r600_shader.c revision 29c4a15bf61a76cd71ffa5b8f09706d0eab84281
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_opcodes.h"
32#include "r600d.h"
33#include <stdio.h>
34#include <errno.h>
35
36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37{
38	struct r600_pipe_state *rstate = &shader->rstate;
39	struct r600_shader *rshader = &shader->shader;
40	unsigned spi_vs_out_id[10];
41	unsigned i, tmp;
42
43	/* clear previous register */
44	rstate->nregs = 0;
45
46	/* so far never got proper semantic id from tgsi */
47	for (i = 0; i < 10; i++) {
48		spi_vs_out_id[i] = 0;
49	}
50	for (i = 0; i < 32; i++) {
51		tmp = i << ((i & 3) * 8);
52		spi_vs_out_id[i / 4] |= tmp;
53	}
54	for (i = 0; i < 10; i++) {
55		r600_pipe_state_add_reg(rstate,
56					R_028614_SPI_VS_OUT_ID_0 + i * 4,
57					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
58	}
59
60	r600_pipe_state_add_reg(rstate,
61			R_0286C4_SPI_VS_OUT_CONFIG,
62			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
63			0xFFFFFFFF, NULL);
64	r600_pipe_state_add_reg(rstate,
65			R_028868_SQ_PGM_RESOURCES_VS,
66			S_028868_NUM_GPRS(rshader->bc.ngpr) |
67			S_028868_STACK_SIZE(rshader->bc.nstack),
68			0xFFFFFFFF, NULL);
69	r600_pipe_state_add_reg(rstate,
70			R_0288A4_SQ_PGM_RESOURCES_FS,
71			0x00000000, 0xFFFFFFFF, NULL);
72	r600_pipe_state_add_reg(rstate,
73			R_0288D0_SQ_PGM_CF_OFFSET_VS,
74			0x00000000, 0xFFFFFFFF, NULL);
75	r600_pipe_state_add_reg(rstate,
76			R_0288DC_SQ_PGM_CF_OFFSET_FS,
77			0x00000000, 0xFFFFFFFF, NULL);
78	r600_pipe_state_add_reg(rstate,
79			R_028858_SQ_PGM_START_VS,
80			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
81	r600_pipe_state_add_reg(rstate,
82			R_028894_SQ_PGM_START_FS,
83			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
84
85	r600_pipe_state_add_reg(rstate,
86				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
87				0xFFFFFFFF, NULL);
88
89}
90
91int r600_find_vs_semantic_index(struct r600_shader *vs,
92				struct r600_shader *ps, int id)
93{
94	struct r600_shader_io *input = &ps->input[id];
95
96	for (int i = 0; i < vs->noutput; i++) {
97		if (input->name == vs->output[i].name &&
98			input->sid == vs->output[i].sid) {
99			return i - 1;
100		}
101	}
102	return 0;
103}
104
105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
106{
107	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
108	struct r600_pipe_state *rstate = &shader->rstate;
109	struct r600_shader *rshader = &shader->shader;
110	unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
111	int pos_index = -1, face_index = -1;
112
113	/* clear previous register */
114	rstate->nregs = 0;
115
116	for (i = 0; i < rshader->ninput; i++) {
117		tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
118		if (rshader->input[i].centroid)
119			tmp |= S_028644_SEL_CENTROID(1);
120		if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
121			tmp |= S_028644_SEL_LINEAR(1);
122
123		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
124			pos_index = i;
125		if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
126		    rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
127		    rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
128			tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
129		}
130		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
131			face_index = i;
132		if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
133			rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
134			tmp |= S_028644_PT_SPRITE_TEX(1);
135		}
136		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
137	}
138	for (i = 0; i < rshader->noutput; i++) {
139		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
140			r600_pipe_state_add_reg(rstate,
141						R_02880C_DB_SHADER_CONTROL,
142						S_02880C_Z_EXPORT_ENABLE(1),
143						S_02880C_Z_EXPORT_ENABLE(1), NULL);
144		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
145			r600_pipe_state_add_reg(rstate,
146						R_02880C_DB_SHADER_CONTROL,
147						S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
148						S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
149	}
150
151	exports_ps = 0;
152	num_cout = 0;
153	for (i = 0; i < rshader->noutput; i++) {
154		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
155			exports_ps |= 1;
156		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
157			num_cout++;
158		}
159	}
160	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
161	if (!exports_ps) {
162		/* always at least export 1 component per pixel */
163		exports_ps = 2;
164	}
165
166	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
167				S_0286CC_PERSP_GRADIENT_ENA(1);
168	spi_input_z = 0;
169	if (pos_index != -1) {
170		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
171					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
172					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
173					S_0286CC_BARYC_SAMPLE_CNTL(1));
174		spi_input_z |= 1;
175	}
176
177	spi_ps_in_control_1 = 0;
178	if (face_index != -1) {
179		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
180			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
181	}
182
183	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
184	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
185	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
186	r600_pipe_state_add_reg(rstate,
187				R_028840_SQ_PGM_START_PS,
188				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
189	r600_pipe_state_add_reg(rstate,
190				R_028850_SQ_PGM_RESOURCES_PS,
191				S_028868_NUM_GPRS(rshader->bc.ngpr) |
192				S_028868_STACK_SIZE(rshader->bc.nstack),
193				0xFFFFFFFF, NULL);
194	r600_pipe_state_add_reg(rstate,
195				R_028854_SQ_PGM_EXPORTS_PS,
196				exports_ps, 0xFFFFFFFF, NULL);
197	r600_pipe_state_add_reg(rstate,
198				R_0288CC_SQ_PGM_CF_OFFSET_PS,
199				0x00000000, 0xFFFFFFFF, NULL);
200
201	if (rshader->uses_kill) {
202		/* only set some bits here, the other bits are set in the dsa state */
203		r600_pipe_state_add_reg(rstate,
204					R_02880C_DB_SHADER_CONTROL,
205					S_02880C_KILL_ENABLE(1),
206					S_02880C_KILL_ENABLE(1), NULL);
207	}
208	r600_pipe_state_add_reg(rstate,
209				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
210				0xFFFFFFFF, NULL);
211}
212
213static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
214{
215	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
216	struct r600_shader *rshader = &shader->shader;
217	void *ptr;
218
219	/* copy new shader */
220	if (shader->bo == NULL) {
221		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
222		if (shader->bo == NULL) {
223			return -ENOMEM;
224		}
225		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
226		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
227		r600_bo_unmap(rctx->radeon, shader->bo);
228	}
229	/* build state */
230	rshader->flat_shade = rctx->flatshade;
231	switch (rshader->processor_type) {
232	case TGSI_PROCESSOR_VERTEX:
233		if (rshader->family >= CHIP_CEDAR) {
234			evergreen_pipe_shader_vs(ctx, shader);
235		} else {
236			r600_pipe_shader_vs(ctx, shader);
237		}
238		break;
239	case TGSI_PROCESSOR_FRAGMENT:
240		if (rshader->family >= CHIP_CEDAR) {
241			evergreen_pipe_shader_ps(ctx, shader);
242		} else {
243			r600_pipe_shader_ps(ctx, shader);
244		}
245		break;
246	default:
247		return -EINVAL;
248	}
249	r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
250	return 0;
251}
252
253static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
254{
255	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
256	struct r600_shader *shader = &rshader->shader;
257	const struct util_format_description *desc;
258	enum pipe_format resource_format[160];
259	unsigned i, nresources = 0;
260	struct r600_bc *bc = &shader->bc;
261	struct r600_bc_cf *cf;
262	struct r600_bc_vtx *vtx;
263
264	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
265		return 0;
266	/* doing a full memcmp fell over the refcount */
267	if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
268	    (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) {
269		return 0;
270	}
271	rshader->vertex_elements = *rctx->vertex_elements;
272	for (i = 0; i < rctx->vertex_elements->count; i++) {
273		resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
274	}
275	r600_bo_reference(rctx->radeon, &rshader->bo, NULL);
276	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
277		switch (cf->inst) {
278		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
279		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
280			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
281				desc = util_format_description(resource_format[vtx->buffer_id]);
282				if (desc == NULL) {
283					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
284					return -EINVAL;
285				}
286				vtx->dst_sel_x = desc->swizzle[0];
287				vtx->dst_sel_y = desc->swizzle[1];
288				vtx->dst_sel_z = desc->swizzle[2];
289				vtx->dst_sel_w = desc->swizzle[3];
290			}
291			break;
292		default:
293			break;
294		}
295	}
296	return r600_bc_build(&shader->bc);
297}
298
299int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
300{
301	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
302	int r;
303
304	if (shader == NULL)
305		return -EINVAL;
306	/* there should be enough input */
307	if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
308		R600_ERR("%d resources provided, expecting %d\n",
309			rctx->vertex_elements->count, shader->shader.bc.nresource);
310		return -EINVAL;
311	}
312	r = r600_shader_update(ctx, shader);
313	if (r)
314		return r;
315	return r600_pipe_shader(ctx, shader);
316}
317
318int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
319int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
320{
321	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
322	int r;
323
324//fprintf(stderr, "--------------------------------------------------------------\n");
325//tgsi_dump(tokens, 0);
326	shader->shader.family = r600_get_family(rctx->radeon);
327	r = r600_shader_from_tgsi(tokens, &shader->shader);
328	if (r) {
329		R600_ERR("translation from TGSI failed !\n");
330		return r;
331	}
332	r = r600_bc_build(&shader->shader.bc);
333	if (r) {
334		R600_ERR("building bytecode failed !\n");
335		return r;
336	}
337//fprintf(stderr, "______________________________________________________________\n");
338	return 0;
339}
340
341void
342r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
343{
344	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
345
346	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
347
348	r600_bc_clear(&shader->shader.bc);
349
350	/* FIXME: is there more stuff to free? */
351}
352
353/*
354 * tgsi -> r600 shader
355 */
356struct r600_shader_tgsi_instruction;
357
358struct r600_shader_ctx {
359	struct tgsi_shader_info			info;
360	struct tgsi_parse_context		parse;
361	const struct tgsi_token			*tokens;
362	unsigned				type;
363	unsigned				file_offset[TGSI_FILE_COUNT];
364	unsigned				temp_reg;
365	struct r600_shader_tgsi_instruction	*inst_info;
366	struct r600_bc				*bc;
367	struct r600_shader			*shader;
368	u32					value[4];
369	u32					*literals;
370	u32					nliterals;
371	u32					max_driver_temp_used;
372	/* needed for evergreen interpolation */
373	boolean                                 input_centroid;
374	boolean                                 input_linear;
375	boolean                                 input_perspective;
376	int					num_interp_gpr;
377};
378
379struct r600_shader_tgsi_instruction {
380	unsigned	tgsi_opcode;
381	unsigned	is_op3;
382	unsigned	r600_opcode;
383	int (*process)(struct r600_shader_ctx *ctx);
384};
385
386static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
387static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
388
389static int tgsi_is_supported(struct r600_shader_ctx *ctx)
390{
391	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
392	int j;
393
394	if (i->Instruction.NumDstRegs > 1) {
395		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
396		return -EINVAL;
397	}
398	if (i->Instruction.Predicate) {
399		R600_ERR("predicate unsupported\n");
400		return -EINVAL;
401	}
402#if 0
403	if (i->Instruction.Label) {
404		R600_ERR("label unsupported\n");
405		return -EINVAL;
406	}
407#endif
408	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
409		if (i->Src[j].Register.Dimension) {
410			R600_ERR("unsupported src %d (dimension %d)\n", j,
411				 i->Src[j].Register.Dimension);
412			return -EINVAL;
413		}
414	}
415	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
416		if (i->Dst[j].Register.Dimension) {
417			R600_ERR("unsupported dst (dimension)\n");
418			return -EINVAL;
419		}
420	}
421	return 0;
422}
423
424static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
425{
426	int i, r;
427	struct r600_bc_alu alu;
428	int gpr = 0, base_chan = 0;
429	int ij_index = 0;
430
431	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
432		ij_index = 0;
433		if (ctx->shader->input[input].centroid)
434			ij_index++;
435	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
436		ij_index = 0;
437		/* if we have perspective add one */
438		if (ctx->input_perspective)  {
439			ij_index++;
440			/* if we have perspective centroid */
441			if (ctx->input_centroid)
442				ij_index++;
443		}
444		if (ctx->shader->input[input].centroid)
445			ij_index++;
446	}
447
448	/* work out gpr and base_chan from index */
449	gpr = ij_index / 2;
450	base_chan = (2 * (ij_index % 2)) + 1;
451
452	for (i = 0; i < 8; i++) {
453		memset(&alu, 0, sizeof(struct r600_bc_alu));
454
455		if (i < 4)
456			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
457		else
458			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
459
460		if ((i > 1) && (i < 6)) {
461			alu.dst.sel = ctx->shader->input[input].gpr;
462			alu.dst.write = 1;
463		}
464
465		alu.dst.chan = i % 4;
466
467		alu.src[0].sel = gpr;
468		alu.src[0].chan = (base_chan - (i % 2));
469
470		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
471
472		alu.bank_swizzle_force = SQ_ALU_VEC_210;
473		if ((i % 4) == 3)
474			alu.last = 1;
475		r = r600_bc_add_alu(ctx->bc, &alu);
476		if (r)
477			return r;
478	}
479	return 0;
480}
481
482
483static int tgsi_declaration(struct r600_shader_ctx *ctx)
484{
485	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
486	struct r600_bc_vtx vtx;
487	unsigned i;
488	int r;
489
490	switch (d->Declaration.File) {
491	case TGSI_FILE_INPUT:
492		i = ctx->shader->ninput++;
493		ctx->shader->input[i].name = d->Semantic.Name;
494		ctx->shader->input[i].sid = d->Semantic.Index;
495		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
496		ctx->shader->input[i].centroid = d->Declaration.Centroid;
497		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
498		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
499			/* turn input into fetch */
500			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
501			vtx.inst = 0;
502			vtx.fetch_type = 0;
503			vtx.buffer_id = i;
504			/* register containing the index into the buffer */
505			vtx.src_gpr = 0;
506			vtx.src_sel_x = 0;
507			vtx.mega_fetch_count = 0x1F;
508			vtx.dst_gpr = ctx->shader->input[i].gpr;
509			vtx.dst_sel_x = 0;
510			vtx.dst_sel_y = 1;
511			vtx.dst_sel_z = 2;
512			vtx.dst_sel_w = 3;
513			vtx.use_const_fields = 1;
514			r = r600_bc_add_vtx(ctx->bc, &vtx);
515			if (r)
516				return r;
517		}
518		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) {
519			/* turn input into interpolate on EG */
520			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
521				if (ctx->shader->input[i].interpolate > 0) {
522					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
523					evergreen_interp_alu(ctx, i);
524				}
525			}
526		}
527		break;
528	case TGSI_FILE_OUTPUT:
529		i = ctx->shader->noutput++;
530		ctx->shader->output[i].name = d->Semantic.Name;
531		ctx->shader->output[i].sid = d->Semantic.Index;
532		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
533		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
534		break;
535	case TGSI_FILE_CONSTANT:
536	case TGSI_FILE_TEMPORARY:
537	case TGSI_FILE_SAMPLER:
538	case TGSI_FILE_ADDRESS:
539		break;
540	default:
541		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
542		return -EINVAL;
543	}
544	return 0;
545}
546
547static int r600_get_temp(struct r600_shader_ctx *ctx)
548{
549	return ctx->temp_reg + ctx->max_driver_temp_used++;
550}
551
552/*
553 * for evergreen we need to scan the shader to find the number of GPRs we need to
554 * reserve for interpolation.
555 *
556 * we need to know if we are going to emit
557 * any centroid inputs
558 * if perspective and linear are required
559*/
560static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
561{
562	int i;
563	int num_baryc;
564
565	ctx->input_linear = FALSE;
566	ctx->input_perspective = FALSE;
567	ctx->input_centroid = FALSE;
568	ctx->num_interp_gpr = 1;
569
570	/* any centroid inputs */
571	for (i = 0; i < ctx->info.num_inputs; i++) {
572		/* skip position/face */
573		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
574		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
575			continue;
576		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
577			ctx->input_linear = TRUE;
578		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
579			ctx->input_perspective = TRUE;
580		if (ctx->info.input_centroid[i])
581			ctx->input_centroid = TRUE;
582	}
583
584	num_baryc = 0;
585	/* ignoring sample for now */
586	if (ctx->input_perspective)
587		num_baryc++;
588	if (ctx->input_linear)
589		num_baryc++;
590	if (ctx->input_centroid)
591		num_baryc *= 2;
592
593	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
594
595	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
596	return ctx->num_interp_gpr;
597}
598
599int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
600{
601	struct tgsi_full_immediate *immediate;
602	struct r600_shader_ctx ctx;
603	struct r600_bc_output output[32];
604	unsigned output_done, noutput;
605	unsigned opcode;
606	int i, r = 0, pos0;
607
608	ctx.bc = &shader->bc;
609	ctx.shader = shader;
610	r = r600_bc_init(ctx.bc, shader->family);
611	if (r)
612		return r;
613	ctx.tokens = tokens;
614	tgsi_scan_shader(tokens, &ctx.info);
615	tgsi_parse_init(&ctx.parse, tokens);
616	ctx.type = ctx.parse.FullHeader.Processor.Processor;
617	shader->processor_type = ctx.type;
618
619	/* register allocations */
620	/* Values [0,127] correspond to GPR[0..127].
621	 * Values [128,159] correspond to constant buffer bank 0
622	 * Values [160,191] correspond to constant buffer bank 1
623	 * Values [256,511] correspond to cfile constants c[0..255].
624	 * Other special values are shown in the list below.
625	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
626	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
627	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
628	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
629	 * 248	SQ_ALU_SRC_0: special constant 0.0.
630	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
631	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
632	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
633	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
634	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
635	 * 254	SQ_ALU_SRC_PV: previous vector result.
636	 * 255	SQ_ALU_SRC_PS: previous scalar result.
637	 */
638	for (i = 0; i < TGSI_FILE_COUNT; i++) {
639		ctx.file_offset[i] = 0;
640	}
641	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
642		ctx.file_offset[TGSI_FILE_INPUT] = 1;
643	}
644	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) {
645		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
646	}
647	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
648						ctx.info.file_count[TGSI_FILE_INPUT];
649	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
650						ctx.info.file_count[TGSI_FILE_OUTPUT];
651
652	ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
653
654	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
655	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
656			ctx.info.file_count[TGSI_FILE_TEMPORARY];
657
658	ctx.nliterals = 0;
659	ctx.literals = NULL;
660
661	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
662		tgsi_parse_token(&ctx.parse);
663		switch (ctx.parse.FullToken.Token.Type) {
664		case TGSI_TOKEN_TYPE_IMMEDIATE:
665			immediate = &ctx.parse.FullToken.FullImmediate;
666			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
667			if(ctx.literals == NULL) {
668				r = -ENOMEM;
669				goto out_err;
670			}
671			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
672			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
673			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
674			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
675			ctx.nliterals++;
676			break;
677		case TGSI_TOKEN_TYPE_DECLARATION:
678			r = tgsi_declaration(&ctx);
679			if (r)
680				goto out_err;
681			break;
682		case TGSI_TOKEN_TYPE_INSTRUCTION:
683			r = tgsi_is_supported(&ctx);
684			if (r)
685				goto out_err;
686			ctx.max_driver_temp_used = 0;
687			/* reserve first tmp for everyone */
688			r600_get_temp(&ctx);
689			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
690			if (ctx.bc->chiprev == 2)
691				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
692			else
693				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
694			r = ctx.inst_info->process(&ctx);
695			if (r)
696				goto out_err;
697			r = r600_bc_add_literal(ctx.bc, ctx.value);
698			if (r)
699				goto out_err;
700			break;
701		default:
702			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
703			r = -EINVAL;
704			goto out_err;
705		}
706	}
707	/* export output */
708	noutput = shader->noutput;
709	for (i = 0, pos0 = 0; i < noutput; i++) {
710		memset(&output[i], 0, sizeof(struct r600_bc_output));
711		output[i].gpr = shader->output[i].gpr;
712		output[i].elem_size = 3;
713		output[i].swizzle_x = 0;
714		output[i].swizzle_y = 1;
715		output[i].swizzle_z = 2;
716		output[i].swizzle_w = 3;
717		output[i].barrier = 1;
718		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
719		output[i].array_base = i - pos0;
720		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
721		switch (ctx.type) {
722		case TGSI_PROCESSOR_VERTEX:
723			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
724				output[i].array_base = 60;
725				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
726				/* position doesn't count in array_base */
727				pos0++;
728			}
729			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
730				output[i].array_base = 61;
731				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
732				/* position doesn't count in array_base */
733				pos0++;
734			}
735			break;
736		case TGSI_PROCESSOR_FRAGMENT:
737			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
738				output[i].array_base = shader->output[i].sid;
739				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
740			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
741				output[i].array_base = 61;
742				output[i].swizzle_x = 2;
743				output[i].swizzle_y = 7;
744				output[i].swizzle_z = output[i].swizzle_w = 7;
745				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
746			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
747				output[i].array_base = 61;
748				output[i].swizzle_x = 7;
749				output[i].swizzle_y = 1;
750				output[i].swizzle_z = output[i].swizzle_w = 7;
751				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
752			} else {
753				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
754				r = -EINVAL;
755				goto out_err;
756			}
757			break;
758		default:
759			R600_ERR("unsupported processor type %d\n", ctx.type);
760			r = -EINVAL;
761			goto out_err;
762		}
763	}
764	/* add fake param output for vertex shader if no param is exported */
765	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
766		for (i = 0, pos0 = 0; i < noutput; i++) {
767			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
768				pos0 = 1;
769				break;
770			}
771		}
772		if (!pos0) {
773			memset(&output[i], 0, sizeof(struct r600_bc_output));
774			output[i].gpr = 0;
775			output[i].elem_size = 3;
776			output[i].swizzle_x = 0;
777			output[i].swizzle_y = 1;
778			output[i].swizzle_z = 2;
779			output[i].swizzle_w = 3;
780			output[i].barrier = 1;
781			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
782			output[i].array_base = 0;
783			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
784			noutput++;
785		}
786	}
787	/* add fake pixel export */
788	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
789		memset(&output[0], 0, sizeof(struct r600_bc_output));
790		output[0].gpr = 0;
791		output[0].elem_size = 3;
792		output[0].swizzle_x = 7;
793		output[0].swizzle_y = 7;
794		output[0].swizzle_z = 7;
795		output[0].swizzle_w = 7;
796		output[0].barrier = 1;
797		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
798		output[0].array_base = 0;
799		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
800		noutput++;
801	}
802	/* set export done on last export of each type */
803	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
804		if (i == (noutput - 1)) {
805			output[i].end_of_program = 1;
806		}
807		if (!(output_done & (1 << output[i].type))) {
808			output_done |= (1 << output[i].type);
809			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
810		}
811	}
812	/* add output to bytecode */
813	for (i = 0; i < noutput; i++) {
814		r = r600_bc_add_output(ctx.bc, &output[i]);
815		if (r)
816			goto out_err;
817	}
818	free(ctx.literals);
819	tgsi_parse_free(&ctx.parse);
820	return 0;
821out_err:
822	free(ctx.literals);
823	tgsi_parse_free(&ctx.parse);
824	return r;
825}
826
827static int tgsi_unsupported(struct r600_shader_ctx *ctx)
828{
829	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
830	return -EINVAL;
831}
832
833static int tgsi_end(struct r600_shader_ctx *ctx)
834{
835	return 0;
836}
837
838static int tgsi_src(struct r600_shader_ctx *ctx,
839			const struct tgsi_full_src_register *tgsi_src,
840			struct r600_bc_alu_src *r600_src)
841{
842	int index;
843	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
844	r600_src->sel = tgsi_src->Register.Index;
845	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
846		r600_src->sel = 0;
847		index = tgsi_src->Register.Index;
848		ctx->value[0] = ctx->literals[index * 4 + 0];
849		ctx->value[1] = ctx->literals[index * 4 + 1];
850		ctx->value[2] = ctx->literals[index * 4 + 2];
851		ctx->value[3] = ctx->literals[index * 4 + 3];
852	}
853	if (tgsi_src->Register.Indirect)
854		r600_src->rel = V_SQ_REL_RELATIVE;
855	r600_src->neg = tgsi_src->Register.Negate;
856	r600_src->abs = tgsi_src->Register.Absolute;
857	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
858	return 0;
859}
860
861static int tgsi_dst(struct r600_shader_ctx *ctx,
862			const struct tgsi_full_dst_register *tgsi_dst,
863			unsigned swizzle,
864			struct r600_bc_alu_dst *r600_dst)
865{
866	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
867
868	r600_dst->sel = tgsi_dst->Register.Index;
869	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
870	r600_dst->chan = swizzle;
871	r600_dst->write = 1;
872	if (tgsi_dst->Register.Indirect)
873		r600_dst->rel = V_SQ_REL_RELATIVE;
874	if (inst->Instruction.Saturate) {
875		r600_dst->clamp = 1;
876	}
877	return 0;
878}
879
880static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
881{
882	switch (swizzle) {
883	case 0:
884		return tgsi_src->Register.SwizzleX;
885	case 1:
886		return tgsi_src->Register.SwizzleY;
887	case 2:
888		return tgsi_src->Register.SwizzleZ;
889	case 3:
890		return tgsi_src->Register.SwizzleW;
891	default:
892		return 0;
893	}
894}
895
896static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
897{
898	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
899	struct r600_bc_alu alu;
900	int i, j, k, nconst, r;
901
902	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
903		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
904			nconst++;
905		}
906		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
907		if (r) {
908			return r;
909		}
910	}
911	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
912		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
913			int treg = r600_get_temp(ctx);
914			for (k = 0; k < 4; k++) {
915				memset(&alu, 0, sizeof(struct r600_bc_alu));
916				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
917				alu.src[0].sel = r600_src[i].sel;
918				alu.src[0].chan = k;
919				alu.src[0].rel = r600_src[i].rel;
920				alu.dst.sel = treg;
921				alu.dst.chan = k;
922				alu.dst.write = 1;
923				if (k == 3)
924					alu.last = 1;
925				r = r600_bc_add_alu(ctx->bc, &alu);
926				if (r)
927					return r;
928			}
929			r600_src[i].sel = treg;
930			r600_src[i].rel =0;
931			j--;
932		}
933	}
934	return 0;
935}
936
937/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
938static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
939{
940	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
941	struct r600_bc_alu alu;
942	int i, j, k, nliteral, r;
943
944	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
945		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
946			nliteral++;
947		}
948	}
949	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
950		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
951			int treg = r600_get_temp(ctx);
952			for (k = 0; k < 4; k++) {
953				memset(&alu, 0, sizeof(struct r600_bc_alu));
954				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
955				alu.src[0].sel = r600_src[i].sel;
956				alu.src[0].chan = k;
957				alu.dst.sel = treg;
958				alu.dst.chan = k;
959				alu.dst.write = 1;
960				if (k == 3)
961					alu.last = 1;
962				r = r600_bc_add_alu(ctx->bc, &alu);
963				if (r)
964					return r;
965			}
966			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
967			if (r)
968				return r;
969			r600_src[i].sel = treg;
970			j--;
971		}
972	}
973	return 0;
974}
975
976static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
977{
978	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
979	struct r600_bc_alu_src r600_src[3];
980	struct r600_bc_alu alu;
981	int i, j, r;
982	int lasti = 0;
983
984	for (i = 0; i < 4; i++) {
985		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
986			lasti = i;
987		}
988	}
989
990	r = tgsi_split_constant(ctx, r600_src);
991	if (r)
992		return r;
993	r = tgsi_split_literal_constant(ctx, r600_src);
994	if (r)
995		return r;
996	for (i = 0; i < lasti + 1; i++) {
997		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
998			continue;
999
1000		memset(&alu, 0, sizeof(struct r600_bc_alu));
1001		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1002		if (r)
1003			return r;
1004
1005		alu.inst = ctx->inst_info->r600_opcode;
1006		if (!swap) {
1007			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1008				alu.src[j] = r600_src[j];
1009				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1010			}
1011		} else {
1012			alu.src[0] = r600_src[1];
1013			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1014
1015			alu.src[1] = r600_src[0];
1016			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1017		}
1018		/* handle some special cases */
1019		switch (ctx->inst_info->tgsi_opcode) {
1020		case TGSI_OPCODE_SUB:
1021			alu.src[1].neg = 1;
1022			break;
1023		case TGSI_OPCODE_ABS:
1024			alu.src[0].abs = 1;
1025			break;
1026		default:
1027			break;
1028		}
1029		if (i == lasti) {
1030			alu.last = 1;
1031		}
1032		r = r600_bc_add_alu(ctx->bc, &alu);
1033		if (r)
1034			return r;
1035	}
1036	return 0;
1037}
1038
1039static int tgsi_op2(struct r600_shader_ctx *ctx)
1040{
1041	return tgsi_op2_s(ctx, 0);
1042}
1043
1044static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1045{
1046	return tgsi_op2_s(ctx, 1);
1047}
1048
1049/*
1050 * r600 - trunc to -PI..PI range
1051 * r700 - normalize by dividing by 2PI
1052 * see fdo bug 27901
1053 */
1054static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
1055			   struct r600_bc_alu_src r600_src[3])
1056{
1057	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1058	int r;
1059	uint32_t lit_vals[4];
1060	struct r600_bc_alu alu;
1061
1062	memset(lit_vals, 0, 4*4);
1063	r = tgsi_split_constant(ctx, r600_src);
1064	if (r)
1065		return r;
1066	r = tgsi_split_literal_constant(ctx, r600_src);
1067	if (r)
1068		return r;
1069
1070	r = tgsi_split_literal_constant(ctx, r600_src);
1071	if (r)
1072		return r;
1073
1074	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
1075	lit_vals[1] = fui(0.5f);
1076
1077	memset(&alu, 0, sizeof(struct r600_bc_alu));
1078	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1079	alu.is_op3 = 1;
1080
1081	alu.dst.chan = 0;
1082	alu.dst.sel = ctx->temp_reg;
1083	alu.dst.write = 1;
1084
1085	alu.src[0] = r600_src[0];
1086	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1087
1088	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1089	alu.src[1].chan = 0;
1090	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1091	alu.src[2].chan = 1;
1092	alu.last = 1;
1093	r = r600_bc_add_alu(ctx->bc, &alu);
1094	if (r)
1095		return r;
1096	r = r600_bc_add_literal(ctx->bc, lit_vals);
1097	if (r)
1098		return r;
1099
1100	memset(&alu, 0, sizeof(struct r600_bc_alu));
1101	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1102
1103	alu.dst.chan = 0;
1104	alu.dst.sel = ctx->temp_reg;
1105	alu.dst.write = 1;
1106
1107	alu.src[0].sel = ctx->temp_reg;
1108	alu.src[0].chan = 0;
1109	alu.last = 1;
1110	r = r600_bc_add_alu(ctx->bc, &alu);
1111	if (r)
1112		return r;
1113
1114	if (ctx->bc->chiprev == 0) {
1115		lit_vals[0] = fui(3.1415926535897f * 2.0f);
1116		lit_vals[1] = fui(-3.1415926535897f);
1117	} else {
1118		lit_vals[0] = fui(1.0f);
1119		lit_vals[1] = fui(-0.5f);
1120	}
1121
1122	memset(&alu, 0, sizeof(struct r600_bc_alu));
1123	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1124	alu.is_op3 = 1;
1125
1126	alu.dst.chan = 0;
1127	alu.dst.sel = ctx->temp_reg;
1128	alu.dst.write = 1;
1129
1130	alu.src[0].sel = ctx->temp_reg;
1131	alu.src[0].chan = 0;
1132
1133	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1134	alu.src[1].chan = 0;
1135	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1136	alu.src[2].chan = 1;
1137	alu.last = 1;
1138	r = r600_bc_add_alu(ctx->bc, &alu);
1139	if (r)
1140		return r;
1141	r = r600_bc_add_literal(ctx->bc, lit_vals);
1142	if (r)
1143		return r;
1144	return 0;
1145}
1146
1147static int tgsi_trig(struct r600_shader_ctx *ctx)
1148{
1149	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1150	struct r600_bc_alu_src r600_src[3];
1151	struct r600_bc_alu alu;
1152	int i, r;
1153	int lasti = 0;
1154
1155	r = tgsi_setup_trig(ctx, r600_src);
1156	if (r)
1157		return r;
1158
1159	memset(&alu, 0, sizeof(struct r600_bc_alu));
1160	alu.inst = ctx->inst_info->r600_opcode;
1161	alu.dst.chan = 0;
1162	alu.dst.sel = ctx->temp_reg;
1163	alu.dst.write = 1;
1164
1165	alu.src[0].sel = ctx->temp_reg;
1166	alu.src[0].chan = 0;
1167	alu.last = 1;
1168	r = r600_bc_add_alu(ctx->bc, &alu);
1169	if (r)
1170		return r;
1171
1172	/* replicate result */
1173	for (i = 0; i < 4; i++) {
1174		if (inst->Dst[0].Register.WriteMask & (1 << i))
1175			lasti = i;
1176	}
1177	for (i = 0; i < lasti + 1; i++) {
1178		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1179			continue;
1180
1181		memset(&alu, 0, sizeof(struct r600_bc_alu));
1182		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1183
1184		alu.src[0].sel = ctx->temp_reg;
1185		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1186		if (r)
1187			return r;
1188		if (i == lasti)
1189			alu.last = 1;
1190		r = r600_bc_add_alu(ctx->bc, &alu);
1191		if (r)
1192			return r;
1193	}
1194	return 0;
1195}
1196
1197static int tgsi_scs(struct r600_shader_ctx *ctx)
1198{
1199	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1200	struct r600_bc_alu_src r600_src[3];
1201	struct r600_bc_alu alu;
1202	int r;
1203
1204	/* We'll only need the trig stuff if we are going to write to the
1205	 * X or Y components of the destination vector.
1206	 */
1207	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1208		r = tgsi_setup_trig(ctx, r600_src);
1209		if (r)
1210			return r;
1211	}
1212
1213	/* dst.x = COS */
1214	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1215		memset(&alu, 0, sizeof(struct r600_bc_alu));
1216		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1217		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1218		if (r)
1219			return r;
1220
1221		alu.src[0].sel = ctx->temp_reg;
1222		alu.src[0].chan = 0;
1223		alu.last = 1;
1224		r = r600_bc_add_alu(ctx->bc, &alu);
1225		if (r)
1226			return r;
1227	}
1228
1229	/* dst.y = SIN */
1230	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1231		memset(&alu, 0, sizeof(struct r600_bc_alu));
1232		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1233		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1234		if (r)
1235			return r;
1236
1237		alu.src[0].sel = ctx->temp_reg;
1238		alu.src[0].chan = 0;
1239		alu.last = 1;
1240		r = r600_bc_add_alu(ctx->bc, &alu);
1241		if (r)
1242			return r;
1243	}
1244
1245	/* dst.z = 0.0; */
1246	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1247		memset(&alu, 0, sizeof(struct r600_bc_alu));
1248
1249		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1250
1251		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1252		if (r)
1253			return r;
1254
1255		alu.src[0].sel = V_SQ_ALU_SRC_0;
1256		alu.src[0].chan = 0;
1257
1258		alu.last = 1;
1259
1260		r = r600_bc_add_alu(ctx->bc, &alu);
1261		if (r)
1262			return r;
1263
1264		r = r600_bc_add_literal(ctx->bc, ctx->value);
1265		if (r)
1266			return r;
1267	}
1268
1269	/* dst.w = 1.0; */
1270	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1271		memset(&alu, 0, sizeof(struct r600_bc_alu));
1272
1273		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1274
1275		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1276		if (r)
1277			return r;
1278
1279		alu.src[0].sel = V_SQ_ALU_SRC_1;
1280		alu.src[0].chan = 0;
1281
1282		alu.last = 1;
1283
1284		r = r600_bc_add_alu(ctx->bc, &alu);
1285		if (r)
1286			return r;
1287
1288		r = r600_bc_add_literal(ctx->bc, ctx->value);
1289		if (r)
1290			return r;
1291	}
1292
1293	return 0;
1294}
1295
1296static int tgsi_kill(struct r600_shader_ctx *ctx)
1297{
1298	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1299	struct r600_bc_alu alu;
1300	int i, r;
1301
1302	for (i = 0; i < 4; i++) {
1303		memset(&alu, 0, sizeof(struct r600_bc_alu));
1304		alu.inst = ctx->inst_info->r600_opcode;
1305
1306		alu.dst.chan = i;
1307
1308		alu.src[0].sel = V_SQ_ALU_SRC_0;
1309
1310		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1311			alu.src[1].sel = V_SQ_ALU_SRC_1;
1312			alu.src[1].neg = 1;
1313		} else {
1314			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1315			if (r)
1316				return r;
1317			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1318		}
1319		if (i == 3) {
1320			alu.last = 1;
1321		}
1322		r = r600_bc_add_alu(ctx->bc, &alu);
1323		if (r)
1324			return r;
1325	}
1326	r = r600_bc_add_literal(ctx->bc, ctx->value);
1327	if (r)
1328		return r;
1329
1330	/* kill must be last in ALU */
1331	ctx->bc->force_add_cf = 1;
1332	ctx->shader->uses_kill = TRUE;
1333	return 0;
1334}
1335
1336static int tgsi_lit(struct r600_shader_ctx *ctx)
1337{
1338	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1339	struct r600_bc_alu alu;
1340	struct r600_bc_alu_src r600_src[3];
1341	int r;
1342
1343	r = tgsi_split_constant(ctx, r600_src);
1344	if (r)
1345		return r;
1346	r = tgsi_split_literal_constant(ctx, r600_src);
1347	if (r)
1348		return r;
1349
1350	/* dst.x, <- 1.0  */
1351	memset(&alu, 0, sizeof(struct r600_bc_alu));
1352	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1353	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1354	alu.src[0].chan = 0;
1355	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1356	if (r)
1357		return r;
1358	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1359	r = r600_bc_add_alu(ctx->bc, &alu);
1360	if (r)
1361		return r;
1362
1363	/* dst.y = max(src.x, 0.0) */
1364	memset(&alu, 0, sizeof(struct r600_bc_alu));
1365	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1366	alu.src[0] = r600_src[0];
1367	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1368	alu.src[1].chan = 0;
1369	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1370	if (r)
1371		return r;
1372	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1373	r = r600_bc_add_alu(ctx->bc, &alu);
1374	if (r)
1375		return r;
1376
1377	/* dst.w, <- 1.0  */
1378	memset(&alu, 0, sizeof(struct r600_bc_alu));
1379	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1380	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1381	alu.src[0].chan = 0;
1382	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1383	if (r)
1384		return r;
1385	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1386	alu.last = 1;
1387	r = r600_bc_add_alu(ctx->bc, &alu);
1388	if (r)
1389		return r;
1390
1391	r = r600_bc_add_literal(ctx->bc, ctx->value);
1392	if (r)
1393		return r;
1394
1395	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1396	{
1397		int chan;
1398		int sel;
1399
1400		/* dst.z = log(src.y) */
1401		memset(&alu, 0, sizeof(struct r600_bc_alu));
1402		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1403		alu.src[0] = r600_src[0];
1404		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1405		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1406		if (r)
1407			return r;
1408		alu.last = 1;
1409		r = r600_bc_add_alu(ctx->bc, &alu);
1410		if (r)
1411			return r;
1412
1413		r = r600_bc_add_literal(ctx->bc, ctx->value);
1414		if (r)
1415			return r;
1416
1417		chan = alu.dst.chan;
1418		sel = alu.dst.sel;
1419
1420		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1421		memset(&alu, 0, sizeof(struct r600_bc_alu));
1422		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1423		alu.src[0] = r600_src[0];
1424		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1425		alu.src[1].sel  = sel;
1426		alu.src[1].chan = chan;
1427
1428		alu.src[2] = r600_src[0];
1429		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1430		alu.dst.sel = ctx->temp_reg;
1431		alu.dst.chan = 0;
1432		alu.dst.write = 1;
1433		alu.is_op3 = 1;
1434		alu.last = 1;
1435		r = r600_bc_add_alu(ctx->bc, &alu);
1436		if (r)
1437			return r;
1438
1439		r = r600_bc_add_literal(ctx->bc, ctx->value);
1440		if (r)
1441			return r;
1442		/* dst.z = exp(tmp.x) */
1443		memset(&alu, 0, sizeof(struct r600_bc_alu));
1444		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1445		alu.src[0].sel = ctx->temp_reg;
1446		alu.src[0].chan = 0;
1447		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1448		if (r)
1449			return r;
1450		alu.last = 1;
1451		r = r600_bc_add_alu(ctx->bc, &alu);
1452		if (r)
1453			return r;
1454	}
1455	return 0;
1456}
1457
1458static int tgsi_rsq(struct r600_shader_ctx *ctx)
1459{
1460	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1461	struct r600_bc_alu alu;
1462	int i, r;
1463
1464	memset(&alu, 0, sizeof(struct r600_bc_alu));
1465
1466	/* FIXME:
1467	 * For state trackers other than OpenGL, we'll want to use
1468	 * _RECIPSQRT_IEEE instead.
1469	 */
1470	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1471
1472	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1473		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1474		if (r)
1475			return r;
1476		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1477		alu.src[i].abs = 1;
1478	}
1479	alu.dst.sel = ctx->temp_reg;
1480	alu.dst.write = 1;
1481	alu.last = 1;
1482	r = r600_bc_add_alu(ctx->bc, &alu);
1483	if (r)
1484		return r;
1485	r = r600_bc_add_literal(ctx->bc, ctx->value);
1486	if (r)
1487		return r;
1488	/* replicate result */
1489	return tgsi_helper_tempx_replicate(ctx);
1490}
1491
1492static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1493{
1494	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1495	struct r600_bc_alu alu;
1496	int i, r;
1497
1498	for (i = 0; i < 4; i++) {
1499		memset(&alu, 0, sizeof(struct r600_bc_alu));
1500		alu.src[0].sel = ctx->temp_reg;
1501		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1502		alu.dst.chan = i;
1503		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1504		if (r)
1505			return r;
1506		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1507		if (i == 3)
1508			alu.last = 1;
1509		r = r600_bc_add_alu(ctx->bc, &alu);
1510		if (r)
1511			return r;
1512	}
1513	return 0;
1514}
1515
1516static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1517{
1518	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1519	struct r600_bc_alu alu;
1520	int i, r;
1521
1522	memset(&alu, 0, sizeof(struct r600_bc_alu));
1523	alu.inst = ctx->inst_info->r600_opcode;
1524	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1525		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1526		if (r)
1527			return r;
1528		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1529	}
1530	alu.dst.sel = ctx->temp_reg;
1531	alu.dst.write = 1;
1532	alu.last = 1;
1533	r = r600_bc_add_alu(ctx->bc, &alu);
1534	if (r)
1535		return r;
1536	r = r600_bc_add_literal(ctx->bc, ctx->value);
1537	if (r)
1538		return r;
1539	/* replicate result */
1540	return tgsi_helper_tempx_replicate(ctx);
1541}
1542
1543static int tgsi_pow(struct r600_shader_ctx *ctx)
1544{
1545	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1546	struct r600_bc_alu alu;
1547	int r;
1548
1549	/* LOG2(a) */
1550	memset(&alu, 0, sizeof(struct r600_bc_alu));
1551	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1552	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1553	if (r)
1554		return r;
1555	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1556	alu.dst.sel = ctx->temp_reg;
1557	alu.dst.write = 1;
1558	alu.last = 1;
1559	r = r600_bc_add_alu(ctx->bc, &alu);
1560	if (r)
1561		return r;
1562	r = r600_bc_add_literal(ctx->bc,ctx->value);
1563	if (r)
1564		return r;
1565	/* b * LOG2(a) */
1566	memset(&alu, 0, sizeof(struct r600_bc_alu));
1567	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1568	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1569	if (r)
1570		return r;
1571	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1572	alu.src[1].sel = ctx->temp_reg;
1573	alu.dst.sel = ctx->temp_reg;
1574	alu.dst.write = 1;
1575	alu.last = 1;
1576	r = r600_bc_add_alu(ctx->bc, &alu);
1577	if (r)
1578		return r;
1579	r = r600_bc_add_literal(ctx->bc,ctx->value);
1580	if (r)
1581		return r;
1582	/* POW(a,b) = EXP2(b * LOG2(a))*/
1583	memset(&alu, 0, sizeof(struct r600_bc_alu));
1584	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1585	alu.src[0].sel = ctx->temp_reg;
1586	alu.dst.sel = ctx->temp_reg;
1587	alu.dst.write = 1;
1588	alu.last = 1;
1589	r = r600_bc_add_alu(ctx->bc, &alu);
1590	if (r)
1591		return r;
1592	r = r600_bc_add_literal(ctx->bc,ctx->value);
1593	if (r)
1594		return r;
1595	return tgsi_helper_tempx_replicate(ctx);
1596}
1597
1598static int tgsi_ssg(struct r600_shader_ctx *ctx)
1599{
1600	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1601	struct r600_bc_alu alu;
1602	struct r600_bc_alu_src r600_src[3];
1603	int i, r;
1604
1605	r = tgsi_split_constant(ctx, r600_src);
1606	if (r)
1607		return r;
1608	r = tgsi_split_literal_constant(ctx, r600_src);
1609	if (r)
1610		return r;
1611
1612	/* tmp = (src > 0 ? 1 : src) */
1613	for (i = 0; i < 4; i++) {
1614		memset(&alu, 0, sizeof(struct r600_bc_alu));
1615		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1616		alu.is_op3 = 1;
1617
1618		alu.dst.sel = ctx->temp_reg;
1619		alu.dst.chan = i;
1620
1621		alu.src[0] = r600_src[0];
1622		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1623
1624		alu.src[1].sel = V_SQ_ALU_SRC_1;
1625
1626		alu.src[2] = r600_src[0];
1627		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1628		if (i == 3)
1629			alu.last = 1;
1630		r = r600_bc_add_alu(ctx->bc, &alu);
1631		if (r)
1632			return r;
1633	}
1634	r = r600_bc_add_literal(ctx->bc, ctx->value);
1635	if (r)
1636		return r;
1637
1638	/* dst = (-tmp > 0 ? -1 : tmp) */
1639	for (i = 0; i < 4; i++) {
1640		memset(&alu, 0, sizeof(struct r600_bc_alu));
1641		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1642		alu.is_op3 = 1;
1643		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1644		if (r)
1645			return r;
1646
1647		alu.src[0].sel = ctx->temp_reg;
1648		alu.src[0].chan = i;
1649		alu.src[0].neg = 1;
1650
1651		alu.src[1].sel = V_SQ_ALU_SRC_1;
1652		alu.src[1].neg = 1;
1653
1654		alu.src[2].sel = ctx->temp_reg;
1655		alu.src[2].chan = i;
1656
1657		if (i == 3)
1658			alu.last = 1;
1659		r = r600_bc_add_alu(ctx->bc, &alu);
1660		if (r)
1661			return r;
1662	}
1663	return 0;
1664}
1665
1666static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1667{
1668	struct r600_bc_alu alu;
1669	int i, r;
1670
1671	r = r600_bc_add_literal(ctx->bc, ctx->value);
1672	if (r)
1673		return r;
1674	for (i = 0; i < 4; i++) {
1675		memset(&alu, 0, sizeof(struct r600_bc_alu));
1676		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1677			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1678			alu.dst.chan = i;
1679		} else {
1680			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1681			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1682			if (r)
1683				return r;
1684			alu.src[0].sel = ctx->temp_reg;
1685			alu.src[0].chan = i;
1686		}
1687		if (i == 3) {
1688			alu.last = 1;
1689		}
1690		r = r600_bc_add_alu(ctx->bc, &alu);
1691		if (r)
1692			return r;
1693	}
1694	return 0;
1695}
1696
1697static int tgsi_op3(struct r600_shader_ctx *ctx)
1698{
1699	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1700	struct r600_bc_alu_src r600_src[3];
1701	struct r600_bc_alu alu;
1702	int i, j, r;
1703
1704	r = tgsi_split_constant(ctx, r600_src);
1705	if (r)
1706		return r;
1707	r = tgsi_split_literal_constant(ctx, r600_src);
1708	if (r)
1709		return r;
1710	/* do it in 2 step as op3 doesn't support writemask */
1711	for (i = 0; i < 4; i++) {
1712		memset(&alu, 0, sizeof(struct r600_bc_alu));
1713		alu.inst = ctx->inst_info->r600_opcode;
1714		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1715			alu.src[j] = r600_src[j];
1716			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1717		}
1718		alu.dst.sel = ctx->temp_reg;
1719		alu.dst.chan = i;
1720		alu.dst.write = 1;
1721		alu.is_op3 = 1;
1722		if (i == 3) {
1723			alu.last = 1;
1724		}
1725		r = r600_bc_add_alu(ctx->bc, &alu);
1726		if (r)
1727			return r;
1728	}
1729	return tgsi_helper_copy(ctx, inst);
1730}
1731
1732static int tgsi_dp(struct r600_shader_ctx *ctx)
1733{
1734	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1735	struct r600_bc_alu_src r600_src[3];
1736	struct r600_bc_alu alu;
1737	int i, j, r;
1738
1739	r = tgsi_split_constant(ctx, r600_src);
1740	if (r)
1741		return r;
1742	r = tgsi_split_literal_constant(ctx, r600_src);
1743	if (r)
1744		return r;
1745	for (i = 0; i < 4; i++) {
1746		memset(&alu, 0, sizeof(struct r600_bc_alu));
1747		alu.inst = ctx->inst_info->r600_opcode;
1748		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1749			alu.src[j] = r600_src[j];
1750			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1751		}
1752		alu.dst.sel = ctx->temp_reg;
1753		alu.dst.chan = i;
1754		alu.dst.write = 1;
1755		/* handle some special cases */
1756		switch (ctx->inst_info->tgsi_opcode) {
1757		case TGSI_OPCODE_DP2:
1758			if (i > 1) {
1759				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1760				alu.src[0].chan = alu.src[1].chan = 0;
1761			}
1762			break;
1763		case TGSI_OPCODE_DP3:
1764			if (i > 2) {
1765				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1766				alu.src[0].chan = alu.src[1].chan = 0;
1767			}
1768			break;
1769		case TGSI_OPCODE_DPH:
1770			if (i == 3) {
1771				alu.src[0].sel = V_SQ_ALU_SRC_1;
1772				alu.src[0].chan = 0;
1773				alu.src[0].neg = 0;
1774			}
1775			break;
1776		default:
1777			break;
1778		}
1779		if (i == 3) {
1780			alu.last = 1;
1781		}
1782		r = r600_bc_add_alu(ctx->bc, &alu);
1783		if (r)
1784			return r;
1785	}
1786	return tgsi_helper_copy(ctx, inst);
1787}
1788
1789static int tgsi_tex(struct r600_shader_ctx *ctx)
1790{
1791	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1792	struct r600_bc_tex tex;
1793	struct r600_bc_alu alu;
1794	unsigned src_gpr;
1795	int r, i;
1796	int opcode;
1797	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1798	uint32_t lit_vals[4];
1799
1800	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1801
1802	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1803		/* Add perspective divide */
1804		memset(&alu, 0, sizeof(struct r600_bc_alu));
1805		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1806		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1807		if (r)
1808			return r;
1809
1810		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1811		alu.dst.sel = ctx->temp_reg;
1812		alu.dst.chan = 3;
1813		alu.last = 1;
1814		alu.dst.write = 1;
1815		r = r600_bc_add_alu(ctx->bc, &alu);
1816		if (r)
1817			return r;
1818
1819		for (i = 0; i < 3; i++) {
1820			memset(&alu, 0, sizeof(struct r600_bc_alu));
1821			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1822			alu.src[0].sel = ctx->temp_reg;
1823			alu.src[0].chan = 3;
1824			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1825			if (r)
1826				return r;
1827			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1828			alu.dst.sel = ctx->temp_reg;
1829			alu.dst.chan = i;
1830			alu.dst.write = 1;
1831			r = r600_bc_add_alu(ctx->bc, &alu);
1832			if (r)
1833				return r;
1834		}
1835		memset(&alu, 0, sizeof(struct r600_bc_alu));
1836		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1837		alu.src[0].sel = V_SQ_ALU_SRC_1;
1838		alu.src[0].chan = 0;
1839		alu.dst.sel = ctx->temp_reg;
1840		alu.dst.chan = 3;
1841		alu.last = 1;
1842		alu.dst.write = 1;
1843		r = r600_bc_add_alu(ctx->bc, &alu);
1844		if (r)
1845			return r;
1846		src_not_temp = FALSE;
1847		src_gpr = ctx->temp_reg;
1848	}
1849
1850	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1851		int src_chan, src2_chan;
1852
1853		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1854		for (i = 0; i < 4; i++) {
1855			memset(&alu, 0, sizeof(struct r600_bc_alu));
1856			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1857			switch (i) {
1858			case 0:
1859				src_chan = 2;
1860				src2_chan = 1;
1861				break;
1862			case 1:
1863				src_chan = 2;
1864				src2_chan = 0;
1865				break;
1866			case 2:
1867				src_chan = 0;
1868				src2_chan = 2;
1869				break;
1870			case 3:
1871				src_chan = 1;
1872				src2_chan = 2;
1873				break;
1874			default:
1875				assert(0);
1876				src_chan = 0;
1877				src2_chan = 0;
1878				break;
1879			}
1880			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1881			if (r)
1882				return r;
1883			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1884			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1885			if (r)
1886				return r;
1887			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1888			alu.dst.sel = ctx->temp_reg;
1889			alu.dst.chan = i;
1890			if (i == 3)
1891				alu.last = 1;
1892			alu.dst.write = 1;
1893			r = r600_bc_add_alu(ctx->bc, &alu);
1894			if (r)
1895				return r;
1896		}
1897
1898		/* tmp1.z = RCP_e(|tmp1.z|) */
1899		memset(&alu, 0, sizeof(struct r600_bc_alu));
1900		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1901		alu.src[0].sel = ctx->temp_reg;
1902		alu.src[0].chan = 2;
1903		alu.src[0].abs = 1;
1904		alu.dst.sel = ctx->temp_reg;
1905		alu.dst.chan = 2;
1906		alu.dst.write = 1;
1907		alu.last = 1;
1908		r = r600_bc_add_alu(ctx->bc, &alu);
1909		if (r)
1910			return r;
1911
1912		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1913		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1914		 * muladd has no writemask, have to use another temp
1915		 */
1916		memset(&alu, 0, sizeof(struct r600_bc_alu));
1917		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1918		alu.is_op3 = 1;
1919
1920		alu.src[0].sel = ctx->temp_reg;
1921		alu.src[0].chan = 0;
1922		alu.src[1].sel = ctx->temp_reg;
1923		alu.src[1].chan = 2;
1924
1925		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1926		alu.src[2].chan = 0;
1927
1928		alu.dst.sel = ctx->temp_reg;
1929		alu.dst.chan = 0;
1930		alu.dst.write = 1;
1931
1932		r = r600_bc_add_alu(ctx->bc, &alu);
1933		if (r)
1934			return r;
1935
1936		memset(&alu, 0, sizeof(struct r600_bc_alu));
1937		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1938		alu.is_op3 = 1;
1939
1940		alu.src[0].sel = ctx->temp_reg;
1941		alu.src[0].chan = 1;
1942		alu.src[1].sel = ctx->temp_reg;
1943		alu.src[1].chan = 2;
1944
1945		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1946		alu.src[2].chan = 0;
1947
1948		alu.dst.sel = ctx->temp_reg;
1949		alu.dst.chan = 1;
1950		alu.dst.write = 1;
1951
1952		alu.last = 1;
1953		r = r600_bc_add_alu(ctx->bc, &alu);
1954		if (r)
1955			return r;
1956
1957		lit_vals[0] = fui(1.5f);
1958
1959		r = r600_bc_add_literal(ctx->bc, lit_vals);
1960		if (r)
1961			return r;
1962		src_not_temp = FALSE;
1963		src_gpr = ctx->temp_reg;
1964	}
1965
1966	if (src_not_temp) {
1967		for (i = 0; i < 4; i++) {
1968			memset(&alu, 0, sizeof(struct r600_bc_alu));
1969			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1970			alu.src[0].sel = src_gpr;
1971			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1972			alu.dst.sel = ctx->temp_reg;
1973			alu.dst.chan = i;
1974			if (i == 3)
1975				alu.last = 1;
1976			alu.dst.write = 1;
1977			r = r600_bc_add_alu(ctx->bc, &alu);
1978			if (r)
1979				return r;
1980		}
1981		src_gpr = ctx->temp_reg;
1982	}
1983
1984	opcode = ctx->inst_info->r600_opcode;
1985	if (opcode == SQ_TEX_INST_SAMPLE &&
1986	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1987		opcode = SQ_TEX_INST_SAMPLE_C;
1988
1989	memset(&tex, 0, sizeof(struct r600_bc_tex));
1990	tex.inst = opcode;
1991	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1992	tex.resource_id = tex.sampler_id;
1993	if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX)
1994		tex.resource_id += PIPE_MAX_ATTRIBS;
1995	tex.src_gpr = src_gpr;
1996	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1997	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1998	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1999	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2000	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2001	tex.src_sel_x = 0;
2002	tex.src_sel_y = 1;
2003	tex.src_sel_z = 2;
2004	tex.src_sel_w = 3;
2005
2006	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2007		tex.src_sel_x = 1;
2008		tex.src_sel_y = 0;
2009		tex.src_sel_z = 3;
2010		tex.src_sel_w = 1;
2011	}
2012
2013	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2014		tex.coord_type_x = 1;
2015		tex.coord_type_y = 1;
2016		tex.coord_type_z = 1;
2017		tex.coord_type_w = 1;
2018	}
2019
2020	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2021		tex.src_sel_w = 2;
2022
2023	r = r600_bc_add_tex(ctx->bc, &tex);
2024	if (r)
2025		return r;
2026
2027	/* add shadow ambient support  - gallium doesn't do it yet */
2028	return 0;
2029
2030}
2031
2032static int tgsi_lrp(struct r600_shader_ctx *ctx)
2033{
2034	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2035	struct r600_bc_alu_src r600_src[3];
2036	struct r600_bc_alu alu;
2037	unsigned i;
2038	int r;
2039
2040	r = tgsi_split_constant(ctx, r600_src);
2041	if (r)
2042		return r;
2043	r = tgsi_split_literal_constant(ctx, r600_src);
2044	if (r)
2045		return r;
2046	/* 1 - src0 */
2047	for (i = 0; i < 4; i++) {
2048		memset(&alu, 0, sizeof(struct r600_bc_alu));
2049		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2050		alu.src[0].sel = V_SQ_ALU_SRC_1;
2051		alu.src[0].chan = 0;
2052		alu.src[1] = r600_src[0];
2053		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
2054		alu.src[1].neg = 1;
2055		alu.dst.sel = ctx->temp_reg;
2056		alu.dst.chan = i;
2057		if (i == 3) {
2058			alu.last = 1;
2059		}
2060		alu.dst.write = 1;
2061		r = r600_bc_add_alu(ctx->bc, &alu);
2062		if (r)
2063			return r;
2064	}
2065	r = r600_bc_add_literal(ctx->bc, ctx->value);
2066	if (r)
2067		return r;
2068
2069	/* (1 - src0) * src2 */
2070	for (i = 0; i < 4; i++) {
2071		memset(&alu, 0, sizeof(struct r600_bc_alu));
2072		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2073		alu.src[0].sel = ctx->temp_reg;
2074		alu.src[0].chan = i;
2075		alu.src[1] = r600_src[2];
2076		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2077		alu.dst.sel = ctx->temp_reg;
2078		alu.dst.chan = i;
2079		if (i == 3) {
2080			alu.last = 1;
2081		}
2082		alu.dst.write = 1;
2083		r = r600_bc_add_alu(ctx->bc, &alu);
2084		if (r)
2085			return r;
2086	}
2087	r = r600_bc_add_literal(ctx->bc, ctx->value);
2088	if (r)
2089		return r;
2090
2091	/* src0 * src1 + (1 - src0) * src2 */
2092	for (i = 0; i < 4; i++) {
2093		memset(&alu, 0, sizeof(struct r600_bc_alu));
2094		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2095		alu.is_op3 = 1;
2096		alu.src[0] = r600_src[0];
2097		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2098		alu.src[1] = r600_src[1];
2099		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2100		alu.src[2].sel = ctx->temp_reg;
2101		alu.src[2].chan = i;
2102		alu.dst.sel = ctx->temp_reg;
2103		alu.dst.chan = i;
2104		if (i == 3) {
2105			alu.last = 1;
2106		}
2107		r = r600_bc_add_alu(ctx->bc, &alu);
2108		if (r)
2109			return r;
2110	}
2111	return tgsi_helper_copy(ctx, inst);
2112}
2113
2114static int tgsi_cmp(struct r600_shader_ctx *ctx)
2115{
2116	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2117	struct r600_bc_alu_src r600_src[3];
2118	struct r600_bc_alu alu;
2119	int use_temp = 0;
2120	int i, r;
2121
2122	r = tgsi_split_constant(ctx, r600_src);
2123	if (r)
2124		return r;
2125	r = tgsi_split_literal_constant(ctx, r600_src);
2126	if (r)
2127		return r;
2128
2129	if (inst->Dst[0].Register.WriteMask != 0xf)
2130		use_temp = 1;
2131
2132	for (i = 0; i < 4; i++) {
2133		memset(&alu, 0, sizeof(struct r600_bc_alu));
2134		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2135		alu.src[0] = r600_src[0];
2136		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2137
2138		alu.src[1] = r600_src[2];
2139		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2140
2141		alu.src[2] = r600_src[1];
2142		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2143
2144		if (use_temp)
2145			alu.dst.sel = ctx->temp_reg;
2146		else {
2147			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2148			if (r)
2149				return r;
2150		}
2151		alu.dst.chan = i;
2152		alu.dst.write = 1;
2153		alu.is_op3 = 1;
2154		if (i == 3)
2155			alu.last = 1;
2156		r = r600_bc_add_alu(ctx->bc, &alu);
2157		if (r)
2158			return r;
2159	}
2160	if (use_temp)
2161		return tgsi_helper_copy(ctx, inst);
2162	return 0;
2163}
2164
2165static int tgsi_xpd(struct r600_shader_ctx *ctx)
2166{
2167	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2168	struct r600_bc_alu_src r600_src[3];
2169	struct r600_bc_alu alu;
2170	uint32_t use_temp = 0;
2171	int i, r;
2172
2173	if (inst->Dst[0].Register.WriteMask != 0xf)
2174		use_temp = 1;
2175
2176	r = tgsi_split_constant(ctx, r600_src);
2177	if (r)
2178		return r;
2179	r = tgsi_split_literal_constant(ctx, r600_src);
2180	if (r)
2181		return r;
2182
2183	for (i = 0; i < 4; i++) {
2184		memset(&alu, 0, sizeof(struct r600_bc_alu));
2185		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2186
2187		alu.src[0] = r600_src[0];
2188		switch (i) {
2189		case 0:
2190			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2191			break;
2192		case 1:
2193			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2194			break;
2195		case 2:
2196			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2197			break;
2198		case 3:
2199			alu.src[0].sel = V_SQ_ALU_SRC_0;
2200			alu.src[0].chan = i;
2201		}
2202
2203		alu.src[1] = r600_src[1];
2204		switch (i) {
2205		case 0:
2206			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2207			break;
2208		case 1:
2209			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2210			break;
2211		case 2:
2212			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2213			break;
2214		case 3:
2215			alu.src[1].sel = V_SQ_ALU_SRC_0;
2216			alu.src[1].chan = i;
2217		}
2218
2219		alu.dst.sel = ctx->temp_reg;
2220		alu.dst.chan = i;
2221		alu.dst.write = 1;
2222
2223		if (i == 3)
2224			alu.last = 1;
2225		r = r600_bc_add_alu(ctx->bc, &alu);
2226		if (r)
2227			return r;
2228
2229		r = r600_bc_add_literal(ctx->bc, ctx->value);
2230		if (r)
2231			return r;
2232	}
2233
2234	for (i = 0; i < 4; i++) {
2235		memset(&alu, 0, sizeof(struct r600_bc_alu));
2236		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2237
2238		alu.src[0] = r600_src[0];
2239		switch (i) {
2240		case 0:
2241			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2242			break;
2243		case 1:
2244			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2245			break;
2246		case 2:
2247			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2248			break;
2249		case 3:
2250			alu.src[0].sel = V_SQ_ALU_SRC_0;
2251			alu.src[0].chan = i;
2252		}
2253
2254		alu.src[1] = r600_src[1];
2255		switch (i) {
2256		case 0:
2257			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2258			break;
2259		case 1:
2260			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2261			break;
2262		case 2:
2263			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2264			break;
2265		case 3:
2266			alu.src[1].sel = V_SQ_ALU_SRC_0;
2267			alu.src[1].chan = i;
2268		}
2269
2270		alu.src[2].sel = ctx->temp_reg;
2271		alu.src[2].neg = 1;
2272		alu.src[2].chan = i;
2273
2274		if (use_temp)
2275			alu.dst.sel = ctx->temp_reg;
2276		else {
2277			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2278			if (r)
2279				return r;
2280		}
2281		alu.dst.chan = i;
2282		alu.dst.write = 1;
2283		alu.is_op3 = 1;
2284		if (i == 3)
2285			alu.last = 1;
2286		r = r600_bc_add_alu(ctx->bc, &alu);
2287		if (r)
2288			return r;
2289
2290		r = r600_bc_add_literal(ctx->bc, ctx->value);
2291		if (r)
2292			return r;
2293	}
2294	if (use_temp)
2295		return tgsi_helper_copy(ctx, inst);
2296	return 0;
2297}
2298
2299static int tgsi_exp(struct r600_shader_ctx *ctx)
2300{
2301	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2302	struct r600_bc_alu_src r600_src[3] = { { 0 } };
2303	struct r600_bc_alu alu;
2304	int r;
2305
2306	/* result.x = 2^floor(src); */
2307	if (inst->Dst[0].Register.WriteMask & 1) {
2308		memset(&alu, 0, sizeof(struct r600_bc_alu));
2309
2310		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2311		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2312		if (r)
2313			return r;
2314
2315		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2316
2317		alu.dst.sel = ctx->temp_reg;
2318		alu.dst.chan = 0;
2319		alu.dst.write = 1;
2320		alu.last = 1;
2321		r = r600_bc_add_alu(ctx->bc, &alu);
2322		if (r)
2323			return r;
2324
2325		r = r600_bc_add_literal(ctx->bc, ctx->value);
2326		if (r)
2327			return r;
2328
2329		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2330		alu.src[0].sel = ctx->temp_reg;
2331		alu.src[0].chan = 0;
2332
2333		alu.dst.sel = ctx->temp_reg;
2334		alu.dst.chan = 0;
2335		alu.dst.write = 1;
2336		alu.last = 1;
2337		r = r600_bc_add_alu(ctx->bc, &alu);
2338		if (r)
2339			return r;
2340
2341		r = r600_bc_add_literal(ctx->bc, ctx->value);
2342		if (r)
2343			return r;
2344	}
2345
2346	/* result.y = tmp - floor(tmp); */
2347	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2348		memset(&alu, 0, sizeof(struct r600_bc_alu));
2349
2350		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2351		alu.src[0] = r600_src[0];
2352		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2353		if (r)
2354			return r;
2355		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2356
2357		alu.dst.sel = ctx->temp_reg;
2358//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2359//		if (r)
2360//			return r;
2361		alu.dst.write = 1;
2362		alu.dst.chan = 1;
2363
2364		alu.last = 1;
2365
2366		r = r600_bc_add_alu(ctx->bc, &alu);
2367		if (r)
2368			return r;
2369		r = r600_bc_add_literal(ctx->bc, ctx->value);
2370		if (r)
2371			return r;
2372	}
2373
2374	/* result.z = RoughApprox2ToX(tmp);*/
2375	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2376		memset(&alu, 0, sizeof(struct r600_bc_alu));
2377		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2378		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2379		if (r)
2380			return r;
2381		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2382
2383		alu.dst.sel = ctx->temp_reg;
2384		alu.dst.write = 1;
2385		alu.dst.chan = 2;
2386
2387		alu.last = 1;
2388
2389		r = r600_bc_add_alu(ctx->bc, &alu);
2390		if (r)
2391			return r;
2392		r = r600_bc_add_literal(ctx->bc, ctx->value);
2393		if (r)
2394			return r;
2395	}
2396
2397	/* result.w = 1.0;*/
2398	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2399		memset(&alu, 0, sizeof(struct r600_bc_alu));
2400
2401		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2402		alu.src[0].sel = V_SQ_ALU_SRC_1;
2403		alu.src[0].chan = 0;
2404
2405		alu.dst.sel = ctx->temp_reg;
2406		alu.dst.chan = 3;
2407		alu.dst.write = 1;
2408		alu.last = 1;
2409		r = r600_bc_add_alu(ctx->bc, &alu);
2410		if (r)
2411			return r;
2412		r = r600_bc_add_literal(ctx->bc, ctx->value);
2413		if (r)
2414			return r;
2415	}
2416	return tgsi_helper_copy(ctx, inst);
2417}
2418
2419static int tgsi_log(struct r600_shader_ctx *ctx)
2420{
2421	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2422	struct r600_bc_alu alu;
2423	int r;
2424
2425	/* result.x = floor(log2(src)); */
2426	if (inst->Dst[0].Register.WriteMask & 1) {
2427		memset(&alu, 0, sizeof(struct r600_bc_alu));
2428
2429		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2430		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2431		if (r)
2432			return r;
2433
2434		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2435
2436		alu.dst.sel = ctx->temp_reg;
2437		alu.dst.chan = 0;
2438		alu.dst.write = 1;
2439		alu.last = 1;
2440		r = r600_bc_add_alu(ctx->bc, &alu);
2441		if (r)
2442			return r;
2443
2444		r = r600_bc_add_literal(ctx->bc, ctx->value);
2445		if (r)
2446			return r;
2447
2448		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2449		alu.src[0].sel = ctx->temp_reg;
2450		alu.src[0].chan = 0;
2451
2452		alu.dst.sel = ctx->temp_reg;
2453		alu.dst.chan = 0;
2454		alu.dst.write = 1;
2455		alu.last = 1;
2456
2457		r = r600_bc_add_alu(ctx->bc, &alu);
2458		if (r)
2459			return r;
2460
2461		r = r600_bc_add_literal(ctx->bc, ctx->value);
2462		if (r)
2463			return r;
2464	}
2465
2466	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2467	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2468		memset(&alu, 0, sizeof(struct r600_bc_alu));
2469
2470		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2471		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2472		if (r)
2473			return r;
2474
2475		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2476
2477		alu.dst.sel = ctx->temp_reg;
2478		alu.dst.chan = 1;
2479		alu.dst.write = 1;
2480		alu.last = 1;
2481
2482		r = r600_bc_add_alu(ctx->bc, &alu);
2483		if (r)
2484			return r;
2485
2486		r = r600_bc_add_literal(ctx->bc, ctx->value);
2487		if (r)
2488			return r;
2489
2490		memset(&alu, 0, sizeof(struct r600_bc_alu));
2491
2492		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2493		alu.src[0].sel = ctx->temp_reg;
2494		alu.src[0].chan = 1;
2495
2496		alu.dst.sel = ctx->temp_reg;
2497		alu.dst.chan = 1;
2498		alu.dst.write = 1;
2499		alu.last = 1;
2500
2501		r = r600_bc_add_alu(ctx->bc, &alu);
2502		if (r)
2503			return r;
2504
2505		r = r600_bc_add_literal(ctx->bc, ctx->value);
2506		if (r)
2507			return r;
2508
2509		memset(&alu, 0, sizeof(struct r600_bc_alu));
2510
2511		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2512		alu.src[0].sel = ctx->temp_reg;
2513		alu.src[0].chan = 1;
2514
2515		alu.dst.sel = ctx->temp_reg;
2516		alu.dst.chan = 1;
2517		alu.dst.write = 1;
2518		alu.last = 1;
2519
2520		r = r600_bc_add_alu(ctx->bc, &alu);
2521		if (r)
2522			return r;
2523
2524		r = r600_bc_add_literal(ctx->bc, ctx->value);
2525		if (r)
2526			return r;
2527
2528		memset(&alu, 0, sizeof(struct r600_bc_alu));
2529
2530		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2531		alu.src[0].sel = ctx->temp_reg;
2532		alu.src[0].chan = 1;
2533
2534		alu.dst.sel = ctx->temp_reg;
2535		alu.dst.chan = 1;
2536		alu.dst.write = 1;
2537		alu.last = 1;
2538
2539		r = r600_bc_add_alu(ctx->bc, &alu);
2540		if (r)
2541			return r;
2542
2543		r = r600_bc_add_literal(ctx->bc, ctx->value);
2544		if (r)
2545			return r;
2546
2547		memset(&alu, 0, sizeof(struct r600_bc_alu));
2548
2549		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2550
2551		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2552		if (r)
2553			return r;
2554
2555		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2556
2557		alu.src[1].sel = ctx->temp_reg;
2558		alu.src[1].chan = 1;
2559
2560		alu.dst.sel = ctx->temp_reg;
2561		alu.dst.chan = 1;
2562		alu.dst.write = 1;
2563		alu.last = 1;
2564
2565		r = r600_bc_add_alu(ctx->bc, &alu);
2566		if (r)
2567			return r;
2568
2569		r = r600_bc_add_literal(ctx->bc, ctx->value);
2570		if (r)
2571			return r;
2572	}
2573
2574	/* result.z = log2(src);*/
2575	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2576		memset(&alu, 0, sizeof(struct r600_bc_alu));
2577
2578		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2579		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2580		if (r)
2581			return r;
2582
2583		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2584
2585		alu.dst.sel = ctx->temp_reg;
2586		alu.dst.write = 1;
2587		alu.dst.chan = 2;
2588		alu.last = 1;
2589
2590		r = r600_bc_add_alu(ctx->bc, &alu);
2591		if (r)
2592			return r;
2593
2594		r = r600_bc_add_literal(ctx->bc, ctx->value);
2595		if (r)
2596			return r;
2597	}
2598
2599	/* result.w = 1.0; */
2600	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2601		memset(&alu, 0, sizeof(struct r600_bc_alu));
2602
2603		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2604		alu.src[0].sel = V_SQ_ALU_SRC_1;
2605		alu.src[0].chan = 0;
2606
2607		alu.dst.sel = ctx->temp_reg;
2608		alu.dst.chan = 3;
2609		alu.dst.write = 1;
2610		alu.last = 1;
2611
2612		r = r600_bc_add_alu(ctx->bc, &alu);
2613		if (r)
2614			return r;
2615
2616		r = r600_bc_add_literal(ctx->bc, ctx->value);
2617		if (r)
2618			return r;
2619	}
2620
2621	return tgsi_helper_copy(ctx, inst);
2622}
2623
2624/* r6/7 only for now */
2625static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2626{
2627	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2628	struct r600_bc_alu alu;
2629	int r;
2630
2631	memset(&alu, 0, sizeof(struct r600_bc_alu));
2632
2633	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2634	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2635	if (r)
2636		return r;
2637	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2638	alu.last = 1;
2639	alu.dst.chan = 0;
2640	alu.dst.sel = ctx->temp_reg;
2641	alu.dst.write = 1;
2642	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2643	if (r)
2644		return r;
2645	memset(&alu, 0, sizeof(struct r600_bc_alu));
2646	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2647	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2648	if (r)
2649		return r;
2650	alu.src[0].sel = ctx->temp_reg;
2651	alu.src[0].chan = 0;
2652	alu.last = 1;
2653	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2654	if (r)
2655		return r;
2656	return 0;
2657}
2658static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2659{
2660	/* TODO from r600c, ar values don't persist between clauses */
2661	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2662	struct r600_bc_alu alu;
2663	int r;
2664	memset(&alu, 0, sizeof(struct r600_bc_alu));
2665
2666	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2667
2668	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2669	if (r)
2670		return r;
2671	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2672
2673	alu.last = 1;
2674
2675	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2676	if (r)
2677		return r;
2678	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2679	return 0;
2680}
2681
2682static int tgsi_opdst(struct r600_shader_ctx *ctx)
2683{
2684	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2685	struct r600_bc_alu alu;
2686	int i, r = 0;
2687
2688	for (i = 0; i < 4; i++) {
2689		memset(&alu, 0, sizeof(struct r600_bc_alu));
2690
2691		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2692		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2693		if (r)
2694			return r;
2695
2696	        if (i == 0 || i == 3) {
2697			alu.src[0].sel = V_SQ_ALU_SRC_1;
2698		} else {
2699			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2700			if (r)
2701				return r;
2702			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2703		}
2704
2705	        if (i == 0 || i == 2) {
2706			alu.src[1].sel = V_SQ_ALU_SRC_1;
2707		} else {
2708			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2709			if (r)
2710				return r;
2711			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2712		}
2713		if (i == 3)
2714			alu.last = 1;
2715		r = r600_bc_add_alu(ctx->bc, &alu);
2716		if (r)
2717			return r;
2718	}
2719	return 0;
2720}
2721
2722static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2723{
2724	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2725	struct r600_bc_alu alu;
2726	int r;
2727
2728	memset(&alu, 0, sizeof(struct r600_bc_alu));
2729	alu.inst = opcode;
2730	alu.predicate = 1;
2731
2732	alu.dst.sel = ctx->temp_reg;
2733	alu.dst.write = 1;
2734	alu.dst.chan = 0;
2735
2736	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2737	if (r)
2738		return r;
2739	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2740	alu.src[1].sel = V_SQ_ALU_SRC_0;
2741	alu.src[1].chan = 0;
2742
2743	alu.last = 1;
2744
2745	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2746	if (r)
2747		return r;
2748	return 0;
2749}
2750
2751static int pops(struct r600_shader_ctx *ctx, int pops)
2752{
2753	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2754	ctx->bc->cf_last->pop_count = pops;
2755	return 0;
2756}
2757
2758static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2759{
2760	switch(reason) {
2761	case FC_PUSH_VPM:
2762		ctx->bc->callstack[ctx->bc->call_sp].current--;
2763		break;
2764	case FC_PUSH_WQM:
2765	case FC_LOOP:
2766		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2767		break;
2768	case FC_REP:
2769		/* TOODO : for 16 vp asic should -= 2; */
2770		ctx->bc->callstack[ctx->bc->call_sp].current --;
2771		break;
2772	}
2773}
2774
2775static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2776{
2777	if (check_max_only) {
2778		int diff;
2779		switch (reason) {
2780		case FC_PUSH_VPM:
2781			diff = 1;
2782			break;
2783		case FC_PUSH_WQM:
2784			diff = 4;
2785			break;
2786		default:
2787			assert(0);
2788			diff = 0;
2789		}
2790		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2791		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2792			ctx->bc->callstack[ctx->bc->call_sp].max =
2793				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2794		}
2795		return;
2796	}
2797	switch (reason) {
2798	case FC_PUSH_VPM:
2799		ctx->bc->callstack[ctx->bc->call_sp].current++;
2800		break;
2801	case FC_PUSH_WQM:
2802	case FC_LOOP:
2803		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2804		break;
2805	case FC_REP:
2806		ctx->bc->callstack[ctx->bc->call_sp].current++;
2807		break;
2808	}
2809
2810	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2811	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2812		ctx->bc->callstack[ctx->bc->call_sp].max =
2813			ctx->bc->callstack[ctx->bc->call_sp].current;
2814	}
2815}
2816
2817static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2818{
2819	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2820
2821	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2822						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2823	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2824	sp->num_mid++;
2825}
2826
2827static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2828{
2829	ctx->bc->fc_sp++;
2830	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2831	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2832}
2833
2834static void fc_poplevel(struct r600_shader_ctx *ctx)
2835{
2836	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2837	if (sp->mid) {
2838		free(sp->mid);
2839		sp->mid = NULL;
2840	}
2841	sp->num_mid = 0;
2842	sp->start = NULL;
2843	sp->type = 0;
2844	ctx->bc->fc_sp--;
2845}
2846
2847#if 0
2848static int emit_return(struct r600_shader_ctx *ctx)
2849{
2850	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2851	return 0;
2852}
2853
2854static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2855{
2856
2857	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2858	ctx->bc->cf_last->pop_count = pops;
2859	/* TODO work out offset */
2860	return 0;
2861}
2862
2863static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2864{
2865	return 0;
2866}
2867
2868static void emit_testflag(struct r600_shader_ctx *ctx)
2869{
2870
2871}
2872
2873static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2874{
2875	emit_testflag(ctx);
2876	emit_jump_to_offset(ctx, 1, 4);
2877	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2878	pops(ctx, ifidx + 1);
2879	emit_return(ctx);
2880}
2881
2882static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2883{
2884	emit_testflag(ctx);
2885
2886	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2887	ctx->bc->cf_last->pop_count = 1;
2888
2889	fc_set_mid(ctx, fc_sp);
2890
2891	pops(ctx, 1);
2892}
2893#endif
2894
2895static int tgsi_if(struct r600_shader_ctx *ctx)
2896{
2897	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2898
2899	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2900
2901	fc_pushlevel(ctx, FC_IF);
2902
2903	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2904	return 0;
2905}
2906
2907static int tgsi_else(struct r600_shader_ctx *ctx)
2908{
2909	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2910	ctx->bc->cf_last->pop_count = 1;
2911
2912	fc_set_mid(ctx, ctx->bc->fc_sp);
2913	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2914	return 0;
2915}
2916
2917static int tgsi_endif(struct r600_shader_ctx *ctx)
2918{
2919	pops(ctx, 1);
2920	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2921		R600_ERR("if/endif unbalanced in shader\n");
2922		return -1;
2923	}
2924
2925	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2926		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2927		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2928	} else {
2929		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2930	}
2931	fc_poplevel(ctx);
2932
2933	callstack_decrease_current(ctx, FC_PUSH_VPM);
2934	return 0;
2935}
2936
2937static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2938{
2939	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2940
2941	fc_pushlevel(ctx, FC_LOOP);
2942
2943	/* check stack depth */
2944	callstack_check_depth(ctx, FC_LOOP, 0);
2945	return 0;
2946}
2947
2948static int tgsi_endloop(struct r600_shader_ctx *ctx)
2949{
2950	int i;
2951
2952	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2953
2954	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2955		R600_ERR("loop/endloop in shader code are not paired.\n");
2956		return -EINVAL;
2957	}
2958
2959	/* fixup loop pointers - from r600isa
2960	   LOOP END points to CF after LOOP START,
2961	   LOOP START point to CF after LOOP END
2962	   BRK/CONT point to LOOP END CF
2963	*/
2964	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2965
2966	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2967
2968	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2969		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2970	}
2971	/* TODO add LOOPRET support */
2972	fc_poplevel(ctx);
2973	callstack_decrease_current(ctx, FC_LOOP);
2974	return 0;
2975}
2976
2977static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2978{
2979	unsigned int fscp;
2980
2981	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2982	{
2983		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2984			break;
2985	}
2986
2987	if (fscp == 0) {
2988		R600_ERR("Break not inside loop/endloop pair\n");
2989		return -EINVAL;
2990	}
2991
2992	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2993	ctx->bc->cf_last->pop_count = 1;
2994
2995	fc_set_mid(ctx, fscp);
2996
2997	pops(ctx, 1);
2998	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2999	return 0;
3000}
3001
3002static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3003	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3004	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3005	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3006
3007	/* FIXME:
3008	 * For state trackers other than OpenGL, we'll want to use
3009	 * _RECIP_IEEE instead.
3010	 */
3011	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3012
3013	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3014	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3015	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3016	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3017	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3018	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3019	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3020	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3021	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3022	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3023	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3024	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3025	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3026	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3027	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3028	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3029	/* gap */
3030	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3031	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3032	/* gap */
3033	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3034	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3035	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3036	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3037	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3038	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3039	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3040	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3041	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3042	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3043	/* gap */
3044	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3045	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3046	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3048	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3049	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3050	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3051	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3052	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3055	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3056	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3057	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3058	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3059	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3060	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3061	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3062	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3063	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3065	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3066	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3067	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3069	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3072	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3077	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3078	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3079	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3080	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3081	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3084	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3085	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3086	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3087	/* gap */
3088	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3091	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3092	/* gap */
3093	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3101	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3102	/* gap */
3103	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3110	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3112	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3114	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3115	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3117	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3118	/* gap */
3119	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3120	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3124	/* gap */
3125	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3126	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3130	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3131	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3132	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3133	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3134	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3135	/* gap */
3136	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3137	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3138	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3139	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3140	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3141	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3142	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3143	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3144	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3145	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3146	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3151	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3153	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3156	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3160	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3164};
3165
3166static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3167	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3168	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3169	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3170	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3171	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3172	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3173	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3174	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3175	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3176	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3177	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3178	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3179	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3180	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3181	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3182	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3183	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3184	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3185	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3186	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3187	/* gap */
3188	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3189	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3190	/* gap */
3191	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3192	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3193	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3194	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3195	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3196	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3197	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3198	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3199	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3200	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3201	/* gap */
3202	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3203	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3204	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3205	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3206	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3207	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3208	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3209	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3210	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3211	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3212	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3213	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3214	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3215	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3216	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3217	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3218	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3219	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3220	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3221	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3222	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3223	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3224	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3225	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3226	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3227	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3228	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3229	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3230	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3231	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3232	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3233	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3234	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3235	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3236	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3237	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3238	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3239	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3241	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3242	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3243	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3244	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3245	/* gap */
3246	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3247	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3248	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3249	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3250	/* gap */
3251	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3252	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3253	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3254	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3255	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3256	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3257	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3258	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3259	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3260	/* gap */
3261	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3262	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3263	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3266	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3267	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3268	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3269	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3270	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3271	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3272	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3273	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3274	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3275	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3276	/* gap */
3277	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3278	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3279	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3280	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3281	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3282	/* gap */
3283	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3284	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3285	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3286	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3288	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3289	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3290	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3291	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3292	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3293	/* gap */
3294	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3295	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3296	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3297	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3298	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3299	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3300	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3301	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3302	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3303	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3304	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3305	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3307	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3308	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3309	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3312	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3314	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3318	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3322};
3323