r600_shader.c revision ccb9be105602edaaff196046e324c8cb4a12fe0a
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_opcodes.h"
32#include "r600d.h"
33#include <stdio.h>
34#include <errno.h>
35
36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37{
38	struct r600_pipe_state *rstate = &shader->rstate;
39	struct r600_shader *rshader = &shader->shader;
40	unsigned spi_vs_out_id[10];
41	unsigned i, tmp;
42
43	/* clear previous register */
44	rstate->nregs = 0;
45
46	/* so far never got proper semantic id from tgsi */
47	for (i = 0; i < 10; i++) {
48		spi_vs_out_id[i] = 0;
49	}
50	for (i = 0; i < 32; i++) {
51		tmp = i << ((i & 3) * 8);
52		spi_vs_out_id[i / 4] |= tmp;
53	}
54	for (i = 0; i < 10; i++) {
55		r600_pipe_state_add_reg(rstate,
56					R_028614_SPI_VS_OUT_ID_0 + i * 4,
57					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
58	}
59
60	r600_pipe_state_add_reg(rstate,
61			R_0286C4_SPI_VS_OUT_CONFIG,
62			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
63			0xFFFFFFFF, NULL);
64	r600_pipe_state_add_reg(rstate,
65			R_028868_SQ_PGM_RESOURCES_VS,
66			S_028868_NUM_GPRS(rshader->bc.ngpr) |
67			S_028868_STACK_SIZE(rshader->bc.nstack),
68			0xFFFFFFFF, NULL);
69	r600_pipe_state_add_reg(rstate,
70			R_0288A4_SQ_PGM_RESOURCES_FS,
71			0x00000000, 0xFFFFFFFF, NULL);
72	r600_pipe_state_add_reg(rstate,
73			R_0288D0_SQ_PGM_CF_OFFSET_VS,
74			0x00000000, 0xFFFFFFFF, NULL);
75	r600_pipe_state_add_reg(rstate,
76			R_0288DC_SQ_PGM_CF_OFFSET_FS,
77			0x00000000, 0xFFFFFFFF, NULL);
78	r600_pipe_state_add_reg(rstate,
79			R_028858_SQ_PGM_START_VS,
80			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
81	r600_pipe_state_add_reg(rstate,
82			R_028894_SQ_PGM_START_FS,
83			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
84
85	r600_pipe_state_add_reg(rstate,
86				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
87				0xFFFFFFFF, NULL);
88
89}
90
91int r600_find_vs_semantic_index(struct r600_shader *vs,
92				struct r600_shader *ps, int id)
93{
94	struct r600_shader_io *input = &ps->input[id];
95
96	for (int i = 0; i < vs->noutput; i++) {
97		if (input->name == vs->output[i].name &&
98			input->sid == vs->output[i].sid) {
99			return i - 1;
100		}
101	}
102	return 0;
103}
104
105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
106{
107	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
108	struct r600_pipe_state *rstate = &shader->rstate;
109	struct r600_shader *rshader = &shader->shader;
110	unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
111	int pos_index = -1, face_index = -1;
112
113	/* clear previous register */
114	rstate->nregs = 0;
115
116	for (i = 0; i < rshader->ninput; i++) {
117		tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
118		if (rshader->input[i].centroid)
119			tmp |= S_028644_SEL_CENTROID(1);
120		if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
121			tmp |= S_028644_SEL_LINEAR(1);
122
123		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
124			pos_index = i;
125		if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
126		    rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
127		    rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
128			tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
129		}
130		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
131			face_index = i;
132		if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
133			rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
134			tmp |= S_028644_PT_SPRITE_TEX(1);
135		}
136		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
137	}
138	for (i = 0; i < rshader->noutput; i++) {
139		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
140			r600_pipe_state_add_reg(rstate,
141						R_02880C_DB_SHADER_CONTROL,
142						S_02880C_Z_EXPORT_ENABLE(1),
143						S_02880C_Z_EXPORT_ENABLE(1), NULL);
144		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
145			r600_pipe_state_add_reg(rstate,
146						R_02880C_DB_SHADER_CONTROL,
147						S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
148						S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
149	}
150
151	exports_ps = 0;
152	num_cout = 0;
153	for (i = 0; i < rshader->noutput; i++) {
154		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
155			exports_ps |= 1;
156		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
157			num_cout++;
158		}
159	}
160	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
161	if (!exports_ps) {
162		/* always at least export 1 component per pixel */
163		exports_ps = 2;
164	}
165
166	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
167				S_0286CC_PERSP_GRADIENT_ENA(1);
168	spi_input_z = 0;
169	if (pos_index != -1) {
170		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
171					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
172					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
173					S_0286CC_BARYC_SAMPLE_CNTL(1));
174		spi_input_z |= 1;
175	}
176
177	spi_ps_in_control_1 = 0;
178	if (face_index != -1) {
179		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
180			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
181	}
182
183	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
184	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
185	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
186	r600_pipe_state_add_reg(rstate,
187				R_028840_SQ_PGM_START_PS,
188				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
189	r600_pipe_state_add_reg(rstate,
190				R_028850_SQ_PGM_RESOURCES_PS,
191				S_028868_NUM_GPRS(rshader->bc.ngpr) |
192				S_028868_STACK_SIZE(rshader->bc.nstack),
193				0xFFFFFFFF, NULL);
194	r600_pipe_state_add_reg(rstate,
195				R_028854_SQ_PGM_EXPORTS_PS,
196				exports_ps, 0xFFFFFFFF, NULL);
197	r600_pipe_state_add_reg(rstate,
198				R_0288CC_SQ_PGM_CF_OFFSET_PS,
199				0x00000000, 0xFFFFFFFF, NULL);
200
201	if (rshader->uses_kill) {
202		/* only set some bits here, the other bits are set in the dsa state */
203		r600_pipe_state_add_reg(rstate,
204					R_02880C_DB_SHADER_CONTROL,
205					S_02880C_KILL_ENABLE(1),
206					S_02880C_KILL_ENABLE(1), NULL);
207	}
208	r600_pipe_state_add_reg(rstate,
209				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
210				0xFFFFFFFF, NULL);
211}
212
213static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
214{
215	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
216	struct r600_shader *rshader = &shader->shader;
217	void *ptr;
218
219	/* copy new shader */
220	if (shader->bo == NULL) {
221		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0);
222		if (shader->bo == NULL) {
223			return -ENOMEM;
224		}
225		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
226		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
227		r600_bo_unmap(rctx->radeon, shader->bo);
228	}
229	/* build state */
230	rshader->flat_shade = rctx->flatshade;
231	switch (rshader->processor_type) {
232	case TGSI_PROCESSOR_VERTEX:
233		if (rshader->family >= CHIP_CEDAR) {
234			evergreen_pipe_shader_vs(ctx, shader);
235		} else {
236			r600_pipe_shader_vs(ctx, shader);
237		}
238		break;
239	case TGSI_PROCESSOR_FRAGMENT:
240		if (rshader->family >= CHIP_CEDAR) {
241			evergreen_pipe_shader_ps(ctx, shader);
242		} else {
243			r600_pipe_shader_ps(ctx, shader);
244		}
245		break;
246	default:
247		return -EINVAL;
248	}
249	r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
250	return 0;
251}
252
253static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
254{
255	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
256	struct r600_shader *shader = &rshader->shader;
257	const struct util_format_description *desc;
258	enum pipe_format resource_format[160];
259	unsigned i, nresources = 0;
260	struct r600_bc *bc = &shader->bc;
261	struct r600_bc_cf *cf;
262	struct r600_bc_vtx *vtx;
263
264	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
265		return 0;
266	/* doing a full memcmp fell over the refcount */
267	if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
268	    (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) {
269		return 0;
270	}
271	rshader->vertex_elements = *rctx->vertex_elements;
272	for (i = 0; i < rctx->vertex_elements->count; i++) {
273		resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
274	}
275	r600_bo_reference(rctx->radeon, &rshader->bo, NULL);
276	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
277		switch (cf->inst) {
278		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
279		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
280			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
281				desc = util_format_description(resource_format[vtx->buffer_id]);
282				if (desc == NULL) {
283					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
284					return -EINVAL;
285				}
286				vtx->dst_sel_x = desc->swizzle[0];
287				vtx->dst_sel_y = desc->swizzle[1];
288				vtx->dst_sel_z = desc->swizzle[2];
289				vtx->dst_sel_w = desc->swizzle[3];
290			}
291			break;
292		default:
293			break;
294		}
295	}
296	return r600_bc_build(&shader->bc);
297}
298
299int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
300{
301	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
302	int r;
303
304	if (shader == NULL)
305		return -EINVAL;
306	/* there should be enough input */
307	if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
308		R600_ERR("%d resources provided, expecting %d\n",
309			rctx->vertex_elements->count, shader->shader.bc.nresource);
310		return -EINVAL;
311	}
312	r = r600_shader_update(ctx, shader);
313	if (r)
314		return r;
315	return r600_pipe_shader(ctx, shader);
316}
317
318int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
319int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
320{
321	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
322	int r;
323
324//fprintf(stderr, "--------------------------------------------------------------\n");
325//tgsi_dump(tokens, 0);
326	shader->shader.family = r600_get_family(rctx->radeon);
327	r = r600_shader_from_tgsi(tokens, &shader->shader);
328	if (r) {
329		R600_ERR("translation from TGSI failed !\n");
330		return r;
331	}
332	r = r600_bc_build(&shader->shader.bc);
333	if (r) {
334		R600_ERR("building bytecode failed !\n");
335		return r;
336	}
337//fprintf(stderr, "______________________________________________________________\n");
338	return 0;
339}
340
341void
342r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
343{
344	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
345	struct r600_bc_cf *cf, *next_cf;
346
347	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
348
349	/* FIXME: is there more stuff to free? */
350}
351
352/*
353 * tgsi -> r600 shader
354 */
355struct r600_shader_tgsi_instruction;
356
357struct r600_shader_ctx {
358	struct tgsi_shader_info			info;
359	struct tgsi_parse_context		parse;
360	const struct tgsi_token			*tokens;
361	unsigned				type;
362	unsigned				file_offset[TGSI_FILE_COUNT];
363	unsigned				temp_reg;
364	struct r600_shader_tgsi_instruction	*inst_info;
365	struct r600_bc				*bc;
366	struct r600_shader			*shader;
367	u32					value[4];
368	u32					*literals;
369	u32					nliterals;
370	u32					max_driver_temp_used;
371	/* needed for evergreen interpolation */
372	boolean                                 input_centroid;
373	boolean                                 input_linear;
374	boolean                                 input_perspective;
375	int					num_interp_gpr;
376};
377
378struct r600_shader_tgsi_instruction {
379	unsigned	tgsi_opcode;
380	unsigned	is_op3;
381	unsigned	r600_opcode;
382	int (*process)(struct r600_shader_ctx *ctx);
383};
384
385static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
386static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
387
388static int tgsi_is_supported(struct r600_shader_ctx *ctx)
389{
390	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
391	int j;
392
393	if (i->Instruction.NumDstRegs > 1) {
394		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
395		return -EINVAL;
396	}
397	if (i->Instruction.Predicate) {
398		R600_ERR("predicate unsupported\n");
399		return -EINVAL;
400	}
401#if 0
402	if (i->Instruction.Label) {
403		R600_ERR("label unsupported\n");
404		return -EINVAL;
405	}
406#endif
407	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
408		if (i->Src[j].Register.Dimension) {
409			R600_ERR("unsupported src %d (dimension %d)\n", j,
410				 i->Src[j].Register.Dimension);
411			return -EINVAL;
412		}
413	}
414	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
415		if (i->Dst[j].Register.Dimension) {
416			R600_ERR("unsupported dst (dimension)\n");
417			return -EINVAL;
418		}
419	}
420	return 0;
421}
422
423static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
424{
425	int i, r;
426	struct r600_bc_alu alu;
427	int gpr = 0, base_chan = 0;
428	int ij_index = 0;
429
430	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
431		ij_index = 0;
432		if (ctx->shader->input[input].centroid)
433			ij_index++;
434	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
435		ij_index = 0;
436		/* if we have perspective add one */
437		if (ctx->input_perspective)  {
438			ij_index++;
439			/* if we have perspective centroid */
440			if (ctx->input_centroid)
441				ij_index++;
442		}
443		if (ctx->shader->input[input].centroid)
444			ij_index++;
445	}
446
447	/* work out gpr and base_chan from index */
448	gpr = ij_index / 2;
449	base_chan = (2 * (ij_index % 2)) + 1;
450
451	for (i = 0; i < 8; i++) {
452		memset(&alu, 0, sizeof(struct r600_bc_alu));
453
454		if (i < 4)
455			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
456		else
457			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
458
459		if ((i > 1) && (i < 6)) {
460			alu.dst.sel = ctx->shader->input[input].gpr;
461			alu.dst.write = 1;
462		}
463
464		alu.dst.chan = i % 4;
465
466		alu.src[0].sel = gpr;
467		alu.src[0].chan = (base_chan - (i % 2));
468
469		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
470
471		alu.bank_swizzle_force = SQ_ALU_VEC_210;
472		if ((i % 4) == 3)
473			alu.last = 1;
474		r = r600_bc_add_alu(ctx->bc, &alu);
475		if (r)
476			return r;
477	}
478	return 0;
479}
480
481
482static int tgsi_declaration(struct r600_shader_ctx *ctx)
483{
484	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
485	struct r600_bc_vtx vtx;
486	unsigned i;
487	int r;
488
489	switch (d->Declaration.File) {
490	case TGSI_FILE_INPUT:
491		i = ctx->shader->ninput++;
492		ctx->shader->input[i].name = d->Semantic.Name;
493		ctx->shader->input[i].sid = d->Semantic.Index;
494		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
495		ctx->shader->input[i].centroid = d->Declaration.Centroid;
496		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
497		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
498			/* turn input into fetch */
499			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
500			vtx.inst = 0;
501			vtx.fetch_type = 0;
502			vtx.buffer_id = i;
503			/* register containing the index into the buffer */
504			vtx.src_gpr = 0;
505			vtx.src_sel_x = 0;
506			vtx.mega_fetch_count = 0x1F;
507			vtx.dst_gpr = ctx->shader->input[i].gpr;
508			vtx.dst_sel_x = 0;
509			vtx.dst_sel_y = 1;
510			vtx.dst_sel_z = 2;
511			vtx.dst_sel_w = 3;
512			vtx.use_const_fields = 1;
513			r = r600_bc_add_vtx(ctx->bc, &vtx);
514			if (r)
515				return r;
516		}
517		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) {
518			/* turn input into interpolate on EG */
519			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
520				if (ctx->shader->input[i].interpolate > 0) {
521					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
522					evergreen_interp_alu(ctx, i);
523				}
524			}
525		}
526		break;
527	case TGSI_FILE_OUTPUT:
528		i = ctx->shader->noutput++;
529		ctx->shader->output[i].name = d->Semantic.Name;
530		ctx->shader->output[i].sid = d->Semantic.Index;
531		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
532		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
533		break;
534	case TGSI_FILE_CONSTANT:
535	case TGSI_FILE_TEMPORARY:
536	case TGSI_FILE_SAMPLER:
537	case TGSI_FILE_ADDRESS:
538		break;
539	default:
540		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
541		return -EINVAL;
542	}
543	return 0;
544}
545
546static int r600_get_temp(struct r600_shader_ctx *ctx)
547{
548	return ctx->temp_reg + ctx->max_driver_temp_used++;
549}
550
551/*
552 * for evergreen we need to scan the shader to find the number of GPRs we need to
553 * reserve for interpolation.
554 *
555 * we need to know if we are going to emit
556 * any centroid inputs
557 * if perspective and linear are required
558*/
559static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
560{
561	int i;
562	int num_baryc;
563
564	ctx->input_linear = FALSE;
565	ctx->input_perspective = FALSE;
566	ctx->input_centroid = FALSE;
567	ctx->num_interp_gpr = 1;
568
569	/* any centroid inputs */
570	for (i = 0; i < ctx->info.num_inputs; i++) {
571		/* skip position/face */
572		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
573		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
574			continue;
575		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
576			ctx->input_linear = TRUE;
577		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
578			ctx->input_perspective = TRUE;
579		if (ctx->info.input_centroid[i])
580			ctx->input_centroid = TRUE;
581	}
582
583	num_baryc = 0;
584	/* ignoring sample for now */
585	if (ctx->input_perspective)
586		num_baryc++;
587	if (ctx->input_linear)
588		num_baryc++;
589	if (ctx->input_centroid)
590		num_baryc *= 2;
591
592	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
593
594	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
595	return ctx->num_interp_gpr;
596}
597
598int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
599{
600	struct tgsi_full_immediate *immediate;
601	struct r600_shader_ctx ctx;
602	struct r600_bc_output output[32];
603	unsigned output_done, noutput;
604	unsigned opcode;
605	int i, r = 0, pos0;
606
607	ctx.bc = &shader->bc;
608	ctx.shader = shader;
609	r = r600_bc_init(ctx.bc, shader->family);
610	if (r)
611		return r;
612	ctx.tokens = tokens;
613	tgsi_scan_shader(tokens, &ctx.info);
614	tgsi_parse_init(&ctx.parse, tokens);
615	ctx.type = ctx.parse.FullHeader.Processor.Processor;
616	shader->processor_type = ctx.type;
617
618	/* register allocations */
619	/* Values [0,127] correspond to GPR[0..127].
620	 * Values [128,159] correspond to constant buffer bank 0
621	 * Values [160,191] correspond to constant buffer bank 1
622	 * Values [256,511] correspond to cfile constants c[0..255].
623	 * Other special values are shown in the list below.
624	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
625	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
626	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
627	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
628	 * 248	SQ_ALU_SRC_0: special constant 0.0.
629	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
630	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
631	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
632	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
633	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
634	 * 254	SQ_ALU_SRC_PV: previous vector result.
635	 * 255	SQ_ALU_SRC_PS: previous scalar result.
636	 */
637	for (i = 0; i < TGSI_FILE_COUNT; i++) {
638		ctx.file_offset[i] = 0;
639	}
640	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
641		ctx.file_offset[TGSI_FILE_INPUT] = 1;
642	}
643	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) {
644		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
645	}
646	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
647						ctx.info.file_count[TGSI_FILE_INPUT];
648	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
649						ctx.info.file_count[TGSI_FILE_OUTPUT];
650
651	ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
652
653	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
654	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
655			ctx.info.file_count[TGSI_FILE_TEMPORARY];
656
657	ctx.nliterals = 0;
658	ctx.literals = NULL;
659
660	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
661		tgsi_parse_token(&ctx.parse);
662		switch (ctx.parse.FullToken.Token.Type) {
663		case TGSI_TOKEN_TYPE_IMMEDIATE:
664			immediate = &ctx.parse.FullToken.FullImmediate;
665			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
666			if(ctx.literals == NULL) {
667				r = -ENOMEM;
668				goto out_err;
669			}
670			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
671			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
672			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
673			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
674			ctx.nliterals++;
675			break;
676		case TGSI_TOKEN_TYPE_DECLARATION:
677			r = tgsi_declaration(&ctx);
678			if (r)
679				goto out_err;
680			break;
681		case TGSI_TOKEN_TYPE_INSTRUCTION:
682			r = tgsi_is_supported(&ctx);
683			if (r)
684				goto out_err;
685			ctx.max_driver_temp_used = 0;
686			/* reserve first tmp for everyone */
687			r600_get_temp(&ctx);
688			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
689			if (ctx.bc->chiprev == 2)
690				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
691			else
692				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
693			r = ctx.inst_info->process(&ctx);
694			if (r)
695				goto out_err;
696			r = r600_bc_add_literal(ctx.bc, ctx.value);
697			if (r)
698				goto out_err;
699			break;
700		default:
701			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
702			r = -EINVAL;
703			goto out_err;
704		}
705	}
706	/* export output */
707	noutput = shader->noutput;
708	for (i = 0, pos0 = 0; i < noutput; i++) {
709		memset(&output[i], 0, sizeof(struct r600_bc_output));
710		output[i].gpr = shader->output[i].gpr;
711		output[i].elem_size = 3;
712		output[i].swizzle_x = 0;
713		output[i].swizzle_y = 1;
714		output[i].swizzle_z = 2;
715		output[i].swizzle_w = 3;
716		output[i].barrier = 1;
717		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
718		output[i].array_base = i - pos0;
719		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
720		switch (ctx.type) {
721		case TGSI_PROCESSOR_VERTEX:
722			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
723				output[i].array_base = 60;
724				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
725				/* position doesn't count in array_base */
726				pos0++;
727			}
728			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
729				output[i].array_base = 61;
730				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
731				/* position doesn't count in array_base */
732				pos0++;
733			}
734			break;
735		case TGSI_PROCESSOR_FRAGMENT:
736			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
737				output[i].array_base = shader->output[i].sid;
738				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
739			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
740				output[i].array_base = 61;
741				output[i].swizzle_x = 2;
742				output[i].swizzle_y = 7;
743				output[i].swizzle_z = output[i].swizzle_w = 7;
744				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
745			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
746				output[i].array_base = 61;
747				output[i].swizzle_x = 7;
748				output[i].swizzle_y = 1;
749				output[i].swizzle_z = output[i].swizzle_w = 7;
750				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
751			} else {
752				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
753				r = -EINVAL;
754				goto out_err;
755			}
756			break;
757		default:
758			R600_ERR("unsupported processor type %d\n", ctx.type);
759			r = -EINVAL;
760			goto out_err;
761		}
762	}
763	/* add fake param output for vertex shader if no param is exported */
764	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
765		for (i = 0, pos0 = 0; i < noutput; i++) {
766			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
767				pos0 = 1;
768				break;
769			}
770		}
771		if (!pos0) {
772			memset(&output[i], 0, sizeof(struct r600_bc_output));
773			output[i].gpr = 0;
774			output[i].elem_size = 3;
775			output[i].swizzle_x = 0;
776			output[i].swizzle_y = 1;
777			output[i].swizzle_z = 2;
778			output[i].swizzle_w = 3;
779			output[i].barrier = 1;
780			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
781			output[i].array_base = 0;
782			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
783			noutput++;
784		}
785	}
786	/* add fake pixel export */
787	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
788		memset(&output[0], 0, sizeof(struct r600_bc_output));
789		output[0].gpr = 0;
790		output[0].elem_size = 3;
791		output[0].swizzle_x = 7;
792		output[0].swizzle_y = 7;
793		output[0].swizzle_z = 7;
794		output[0].swizzle_w = 7;
795		output[0].barrier = 1;
796		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
797		output[0].array_base = 0;
798		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
799		noutput++;
800	}
801	/* set export done on last export of each type */
802	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
803		if (i == (noutput - 1)) {
804			output[i].end_of_program = 1;
805		}
806		if (!(output_done & (1 << output[i].type))) {
807			output_done |= (1 << output[i].type);
808			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
809		}
810	}
811	/* add output to bytecode */
812	for (i = 0; i < noutput; i++) {
813		r = r600_bc_add_output(ctx.bc, &output[i]);
814		if (r)
815			goto out_err;
816	}
817	free(ctx.literals);
818	tgsi_parse_free(&ctx.parse);
819	return 0;
820out_err:
821	free(ctx.literals);
822	tgsi_parse_free(&ctx.parse);
823	return r;
824}
825
826static int tgsi_unsupported(struct r600_shader_ctx *ctx)
827{
828	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
829	return -EINVAL;
830}
831
832static int tgsi_end(struct r600_shader_ctx *ctx)
833{
834	return 0;
835}
836
837static int tgsi_src(struct r600_shader_ctx *ctx,
838			const struct tgsi_full_src_register *tgsi_src,
839			struct r600_bc_alu_src *r600_src)
840{
841	int index;
842	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
843	r600_src->sel = tgsi_src->Register.Index;
844	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
845		r600_src->sel = 0;
846		index = tgsi_src->Register.Index;
847		ctx->value[0] = ctx->literals[index * 4 + 0];
848		ctx->value[1] = ctx->literals[index * 4 + 1];
849		ctx->value[2] = ctx->literals[index * 4 + 2];
850		ctx->value[3] = ctx->literals[index * 4 + 3];
851	}
852	if (tgsi_src->Register.Indirect)
853		r600_src->rel = V_SQ_REL_RELATIVE;
854	r600_src->neg = tgsi_src->Register.Negate;
855	r600_src->abs = tgsi_src->Register.Absolute;
856	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
857	return 0;
858}
859
860static int tgsi_dst(struct r600_shader_ctx *ctx,
861			const struct tgsi_full_dst_register *tgsi_dst,
862			unsigned swizzle,
863			struct r600_bc_alu_dst *r600_dst)
864{
865	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
866
867	r600_dst->sel = tgsi_dst->Register.Index;
868	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
869	r600_dst->chan = swizzle;
870	r600_dst->write = 1;
871	if (tgsi_dst->Register.Indirect)
872		r600_dst->rel = V_SQ_REL_RELATIVE;
873	if (inst->Instruction.Saturate) {
874		r600_dst->clamp = 1;
875	}
876	return 0;
877}
878
879static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
880{
881	switch (swizzle) {
882	case 0:
883		return tgsi_src->Register.SwizzleX;
884	case 1:
885		return tgsi_src->Register.SwizzleY;
886	case 2:
887		return tgsi_src->Register.SwizzleZ;
888	case 3:
889		return tgsi_src->Register.SwizzleW;
890	default:
891		return 0;
892	}
893}
894
895static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
896{
897	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
898	struct r600_bc_alu alu;
899	int i, j, k, nconst, r;
900
901	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
902		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
903			nconst++;
904		}
905		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
906		if (r) {
907			return r;
908		}
909	}
910	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
911		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
912			int treg = r600_get_temp(ctx);
913			for (k = 0; k < 4; k++) {
914				memset(&alu, 0, sizeof(struct r600_bc_alu));
915				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
916				alu.src[0].sel = r600_src[i].sel;
917				alu.src[0].chan = k;
918				alu.src[0].rel = r600_src[i].rel;
919				alu.dst.sel = treg;
920				alu.dst.chan = k;
921				alu.dst.write = 1;
922				if (k == 3)
923					alu.last = 1;
924				r = r600_bc_add_alu(ctx->bc, &alu);
925				if (r)
926					return r;
927			}
928			r600_src[i].sel = treg;
929			r600_src[i].rel =0;
930			j--;
931		}
932	}
933	return 0;
934}
935
936/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
937static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
938{
939	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
940	struct r600_bc_alu alu;
941	int i, j, k, nliteral, r;
942
943	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
944		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
945			nliteral++;
946		}
947	}
948	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
949		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
950			int treg = r600_get_temp(ctx);
951			for (k = 0; k < 4; k++) {
952				memset(&alu, 0, sizeof(struct r600_bc_alu));
953				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
954				alu.src[0].sel = r600_src[i].sel;
955				alu.src[0].chan = k;
956				alu.dst.sel = treg;
957				alu.dst.chan = k;
958				alu.dst.write = 1;
959				if (k == 3)
960					alu.last = 1;
961				r = r600_bc_add_alu(ctx->bc, &alu);
962				if (r)
963					return r;
964			}
965			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
966			if (r)
967				return r;
968			r600_src[i].sel = treg;
969			j--;
970		}
971	}
972	return 0;
973}
974
975static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
976{
977	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
978	struct r600_bc_alu_src r600_src[3];
979	struct r600_bc_alu alu;
980	int i, j, r;
981	int lasti = 0;
982
983	for (i = 0; i < 4; i++) {
984		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
985			lasti = i;
986		}
987	}
988
989	r = tgsi_split_constant(ctx, r600_src);
990	if (r)
991		return r;
992	r = tgsi_split_literal_constant(ctx, r600_src);
993	if (r)
994		return r;
995	for (i = 0; i < lasti + 1; i++) {
996		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
997			continue;
998
999		memset(&alu, 0, sizeof(struct r600_bc_alu));
1000		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1001		if (r)
1002			return r;
1003
1004		alu.inst = ctx->inst_info->r600_opcode;
1005		if (!swap) {
1006			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1007				alu.src[j] = r600_src[j];
1008				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1009			}
1010		} else {
1011			alu.src[0] = r600_src[1];
1012			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1013
1014			alu.src[1] = r600_src[0];
1015			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1016		}
1017		/* handle some special cases */
1018		switch (ctx->inst_info->tgsi_opcode) {
1019		case TGSI_OPCODE_SUB:
1020			alu.src[1].neg = 1;
1021			break;
1022		case TGSI_OPCODE_ABS:
1023			alu.src[0].abs = 1;
1024			break;
1025		default:
1026			break;
1027		}
1028		if (i == lasti) {
1029			alu.last = 1;
1030		}
1031		r = r600_bc_add_alu(ctx->bc, &alu);
1032		if (r)
1033			return r;
1034	}
1035	return 0;
1036}
1037
1038static int tgsi_op2(struct r600_shader_ctx *ctx)
1039{
1040	return tgsi_op2_s(ctx, 0);
1041}
1042
1043static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1044{
1045	return tgsi_op2_s(ctx, 1);
1046}
1047
1048/*
1049 * r600 - trunc to -PI..PI range
1050 * r700 - normalize by dividing by 2PI
1051 * see fdo bug 27901
1052 */
1053static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
1054			   struct r600_bc_alu_src r600_src[3])
1055{
1056	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1057	int r;
1058	uint32_t lit_vals[4];
1059	struct r600_bc_alu alu;
1060
1061	memset(lit_vals, 0, 4*4);
1062	r = tgsi_split_constant(ctx, r600_src);
1063	if (r)
1064		return r;
1065	r = tgsi_split_literal_constant(ctx, r600_src);
1066	if (r)
1067		return r;
1068
1069	r = tgsi_split_literal_constant(ctx, r600_src);
1070	if (r)
1071		return r;
1072
1073	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
1074	lit_vals[1] = fui(0.5f);
1075
1076	memset(&alu, 0, sizeof(struct r600_bc_alu));
1077	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1078	alu.is_op3 = 1;
1079
1080	alu.dst.chan = 0;
1081	alu.dst.sel = ctx->temp_reg;
1082	alu.dst.write = 1;
1083
1084	alu.src[0] = r600_src[0];
1085	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1086
1087	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1088	alu.src[1].chan = 0;
1089	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1090	alu.src[2].chan = 1;
1091	alu.last = 1;
1092	r = r600_bc_add_alu(ctx->bc, &alu);
1093	if (r)
1094		return r;
1095	r = r600_bc_add_literal(ctx->bc, lit_vals);
1096	if (r)
1097		return r;
1098
1099	memset(&alu, 0, sizeof(struct r600_bc_alu));
1100	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1101
1102	alu.dst.chan = 0;
1103	alu.dst.sel = ctx->temp_reg;
1104	alu.dst.write = 1;
1105
1106	alu.src[0].sel = ctx->temp_reg;
1107	alu.src[0].chan = 0;
1108	alu.last = 1;
1109	r = r600_bc_add_alu(ctx->bc, &alu);
1110	if (r)
1111		return r;
1112
1113	if (ctx->bc->chiprev == 0) {
1114		lit_vals[0] = fui(3.1415926535897f * 2.0f);
1115		lit_vals[1] = fui(-3.1415926535897f);
1116	} else {
1117		lit_vals[0] = fui(1.0f);
1118		lit_vals[1] = fui(-0.5f);
1119	}
1120
1121	memset(&alu, 0, sizeof(struct r600_bc_alu));
1122	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1123	alu.is_op3 = 1;
1124
1125	alu.dst.chan = 0;
1126	alu.dst.sel = ctx->temp_reg;
1127	alu.dst.write = 1;
1128
1129	alu.src[0].sel = ctx->temp_reg;
1130	alu.src[0].chan = 0;
1131
1132	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1133	alu.src[1].chan = 0;
1134	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1135	alu.src[2].chan = 1;
1136	alu.last = 1;
1137	r = r600_bc_add_alu(ctx->bc, &alu);
1138	if (r)
1139		return r;
1140	r = r600_bc_add_literal(ctx->bc, lit_vals);
1141	if (r)
1142		return r;
1143	return 0;
1144}
1145
1146static int tgsi_trig(struct r600_shader_ctx *ctx)
1147{
1148	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1149	struct r600_bc_alu_src r600_src[3];
1150	struct r600_bc_alu alu;
1151	int i, r;
1152	int lasti = 0;
1153
1154	r = tgsi_setup_trig(ctx, r600_src);
1155	if (r)
1156		return r;
1157
1158	memset(&alu, 0, sizeof(struct r600_bc_alu));
1159	alu.inst = ctx->inst_info->r600_opcode;
1160	alu.dst.chan = 0;
1161	alu.dst.sel = ctx->temp_reg;
1162	alu.dst.write = 1;
1163
1164	alu.src[0].sel = ctx->temp_reg;
1165	alu.src[0].chan = 0;
1166	alu.last = 1;
1167	r = r600_bc_add_alu(ctx->bc, &alu);
1168	if (r)
1169		return r;
1170
1171	/* replicate result */
1172	for (i = 0; i < 4; i++) {
1173		if (inst->Dst[0].Register.WriteMask & (1 << i))
1174			lasti = i;
1175	}
1176	for (i = 0; i < lasti + 1; i++) {
1177		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1178			continue;
1179
1180		memset(&alu, 0, sizeof(struct r600_bc_alu));
1181		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1182
1183		alu.src[0].sel = ctx->temp_reg;
1184		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1185		if (r)
1186			return r;
1187		if (i == lasti)
1188			alu.last = 1;
1189		r = r600_bc_add_alu(ctx->bc, &alu);
1190		if (r)
1191			return r;
1192	}
1193	return 0;
1194}
1195
1196static int tgsi_scs(struct r600_shader_ctx *ctx)
1197{
1198	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1199	struct r600_bc_alu_src r600_src[3];
1200	struct r600_bc_alu alu;
1201	int r;
1202
1203	/* We'll only need the trig stuff if we are going to write to the
1204	 * X or Y components of the destination vector.
1205	 */
1206	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1207		r = tgsi_setup_trig(ctx, r600_src);
1208		if (r)
1209			return r;
1210	}
1211
1212	/* dst.x = COS */
1213	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1214		memset(&alu, 0, sizeof(struct r600_bc_alu));
1215		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1216		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1217		if (r)
1218			return r;
1219
1220		alu.src[0].sel = ctx->temp_reg;
1221		alu.src[0].chan = 0;
1222		alu.last = 1;
1223		r = r600_bc_add_alu(ctx->bc, &alu);
1224		if (r)
1225			return r;
1226	}
1227
1228	/* dst.y = SIN */
1229	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1230		memset(&alu, 0, sizeof(struct r600_bc_alu));
1231		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1232		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1233		if (r)
1234			return r;
1235
1236		alu.src[0].sel = ctx->temp_reg;
1237		alu.src[0].chan = 0;
1238		alu.last = 1;
1239		r = r600_bc_add_alu(ctx->bc, &alu);
1240		if (r)
1241			return r;
1242	}
1243
1244	/* dst.z = 0.0; */
1245	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1246		memset(&alu, 0, sizeof(struct r600_bc_alu));
1247
1248		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1249
1250		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1251		if (r)
1252			return r;
1253
1254		alu.src[0].sel = V_SQ_ALU_SRC_0;
1255		alu.src[0].chan = 0;
1256
1257		alu.last = 1;
1258
1259		r = r600_bc_add_alu(ctx->bc, &alu);
1260		if (r)
1261			return r;
1262
1263		r = r600_bc_add_literal(ctx->bc, ctx->value);
1264		if (r)
1265			return r;
1266	}
1267
1268	/* dst.w = 1.0; */
1269	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1270		memset(&alu, 0, sizeof(struct r600_bc_alu));
1271
1272		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1273
1274		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1275		if (r)
1276			return r;
1277
1278		alu.src[0].sel = V_SQ_ALU_SRC_1;
1279		alu.src[0].chan = 0;
1280
1281		alu.last = 1;
1282
1283		r = r600_bc_add_alu(ctx->bc, &alu);
1284		if (r)
1285			return r;
1286
1287		r = r600_bc_add_literal(ctx->bc, ctx->value);
1288		if (r)
1289			return r;
1290	}
1291
1292	return 0;
1293}
1294
1295static int tgsi_kill(struct r600_shader_ctx *ctx)
1296{
1297	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1298	struct r600_bc_alu alu;
1299	int i, r;
1300
1301	for (i = 0; i < 4; i++) {
1302		memset(&alu, 0, sizeof(struct r600_bc_alu));
1303		alu.inst = ctx->inst_info->r600_opcode;
1304
1305		alu.dst.chan = i;
1306
1307		alu.src[0].sel = V_SQ_ALU_SRC_0;
1308
1309		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1310			alu.src[1].sel = V_SQ_ALU_SRC_1;
1311			alu.src[1].neg = 1;
1312		} else {
1313			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1314			if (r)
1315				return r;
1316			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1317		}
1318		if (i == 3) {
1319			alu.last = 1;
1320		}
1321		r = r600_bc_add_alu(ctx->bc, &alu);
1322		if (r)
1323			return r;
1324	}
1325	r = r600_bc_add_literal(ctx->bc, ctx->value);
1326	if (r)
1327		return r;
1328
1329	/* kill must be last in ALU */
1330	ctx->bc->force_add_cf = 1;
1331	ctx->shader->uses_kill = TRUE;
1332	return 0;
1333}
1334
1335static int tgsi_lit(struct r600_shader_ctx *ctx)
1336{
1337	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1338	struct r600_bc_alu alu;
1339	struct r600_bc_alu_src r600_src[3];
1340	int r;
1341
1342	r = tgsi_split_constant(ctx, r600_src);
1343	if (r)
1344		return r;
1345	r = tgsi_split_literal_constant(ctx, r600_src);
1346	if (r)
1347		return r;
1348
1349	/* dst.x, <- 1.0  */
1350	memset(&alu, 0, sizeof(struct r600_bc_alu));
1351	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1352	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1353	alu.src[0].chan = 0;
1354	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1355	if (r)
1356		return r;
1357	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1358	r = r600_bc_add_alu(ctx->bc, &alu);
1359	if (r)
1360		return r;
1361
1362	/* dst.y = max(src.x, 0.0) */
1363	memset(&alu, 0, sizeof(struct r600_bc_alu));
1364	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1365	alu.src[0] = r600_src[0];
1366	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1367	alu.src[1].chan = 0;
1368	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1369	if (r)
1370		return r;
1371	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1372	r = r600_bc_add_alu(ctx->bc, &alu);
1373	if (r)
1374		return r;
1375
1376	/* dst.w, <- 1.0  */
1377	memset(&alu, 0, sizeof(struct r600_bc_alu));
1378	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1379	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1380	alu.src[0].chan = 0;
1381	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1382	if (r)
1383		return r;
1384	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1385	alu.last = 1;
1386	r = r600_bc_add_alu(ctx->bc, &alu);
1387	if (r)
1388		return r;
1389
1390	r = r600_bc_add_literal(ctx->bc, ctx->value);
1391	if (r)
1392		return r;
1393
1394	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1395	{
1396		int chan;
1397		int sel;
1398
1399		/* dst.z = log(src.y) */
1400		memset(&alu, 0, sizeof(struct r600_bc_alu));
1401		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1402		alu.src[0] = r600_src[0];
1403		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1404		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1405		if (r)
1406			return r;
1407		alu.last = 1;
1408		r = r600_bc_add_alu(ctx->bc, &alu);
1409		if (r)
1410			return r;
1411
1412		r = r600_bc_add_literal(ctx->bc, ctx->value);
1413		if (r)
1414			return r;
1415
1416		chan = alu.dst.chan;
1417		sel = alu.dst.sel;
1418
1419		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1420		memset(&alu, 0, sizeof(struct r600_bc_alu));
1421		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1422		alu.src[0] = r600_src[0];
1423		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1424		alu.src[1].sel  = sel;
1425		alu.src[1].chan = chan;
1426
1427		alu.src[2] = r600_src[0];
1428		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1429		alu.dst.sel = ctx->temp_reg;
1430		alu.dst.chan = 0;
1431		alu.dst.write = 1;
1432		alu.is_op3 = 1;
1433		alu.last = 1;
1434		r = r600_bc_add_alu(ctx->bc, &alu);
1435		if (r)
1436			return r;
1437
1438		r = r600_bc_add_literal(ctx->bc, ctx->value);
1439		if (r)
1440			return r;
1441		/* dst.z = exp(tmp.x) */
1442		memset(&alu, 0, sizeof(struct r600_bc_alu));
1443		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1444		alu.src[0].sel = ctx->temp_reg;
1445		alu.src[0].chan = 0;
1446		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1447		if (r)
1448			return r;
1449		alu.last = 1;
1450		r = r600_bc_add_alu(ctx->bc, &alu);
1451		if (r)
1452			return r;
1453	}
1454	return 0;
1455}
1456
1457static int tgsi_rsq(struct r600_shader_ctx *ctx)
1458{
1459	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1460	struct r600_bc_alu alu;
1461	int i, r;
1462
1463	memset(&alu, 0, sizeof(struct r600_bc_alu));
1464
1465	/* FIXME:
1466	 * For state trackers other than OpenGL, we'll want to use
1467	 * _RECIPSQRT_IEEE instead.
1468	 */
1469	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1470
1471	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1472		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1473		if (r)
1474			return r;
1475		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1476		alu.src[i].abs = 1;
1477	}
1478	alu.dst.sel = ctx->temp_reg;
1479	alu.dst.write = 1;
1480	alu.last = 1;
1481	r = r600_bc_add_alu(ctx->bc, &alu);
1482	if (r)
1483		return r;
1484	r = r600_bc_add_literal(ctx->bc, ctx->value);
1485	if (r)
1486		return r;
1487	/* replicate result */
1488	return tgsi_helper_tempx_replicate(ctx);
1489}
1490
1491static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1492{
1493	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1494	struct r600_bc_alu alu;
1495	int i, r;
1496
1497	for (i = 0; i < 4; i++) {
1498		memset(&alu, 0, sizeof(struct r600_bc_alu));
1499		alu.src[0].sel = ctx->temp_reg;
1500		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1501		alu.dst.chan = i;
1502		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1503		if (r)
1504			return r;
1505		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1506		if (i == 3)
1507			alu.last = 1;
1508		r = r600_bc_add_alu(ctx->bc, &alu);
1509		if (r)
1510			return r;
1511	}
1512	return 0;
1513}
1514
1515static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1516{
1517	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1518	struct r600_bc_alu alu;
1519	int i, r;
1520
1521	memset(&alu, 0, sizeof(struct r600_bc_alu));
1522	alu.inst = ctx->inst_info->r600_opcode;
1523	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1524		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1525		if (r)
1526			return r;
1527		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1528	}
1529	alu.dst.sel = ctx->temp_reg;
1530	alu.dst.write = 1;
1531	alu.last = 1;
1532	r = r600_bc_add_alu(ctx->bc, &alu);
1533	if (r)
1534		return r;
1535	r = r600_bc_add_literal(ctx->bc, ctx->value);
1536	if (r)
1537		return r;
1538	/* replicate result */
1539	return tgsi_helper_tempx_replicate(ctx);
1540}
1541
1542static int tgsi_pow(struct r600_shader_ctx *ctx)
1543{
1544	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1545	struct r600_bc_alu alu;
1546	int r;
1547
1548	/* LOG2(a) */
1549	memset(&alu, 0, sizeof(struct r600_bc_alu));
1550	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1551	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1552	if (r)
1553		return r;
1554	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1555	alu.dst.sel = ctx->temp_reg;
1556	alu.dst.write = 1;
1557	alu.last = 1;
1558	r = r600_bc_add_alu(ctx->bc, &alu);
1559	if (r)
1560		return r;
1561	r = r600_bc_add_literal(ctx->bc,ctx->value);
1562	if (r)
1563		return r;
1564	/* b * LOG2(a) */
1565	memset(&alu, 0, sizeof(struct r600_bc_alu));
1566	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1567	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1568	if (r)
1569		return r;
1570	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1571	alu.src[1].sel = ctx->temp_reg;
1572	alu.dst.sel = ctx->temp_reg;
1573	alu.dst.write = 1;
1574	alu.last = 1;
1575	r = r600_bc_add_alu(ctx->bc, &alu);
1576	if (r)
1577		return r;
1578	r = r600_bc_add_literal(ctx->bc,ctx->value);
1579	if (r)
1580		return r;
1581	/* POW(a,b) = EXP2(b * LOG2(a))*/
1582	memset(&alu, 0, sizeof(struct r600_bc_alu));
1583	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1584	alu.src[0].sel = ctx->temp_reg;
1585	alu.dst.sel = ctx->temp_reg;
1586	alu.dst.write = 1;
1587	alu.last = 1;
1588	r = r600_bc_add_alu(ctx->bc, &alu);
1589	if (r)
1590		return r;
1591	r = r600_bc_add_literal(ctx->bc,ctx->value);
1592	if (r)
1593		return r;
1594	return tgsi_helper_tempx_replicate(ctx);
1595}
1596
1597static int tgsi_ssg(struct r600_shader_ctx *ctx)
1598{
1599	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1600	struct r600_bc_alu alu;
1601	struct r600_bc_alu_src r600_src[3];
1602	int i, r;
1603
1604	r = tgsi_split_constant(ctx, r600_src);
1605	if (r)
1606		return r;
1607	r = tgsi_split_literal_constant(ctx, r600_src);
1608	if (r)
1609		return r;
1610
1611	/* tmp = (src > 0 ? 1 : src) */
1612	for (i = 0; i < 4; i++) {
1613		memset(&alu, 0, sizeof(struct r600_bc_alu));
1614		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1615		alu.is_op3 = 1;
1616
1617		alu.dst.sel = ctx->temp_reg;
1618		alu.dst.chan = i;
1619
1620		alu.src[0] = r600_src[0];
1621		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1622
1623		alu.src[1].sel = V_SQ_ALU_SRC_1;
1624
1625		alu.src[2] = r600_src[0];
1626		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1627		if (i == 3)
1628			alu.last = 1;
1629		r = r600_bc_add_alu(ctx->bc, &alu);
1630		if (r)
1631			return r;
1632	}
1633	r = r600_bc_add_literal(ctx->bc, ctx->value);
1634	if (r)
1635		return r;
1636
1637	/* dst = (-tmp > 0 ? -1 : tmp) */
1638	for (i = 0; i < 4; i++) {
1639		memset(&alu, 0, sizeof(struct r600_bc_alu));
1640		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1641		alu.is_op3 = 1;
1642		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1643		if (r)
1644			return r;
1645
1646		alu.src[0].sel = ctx->temp_reg;
1647		alu.src[0].chan = i;
1648		alu.src[0].neg = 1;
1649
1650		alu.src[1].sel = V_SQ_ALU_SRC_1;
1651		alu.src[1].neg = 1;
1652
1653		alu.src[2].sel = ctx->temp_reg;
1654		alu.src[2].chan = i;
1655
1656		if (i == 3)
1657			alu.last = 1;
1658		r = r600_bc_add_alu(ctx->bc, &alu);
1659		if (r)
1660			return r;
1661	}
1662	return 0;
1663}
1664
1665static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1666{
1667	struct r600_bc_alu alu;
1668	int i, r;
1669
1670	r = r600_bc_add_literal(ctx->bc, ctx->value);
1671	if (r)
1672		return r;
1673	for (i = 0; i < 4; i++) {
1674		memset(&alu, 0, sizeof(struct r600_bc_alu));
1675		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1676			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1677			alu.dst.chan = i;
1678		} else {
1679			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1680			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1681			if (r)
1682				return r;
1683			alu.src[0].sel = ctx->temp_reg;
1684			alu.src[0].chan = i;
1685		}
1686		if (i == 3) {
1687			alu.last = 1;
1688		}
1689		r = r600_bc_add_alu(ctx->bc, &alu);
1690		if (r)
1691			return r;
1692	}
1693	return 0;
1694}
1695
1696static int tgsi_op3(struct r600_shader_ctx *ctx)
1697{
1698	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1699	struct r600_bc_alu_src r600_src[3];
1700	struct r600_bc_alu alu;
1701	int i, j, r;
1702
1703	r = tgsi_split_constant(ctx, r600_src);
1704	if (r)
1705		return r;
1706	r = tgsi_split_literal_constant(ctx, r600_src);
1707	if (r)
1708		return r;
1709	/* do it in 2 step as op3 doesn't support writemask */
1710	for (i = 0; i < 4; i++) {
1711		memset(&alu, 0, sizeof(struct r600_bc_alu));
1712		alu.inst = ctx->inst_info->r600_opcode;
1713		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1714			alu.src[j] = r600_src[j];
1715			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1716		}
1717		alu.dst.sel = ctx->temp_reg;
1718		alu.dst.chan = i;
1719		alu.dst.write = 1;
1720		alu.is_op3 = 1;
1721		if (i == 3) {
1722			alu.last = 1;
1723		}
1724		r = r600_bc_add_alu(ctx->bc, &alu);
1725		if (r)
1726			return r;
1727	}
1728	return tgsi_helper_copy(ctx, inst);
1729}
1730
1731static int tgsi_dp(struct r600_shader_ctx *ctx)
1732{
1733	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1734	struct r600_bc_alu_src r600_src[3];
1735	struct r600_bc_alu alu;
1736	int i, j, r;
1737
1738	r = tgsi_split_constant(ctx, r600_src);
1739	if (r)
1740		return r;
1741	r = tgsi_split_literal_constant(ctx, r600_src);
1742	if (r)
1743		return r;
1744	for (i = 0; i < 4; i++) {
1745		memset(&alu, 0, sizeof(struct r600_bc_alu));
1746		alu.inst = ctx->inst_info->r600_opcode;
1747		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1748			alu.src[j] = r600_src[j];
1749			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1750		}
1751		alu.dst.sel = ctx->temp_reg;
1752		alu.dst.chan = i;
1753		alu.dst.write = 1;
1754		/* handle some special cases */
1755		switch (ctx->inst_info->tgsi_opcode) {
1756		case TGSI_OPCODE_DP2:
1757			if (i > 1) {
1758				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1759				alu.src[0].chan = alu.src[1].chan = 0;
1760			}
1761			break;
1762		case TGSI_OPCODE_DP3:
1763			if (i > 2) {
1764				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1765				alu.src[0].chan = alu.src[1].chan = 0;
1766			}
1767			break;
1768		case TGSI_OPCODE_DPH:
1769			if (i == 3) {
1770				alu.src[0].sel = V_SQ_ALU_SRC_1;
1771				alu.src[0].chan = 0;
1772				alu.src[0].neg = 0;
1773			}
1774			break;
1775		default:
1776			break;
1777		}
1778		if (i == 3) {
1779			alu.last = 1;
1780		}
1781		r = r600_bc_add_alu(ctx->bc, &alu);
1782		if (r)
1783			return r;
1784	}
1785	return tgsi_helper_copy(ctx, inst);
1786}
1787
1788static int tgsi_tex(struct r600_shader_ctx *ctx)
1789{
1790	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1791	struct r600_bc_tex tex;
1792	struct r600_bc_alu alu;
1793	unsigned src_gpr;
1794	int r, i;
1795	int opcode;
1796	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1797	uint32_t lit_vals[4];
1798
1799	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1800
1801	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1802		/* Add perspective divide */
1803		memset(&alu, 0, sizeof(struct r600_bc_alu));
1804		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1805		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1806		if (r)
1807			return r;
1808
1809		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1810		alu.dst.sel = ctx->temp_reg;
1811		alu.dst.chan = 3;
1812		alu.last = 1;
1813		alu.dst.write = 1;
1814		r = r600_bc_add_alu(ctx->bc, &alu);
1815		if (r)
1816			return r;
1817
1818		for (i = 0; i < 3; i++) {
1819			memset(&alu, 0, sizeof(struct r600_bc_alu));
1820			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1821			alu.src[0].sel = ctx->temp_reg;
1822			alu.src[0].chan = 3;
1823			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1824			if (r)
1825				return r;
1826			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1827			alu.dst.sel = ctx->temp_reg;
1828			alu.dst.chan = i;
1829			alu.dst.write = 1;
1830			r = r600_bc_add_alu(ctx->bc, &alu);
1831			if (r)
1832				return r;
1833		}
1834		memset(&alu, 0, sizeof(struct r600_bc_alu));
1835		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1836		alu.src[0].sel = V_SQ_ALU_SRC_1;
1837		alu.src[0].chan = 0;
1838		alu.dst.sel = ctx->temp_reg;
1839		alu.dst.chan = 3;
1840		alu.last = 1;
1841		alu.dst.write = 1;
1842		r = r600_bc_add_alu(ctx->bc, &alu);
1843		if (r)
1844			return r;
1845		src_not_temp = FALSE;
1846		src_gpr = ctx->temp_reg;
1847	}
1848
1849	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1850		int src_chan, src2_chan;
1851
1852		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1853		for (i = 0; i < 4; i++) {
1854			memset(&alu, 0, sizeof(struct r600_bc_alu));
1855			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1856			switch (i) {
1857			case 0:
1858				src_chan = 2;
1859				src2_chan = 1;
1860				break;
1861			case 1:
1862				src_chan = 2;
1863				src2_chan = 0;
1864				break;
1865			case 2:
1866				src_chan = 0;
1867				src2_chan = 2;
1868				break;
1869			case 3:
1870				src_chan = 1;
1871				src2_chan = 2;
1872				break;
1873			default:
1874				assert(0);
1875				src_chan = 0;
1876				src2_chan = 0;
1877				break;
1878			}
1879			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1880			if (r)
1881				return r;
1882			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1883			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1884			if (r)
1885				return r;
1886			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1887			alu.dst.sel = ctx->temp_reg;
1888			alu.dst.chan = i;
1889			if (i == 3)
1890				alu.last = 1;
1891			alu.dst.write = 1;
1892			r = r600_bc_add_alu(ctx->bc, &alu);
1893			if (r)
1894				return r;
1895		}
1896
1897		/* tmp1.z = RCP_e(|tmp1.z|) */
1898		memset(&alu, 0, sizeof(struct r600_bc_alu));
1899		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1900		alu.src[0].sel = ctx->temp_reg;
1901		alu.src[0].chan = 2;
1902		alu.src[0].abs = 1;
1903		alu.dst.sel = ctx->temp_reg;
1904		alu.dst.chan = 2;
1905		alu.dst.write = 1;
1906		alu.last = 1;
1907		r = r600_bc_add_alu(ctx->bc, &alu);
1908		if (r)
1909			return r;
1910
1911		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1912		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1913		 * muladd has no writemask, have to use another temp
1914		 */
1915		memset(&alu, 0, sizeof(struct r600_bc_alu));
1916		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1917		alu.is_op3 = 1;
1918
1919		alu.src[0].sel = ctx->temp_reg;
1920		alu.src[0].chan = 0;
1921		alu.src[1].sel = ctx->temp_reg;
1922		alu.src[1].chan = 2;
1923
1924		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1925		alu.src[2].chan = 0;
1926
1927		alu.dst.sel = ctx->temp_reg;
1928		alu.dst.chan = 0;
1929		alu.dst.write = 1;
1930
1931		r = r600_bc_add_alu(ctx->bc, &alu);
1932		if (r)
1933			return r;
1934
1935		memset(&alu, 0, sizeof(struct r600_bc_alu));
1936		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1937		alu.is_op3 = 1;
1938
1939		alu.src[0].sel = ctx->temp_reg;
1940		alu.src[0].chan = 1;
1941		alu.src[1].sel = ctx->temp_reg;
1942		alu.src[1].chan = 2;
1943
1944		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1945		alu.src[2].chan = 0;
1946
1947		alu.dst.sel = ctx->temp_reg;
1948		alu.dst.chan = 1;
1949		alu.dst.write = 1;
1950
1951		alu.last = 1;
1952		r = r600_bc_add_alu(ctx->bc, &alu);
1953		if (r)
1954			return r;
1955
1956		lit_vals[0] = fui(1.5f);
1957
1958		r = r600_bc_add_literal(ctx->bc, lit_vals);
1959		if (r)
1960			return r;
1961		src_not_temp = FALSE;
1962		src_gpr = ctx->temp_reg;
1963	}
1964
1965	if (src_not_temp) {
1966		for (i = 0; i < 4; i++) {
1967			memset(&alu, 0, sizeof(struct r600_bc_alu));
1968			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1969			alu.src[0].sel = src_gpr;
1970			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1971			alu.dst.sel = ctx->temp_reg;
1972			alu.dst.chan = i;
1973			if (i == 3)
1974				alu.last = 1;
1975			alu.dst.write = 1;
1976			r = r600_bc_add_alu(ctx->bc, &alu);
1977			if (r)
1978				return r;
1979		}
1980		src_gpr = ctx->temp_reg;
1981	}
1982
1983	opcode = ctx->inst_info->r600_opcode;
1984	if (opcode == SQ_TEX_INST_SAMPLE &&
1985	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1986		opcode = SQ_TEX_INST_SAMPLE_C;
1987
1988	memset(&tex, 0, sizeof(struct r600_bc_tex));
1989	tex.inst = opcode;
1990	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1991	tex.resource_id = tex.sampler_id;
1992	if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX)
1993		tex.resource_id += PIPE_MAX_ATTRIBS;
1994	tex.src_gpr = src_gpr;
1995	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1996	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1997	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1998	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1999	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2000	tex.src_sel_x = 0;
2001	tex.src_sel_y = 1;
2002	tex.src_sel_z = 2;
2003	tex.src_sel_w = 3;
2004
2005	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2006		tex.src_sel_x = 1;
2007		tex.src_sel_y = 0;
2008		tex.src_sel_z = 3;
2009		tex.src_sel_w = 1;
2010	}
2011
2012	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2013		tex.coord_type_x = 1;
2014		tex.coord_type_y = 1;
2015		tex.coord_type_z = 1;
2016		tex.coord_type_w = 1;
2017	}
2018
2019	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2020		tex.src_sel_w = 2;
2021
2022	r = r600_bc_add_tex(ctx->bc, &tex);
2023	if (r)
2024		return r;
2025
2026	/* add shadow ambient support  - gallium doesn't do it yet */
2027	return 0;
2028
2029}
2030
2031static int tgsi_lrp(struct r600_shader_ctx *ctx)
2032{
2033	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2034	struct r600_bc_alu_src r600_src[3];
2035	struct r600_bc_alu alu;
2036	unsigned i;
2037	int r;
2038
2039	r = tgsi_split_constant(ctx, r600_src);
2040	if (r)
2041		return r;
2042	r = tgsi_split_literal_constant(ctx, r600_src);
2043	if (r)
2044		return r;
2045	/* 1 - src0 */
2046	for (i = 0; i < 4; i++) {
2047		memset(&alu, 0, sizeof(struct r600_bc_alu));
2048		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2049		alu.src[0].sel = V_SQ_ALU_SRC_1;
2050		alu.src[0].chan = 0;
2051		alu.src[1] = r600_src[0];
2052		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
2053		alu.src[1].neg = 1;
2054		alu.dst.sel = ctx->temp_reg;
2055		alu.dst.chan = i;
2056		if (i == 3) {
2057			alu.last = 1;
2058		}
2059		alu.dst.write = 1;
2060		r = r600_bc_add_alu(ctx->bc, &alu);
2061		if (r)
2062			return r;
2063	}
2064	r = r600_bc_add_literal(ctx->bc, ctx->value);
2065	if (r)
2066		return r;
2067
2068	/* (1 - src0) * src2 */
2069	for (i = 0; i < 4; i++) {
2070		memset(&alu, 0, sizeof(struct r600_bc_alu));
2071		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2072		alu.src[0].sel = ctx->temp_reg;
2073		alu.src[0].chan = i;
2074		alu.src[1] = r600_src[2];
2075		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2076		alu.dst.sel = ctx->temp_reg;
2077		alu.dst.chan = i;
2078		if (i == 3) {
2079			alu.last = 1;
2080		}
2081		alu.dst.write = 1;
2082		r = r600_bc_add_alu(ctx->bc, &alu);
2083		if (r)
2084			return r;
2085	}
2086	r = r600_bc_add_literal(ctx->bc, ctx->value);
2087	if (r)
2088		return r;
2089
2090	/* src0 * src1 + (1 - src0) * src2 */
2091	for (i = 0; i < 4; i++) {
2092		memset(&alu, 0, sizeof(struct r600_bc_alu));
2093		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2094		alu.is_op3 = 1;
2095		alu.src[0] = r600_src[0];
2096		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2097		alu.src[1] = r600_src[1];
2098		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2099		alu.src[2].sel = ctx->temp_reg;
2100		alu.src[2].chan = i;
2101		alu.dst.sel = ctx->temp_reg;
2102		alu.dst.chan = i;
2103		if (i == 3) {
2104			alu.last = 1;
2105		}
2106		r = r600_bc_add_alu(ctx->bc, &alu);
2107		if (r)
2108			return r;
2109	}
2110	return tgsi_helper_copy(ctx, inst);
2111}
2112
2113static int tgsi_cmp(struct r600_shader_ctx *ctx)
2114{
2115	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2116	struct r600_bc_alu_src r600_src[3];
2117	struct r600_bc_alu alu;
2118	int use_temp = 0;
2119	int i, r;
2120
2121	r = tgsi_split_constant(ctx, r600_src);
2122	if (r)
2123		return r;
2124	r = tgsi_split_literal_constant(ctx, r600_src);
2125	if (r)
2126		return r;
2127
2128	if (inst->Dst[0].Register.WriteMask != 0xf)
2129		use_temp = 1;
2130
2131	for (i = 0; i < 4; i++) {
2132		memset(&alu, 0, sizeof(struct r600_bc_alu));
2133		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2134		alu.src[0] = r600_src[0];
2135		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2136
2137		alu.src[1] = r600_src[2];
2138		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2139
2140		alu.src[2] = r600_src[1];
2141		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2142
2143		if (use_temp)
2144			alu.dst.sel = ctx->temp_reg;
2145		else {
2146			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2147			if (r)
2148				return r;
2149		}
2150		alu.dst.chan = i;
2151		alu.dst.write = 1;
2152		alu.is_op3 = 1;
2153		if (i == 3)
2154			alu.last = 1;
2155		r = r600_bc_add_alu(ctx->bc, &alu);
2156		if (r)
2157			return r;
2158	}
2159	if (use_temp)
2160		return tgsi_helper_copy(ctx, inst);
2161	return 0;
2162}
2163
2164static int tgsi_xpd(struct r600_shader_ctx *ctx)
2165{
2166	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2167	struct r600_bc_alu_src r600_src[3];
2168	struct r600_bc_alu alu;
2169	uint32_t use_temp = 0;
2170	int i, r;
2171
2172	if (inst->Dst[0].Register.WriteMask != 0xf)
2173		use_temp = 1;
2174
2175	r = tgsi_split_constant(ctx, r600_src);
2176	if (r)
2177		return r;
2178	r = tgsi_split_literal_constant(ctx, r600_src);
2179	if (r)
2180		return r;
2181
2182	for (i = 0; i < 4; i++) {
2183		memset(&alu, 0, sizeof(struct r600_bc_alu));
2184		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2185
2186		alu.src[0] = r600_src[0];
2187		switch (i) {
2188		case 0:
2189			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2190			break;
2191		case 1:
2192			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2193			break;
2194		case 2:
2195			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2196			break;
2197		case 3:
2198			alu.src[0].sel = V_SQ_ALU_SRC_0;
2199			alu.src[0].chan = i;
2200		}
2201
2202		alu.src[1] = r600_src[1];
2203		switch (i) {
2204		case 0:
2205			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2206			break;
2207		case 1:
2208			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2209			break;
2210		case 2:
2211			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2212			break;
2213		case 3:
2214			alu.src[1].sel = V_SQ_ALU_SRC_0;
2215			alu.src[1].chan = i;
2216		}
2217
2218		alu.dst.sel = ctx->temp_reg;
2219		alu.dst.chan = i;
2220		alu.dst.write = 1;
2221
2222		if (i == 3)
2223			alu.last = 1;
2224		r = r600_bc_add_alu(ctx->bc, &alu);
2225		if (r)
2226			return r;
2227
2228		r = r600_bc_add_literal(ctx->bc, ctx->value);
2229		if (r)
2230			return r;
2231	}
2232
2233	for (i = 0; i < 4; i++) {
2234		memset(&alu, 0, sizeof(struct r600_bc_alu));
2235		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2236
2237		alu.src[0] = r600_src[0];
2238		switch (i) {
2239		case 0:
2240			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2241			break;
2242		case 1:
2243			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2244			break;
2245		case 2:
2246			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2247			break;
2248		case 3:
2249			alu.src[0].sel = V_SQ_ALU_SRC_0;
2250			alu.src[0].chan = i;
2251		}
2252
2253		alu.src[1] = r600_src[1];
2254		switch (i) {
2255		case 0:
2256			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2257			break;
2258		case 1:
2259			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2260			break;
2261		case 2:
2262			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2263			break;
2264		case 3:
2265			alu.src[1].sel = V_SQ_ALU_SRC_0;
2266			alu.src[1].chan = i;
2267		}
2268
2269		alu.src[2].sel = ctx->temp_reg;
2270		alu.src[2].neg = 1;
2271		alu.src[2].chan = i;
2272
2273		if (use_temp)
2274			alu.dst.sel = ctx->temp_reg;
2275		else {
2276			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2277			if (r)
2278				return r;
2279		}
2280		alu.dst.chan = i;
2281		alu.dst.write = 1;
2282		alu.is_op3 = 1;
2283		if (i == 3)
2284			alu.last = 1;
2285		r = r600_bc_add_alu(ctx->bc, &alu);
2286		if (r)
2287			return r;
2288
2289		r = r600_bc_add_literal(ctx->bc, ctx->value);
2290		if (r)
2291			return r;
2292	}
2293	if (use_temp)
2294		return tgsi_helper_copy(ctx, inst);
2295	return 0;
2296}
2297
2298static int tgsi_exp(struct r600_shader_ctx *ctx)
2299{
2300	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2301	struct r600_bc_alu_src r600_src[3] = { { 0 } };
2302	struct r600_bc_alu alu;
2303	int r;
2304
2305	/* result.x = 2^floor(src); */
2306	if (inst->Dst[0].Register.WriteMask & 1) {
2307		memset(&alu, 0, sizeof(struct r600_bc_alu));
2308
2309		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2310		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2311		if (r)
2312			return r;
2313
2314		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2315
2316		alu.dst.sel = ctx->temp_reg;
2317		alu.dst.chan = 0;
2318		alu.dst.write = 1;
2319		alu.last = 1;
2320		r = r600_bc_add_alu(ctx->bc, &alu);
2321		if (r)
2322			return r;
2323
2324		r = r600_bc_add_literal(ctx->bc, ctx->value);
2325		if (r)
2326			return r;
2327
2328		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2329		alu.src[0].sel = ctx->temp_reg;
2330		alu.src[0].chan = 0;
2331
2332		alu.dst.sel = ctx->temp_reg;
2333		alu.dst.chan = 0;
2334		alu.dst.write = 1;
2335		alu.last = 1;
2336		r = r600_bc_add_alu(ctx->bc, &alu);
2337		if (r)
2338			return r;
2339
2340		r = r600_bc_add_literal(ctx->bc, ctx->value);
2341		if (r)
2342			return r;
2343	}
2344
2345	/* result.y = tmp - floor(tmp); */
2346	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2347		memset(&alu, 0, sizeof(struct r600_bc_alu));
2348
2349		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2350		alu.src[0] = r600_src[0];
2351		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2352		if (r)
2353			return r;
2354		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2355
2356		alu.dst.sel = ctx->temp_reg;
2357//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2358//		if (r)
2359//			return r;
2360		alu.dst.write = 1;
2361		alu.dst.chan = 1;
2362
2363		alu.last = 1;
2364
2365		r = r600_bc_add_alu(ctx->bc, &alu);
2366		if (r)
2367			return r;
2368		r = r600_bc_add_literal(ctx->bc, ctx->value);
2369		if (r)
2370			return r;
2371	}
2372
2373	/* result.z = RoughApprox2ToX(tmp);*/
2374	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2375		memset(&alu, 0, sizeof(struct r600_bc_alu));
2376		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2377		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2378		if (r)
2379			return r;
2380		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2381
2382		alu.dst.sel = ctx->temp_reg;
2383		alu.dst.write = 1;
2384		alu.dst.chan = 2;
2385
2386		alu.last = 1;
2387
2388		r = r600_bc_add_alu(ctx->bc, &alu);
2389		if (r)
2390			return r;
2391		r = r600_bc_add_literal(ctx->bc, ctx->value);
2392		if (r)
2393			return r;
2394	}
2395
2396	/* result.w = 1.0;*/
2397	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2398		memset(&alu, 0, sizeof(struct r600_bc_alu));
2399
2400		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2401		alu.src[0].sel = V_SQ_ALU_SRC_1;
2402		alu.src[0].chan = 0;
2403
2404		alu.dst.sel = ctx->temp_reg;
2405		alu.dst.chan = 3;
2406		alu.dst.write = 1;
2407		alu.last = 1;
2408		r = r600_bc_add_alu(ctx->bc, &alu);
2409		if (r)
2410			return r;
2411		r = r600_bc_add_literal(ctx->bc, ctx->value);
2412		if (r)
2413			return r;
2414	}
2415	return tgsi_helper_copy(ctx, inst);
2416}
2417
2418static int tgsi_log(struct r600_shader_ctx *ctx)
2419{
2420	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2421	struct r600_bc_alu alu;
2422	int r;
2423
2424	/* result.x = floor(log2(src)); */
2425	if (inst->Dst[0].Register.WriteMask & 1) {
2426		memset(&alu, 0, sizeof(struct r600_bc_alu));
2427
2428		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2429		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2430		if (r)
2431			return r;
2432
2433		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2434
2435		alu.dst.sel = ctx->temp_reg;
2436		alu.dst.chan = 0;
2437		alu.dst.write = 1;
2438		alu.last = 1;
2439		r = r600_bc_add_alu(ctx->bc, &alu);
2440		if (r)
2441			return r;
2442
2443		r = r600_bc_add_literal(ctx->bc, ctx->value);
2444		if (r)
2445			return r;
2446
2447		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2448		alu.src[0].sel = ctx->temp_reg;
2449		alu.src[0].chan = 0;
2450
2451		alu.dst.sel = ctx->temp_reg;
2452		alu.dst.chan = 0;
2453		alu.dst.write = 1;
2454		alu.last = 1;
2455
2456		r = r600_bc_add_alu(ctx->bc, &alu);
2457		if (r)
2458			return r;
2459
2460		r = r600_bc_add_literal(ctx->bc, ctx->value);
2461		if (r)
2462			return r;
2463	}
2464
2465	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2466	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2467		memset(&alu, 0, sizeof(struct r600_bc_alu));
2468
2469		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2470		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2471		if (r)
2472			return r;
2473
2474		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2475
2476		alu.dst.sel = ctx->temp_reg;
2477		alu.dst.chan = 1;
2478		alu.dst.write = 1;
2479		alu.last = 1;
2480
2481		r = r600_bc_add_alu(ctx->bc, &alu);
2482		if (r)
2483			return r;
2484
2485		r = r600_bc_add_literal(ctx->bc, ctx->value);
2486		if (r)
2487			return r;
2488
2489		memset(&alu, 0, sizeof(struct r600_bc_alu));
2490
2491		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2492		alu.src[0].sel = ctx->temp_reg;
2493		alu.src[0].chan = 1;
2494
2495		alu.dst.sel = ctx->temp_reg;
2496		alu.dst.chan = 1;
2497		alu.dst.write = 1;
2498		alu.last = 1;
2499
2500		r = r600_bc_add_alu(ctx->bc, &alu);
2501		if (r)
2502			return r;
2503
2504		r = r600_bc_add_literal(ctx->bc, ctx->value);
2505		if (r)
2506			return r;
2507
2508		memset(&alu, 0, sizeof(struct r600_bc_alu));
2509
2510		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2511		alu.src[0].sel = ctx->temp_reg;
2512		alu.src[0].chan = 1;
2513
2514		alu.dst.sel = ctx->temp_reg;
2515		alu.dst.chan = 1;
2516		alu.dst.write = 1;
2517		alu.last = 1;
2518
2519		r = r600_bc_add_alu(ctx->bc, &alu);
2520		if (r)
2521			return r;
2522
2523		r = r600_bc_add_literal(ctx->bc, ctx->value);
2524		if (r)
2525			return r;
2526
2527		memset(&alu, 0, sizeof(struct r600_bc_alu));
2528
2529		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2530		alu.src[0].sel = ctx->temp_reg;
2531		alu.src[0].chan = 1;
2532
2533		alu.dst.sel = ctx->temp_reg;
2534		alu.dst.chan = 1;
2535		alu.dst.write = 1;
2536		alu.last = 1;
2537
2538		r = r600_bc_add_alu(ctx->bc, &alu);
2539		if (r)
2540			return r;
2541
2542		r = r600_bc_add_literal(ctx->bc, ctx->value);
2543		if (r)
2544			return r;
2545
2546		memset(&alu, 0, sizeof(struct r600_bc_alu));
2547
2548		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2549
2550		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2551		if (r)
2552			return r;
2553
2554		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2555
2556		alu.src[1].sel = ctx->temp_reg;
2557		alu.src[1].chan = 1;
2558
2559		alu.dst.sel = ctx->temp_reg;
2560		alu.dst.chan = 1;
2561		alu.dst.write = 1;
2562		alu.last = 1;
2563
2564		r = r600_bc_add_alu(ctx->bc, &alu);
2565		if (r)
2566			return r;
2567
2568		r = r600_bc_add_literal(ctx->bc, ctx->value);
2569		if (r)
2570			return r;
2571	}
2572
2573	/* result.z = log2(src);*/
2574	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2575		memset(&alu, 0, sizeof(struct r600_bc_alu));
2576
2577		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2578		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2579		if (r)
2580			return r;
2581
2582		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2583
2584		alu.dst.sel = ctx->temp_reg;
2585		alu.dst.write = 1;
2586		alu.dst.chan = 2;
2587		alu.last = 1;
2588
2589		r = r600_bc_add_alu(ctx->bc, &alu);
2590		if (r)
2591			return r;
2592
2593		r = r600_bc_add_literal(ctx->bc, ctx->value);
2594		if (r)
2595			return r;
2596	}
2597
2598	/* result.w = 1.0; */
2599	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2600		memset(&alu, 0, sizeof(struct r600_bc_alu));
2601
2602		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2603		alu.src[0].sel = V_SQ_ALU_SRC_1;
2604		alu.src[0].chan = 0;
2605
2606		alu.dst.sel = ctx->temp_reg;
2607		alu.dst.chan = 3;
2608		alu.dst.write = 1;
2609		alu.last = 1;
2610
2611		r = r600_bc_add_alu(ctx->bc, &alu);
2612		if (r)
2613			return r;
2614
2615		r = r600_bc_add_literal(ctx->bc, ctx->value);
2616		if (r)
2617			return r;
2618	}
2619
2620	return tgsi_helper_copy(ctx, inst);
2621}
2622
2623/* r6/7 only for now */
2624static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2625{
2626	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2627	struct r600_bc_alu alu;
2628	int r;
2629
2630	memset(&alu, 0, sizeof(struct r600_bc_alu));
2631
2632	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2633	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2634	if (r)
2635		return r;
2636	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2637	alu.last = 1;
2638	alu.dst.chan = 0;
2639	alu.dst.sel = ctx->temp_reg;
2640	alu.dst.write = 1;
2641	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2642	if (r)
2643		return r;
2644	memset(&alu, 0, sizeof(struct r600_bc_alu));
2645	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2646	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2647	if (r)
2648		return r;
2649	alu.src[0].sel = ctx->temp_reg;
2650	alu.src[0].chan = 0;
2651	alu.last = 1;
2652	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2653	if (r)
2654		return r;
2655	return 0;
2656}
2657static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2658{
2659	/* TODO from r600c, ar values don't persist between clauses */
2660	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2661	struct r600_bc_alu alu;
2662	int r;
2663	memset(&alu, 0, sizeof(struct r600_bc_alu));
2664
2665	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2666
2667	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2668	if (r)
2669		return r;
2670	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2671
2672	alu.last = 1;
2673
2674	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2675	if (r)
2676		return r;
2677	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2678	return 0;
2679}
2680
2681static int tgsi_opdst(struct r600_shader_ctx *ctx)
2682{
2683	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2684	struct r600_bc_alu alu;
2685	int i, r = 0;
2686
2687	for (i = 0; i < 4; i++) {
2688		memset(&alu, 0, sizeof(struct r600_bc_alu));
2689
2690		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2691		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2692		if (r)
2693			return r;
2694
2695	        if (i == 0 || i == 3) {
2696			alu.src[0].sel = V_SQ_ALU_SRC_1;
2697		} else {
2698			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2699			if (r)
2700				return r;
2701			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2702		}
2703
2704	        if (i == 0 || i == 2) {
2705			alu.src[1].sel = V_SQ_ALU_SRC_1;
2706		} else {
2707			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2708			if (r)
2709				return r;
2710			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2711		}
2712		if (i == 3)
2713			alu.last = 1;
2714		r = r600_bc_add_alu(ctx->bc, &alu);
2715		if (r)
2716			return r;
2717	}
2718	return 0;
2719}
2720
2721static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2722{
2723	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2724	struct r600_bc_alu alu;
2725	int r;
2726
2727	memset(&alu, 0, sizeof(struct r600_bc_alu));
2728	alu.inst = opcode;
2729	alu.predicate = 1;
2730
2731	alu.dst.sel = ctx->temp_reg;
2732	alu.dst.write = 1;
2733	alu.dst.chan = 0;
2734
2735	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2736	if (r)
2737		return r;
2738	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2739	alu.src[1].sel = V_SQ_ALU_SRC_0;
2740	alu.src[1].chan = 0;
2741
2742	alu.last = 1;
2743
2744	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2745	if (r)
2746		return r;
2747	return 0;
2748}
2749
2750static int pops(struct r600_shader_ctx *ctx, int pops)
2751{
2752	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2753	ctx->bc->cf_last->pop_count = pops;
2754	return 0;
2755}
2756
2757static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2758{
2759	switch(reason) {
2760	case FC_PUSH_VPM:
2761		ctx->bc->callstack[ctx->bc->call_sp].current--;
2762		break;
2763	case FC_PUSH_WQM:
2764	case FC_LOOP:
2765		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2766		break;
2767	case FC_REP:
2768		/* TOODO : for 16 vp asic should -= 2; */
2769		ctx->bc->callstack[ctx->bc->call_sp].current --;
2770		break;
2771	}
2772}
2773
2774static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2775{
2776	if (check_max_only) {
2777		int diff;
2778		switch (reason) {
2779		case FC_PUSH_VPM:
2780			diff = 1;
2781			break;
2782		case FC_PUSH_WQM:
2783			diff = 4;
2784			break;
2785		default:
2786			assert(0);
2787			diff = 0;
2788		}
2789		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2790		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2791			ctx->bc->callstack[ctx->bc->call_sp].max =
2792				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2793		}
2794		return;
2795	}
2796	switch (reason) {
2797	case FC_PUSH_VPM:
2798		ctx->bc->callstack[ctx->bc->call_sp].current++;
2799		break;
2800	case FC_PUSH_WQM:
2801	case FC_LOOP:
2802		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2803		break;
2804	case FC_REP:
2805		ctx->bc->callstack[ctx->bc->call_sp].current++;
2806		break;
2807	}
2808
2809	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2810	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2811		ctx->bc->callstack[ctx->bc->call_sp].max =
2812			ctx->bc->callstack[ctx->bc->call_sp].current;
2813	}
2814}
2815
2816static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2817{
2818	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2819
2820	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2821						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2822	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2823	sp->num_mid++;
2824}
2825
2826static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2827{
2828	ctx->bc->fc_sp++;
2829	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2830	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2831}
2832
2833static void fc_poplevel(struct r600_shader_ctx *ctx)
2834{
2835	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2836	if (sp->mid) {
2837		free(sp->mid);
2838		sp->mid = NULL;
2839	}
2840	sp->num_mid = 0;
2841	sp->start = NULL;
2842	sp->type = 0;
2843	ctx->bc->fc_sp--;
2844}
2845
2846#if 0
2847static int emit_return(struct r600_shader_ctx *ctx)
2848{
2849	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2850	return 0;
2851}
2852
2853static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2854{
2855
2856	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2857	ctx->bc->cf_last->pop_count = pops;
2858	/* TODO work out offset */
2859	return 0;
2860}
2861
2862static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2863{
2864	return 0;
2865}
2866
2867static void emit_testflag(struct r600_shader_ctx *ctx)
2868{
2869
2870}
2871
2872static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2873{
2874	emit_testflag(ctx);
2875	emit_jump_to_offset(ctx, 1, 4);
2876	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2877	pops(ctx, ifidx + 1);
2878	emit_return(ctx);
2879}
2880
2881static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2882{
2883	emit_testflag(ctx);
2884
2885	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2886	ctx->bc->cf_last->pop_count = 1;
2887
2888	fc_set_mid(ctx, fc_sp);
2889
2890	pops(ctx, 1);
2891}
2892#endif
2893
2894static int tgsi_if(struct r600_shader_ctx *ctx)
2895{
2896	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2897
2898	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2899
2900	fc_pushlevel(ctx, FC_IF);
2901
2902	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2903	return 0;
2904}
2905
2906static int tgsi_else(struct r600_shader_ctx *ctx)
2907{
2908	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2909	ctx->bc->cf_last->pop_count = 1;
2910
2911	fc_set_mid(ctx, ctx->bc->fc_sp);
2912	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2913	return 0;
2914}
2915
2916static int tgsi_endif(struct r600_shader_ctx *ctx)
2917{
2918	pops(ctx, 1);
2919	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2920		R600_ERR("if/endif unbalanced in shader\n");
2921		return -1;
2922	}
2923
2924	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2925		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2926		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2927	} else {
2928		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2929	}
2930	fc_poplevel(ctx);
2931
2932	callstack_decrease_current(ctx, FC_PUSH_VPM);
2933	return 0;
2934}
2935
2936static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2937{
2938	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2939
2940	fc_pushlevel(ctx, FC_LOOP);
2941
2942	/* check stack depth */
2943	callstack_check_depth(ctx, FC_LOOP, 0);
2944	return 0;
2945}
2946
2947static int tgsi_endloop(struct r600_shader_ctx *ctx)
2948{
2949	int i;
2950
2951	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2952
2953	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2954		R600_ERR("loop/endloop in shader code are not paired.\n");
2955		return -EINVAL;
2956	}
2957
2958	/* fixup loop pointers - from r600isa
2959	   LOOP END points to CF after LOOP START,
2960	   LOOP START point to CF after LOOP END
2961	   BRK/CONT point to LOOP END CF
2962	*/
2963	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2964
2965	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2966
2967	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2968		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2969	}
2970	/* TODO add LOOPRET support */
2971	fc_poplevel(ctx);
2972	callstack_decrease_current(ctx, FC_LOOP);
2973	return 0;
2974}
2975
2976static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2977{
2978	unsigned int fscp;
2979
2980	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2981	{
2982		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2983			break;
2984	}
2985
2986	if (fscp == 0) {
2987		R600_ERR("Break not inside loop/endloop pair\n");
2988		return -EINVAL;
2989	}
2990
2991	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2992	ctx->bc->cf_last->pop_count = 1;
2993
2994	fc_set_mid(ctx, fscp);
2995
2996	pops(ctx, 1);
2997	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2998	return 0;
2999}
3000
3001static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3002	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3003	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3004	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3005
3006	/* FIXME:
3007	 * For state trackers other than OpenGL, we'll want to use
3008	 * _RECIP_IEEE instead.
3009	 */
3010	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3011
3012	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3013	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3014	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3015	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3016	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3017	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3018	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3019	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3020	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3021	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3022	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3023	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3024	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3025	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3026	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3027	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3028	/* gap */
3029	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3030	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3031	/* gap */
3032	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3034	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3035	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3036	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3037	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3038	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3039	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3040	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3041	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3042	/* gap */
3043	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3045	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3047	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3048	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3049	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3050	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3051	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3052	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3055	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3056	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3057	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3059	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3060	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3061	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3062	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3063	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3064	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3065	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3066	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3067	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3069	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3072	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3077	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3078	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3079	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3080	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3083	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3084	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3085	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3086	/* gap */
3087	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3090	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3091	/* gap */
3092	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3100	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101	/* gap */
3102	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3110	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3111	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3114	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3115	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3116	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3117	/* gap */
3118	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3119	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3120	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123	/* gap */
3124	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3126	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3130	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3131	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3132	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3133	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3134	/* gap */
3135	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3136	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3137	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3138	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3139	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3140	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3141	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3142	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3143	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3144	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3145	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3146	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3151	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3153	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3156	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3160	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163};
3164
3165static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3166	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3167	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3168	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3169	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3170	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3171	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3172	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3173	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3174	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3175	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3176	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3177	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3178	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3179	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3180	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3181	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3182	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3183	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3184	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3185	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3186	/* gap */
3187	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3188	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3189	/* gap */
3190	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3191	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3192	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3193	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3194	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3195	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3197	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3198	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3199	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3200	/* gap */
3201	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3203	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3204	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3205	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3206	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3207	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3208	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3209	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3210	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3211	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3212	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3213	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3214	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3215	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3216	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3217	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3218	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3219	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3220	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3221	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3222	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3223	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3224	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3225	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3226	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3227	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3228	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3229	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3230	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3231	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3232	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3233	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3234	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3235	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3236	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3237	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3238	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3239	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3241	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3242	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3243	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3244	/* gap */
3245	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3246	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3247	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3248	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3249	/* gap */
3250	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3251	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3252	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3253	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3254	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3255	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3256	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3257	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3258	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3259	/* gap */
3260	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3261	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3262	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3263	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3266	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3267	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3268	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3269	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3270	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3271	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3272	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3273	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3274	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3275	/* gap */
3276	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3277	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3278	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3279	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3280	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3281	/* gap */
3282	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3284	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3285	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3286	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3288	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3289	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3290	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3291	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3292	/* gap */
3293	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3294	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3295	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3296	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3297	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3298	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3299	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3300	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3301	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3302	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3303	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3304	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3305	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3307	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3308	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3309	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3312	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3314	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3318	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3321};
3322