r600_shader.c revision a6a710cbe7425819e1cd5ad5f2085311c092f2e7
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_opcodes.h"
32#include "r600d.h"
33#include <stdio.h>
34#include <errno.h>
35
36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37{
38	struct r600_pipe_state *rstate = &shader->rstate;
39	struct r600_shader *rshader = &shader->shader;
40	unsigned spi_vs_out_id[10];
41	unsigned i, tmp;
42
43	/* clear previous register */
44	rstate->nregs = 0;
45
46	/* so far never got proper semantic id from tgsi */
47	/* FIXME better to move this in config things so they get emited
48	 * only one time per cs
49	 */
50	for (i = 0; i < 10; i++) {
51		spi_vs_out_id[i] = 0;
52	}
53	for (i = 0; i < 32; i++) {
54		tmp = i << ((i & 3) * 8);
55		spi_vs_out_id[i / 4] |= tmp;
56	}
57	for (i = 0; i < 10; i++) {
58		r600_pipe_state_add_reg(rstate,
59					R_028614_SPI_VS_OUT_ID_0 + i * 4,
60					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
61	}
62
63	r600_pipe_state_add_reg(rstate,
64			R_0286C4_SPI_VS_OUT_CONFIG,
65			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
66			0xFFFFFFFF, NULL);
67	r600_pipe_state_add_reg(rstate,
68			R_028868_SQ_PGM_RESOURCES_VS,
69			S_028868_NUM_GPRS(rshader->bc.ngpr) |
70			S_028868_STACK_SIZE(rshader->bc.nstack),
71			0xFFFFFFFF, NULL);
72	r600_pipe_state_add_reg(rstate,
73			R_0288D0_SQ_PGM_CF_OFFSET_VS,
74			0x00000000, 0xFFFFFFFF, NULL);
75	r600_pipe_state_add_reg(rstate,
76			R_028858_SQ_PGM_START_VS,
77			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
78
79	r600_pipe_state_add_reg(rstate,
80				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
81				0xFFFFFFFF, NULL);
82
83}
84
85int r600_find_vs_semantic_index(struct r600_shader *vs,
86				struct r600_shader *ps, int id)
87{
88	struct r600_shader_io *input = &ps->input[id];
89
90	for (int i = 0; i < vs->noutput; i++) {
91		if (input->name == vs->output[i].name &&
92			input->sid == vs->output[i].sid) {
93			return i - 1;
94		}
95	}
96	return 0;
97}
98
99static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
100{
101	struct r600_pipe_state *rstate = &shader->rstate;
102	struct r600_shader *rshader = &shader->shader;
103	unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
104	int pos_index = -1, face_index = -1;
105
106	rstate->nregs = 0;
107
108	for (i = 0; i < rshader->ninput; i++) {
109		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
110			pos_index = i;
111		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
112			face_index = i;
113	}
114
115	for (i = 0; i < rshader->noutput; i++) {
116		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
117			r600_pipe_state_add_reg(rstate,
118						R_02880C_DB_SHADER_CONTROL,
119						S_02880C_Z_EXPORT_ENABLE(1),
120						S_02880C_Z_EXPORT_ENABLE(1), NULL);
121		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
122			r600_pipe_state_add_reg(rstate,
123						R_02880C_DB_SHADER_CONTROL,
124						S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
125						S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
126	}
127
128	exports_ps = 0;
129	num_cout = 0;
130	for (i = 0; i < rshader->noutput; i++) {
131		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
132			exports_ps |= 1;
133		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
134			num_cout++;
135		}
136	}
137	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
138	if (!exports_ps) {
139		/* always at least export 1 component per pixel */
140		exports_ps = 2;
141	}
142
143	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
144				S_0286CC_PERSP_GRADIENT_ENA(1);
145	spi_input_z = 0;
146	if (pos_index != -1) {
147		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
148					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
149					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
150					S_0286CC_BARYC_SAMPLE_CNTL(1));
151		spi_input_z |= 1;
152	}
153
154	spi_ps_in_control_1 = 0;
155	if (face_index != -1) {
156		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
157			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
158	}
159
160	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
161	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
162	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
163	r600_pipe_state_add_reg(rstate,
164				R_028840_SQ_PGM_START_PS,
165				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
166	r600_pipe_state_add_reg(rstate,
167				R_028850_SQ_PGM_RESOURCES_PS,
168				S_028868_NUM_GPRS(rshader->bc.ngpr) |
169				S_028868_STACK_SIZE(rshader->bc.nstack),
170				0xFFFFFFFF, NULL);
171	r600_pipe_state_add_reg(rstate,
172				R_028854_SQ_PGM_EXPORTS_PS,
173				exports_ps, 0xFFFFFFFF, NULL);
174	r600_pipe_state_add_reg(rstate,
175				R_0288CC_SQ_PGM_CF_OFFSET_PS,
176				0x00000000, 0xFFFFFFFF, NULL);
177
178	if (rshader->fs_write_all) {
179		r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
180					S_028808_MULTIWRITE_ENABLE(1),
181					S_028808_MULTIWRITE_ENABLE(1),
182					NULL);
183	}
184
185	if (rshader->uses_kill) {
186		/* only set some bits here, the other bits are set in the dsa state */
187		r600_pipe_state_add_reg(rstate,
188					R_02880C_DB_SHADER_CONTROL,
189					S_02880C_KILL_ENABLE(1),
190					S_02880C_KILL_ENABLE(1), NULL);
191	}
192	r600_pipe_state_add_reg(rstate,
193				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
194				0xFFFFFFFF, NULL);
195}
196
197static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
198{
199	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
200	struct r600_shader *rshader = &shader->shader;
201	void *ptr;
202
203	/* copy new shader */
204	if (shader->bo == NULL) {
205		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
206		if (shader->bo == NULL) {
207			return -ENOMEM;
208		}
209		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
210		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
211		r600_bo_unmap(rctx->radeon, shader->bo);
212	}
213	/* build state */
214	switch (rshader->processor_type) {
215	case TGSI_PROCESSOR_VERTEX:
216		if (rshader->family >= CHIP_CEDAR) {
217			evergreen_pipe_shader_vs(ctx, shader);
218		} else {
219			r600_pipe_shader_vs(ctx, shader);
220		}
221		break;
222	case TGSI_PROCESSOR_FRAGMENT:
223		if (rshader->family >= CHIP_CEDAR) {
224			evergreen_pipe_shader_ps(ctx, shader);
225		} else {
226			r600_pipe_shader_ps(ctx, shader);
227		}
228		break;
229	default:
230		return -EINVAL;
231	}
232	return 0;
233}
234
235int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
236int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
237{
238	static int dump_shaders = -1;
239	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
240	u32 *literals;
241	int r;
242
243        /* Would like some magic "get_bool_option_once" routine.
244         */
245        if (dump_shaders == -1)
246                dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
247
248	if (dump_shaders) {
249		fprintf(stderr, "--------------------------------------------------------------\n");
250		tgsi_dump(tokens, 0);
251	}
252	shader->shader.family = r600_get_family(rctx->radeon);
253	r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
254	if (r) {
255		R600_ERR("translation from TGSI failed !\n");
256		return r;
257	}
258	r = r600_bc_build(&shader->shader.bc);
259	free(literals);
260	if (r) {
261		R600_ERR("building bytecode failed !\n");
262		return r;
263	}
264	if (dump_shaders) {
265		r600_bc_dump(&shader->shader.bc);
266		fprintf(stderr, "______________________________________________________________\n");
267	}
268	return r600_pipe_shader(ctx, shader);
269}
270
271void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
272{
273	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
274
275	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
276	r600_bc_clear(&shader->shader.bc);
277}
278
279/*
280 * tgsi -> r600 shader
281 */
282struct r600_shader_tgsi_instruction;
283
284struct r600_shader_ctx {
285	struct tgsi_shader_info			info;
286	struct tgsi_parse_context		parse;
287	const struct tgsi_token			*tokens;
288	unsigned				type;
289	unsigned				file_offset[TGSI_FILE_COUNT];
290	unsigned				temp_reg;
291	struct r600_shader_tgsi_instruction	*inst_info;
292	struct r600_bc				*bc;
293	struct r600_shader			*shader;
294	u32					*literals;
295	u32					nliterals;
296	u32					max_driver_temp_used;
297	/* needed for evergreen interpolation */
298	boolean                                 input_centroid;
299	boolean                                 input_linear;
300	boolean                                 input_perspective;
301	int					num_interp_gpr;
302};
303
304struct r600_shader_tgsi_instruction {
305	unsigned	tgsi_opcode;
306	unsigned	is_op3;
307	unsigned	r600_opcode;
308	int (*process)(struct r600_shader_ctx *ctx);
309};
310
311static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
312static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
313
314static int tgsi_is_supported(struct r600_shader_ctx *ctx)
315{
316	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
317	int j;
318
319	if (i->Instruction.NumDstRegs > 1) {
320		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
321		return -EINVAL;
322	}
323	if (i->Instruction.Predicate) {
324		R600_ERR("predicate unsupported\n");
325		return -EINVAL;
326	}
327#if 0
328	if (i->Instruction.Label) {
329		R600_ERR("label unsupported\n");
330		return -EINVAL;
331	}
332#endif
333	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
334		if (i->Src[j].Register.Dimension) {
335			R600_ERR("unsupported src %d (dimension %d)\n", j,
336				 i->Src[j].Register.Dimension);
337			return -EINVAL;
338		}
339	}
340	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
341		if (i->Dst[j].Register.Dimension) {
342			R600_ERR("unsupported dst (dimension)\n");
343			return -EINVAL;
344		}
345	}
346	return 0;
347}
348
349static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
350{
351	int i, r;
352	struct r600_bc_alu alu;
353	int gpr = 0, base_chan = 0;
354	int ij_index = 0;
355
356	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
357		ij_index = 0;
358		if (ctx->shader->input[input].centroid)
359			ij_index++;
360	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
361		ij_index = 0;
362		/* if we have perspective add one */
363		if (ctx->input_perspective)  {
364			ij_index++;
365			/* if we have perspective centroid */
366			if (ctx->input_centroid)
367				ij_index++;
368		}
369		if (ctx->shader->input[input].centroid)
370			ij_index++;
371	}
372
373	/* work out gpr and base_chan from index */
374	gpr = ij_index / 2;
375	base_chan = (2 * (ij_index % 2)) + 1;
376
377	for (i = 0; i < 8; i++) {
378		memset(&alu, 0, sizeof(struct r600_bc_alu));
379
380		if (i < 4)
381			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
382		else
383			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
384
385		if ((i > 1) && (i < 6)) {
386			alu.dst.sel = ctx->shader->input[input].gpr;
387			alu.dst.write = 1;
388		}
389
390		alu.dst.chan = i % 4;
391
392		alu.src[0].sel = gpr;
393		alu.src[0].chan = (base_chan - (i % 2));
394
395		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
396
397		alu.bank_swizzle_force = SQ_ALU_VEC_210;
398		if ((i % 4) == 3)
399			alu.last = 1;
400		r = r600_bc_add_alu(ctx->bc, &alu);
401		if (r)
402			return r;
403	}
404	return 0;
405}
406
407
408static int tgsi_declaration(struct r600_shader_ctx *ctx)
409{
410	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
411	unsigned i;
412
413	switch (d->Declaration.File) {
414	case TGSI_FILE_INPUT:
415		i = ctx->shader->ninput++;
416		ctx->shader->input[i].name = d->Semantic.Name;
417		ctx->shader->input[i].sid = d->Semantic.Index;
418		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
419		ctx->shader->input[i].centroid = d->Declaration.Centroid;
420		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
421		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
422			/* turn input into interpolate on EG */
423			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
424				if (ctx->shader->input[i].interpolate > 0) {
425					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
426					evergreen_interp_alu(ctx, i);
427				}
428			}
429		}
430		break;
431	case TGSI_FILE_OUTPUT:
432		i = ctx->shader->noutput++;
433		ctx->shader->output[i].name = d->Semantic.Name;
434		ctx->shader->output[i].sid = d->Semantic.Index;
435		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
436		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
437		break;
438	case TGSI_FILE_CONSTANT:
439	case TGSI_FILE_TEMPORARY:
440	case TGSI_FILE_SAMPLER:
441	case TGSI_FILE_ADDRESS:
442		break;
443	default:
444		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
445		return -EINVAL;
446	}
447	return 0;
448}
449
450static int r600_get_temp(struct r600_shader_ctx *ctx)
451{
452	return ctx->temp_reg + ctx->max_driver_temp_used++;
453}
454
455/*
456 * for evergreen we need to scan the shader to find the number of GPRs we need to
457 * reserve for interpolation.
458 *
459 * we need to know if we are going to emit
460 * any centroid inputs
461 * if perspective and linear are required
462*/
463static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
464{
465	int i;
466	int num_baryc;
467
468	ctx->input_linear = FALSE;
469	ctx->input_perspective = FALSE;
470	ctx->input_centroid = FALSE;
471	ctx->num_interp_gpr = 1;
472
473	/* any centroid inputs */
474	for (i = 0; i < ctx->info.num_inputs; i++) {
475		/* skip position/face */
476		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
477		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
478			continue;
479		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
480			ctx->input_linear = TRUE;
481		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
482			ctx->input_perspective = TRUE;
483		if (ctx->info.input_centroid[i])
484			ctx->input_centroid = TRUE;
485	}
486
487	num_baryc = 0;
488	/* ignoring sample for now */
489	if (ctx->input_perspective)
490		num_baryc++;
491	if (ctx->input_linear)
492		num_baryc++;
493	if (ctx->input_centroid)
494		num_baryc *= 2;
495
496	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
497
498	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
499	return ctx->num_interp_gpr;
500}
501
502int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
503{
504	struct tgsi_full_immediate *immediate;
505	struct tgsi_full_property *property;
506	struct r600_shader_ctx ctx;
507	struct r600_bc_output output[32];
508	unsigned output_done, noutput;
509	unsigned opcode;
510	int i, r = 0, pos0;
511
512	ctx.bc = &shader->bc;
513	ctx.shader = shader;
514	r = r600_bc_init(ctx.bc, shader->family);
515	if (r)
516		return r;
517	ctx.tokens = tokens;
518	tgsi_scan_shader(tokens, &ctx.info);
519	tgsi_parse_init(&ctx.parse, tokens);
520	ctx.type = ctx.parse.FullHeader.Processor.Processor;
521	shader->processor_type = ctx.type;
522	ctx.bc->type = shader->processor_type;
523
524	/* register allocations */
525	/* Values [0,127] correspond to GPR[0..127].
526	 * Values [128,159] correspond to constant buffer bank 0
527	 * Values [160,191] correspond to constant buffer bank 1
528	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
529	 * Values [256,287] correspond to constant buffer bank 2 (EG)
530	 * Values [288,319] correspond to constant buffer bank 3 (EG)
531	 * Other special values are shown in the list below.
532	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
533	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
534	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
535	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
536	 * 248	SQ_ALU_SRC_0: special constant 0.0.
537	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
538	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
539	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
540	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
541	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
542	 * 254	SQ_ALU_SRC_PV: previous vector result.
543	 * 255	SQ_ALU_SRC_PS: previous scalar result.
544	 */
545	for (i = 0; i < TGSI_FILE_COUNT; i++) {
546		ctx.file_offset[i] = 0;
547	}
548	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
549		ctx.file_offset[TGSI_FILE_INPUT] = 1;
550		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
551			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
552		} else {
553			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
554		}
555	}
556	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
557		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
558	}
559	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
560						ctx.info.file_count[TGSI_FILE_INPUT];
561	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
562						ctx.info.file_count[TGSI_FILE_OUTPUT];
563
564	/* Outside the GPR range. This will be translated to one of the
565	 * kcache banks later. */
566	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
567
568	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
569	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
570			ctx.info.file_count[TGSI_FILE_TEMPORARY];
571
572	ctx.nliterals = 0;
573	ctx.literals = NULL;
574	shader->fs_write_all = FALSE;
575	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
576		tgsi_parse_token(&ctx.parse);
577		switch (ctx.parse.FullToken.Token.Type) {
578		case TGSI_TOKEN_TYPE_IMMEDIATE:
579			immediate = &ctx.parse.FullToken.FullImmediate;
580			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
581			if(ctx.literals == NULL) {
582				r = -ENOMEM;
583				goto out_err;
584			}
585			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
586			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
587			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
588			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
589			ctx.nliterals++;
590			break;
591		case TGSI_TOKEN_TYPE_DECLARATION:
592			r = tgsi_declaration(&ctx);
593			if (r)
594				goto out_err;
595			break;
596		case TGSI_TOKEN_TYPE_INSTRUCTION:
597			r = tgsi_is_supported(&ctx);
598			if (r)
599				goto out_err;
600			ctx.max_driver_temp_used = 0;
601			/* reserve first tmp for everyone */
602			r600_get_temp(&ctx);
603			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
604			if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
605				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
606			else
607				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
608			r = ctx.inst_info->process(&ctx);
609			if (r)
610				goto out_err;
611			break;
612		case TGSI_TOKEN_TYPE_PROPERTY:
613			property = &ctx.parse.FullToken.FullProperty;
614			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
615				if (property->u[0].Data == 1)
616					shader->fs_write_all = TRUE;
617			}
618			break;
619		default:
620			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
621			r = -EINVAL;
622			goto out_err;
623		}
624	}
625	/* export output */
626	noutput = shader->noutput;
627	for (i = 0, pos0 = 0; i < noutput; i++) {
628		memset(&output[i], 0, sizeof(struct r600_bc_output));
629		output[i].gpr = shader->output[i].gpr;
630		output[i].elem_size = 3;
631		output[i].swizzle_x = 0;
632		output[i].swizzle_y = 1;
633		output[i].swizzle_z = 2;
634		output[i].swizzle_w = 3;
635		output[i].burst_count = 1;
636		output[i].barrier = 1;
637		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
638		output[i].array_base = i - pos0;
639		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
640		switch (ctx.type) {
641		case TGSI_PROCESSOR_VERTEX:
642			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
643				output[i].array_base = 60;
644				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
645				/* position doesn't count in array_base */
646				pos0++;
647			}
648			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
649				output[i].array_base = 61;
650				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
651				/* position doesn't count in array_base */
652				pos0++;
653			}
654			break;
655		case TGSI_PROCESSOR_FRAGMENT:
656			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
657				output[i].array_base = shader->output[i].sid;
658				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
659			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
660				output[i].array_base = 61;
661				output[i].swizzle_x = 2;
662				output[i].swizzle_y = 7;
663				output[i].swizzle_z = output[i].swizzle_w = 7;
664				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
665			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
666				output[i].array_base = 61;
667				output[i].swizzle_x = 7;
668				output[i].swizzle_y = 1;
669				output[i].swizzle_z = output[i].swizzle_w = 7;
670				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
671			} else {
672				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
673				r = -EINVAL;
674				goto out_err;
675			}
676			break;
677		default:
678			R600_ERR("unsupported processor type %d\n", ctx.type);
679			r = -EINVAL;
680			goto out_err;
681		}
682	}
683	/* add fake param output for vertex shader if no param is exported */
684	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
685		for (i = 0, pos0 = 0; i < noutput; i++) {
686			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
687				pos0 = 1;
688				break;
689			}
690		}
691		if (!pos0) {
692			memset(&output[i], 0, sizeof(struct r600_bc_output));
693			output[i].gpr = 0;
694			output[i].elem_size = 3;
695			output[i].swizzle_x = 0;
696			output[i].swizzle_y = 1;
697			output[i].swizzle_z = 2;
698			output[i].swizzle_w = 3;
699			output[i].burst_count = 1;
700			output[i].barrier = 1;
701			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
702			output[i].array_base = 0;
703			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
704			noutput++;
705		}
706	}
707	/* add fake pixel export */
708	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
709		memset(&output[0], 0, sizeof(struct r600_bc_output));
710		output[0].gpr = 0;
711		output[0].elem_size = 3;
712		output[0].swizzle_x = 7;
713		output[0].swizzle_y = 7;
714		output[0].swizzle_z = 7;
715		output[0].swizzle_w = 7;
716		output[0].burst_count = 1;
717		output[0].barrier = 1;
718		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
719		output[0].array_base = 0;
720		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
721		noutput++;
722	}
723	/* set export done on last export of each type */
724	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
725		if (i == (noutput - 1)) {
726			output[i].end_of_program = 1;
727		}
728		if (!(output_done & (1 << output[i].type))) {
729			output_done |= (1 << output[i].type);
730			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
731		}
732	}
733	/* add output to bytecode */
734	for (i = 0; i < noutput; i++) {
735		r = r600_bc_add_output(ctx.bc, &output[i]);
736		if (r)
737			goto out_err;
738	}
739	*literals = ctx.literals;
740	tgsi_parse_free(&ctx.parse);
741	return 0;
742out_err:
743	free(ctx.literals);
744	tgsi_parse_free(&ctx.parse);
745	return r;
746}
747
748static int tgsi_unsupported(struct r600_shader_ctx *ctx)
749{
750	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
751	return -EINVAL;
752}
753
754static int tgsi_end(struct r600_shader_ctx *ctx)
755{
756	return 0;
757}
758
759static int tgsi_src(struct r600_shader_ctx *ctx,
760			const struct tgsi_full_src_register *tgsi_src,
761			struct r600_bc_alu_src *r600_src)
762{
763	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
764	r600_src->neg = tgsi_src->Register.Negate;
765	r600_src->abs = tgsi_src->Register.Absolute;
766	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
767		int index;
768		if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
769			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
770			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
771
772			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
773			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
774			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
775				return 0;
776		}
777		index = tgsi_src->Register.Index;
778		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
779		r600_src->value = ctx->literals + index * 4;
780	} else {
781		if (tgsi_src->Register.Indirect)
782			r600_src->rel = V_SQ_REL_RELATIVE;
783		r600_src->sel = tgsi_src->Register.Index;
784		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
785	}
786	return 0;
787}
788
789static int tgsi_dst(struct r600_shader_ctx *ctx,
790			const struct tgsi_full_dst_register *tgsi_dst,
791			unsigned swizzle,
792			struct r600_bc_alu_dst *r600_dst)
793{
794	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
795
796	r600_dst->sel = tgsi_dst->Register.Index;
797	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
798	r600_dst->chan = swizzle;
799	r600_dst->write = 1;
800	if (tgsi_dst->Register.Indirect)
801		r600_dst->rel = V_SQ_REL_RELATIVE;
802	if (inst->Instruction.Saturate) {
803		r600_dst->clamp = 1;
804	}
805	return 0;
806}
807
808static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
809{
810	switch (swizzle) {
811	case 0:
812		return tgsi_src->Register.SwizzleX;
813	case 1:
814		return tgsi_src->Register.SwizzleY;
815	case 2:
816		return tgsi_src->Register.SwizzleZ;
817	case 3:
818		return tgsi_src->Register.SwizzleW;
819	default:
820		return 0;
821	}
822}
823
824static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
825{
826	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
827	struct r600_bc_alu alu;
828	int i, j, k, nconst, r;
829
830	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
831		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
832			nconst++;
833		}
834		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
835		if (r) {
836			return r;
837		}
838	}
839	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
840		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
841			int treg = r600_get_temp(ctx);
842			for (k = 0; k < 4; k++) {
843				memset(&alu, 0, sizeof(struct r600_bc_alu));
844				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
845				alu.src[0].sel = r600_src[i].sel;
846				alu.src[0].chan = k;
847				alu.src[0].rel = r600_src[i].rel;
848				alu.dst.sel = treg;
849				alu.dst.chan = k;
850				alu.dst.write = 1;
851				if (k == 3)
852					alu.last = 1;
853				r = r600_bc_add_alu(ctx->bc, &alu);
854				if (r)
855					return r;
856			}
857			r600_src[i].sel = treg;
858			r600_src[i].rel =0;
859			j--;
860		}
861	}
862	return 0;
863}
864
865/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
866static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
867{
868	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
869	struct r600_bc_alu alu;
870	int i, j, k, nliteral, r;
871
872	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
873		if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
874			nliteral++;
875		}
876	}
877	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
878		if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
879			int treg = r600_get_temp(ctx);
880			for (k = 0; k < 4; k++) {
881				memset(&alu, 0, sizeof(struct r600_bc_alu));
882				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
883				alu.src[0].sel = r600_src[i].sel;
884				alu.src[0].chan = k;
885				alu.src[0].value = r600_src[i].value;
886				alu.dst.sel = treg;
887				alu.dst.chan = k;
888				alu.dst.write = 1;
889				if (k == 3)
890					alu.last = 1;
891				r = r600_bc_add_alu(ctx->bc, &alu);
892				if (r)
893					return r;
894			}
895			r600_src[i].sel = treg;
896			j--;
897		}
898	}
899	return 0;
900}
901
902static int tgsi_last_instruction(unsigned writemask)
903{
904	int i, lasti = 0;
905
906	for (i = 0; i < 4; i++) {
907		if (writemask & (1 << i)) {
908			lasti = i;
909		}
910	}
911	return lasti;
912}
913
914static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
915{
916	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
917	struct r600_bc_alu_src r600_src[3];
918	struct r600_bc_alu alu;
919	int i, j, r;
920	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
921
922	r = tgsi_split_constant(ctx, r600_src);
923	if (r)
924		return r;
925	r = tgsi_split_literal_constant(ctx, r600_src);
926	if (r)
927		return r;
928	for (i = 0; i < lasti + 1; i++) {
929		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
930			continue;
931
932		memset(&alu, 0, sizeof(struct r600_bc_alu));
933		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
934		if (r)
935			return r;
936
937		alu.inst = ctx->inst_info->r600_opcode;
938		if (!swap) {
939			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
940				alu.src[j] = r600_src[j];
941				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
942			}
943		} else {
944			alu.src[0] = r600_src[1];
945			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
946
947			alu.src[1] = r600_src[0];
948			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
949		}
950		/* handle some special cases */
951		switch (ctx->inst_info->tgsi_opcode) {
952		case TGSI_OPCODE_SUB:
953			alu.src[1].neg = 1;
954			break;
955		case TGSI_OPCODE_ABS:
956			alu.src[0].abs = 1;
957			break;
958		default:
959			break;
960		}
961		if (i == lasti) {
962			alu.last = 1;
963		}
964		r = r600_bc_add_alu(ctx->bc, &alu);
965		if (r)
966			return r;
967	}
968	return 0;
969}
970
971static int tgsi_op2(struct r600_shader_ctx *ctx)
972{
973	return tgsi_op2_s(ctx, 0);
974}
975
976static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
977{
978	return tgsi_op2_s(ctx, 1);
979}
980
981/*
982 * r600 - trunc to -PI..PI range
983 * r700 - normalize by dividing by 2PI
984 * see fdo bug 27901
985 */
986static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
987			   struct r600_bc_alu_src r600_src[3])
988{
989	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
990	static float double_pi = 3.1415926535 * 2;
991	static float neg_pi = -3.1415926535;
992
993	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
994	int r;
995	struct r600_bc_alu alu;
996
997	r = tgsi_split_constant(ctx, r600_src);
998	if (r)
999		return r;
1000	r = tgsi_split_literal_constant(ctx, r600_src);
1001	if (r)
1002		return r;
1003
1004	memset(&alu, 0, sizeof(struct r600_bc_alu));
1005	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1006	alu.is_op3 = 1;
1007
1008	alu.dst.chan = 0;
1009	alu.dst.sel = ctx->temp_reg;
1010	alu.dst.write = 1;
1011
1012	alu.src[0] = r600_src[0];
1013	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1014
1015	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1016	alu.src[1].chan = 0;
1017	alu.src[1].value = (uint32_t *)&half_inv_pi;
1018	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1019	alu.src[2].chan = 0;
1020	alu.last = 1;
1021	r = r600_bc_add_alu(ctx->bc, &alu);
1022	if (r)
1023		return r;
1024
1025	memset(&alu, 0, sizeof(struct r600_bc_alu));
1026	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1027
1028	alu.dst.chan = 0;
1029	alu.dst.sel = ctx->temp_reg;
1030	alu.dst.write = 1;
1031
1032	alu.src[0].sel = ctx->temp_reg;
1033	alu.src[0].chan = 0;
1034	alu.last = 1;
1035	r = r600_bc_add_alu(ctx->bc, &alu);
1036	if (r)
1037		return r;
1038
1039	memset(&alu, 0, sizeof(struct r600_bc_alu));
1040	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1041	alu.is_op3 = 1;
1042
1043	alu.dst.chan = 0;
1044	alu.dst.sel = ctx->temp_reg;
1045	alu.dst.write = 1;
1046
1047	alu.src[0].sel = ctx->temp_reg;
1048	alu.src[0].chan = 0;
1049
1050	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1051	alu.src[1].chan = 0;
1052	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1053	alu.src[2].chan = 0;
1054
1055	if (ctx->bc->chiprev == CHIPREV_R600) {
1056		alu.src[1].value = (uint32_t *)&double_pi;
1057		alu.src[2].value = (uint32_t *)&neg_pi;
1058	} else {
1059		alu.src[1].sel = V_SQ_ALU_SRC_1;
1060		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1061		alu.src[2].neg = 1;
1062	}
1063
1064	alu.last = 1;
1065	r = r600_bc_add_alu(ctx->bc, &alu);
1066	if (r)
1067		return r;
1068	return 0;
1069}
1070
1071static int tgsi_trig(struct r600_shader_ctx *ctx)
1072{
1073	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1074	struct r600_bc_alu_src r600_src[3];
1075	struct r600_bc_alu alu;
1076	int i, r;
1077	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1078
1079	r = tgsi_setup_trig(ctx, r600_src);
1080	if (r)
1081		return r;
1082
1083	memset(&alu, 0, sizeof(struct r600_bc_alu));
1084	alu.inst = ctx->inst_info->r600_opcode;
1085	alu.dst.chan = 0;
1086	alu.dst.sel = ctx->temp_reg;
1087	alu.dst.write = 1;
1088
1089	alu.src[0].sel = ctx->temp_reg;
1090	alu.src[0].chan = 0;
1091	alu.last = 1;
1092	r = r600_bc_add_alu(ctx->bc, &alu);
1093	if (r)
1094		return r;
1095
1096	/* replicate result */
1097	for (i = 0; i < lasti + 1; i++) {
1098		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1099			continue;
1100
1101		memset(&alu, 0, sizeof(struct r600_bc_alu));
1102		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1103
1104		alu.src[0].sel = ctx->temp_reg;
1105		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1106		if (r)
1107			return r;
1108		if (i == lasti)
1109			alu.last = 1;
1110		r = r600_bc_add_alu(ctx->bc, &alu);
1111		if (r)
1112			return r;
1113	}
1114	return 0;
1115}
1116
1117static int tgsi_scs(struct r600_shader_ctx *ctx)
1118{
1119	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1120	struct r600_bc_alu_src r600_src[3];
1121	struct r600_bc_alu alu;
1122	int r;
1123
1124	/* We'll only need the trig stuff if we are going to write to the
1125	 * X or Y components of the destination vector.
1126	 */
1127	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1128		r = tgsi_setup_trig(ctx, r600_src);
1129		if (r)
1130			return r;
1131	}
1132
1133	/* dst.x = COS */
1134	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1135		memset(&alu, 0, sizeof(struct r600_bc_alu));
1136		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1137		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1138		if (r)
1139			return r;
1140
1141		alu.src[0].sel = ctx->temp_reg;
1142		alu.src[0].chan = 0;
1143		alu.last = 1;
1144		r = r600_bc_add_alu(ctx->bc, &alu);
1145		if (r)
1146			return r;
1147	}
1148
1149	/* dst.y = SIN */
1150	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1151		memset(&alu, 0, sizeof(struct r600_bc_alu));
1152		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1153		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1154		if (r)
1155			return r;
1156
1157		alu.src[0].sel = ctx->temp_reg;
1158		alu.src[0].chan = 0;
1159		alu.last = 1;
1160		r = r600_bc_add_alu(ctx->bc, &alu);
1161		if (r)
1162			return r;
1163	}
1164
1165	/* dst.z = 0.0; */
1166	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1167		memset(&alu, 0, sizeof(struct r600_bc_alu));
1168
1169		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1170
1171		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1172		if (r)
1173			return r;
1174
1175		alu.src[0].sel = V_SQ_ALU_SRC_0;
1176		alu.src[0].chan = 0;
1177
1178		alu.last = 1;
1179
1180		r = r600_bc_add_alu(ctx->bc, &alu);
1181		if (r)
1182			return r;
1183	}
1184
1185	/* dst.w = 1.0; */
1186	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1187		memset(&alu, 0, sizeof(struct r600_bc_alu));
1188
1189		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1190
1191		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1192		if (r)
1193			return r;
1194
1195		alu.src[0].sel = V_SQ_ALU_SRC_1;
1196		alu.src[0].chan = 0;
1197
1198		alu.last = 1;
1199
1200		r = r600_bc_add_alu(ctx->bc, &alu);
1201		if (r)
1202			return r;
1203	}
1204
1205	return 0;
1206}
1207
1208static int tgsi_kill(struct r600_shader_ctx *ctx)
1209{
1210	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1211	struct r600_bc_alu alu;
1212	int i, r;
1213
1214	for (i = 0; i < 4; i++) {
1215		memset(&alu, 0, sizeof(struct r600_bc_alu));
1216		alu.inst = ctx->inst_info->r600_opcode;
1217
1218		alu.dst.chan = i;
1219
1220		alu.src[0].sel = V_SQ_ALU_SRC_0;
1221
1222		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1223			alu.src[1].sel = V_SQ_ALU_SRC_1;
1224			alu.src[1].neg = 1;
1225		} else {
1226			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1227			if (r)
1228				return r;
1229			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1230		}
1231		if (i == 3) {
1232			alu.last = 1;
1233		}
1234		r = r600_bc_add_alu(ctx->bc, &alu);
1235		if (r)
1236			return r;
1237	}
1238
1239	/* kill must be last in ALU */
1240	ctx->bc->force_add_cf = 1;
1241	ctx->shader->uses_kill = TRUE;
1242	return 0;
1243}
1244
1245static int tgsi_lit(struct r600_shader_ctx *ctx)
1246{
1247	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1248	struct r600_bc_alu alu;
1249	struct r600_bc_alu_src r600_src[3];
1250	int r;
1251
1252	r = tgsi_split_constant(ctx, r600_src);
1253	if (r)
1254		return r;
1255	r = tgsi_split_literal_constant(ctx, r600_src);
1256	if (r)
1257		return r;
1258
1259	/* dst.x, <- 1.0  */
1260	memset(&alu, 0, sizeof(struct r600_bc_alu));
1261	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1262	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1263	alu.src[0].chan = 0;
1264	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1265	if (r)
1266		return r;
1267	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1268	r = r600_bc_add_alu(ctx->bc, &alu);
1269	if (r)
1270		return r;
1271
1272	/* dst.y = max(src.x, 0.0) */
1273	memset(&alu, 0, sizeof(struct r600_bc_alu));
1274	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1275	alu.src[0] = r600_src[0];
1276	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1277	alu.src[1].chan = 0;
1278	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1279	if (r)
1280		return r;
1281	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1282	r = r600_bc_add_alu(ctx->bc, &alu);
1283	if (r)
1284		return r;
1285
1286	/* dst.w, <- 1.0  */
1287	memset(&alu, 0, sizeof(struct r600_bc_alu));
1288	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1289	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1290	alu.src[0].chan = 0;
1291	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1292	if (r)
1293		return r;
1294	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1295	alu.last = 1;
1296	r = r600_bc_add_alu(ctx->bc, &alu);
1297	if (r)
1298		return r;
1299
1300	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1301	{
1302		int chan;
1303		int sel;
1304
1305		/* dst.z = log(src.y) */
1306		memset(&alu, 0, sizeof(struct r600_bc_alu));
1307		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1308		alu.src[0] = r600_src[0];
1309		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1310		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1311		if (r)
1312			return r;
1313		alu.last = 1;
1314		r = r600_bc_add_alu(ctx->bc, &alu);
1315		if (r)
1316			return r;
1317
1318		chan = alu.dst.chan;
1319		sel = alu.dst.sel;
1320
1321		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1322		memset(&alu, 0, sizeof(struct r600_bc_alu));
1323		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1324		alu.src[0] = r600_src[0];
1325		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1326		alu.src[1].sel  = sel;
1327		alu.src[1].chan = chan;
1328
1329		alu.src[2] = r600_src[0];
1330		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1331		alu.dst.sel = ctx->temp_reg;
1332		alu.dst.chan = 0;
1333		alu.dst.write = 1;
1334		alu.is_op3 = 1;
1335		alu.last = 1;
1336		r = r600_bc_add_alu(ctx->bc, &alu);
1337		if (r)
1338			return r;
1339
1340		/* dst.z = exp(tmp.x) */
1341		memset(&alu, 0, sizeof(struct r600_bc_alu));
1342		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1343		alu.src[0].sel = ctx->temp_reg;
1344		alu.src[0].chan = 0;
1345		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1346		if (r)
1347			return r;
1348		alu.last = 1;
1349		r = r600_bc_add_alu(ctx->bc, &alu);
1350		if (r)
1351			return r;
1352	}
1353	return 0;
1354}
1355
1356static int tgsi_rsq(struct r600_shader_ctx *ctx)
1357{
1358	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1359	struct r600_bc_alu alu;
1360	int i, r;
1361
1362	memset(&alu, 0, sizeof(struct r600_bc_alu));
1363
1364	/* FIXME:
1365	 * For state trackers other than OpenGL, we'll want to use
1366	 * _RECIPSQRT_IEEE instead.
1367	 */
1368	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1369
1370	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1371		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1372		if (r)
1373			return r;
1374		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1375		alu.src[i].abs = 1;
1376	}
1377	alu.dst.sel = ctx->temp_reg;
1378	alu.dst.write = 1;
1379	alu.last = 1;
1380	r = r600_bc_add_alu(ctx->bc, &alu);
1381	if (r)
1382		return r;
1383	/* replicate result */
1384	return tgsi_helper_tempx_replicate(ctx);
1385}
1386
1387static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1388{
1389	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1390	struct r600_bc_alu alu;
1391	int i, r;
1392
1393	for (i = 0; i < 4; i++) {
1394		memset(&alu, 0, sizeof(struct r600_bc_alu));
1395		alu.src[0].sel = ctx->temp_reg;
1396		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1397		alu.dst.chan = i;
1398		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1399		if (r)
1400			return r;
1401		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1402		if (i == 3)
1403			alu.last = 1;
1404		r = r600_bc_add_alu(ctx->bc, &alu);
1405		if (r)
1406			return r;
1407	}
1408	return 0;
1409}
1410
1411static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1412{
1413	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1414	struct r600_bc_alu alu;
1415	int i, r;
1416
1417	memset(&alu, 0, sizeof(struct r600_bc_alu));
1418	alu.inst = ctx->inst_info->r600_opcode;
1419	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1420		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1421		if (r)
1422			return r;
1423		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1424	}
1425	alu.dst.sel = ctx->temp_reg;
1426	alu.dst.write = 1;
1427	alu.last = 1;
1428	r = r600_bc_add_alu(ctx->bc, &alu);
1429	if (r)
1430		return r;
1431	/* replicate result */
1432	return tgsi_helper_tempx_replicate(ctx);
1433}
1434
1435static int tgsi_pow(struct r600_shader_ctx *ctx)
1436{
1437	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1438	struct r600_bc_alu alu;
1439	int r;
1440
1441	/* LOG2(a) */
1442	memset(&alu, 0, sizeof(struct r600_bc_alu));
1443	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1444	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1445	if (r)
1446		return r;
1447	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1448	alu.dst.sel = ctx->temp_reg;
1449	alu.dst.write = 1;
1450	alu.last = 1;
1451	r = r600_bc_add_alu(ctx->bc, &alu);
1452	if (r)
1453		return r;
1454	/* b * LOG2(a) */
1455	memset(&alu, 0, sizeof(struct r600_bc_alu));
1456	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1457	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1458	if (r)
1459		return r;
1460	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1461	alu.src[1].sel = ctx->temp_reg;
1462	alu.dst.sel = ctx->temp_reg;
1463	alu.dst.write = 1;
1464	alu.last = 1;
1465	r = r600_bc_add_alu(ctx->bc, &alu);
1466	if (r)
1467		return r;
1468	/* POW(a,b) = EXP2(b * LOG2(a))*/
1469	memset(&alu, 0, sizeof(struct r600_bc_alu));
1470	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1471	alu.src[0].sel = ctx->temp_reg;
1472	alu.dst.sel = ctx->temp_reg;
1473	alu.dst.write = 1;
1474	alu.last = 1;
1475	r = r600_bc_add_alu(ctx->bc, &alu);
1476	if (r)
1477		return r;
1478	return tgsi_helper_tempx_replicate(ctx);
1479}
1480
1481static int tgsi_ssg(struct r600_shader_ctx *ctx)
1482{
1483	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1484	struct r600_bc_alu alu;
1485	struct r600_bc_alu_src r600_src[3];
1486	int i, r;
1487
1488	r = tgsi_split_constant(ctx, r600_src);
1489	if (r)
1490		return r;
1491	r = tgsi_split_literal_constant(ctx, r600_src);
1492	if (r)
1493		return r;
1494
1495	/* tmp = (src > 0 ? 1 : src) */
1496	for (i = 0; i < 4; i++) {
1497		memset(&alu, 0, sizeof(struct r600_bc_alu));
1498		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1499		alu.is_op3 = 1;
1500
1501		alu.dst.sel = ctx->temp_reg;
1502		alu.dst.chan = i;
1503
1504		alu.src[0] = r600_src[0];
1505		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1506
1507		alu.src[1].sel = V_SQ_ALU_SRC_1;
1508
1509		alu.src[2] = r600_src[0];
1510		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1511		if (i == 3)
1512			alu.last = 1;
1513		r = r600_bc_add_alu(ctx->bc, &alu);
1514		if (r)
1515			return r;
1516	}
1517
1518	/* dst = (-tmp > 0 ? -1 : tmp) */
1519	for (i = 0; i < 4; i++) {
1520		memset(&alu, 0, sizeof(struct r600_bc_alu));
1521		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1522		alu.is_op3 = 1;
1523		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1524		if (r)
1525			return r;
1526
1527		alu.src[0].sel = ctx->temp_reg;
1528		alu.src[0].chan = i;
1529		alu.src[0].neg = 1;
1530
1531		alu.src[1].sel = V_SQ_ALU_SRC_1;
1532		alu.src[1].neg = 1;
1533
1534		alu.src[2].sel = ctx->temp_reg;
1535		alu.src[2].chan = i;
1536
1537		if (i == 3)
1538			alu.last = 1;
1539		r = r600_bc_add_alu(ctx->bc, &alu);
1540		if (r)
1541			return r;
1542	}
1543	return 0;
1544}
1545
1546static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1547{
1548	struct r600_bc_alu alu;
1549	int i, r;
1550
1551	for (i = 0; i < 4; i++) {
1552		memset(&alu, 0, sizeof(struct r600_bc_alu));
1553		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1554			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1555			alu.dst.chan = i;
1556		} else {
1557			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1558			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1559			if (r)
1560				return r;
1561			alu.src[0].sel = ctx->temp_reg;
1562			alu.src[0].chan = i;
1563		}
1564		if (i == 3) {
1565			alu.last = 1;
1566		}
1567		r = r600_bc_add_alu(ctx->bc, &alu);
1568		if (r)
1569			return r;
1570	}
1571	return 0;
1572}
1573
1574static int tgsi_op3(struct r600_shader_ctx *ctx)
1575{
1576	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1577	struct r600_bc_alu_src r600_src[3];
1578	struct r600_bc_alu alu;
1579	int i, j, r;
1580	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1581
1582	r = tgsi_split_constant(ctx, r600_src);
1583	if (r)
1584		return r;
1585	r = tgsi_split_literal_constant(ctx, r600_src);
1586	if (r)
1587		return r;
1588	for (i = 0; i < lasti + 1; i++) {
1589		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1590			continue;
1591
1592		memset(&alu, 0, sizeof(struct r600_bc_alu));
1593		alu.inst = ctx->inst_info->r600_opcode;
1594		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1595			alu.src[j] = r600_src[j];
1596			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1597		}
1598
1599		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1600		if (r)
1601			return r;
1602
1603		alu.dst.chan = i;
1604		alu.dst.write = 1;
1605		alu.is_op3 = 1;
1606		if (i == lasti) {
1607			alu.last = 1;
1608		}
1609		r = r600_bc_add_alu(ctx->bc, &alu);
1610		if (r)
1611			return r;
1612	}
1613	return 0;
1614}
1615
1616static int tgsi_dp(struct r600_shader_ctx *ctx)
1617{
1618	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1619	struct r600_bc_alu_src r600_src[3];
1620	struct r600_bc_alu alu;
1621	int i, j, r;
1622
1623	r = tgsi_split_constant(ctx, r600_src);
1624	if (r)
1625		return r;
1626	r = tgsi_split_literal_constant(ctx, r600_src);
1627	if (r)
1628		return r;
1629	for (i = 0; i < 4; i++) {
1630		memset(&alu, 0, sizeof(struct r600_bc_alu));
1631		alu.inst = ctx->inst_info->r600_opcode;
1632		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1633			alu.src[j] = r600_src[j];
1634			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1635		}
1636
1637		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1638		if (r)
1639			return r;
1640
1641		alu.dst.chan = i;
1642		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1643		/* handle some special cases */
1644		switch (ctx->inst_info->tgsi_opcode) {
1645		case TGSI_OPCODE_DP2:
1646			if (i > 1) {
1647				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1648				alu.src[0].chan = alu.src[1].chan = 0;
1649			}
1650			break;
1651		case TGSI_OPCODE_DP3:
1652			if (i > 2) {
1653				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1654				alu.src[0].chan = alu.src[1].chan = 0;
1655			}
1656			break;
1657		case TGSI_OPCODE_DPH:
1658			if (i == 3) {
1659				alu.src[0].sel = V_SQ_ALU_SRC_1;
1660				alu.src[0].chan = 0;
1661				alu.src[0].neg = 0;
1662			}
1663			break;
1664		default:
1665			break;
1666		}
1667		if (i == 3) {
1668			alu.last = 1;
1669		}
1670		r = r600_bc_add_alu(ctx->bc, &alu);
1671		if (r)
1672			return r;
1673	}
1674	return 0;
1675}
1676
1677static int tgsi_tex(struct r600_shader_ctx *ctx)
1678{
1679	static float one_point_five = 1.5f;
1680	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1681	struct r600_bc_tex tex;
1682	struct r600_bc_alu alu;
1683	unsigned src_gpr;
1684	int r, i;
1685	int opcode;
1686	boolean src_not_temp =
1687		inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1688		inst->Src[0].Register.File != TGSI_FILE_INPUT;
1689
1690	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1691
1692	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1693		/* Add perspective divide */
1694		memset(&alu, 0, sizeof(struct r600_bc_alu));
1695		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1696		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1697		if (r)
1698			return r;
1699
1700		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1701		alu.dst.sel = ctx->temp_reg;
1702		alu.dst.chan = 3;
1703		alu.last = 1;
1704		alu.dst.write = 1;
1705		r = r600_bc_add_alu(ctx->bc, &alu);
1706		if (r)
1707			return r;
1708
1709		for (i = 0; i < 3; i++) {
1710			memset(&alu, 0, sizeof(struct r600_bc_alu));
1711			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1712			alu.src[0].sel = ctx->temp_reg;
1713			alu.src[0].chan = 3;
1714			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1715			if (r)
1716				return r;
1717			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1718			alu.dst.sel = ctx->temp_reg;
1719			alu.dst.chan = i;
1720			alu.dst.write = 1;
1721			r = r600_bc_add_alu(ctx->bc, &alu);
1722			if (r)
1723				return r;
1724		}
1725		memset(&alu, 0, sizeof(struct r600_bc_alu));
1726		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1727		alu.src[0].sel = V_SQ_ALU_SRC_1;
1728		alu.src[0].chan = 0;
1729		alu.dst.sel = ctx->temp_reg;
1730		alu.dst.chan = 3;
1731		alu.last = 1;
1732		alu.dst.write = 1;
1733		r = r600_bc_add_alu(ctx->bc, &alu);
1734		if (r)
1735			return r;
1736		src_not_temp = FALSE;
1737		src_gpr = ctx->temp_reg;
1738	}
1739
1740	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1741		int src_chan, src2_chan;
1742
1743		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1744		for (i = 0; i < 4; i++) {
1745			memset(&alu, 0, sizeof(struct r600_bc_alu));
1746			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1747			switch (i) {
1748			case 0:
1749				src_chan = 2;
1750				src2_chan = 1;
1751				break;
1752			case 1:
1753				src_chan = 2;
1754				src2_chan = 0;
1755				break;
1756			case 2:
1757				src_chan = 0;
1758				src2_chan = 2;
1759				break;
1760			case 3:
1761				src_chan = 1;
1762				src2_chan = 2;
1763				break;
1764			default:
1765				assert(0);
1766				src_chan = 0;
1767				src2_chan = 0;
1768				break;
1769			}
1770			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1771			if (r)
1772				return r;
1773			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1774			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1775			if (r)
1776				return r;
1777			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1778			alu.dst.sel = ctx->temp_reg;
1779			alu.dst.chan = i;
1780			if (i == 3)
1781				alu.last = 1;
1782			alu.dst.write = 1;
1783			r = r600_bc_add_alu(ctx->bc, &alu);
1784			if (r)
1785				return r;
1786		}
1787
1788		/* tmp1.z = RCP_e(|tmp1.z|) */
1789		memset(&alu, 0, sizeof(struct r600_bc_alu));
1790		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1791		alu.src[0].sel = ctx->temp_reg;
1792		alu.src[0].chan = 2;
1793		alu.src[0].abs = 1;
1794		alu.dst.sel = ctx->temp_reg;
1795		alu.dst.chan = 2;
1796		alu.dst.write = 1;
1797		alu.last = 1;
1798		r = r600_bc_add_alu(ctx->bc, &alu);
1799		if (r)
1800			return r;
1801
1802		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1803		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1804		 * muladd has no writemask, have to use another temp
1805		 */
1806		memset(&alu, 0, sizeof(struct r600_bc_alu));
1807		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1808		alu.is_op3 = 1;
1809
1810		alu.src[0].sel = ctx->temp_reg;
1811		alu.src[0].chan = 0;
1812		alu.src[1].sel = ctx->temp_reg;
1813		alu.src[1].chan = 2;
1814
1815		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1816		alu.src[2].chan = 0;
1817		alu.src[2].value = (u32*)&one_point_five;
1818
1819		alu.dst.sel = ctx->temp_reg;
1820		alu.dst.chan = 0;
1821		alu.dst.write = 1;
1822
1823		r = r600_bc_add_alu(ctx->bc, &alu);
1824		if (r)
1825			return r;
1826
1827		memset(&alu, 0, sizeof(struct r600_bc_alu));
1828		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1829		alu.is_op3 = 1;
1830
1831		alu.src[0].sel = ctx->temp_reg;
1832		alu.src[0].chan = 1;
1833		alu.src[1].sel = ctx->temp_reg;
1834		alu.src[1].chan = 2;
1835
1836		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1837		alu.src[2].chan = 0;
1838		alu.src[2].value = (u32*)&one_point_five;
1839
1840		alu.dst.sel = ctx->temp_reg;
1841		alu.dst.chan = 1;
1842		alu.dst.write = 1;
1843
1844		alu.last = 1;
1845		r = r600_bc_add_alu(ctx->bc, &alu);
1846		if (r)
1847			return r;
1848
1849		src_not_temp = FALSE;
1850		src_gpr = ctx->temp_reg;
1851	}
1852
1853	if (src_not_temp) {
1854		for (i = 0; i < 4; i++) {
1855			memset(&alu, 0, sizeof(struct r600_bc_alu));
1856			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1857			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1858			if (r)
1859				return r;
1860			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1861			alu.dst.sel = ctx->temp_reg;
1862			alu.dst.chan = i;
1863			if (i == 3)
1864				alu.last = 1;
1865			alu.dst.write = 1;
1866			r = r600_bc_add_alu(ctx->bc, &alu);
1867			if (r)
1868				return r;
1869		}
1870		src_gpr = ctx->temp_reg;
1871	}
1872
1873	opcode = ctx->inst_info->r600_opcode;
1874	if (opcode == SQ_TEX_INST_SAMPLE &&
1875	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1876		opcode = SQ_TEX_INST_SAMPLE_C;
1877
1878	memset(&tex, 0, sizeof(struct r600_bc_tex));
1879	tex.inst = opcode;
1880	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1881	tex.resource_id = tex.sampler_id;
1882	tex.src_gpr = src_gpr;
1883	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1884	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1885	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1886	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1887	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1888	tex.src_sel_x = 0;
1889	tex.src_sel_y = 1;
1890	tex.src_sel_z = 2;
1891	tex.src_sel_w = 3;
1892
1893	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1894		tex.src_sel_x = 1;
1895		tex.src_sel_y = 0;
1896		tex.src_sel_z = 3;
1897		tex.src_sel_w = 1;
1898	}
1899
1900	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1901		tex.coord_type_x = 1;
1902		tex.coord_type_y = 1;
1903		tex.coord_type_z = 1;
1904		tex.coord_type_w = 1;
1905	}
1906
1907	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1908		tex.src_sel_w = 2;
1909
1910	r = r600_bc_add_tex(ctx->bc, &tex);
1911	if (r)
1912		return r;
1913
1914	/* add shadow ambient support  - gallium doesn't do it yet */
1915	return 0;
1916}
1917
1918static int tgsi_lrp(struct r600_shader_ctx *ctx)
1919{
1920	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1921	struct r600_bc_alu_src r600_src[3];
1922	struct r600_bc_alu alu;
1923	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1924	unsigned i;
1925	int r;
1926
1927	r = tgsi_split_constant(ctx, r600_src);
1928	if (r)
1929		return r;
1930	r = tgsi_split_literal_constant(ctx, r600_src);
1931	if (r)
1932		return r;
1933
1934	/* optimize if it's just an equal balance */
1935	if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) {
1936		for (i = 0; i < lasti + 1; i++) {
1937			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1938				continue;
1939
1940			memset(&alu, 0, sizeof(struct r600_bc_alu));
1941			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1942			alu.src[0] = r600_src[1];
1943			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
1944			alu.src[1] = r600_src[2];
1945			alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1946			alu.omod = 3;
1947			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1948			if (r)
1949				return r;
1950
1951			alu.dst.chan = i;
1952			if (i == lasti) {
1953				alu.last = 1;
1954			}
1955			r = r600_bc_add_alu(ctx->bc, &alu);
1956			if (r)
1957				return r;
1958		}
1959		return 0;
1960	}
1961
1962	/* 1 - src0 */
1963	for (i = 0; i < lasti + 1; i++) {
1964		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1965			continue;
1966
1967		memset(&alu, 0, sizeof(struct r600_bc_alu));
1968		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1969		alu.src[0].sel = V_SQ_ALU_SRC_1;
1970		alu.src[0].chan = 0;
1971		alu.src[1] = r600_src[0];
1972		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1973		alu.src[1].neg = 1;
1974		alu.dst.sel = ctx->temp_reg;
1975		alu.dst.chan = i;
1976		if (i == lasti) {
1977			alu.last = 1;
1978		}
1979		alu.dst.write = 1;
1980		r = r600_bc_add_alu(ctx->bc, &alu);
1981		if (r)
1982			return r;
1983	}
1984
1985	/* (1 - src0) * src2 */
1986	for (i = 0; i < lasti + 1; i++) {
1987		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1988			continue;
1989
1990		memset(&alu, 0, sizeof(struct r600_bc_alu));
1991		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1992		alu.src[0].sel = ctx->temp_reg;
1993		alu.src[0].chan = i;
1994		alu.src[1] = r600_src[2];
1995		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1996		alu.dst.sel = ctx->temp_reg;
1997		alu.dst.chan = i;
1998		if (i == lasti) {
1999			alu.last = 1;
2000		}
2001		alu.dst.write = 1;
2002		r = r600_bc_add_alu(ctx->bc, &alu);
2003		if (r)
2004			return r;
2005	}
2006
2007	/* src0 * src1 + (1 - src0) * src2 */
2008	for (i = 0; i < lasti + 1; i++) {
2009		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2010			continue;
2011
2012		memset(&alu, 0, sizeof(struct r600_bc_alu));
2013		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2014		alu.is_op3 = 1;
2015		alu.src[0] = r600_src[0];
2016		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2017		alu.src[1] = r600_src[1];
2018		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2019		alu.src[2].sel = ctx->temp_reg;
2020		alu.src[2].chan = i;
2021
2022		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2023		if (r)
2024			return r;
2025
2026		alu.dst.chan = i;
2027		if (i == lasti) {
2028			alu.last = 1;
2029		}
2030		r = r600_bc_add_alu(ctx->bc, &alu);
2031		if (r)
2032			return r;
2033	}
2034	return 0;
2035}
2036
2037static int tgsi_cmp(struct r600_shader_ctx *ctx)
2038{
2039	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2040	struct r600_bc_alu_src r600_src[3];
2041	struct r600_bc_alu alu;
2042	int i, r;
2043	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2044
2045	r = tgsi_split_constant(ctx, r600_src);
2046	if (r)
2047		return r;
2048	r = tgsi_split_literal_constant(ctx, r600_src);
2049	if (r)
2050		return r;
2051
2052	for (i = 0; i < lasti + 1; i++) {
2053		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2054			continue;
2055
2056		memset(&alu, 0, sizeof(struct r600_bc_alu));
2057		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2058		alu.src[0] = r600_src[0];
2059		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2060
2061		alu.src[1] = r600_src[2];
2062		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2063
2064		alu.src[2] = r600_src[1];
2065		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2066
2067		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2068		if (r)
2069			return r;
2070
2071		alu.dst.chan = i;
2072		alu.dst.write = 1;
2073		alu.is_op3 = 1;
2074		if (i == lasti)
2075			alu.last = 1;
2076		r = r600_bc_add_alu(ctx->bc, &alu);
2077		if (r)
2078			return r;
2079	}
2080	return 0;
2081}
2082
2083static int tgsi_xpd(struct r600_shader_ctx *ctx)
2084{
2085	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2086	struct r600_bc_alu_src r600_src[3];
2087	struct r600_bc_alu alu;
2088	uint32_t use_temp = 0;
2089	int i, r;
2090
2091	if (inst->Dst[0].Register.WriteMask != 0xf)
2092		use_temp = 1;
2093
2094	r = tgsi_split_constant(ctx, r600_src);
2095	if (r)
2096		return r;
2097	r = tgsi_split_literal_constant(ctx, r600_src);
2098	if (r)
2099		return r;
2100
2101	for (i = 0; i < 4; i++) {
2102		memset(&alu, 0, sizeof(struct r600_bc_alu));
2103		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2104
2105		alu.src[0] = r600_src[0];
2106		switch (i) {
2107		case 0:
2108			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2109			break;
2110		case 1:
2111			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2112			break;
2113		case 2:
2114			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2115			break;
2116		case 3:
2117			alu.src[0].sel = V_SQ_ALU_SRC_0;
2118			alu.src[0].chan = i;
2119		}
2120
2121		alu.src[1] = r600_src[1];
2122		switch (i) {
2123		case 0:
2124			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2125			break;
2126		case 1:
2127			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2128			break;
2129		case 2:
2130			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2131			break;
2132		case 3:
2133			alu.src[1].sel = V_SQ_ALU_SRC_0;
2134			alu.src[1].chan = i;
2135		}
2136
2137		alu.dst.sel = ctx->temp_reg;
2138		alu.dst.chan = i;
2139		alu.dst.write = 1;
2140
2141		if (i == 3)
2142			alu.last = 1;
2143		r = r600_bc_add_alu(ctx->bc, &alu);
2144		if (r)
2145			return r;
2146	}
2147
2148	for (i = 0; i < 4; i++) {
2149		memset(&alu, 0, sizeof(struct r600_bc_alu));
2150		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2151
2152		alu.src[0] = r600_src[0];
2153		switch (i) {
2154		case 0:
2155			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2156			break;
2157		case 1:
2158			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2159			break;
2160		case 2:
2161			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2162			break;
2163		case 3:
2164			alu.src[0].sel = V_SQ_ALU_SRC_0;
2165			alu.src[0].chan = i;
2166		}
2167
2168		alu.src[1] = r600_src[1];
2169		switch (i) {
2170		case 0:
2171			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2172			break;
2173		case 1:
2174			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2175			break;
2176		case 2:
2177			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2178			break;
2179		case 3:
2180			alu.src[1].sel = V_SQ_ALU_SRC_0;
2181			alu.src[1].chan = i;
2182		}
2183
2184		alu.src[2].sel = ctx->temp_reg;
2185		alu.src[2].neg = 1;
2186		alu.src[2].chan = i;
2187
2188		if (use_temp)
2189			alu.dst.sel = ctx->temp_reg;
2190		else {
2191			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2192			if (r)
2193				return r;
2194		}
2195		alu.dst.chan = i;
2196		alu.dst.write = 1;
2197		alu.is_op3 = 1;
2198		if (i == 3)
2199			alu.last = 1;
2200		r = r600_bc_add_alu(ctx->bc, &alu);
2201		if (r)
2202			return r;
2203	}
2204	if (use_temp)
2205		return tgsi_helper_copy(ctx, inst);
2206	return 0;
2207}
2208
2209static int tgsi_exp(struct r600_shader_ctx *ctx)
2210{
2211	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2212	struct r600_bc_alu_src r600_src[3] = { { 0 } };
2213	struct r600_bc_alu alu;
2214	int r;
2215
2216	/* result.x = 2^floor(src); */
2217	if (inst->Dst[0].Register.WriteMask & 1) {
2218		memset(&alu, 0, sizeof(struct r600_bc_alu));
2219
2220		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2221		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2222		if (r)
2223			return r;
2224
2225		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2226
2227		alu.dst.sel = ctx->temp_reg;
2228		alu.dst.chan = 0;
2229		alu.dst.write = 1;
2230		alu.last = 1;
2231		r = r600_bc_add_alu(ctx->bc, &alu);
2232		if (r)
2233			return r;
2234
2235		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2236		alu.src[0].sel = ctx->temp_reg;
2237		alu.src[0].chan = 0;
2238
2239		alu.dst.sel = ctx->temp_reg;
2240		alu.dst.chan = 0;
2241		alu.dst.write = 1;
2242		alu.last = 1;
2243		r = r600_bc_add_alu(ctx->bc, &alu);
2244		if (r)
2245			return r;
2246	}
2247
2248	/* result.y = tmp - floor(tmp); */
2249	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2250		memset(&alu, 0, sizeof(struct r600_bc_alu));
2251
2252		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2253		alu.src[0] = r600_src[0];
2254		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2255		if (r)
2256			return r;
2257		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2258
2259		alu.dst.sel = ctx->temp_reg;
2260//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2261//		if (r)
2262//			return r;
2263		alu.dst.write = 1;
2264		alu.dst.chan = 1;
2265
2266		alu.last = 1;
2267
2268		r = r600_bc_add_alu(ctx->bc, &alu);
2269		if (r)
2270			return r;
2271	}
2272
2273	/* result.z = RoughApprox2ToX(tmp);*/
2274	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2275		memset(&alu, 0, sizeof(struct r600_bc_alu));
2276		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2277		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2278		if (r)
2279			return r;
2280		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2281
2282		alu.dst.sel = ctx->temp_reg;
2283		alu.dst.write = 1;
2284		alu.dst.chan = 2;
2285
2286		alu.last = 1;
2287
2288		r = r600_bc_add_alu(ctx->bc, &alu);
2289		if (r)
2290			return r;
2291	}
2292
2293	/* result.w = 1.0;*/
2294	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2295		memset(&alu, 0, sizeof(struct r600_bc_alu));
2296
2297		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2298		alu.src[0].sel = V_SQ_ALU_SRC_1;
2299		alu.src[0].chan = 0;
2300
2301		alu.dst.sel = ctx->temp_reg;
2302		alu.dst.chan = 3;
2303		alu.dst.write = 1;
2304		alu.last = 1;
2305		r = r600_bc_add_alu(ctx->bc, &alu);
2306		if (r)
2307			return r;
2308	}
2309	return tgsi_helper_copy(ctx, inst);
2310}
2311
2312static int tgsi_log(struct r600_shader_ctx *ctx)
2313{
2314	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2315	struct r600_bc_alu alu;
2316	int r;
2317
2318	/* result.x = floor(log2(src)); */
2319	if (inst->Dst[0].Register.WriteMask & 1) {
2320		memset(&alu, 0, sizeof(struct r600_bc_alu));
2321
2322		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2323		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2324		if (r)
2325			return r;
2326
2327		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2328
2329		alu.dst.sel = ctx->temp_reg;
2330		alu.dst.chan = 0;
2331		alu.dst.write = 1;
2332		alu.last = 1;
2333		r = r600_bc_add_alu(ctx->bc, &alu);
2334		if (r)
2335			return r;
2336
2337		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2338		alu.src[0].sel = ctx->temp_reg;
2339		alu.src[0].chan = 0;
2340
2341		alu.dst.sel = ctx->temp_reg;
2342		alu.dst.chan = 0;
2343		alu.dst.write = 1;
2344		alu.last = 1;
2345
2346		r = r600_bc_add_alu(ctx->bc, &alu);
2347		if (r)
2348			return r;
2349	}
2350
2351	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2352	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2353		memset(&alu, 0, sizeof(struct r600_bc_alu));
2354
2355		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2356		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2357		if (r)
2358			return r;
2359
2360		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2361
2362		alu.dst.sel = ctx->temp_reg;
2363		alu.dst.chan = 1;
2364		alu.dst.write = 1;
2365		alu.last = 1;
2366
2367		r = r600_bc_add_alu(ctx->bc, &alu);
2368		if (r)
2369			return r;
2370
2371		memset(&alu, 0, sizeof(struct r600_bc_alu));
2372
2373		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2374		alu.src[0].sel = ctx->temp_reg;
2375		alu.src[0].chan = 1;
2376
2377		alu.dst.sel = ctx->temp_reg;
2378		alu.dst.chan = 1;
2379		alu.dst.write = 1;
2380		alu.last = 1;
2381
2382		r = r600_bc_add_alu(ctx->bc, &alu);
2383		if (r)
2384			return r;
2385
2386		memset(&alu, 0, sizeof(struct r600_bc_alu));
2387
2388		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2389		alu.src[0].sel = ctx->temp_reg;
2390		alu.src[0].chan = 1;
2391
2392		alu.dst.sel = ctx->temp_reg;
2393		alu.dst.chan = 1;
2394		alu.dst.write = 1;
2395		alu.last = 1;
2396
2397		r = r600_bc_add_alu(ctx->bc, &alu);
2398		if (r)
2399			return r;
2400
2401		memset(&alu, 0, sizeof(struct r600_bc_alu));
2402
2403		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2404		alu.src[0].sel = ctx->temp_reg;
2405		alu.src[0].chan = 1;
2406
2407		alu.dst.sel = ctx->temp_reg;
2408		alu.dst.chan = 1;
2409		alu.dst.write = 1;
2410		alu.last = 1;
2411
2412		r = r600_bc_add_alu(ctx->bc, &alu);
2413		if (r)
2414			return r;
2415
2416		memset(&alu, 0, sizeof(struct r600_bc_alu));
2417
2418		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2419
2420		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2421		if (r)
2422			return r;
2423
2424		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2425
2426		alu.src[1].sel = ctx->temp_reg;
2427		alu.src[1].chan = 1;
2428
2429		alu.dst.sel = ctx->temp_reg;
2430		alu.dst.chan = 1;
2431		alu.dst.write = 1;
2432		alu.last = 1;
2433
2434		r = r600_bc_add_alu(ctx->bc, &alu);
2435		if (r)
2436			return r;
2437	}
2438
2439	/* result.z = log2(src);*/
2440	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2441		memset(&alu, 0, sizeof(struct r600_bc_alu));
2442
2443		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2444		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2445		if (r)
2446			return r;
2447
2448		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2449
2450		alu.dst.sel = ctx->temp_reg;
2451		alu.dst.write = 1;
2452		alu.dst.chan = 2;
2453		alu.last = 1;
2454
2455		r = r600_bc_add_alu(ctx->bc, &alu);
2456		if (r)
2457			return r;
2458	}
2459
2460	/* result.w = 1.0; */
2461	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2462		memset(&alu, 0, sizeof(struct r600_bc_alu));
2463
2464		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2465		alu.src[0].sel = V_SQ_ALU_SRC_1;
2466		alu.src[0].chan = 0;
2467
2468		alu.dst.sel = ctx->temp_reg;
2469		alu.dst.chan = 3;
2470		alu.dst.write = 1;
2471		alu.last = 1;
2472
2473		r = r600_bc_add_alu(ctx->bc, &alu);
2474		if (r)
2475			return r;
2476	}
2477
2478	return tgsi_helper_copy(ctx, inst);
2479}
2480
2481static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2482{
2483	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2484	struct r600_bc_alu alu;
2485	int r;
2486	memset(&alu, 0, sizeof(struct r600_bc_alu));
2487
2488	switch (inst->Instruction.Opcode) {
2489	case TGSI_OPCODE_ARL:
2490		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2491		break;
2492	case TGSI_OPCODE_ARR:
2493		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2494		break;
2495	default:
2496		assert(0);
2497		return -1;
2498	}
2499
2500	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2501	if (r)
2502		return r;
2503	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2504	alu.last = 1;
2505	alu.dst.chan = 0;
2506	alu.dst.sel = ctx->temp_reg;
2507	alu.dst.write = 1;
2508	r = r600_bc_add_alu(ctx->bc, &alu);
2509	if (r)
2510		return r;
2511	memset(&alu, 0, sizeof(struct r600_bc_alu));
2512	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2513	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2514	if (r)
2515		return r;
2516	alu.src[0].sel = ctx->temp_reg;
2517	alu.src[0].chan = 0;
2518	alu.last = 1;
2519	r = r600_bc_add_alu(ctx->bc, &alu);
2520	if (r)
2521		return r;
2522	return 0;
2523}
2524static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2525{
2526	/* TODO from r600c, ar values don't persist between clauses */
2527	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2528	struct r600_bc_alu alu;
2529	int r;
2530	memset(&alu, 0, sizeof(struct r600_bc_alu));
2531
2532	switch (inst->Instruction.Opcode) {
2533	case TGSI_OPCODE_ARL:
2534		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2535		break;
2536	case TGSI_OPCODE_ARR:
2537		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2538		break;
2539	default:
2540		assert(0);
2541		return -1;
2542	}
2543
2544
2545	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2546	if (r)
2547		return r;
2548	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2549
2550	alu.last = 1;
2551
2552	r = r600_bc_add_alu(ctx->bc, &alu);
2553	if (r)
2554		return r;
2555	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2556	return 0;
2557}
2558
2559static int tgsi_opdst(struct r600_shader_ctx *ctx)
2560{
2561	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2562	struct r600_bc_alu alu;
2563	int i, r = 0;
2564
2565	for (i = 0; i < 4; i++) {
2566		memset(&alu, 0, sizeof(struct r600_bc_alu));
2567
2568		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2569		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2570		if (r)
2571			return r;
2572
2573		if (i == 0 || i == 3) {
2574			alu.src[0].sel = V_SQ_ALU_SRC_1;
2575		} else {
2576			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2577			if (r)
2578				return r;
2579			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2580		}
2581
2582	        if (i == 0 || i == 2) {
2583			alu.src[1].sel = V_SQ_ALU_SRC_1;
2584		} else {
2585			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2586			if (r)
2587				return r;
2588			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2589		}
2590		if (i == 3)
2591			alu.last = 1;
2592		r = r600_bc_add_alu(ctx->bc, &alu);
2593		if (r)
2594			return r;
2595	}
2596	return 0;
2597}
2598
2599static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2600{
2601	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2602	struct r600_bc_alu alu;
2603	int r;
2604
2605	memset(&alu, 0, sizeof(struct r600_bc_alu));
2606	alu.inst = opcode;
2607	alu.predicate = 1;
2608
2609	alu.dst.sel = ctx->temp_reg;
2610	alu.dst.write = 1;
2611	alu.dst.chan = 0;
2612
2613	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2614	if (r)
2615		return r;
2616	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2617	alu.src[1].sel = V_SQ_ALU_SRC_0;
2618	alu.src[1].chan = 0;
2619
2620	alu.last = 1;
2621
2622	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2623	if (r)
2624		return r;
2625	return 0;
2626}
2627
2628static int pops(struct r600_shader_ctx *ctx, int pops)
2629{
2630	int alu_pop = 3;
2631	if (ctx->bc->cf_last) {
2632		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2633			alu_pop = 0;
2634		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2635			alu_pop = 1;
2636	}
2637	alu_pop += pops;
2638	if (alu_pop == 1) {
2639		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2640		ctx->bc->force_add_cf = 1;
2641	} else if (alu_pop == 2) {
2642		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2643		ctx->bc->force_add_cf = 1;
2644	} else {
2645		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2646		ctx->bc->cf_last->pop_count = pops;
2647		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2648	}
2649	return 0;
2650}
2651
2652static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2653{
2654	switch(reason) {
2655	case FC_PUSH_VPM:
2656		ctx->bc->callstack[ctx->bc->call_sp].current--;
2657		break;
2658	case FC_PUSH_WQM:
2659	case FC_LOOP:
2660		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2661		break;
2662	case FC_REP:
2663		/* TOODO : for 16 vp asic should -= 2; */
2664		ctx->bc->callstack[ctx->bc->call_sp].current --;
2665		break;
2666	}
2667}
2668
2669static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2670{
2671	if (check_max_only) {
2672		int diff;
2673		switch (reason) {
2674		case FC_PUSH_VPM:
2675			diff = 1;
2676			break;
2677		case FC_PUSH_WQM:
2678			diff = 4;
2679			break;
2680		default:
2681			assert(0);
2682			diff = 0;
2683		}
2684		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2685		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2686			ctx->bc->callstack[ctx->bc->call_sp].max =
2687				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2688		}
2689		return;
2690	}
2691	switch (reason) {
2692	case FC_PUSH_VPM:
2693		ctx->bc->callstack[ctx->bc->call_sp].current++;
2694		break;
2695	case FC_PUSH_WQM:
2696	case FC_LOOP:
2697		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2698		break;
2699	case FC_REP:
2700		ctx->bc->callstack[ctx->bc->call_sp].current++;
2701		break;
2702	}
2703
2704	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2705	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2706		ctx->bc->callstack[ctx->bc->call_sp].max =
2707			ctx->bc->callstack[ctx->bc->call_sp].current;
2708	}
2709}
2710
2711static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2712{
2713	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2714
2715	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2716						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2717	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2718	sp->num_mid++;
2719}
2720
2721static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2722{
2723	ctx->bc->fc_sp++;
2724	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2725	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2726}
2727
2728static void fc_poplevel(struct r600_shader_ctx *ctx)
2729{
2730	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2731	if (sp->mid) {
2732		free(sp->mid);
2733		sp->mid = NULL;
2734	}
2735	sp->num_mid = 0;
2736	sp->start = NULL;
2737	sp->type = 0;
2738	ctx->bc->fc_sp--;
2739}
2740
2741#if 0
2742static int emit_return(struct r600_shader_ctx *ctx)
2743{
2744	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2745	return 0;
2746}
2747
2748static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2749{
2750
2751	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2752	ctx->bc->cf_last->pop_count = pops;
2753	/* TODO work out offset */
2754	return 0;
2755}
2756
2757static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2758{
2759	return 0;
2760}
2761
2762static void emit_testflag(struct r600_shader_ctx *ctx)
2763{
2764
2765}
2766
2767static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2768{
2769	emit_testflag(ctx);
2770	emit_jump_to_offset(ctx, 1, 4);
2771	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2772	pops(ctx, ifidx + 1);
2773	emit_return(ctx);
2774}
2775
2776static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2777{
2778	emit_testflag(ctx);
2779
2780	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2781	ctx->bc->cf_last->pop_count = 1;
2782
2783	fc_set_mid(ctx, fc_sp);
2784
2785	pops(ctx, 1);
2786}
2787#endif
2788
2789static int tgsi_if(struct r600_shader_ctx *ctx)
2790{
2791	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2792
2793	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2794
2795	fc_pushlevel(ctx, FC_IF);
2796
2797	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2798	return 0;
2799}
2800
2801static int tgsi_else(struct r600_shader_ctx *ctx)
2802{
2803	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2804	ctx->bc->cf_last->pop_count = 1;
2805
2806	fc_set_mid(ctx, ctx->bc->fc_sp);
2807	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2808	return 0;
2809}
2810
2811static int tgsi_endif(struct r600_shader_ctx *ctx)
2812{
2813	pops(ctx, 1);
2814	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2815		R600_ERR("if/endif unbalanced in shader\n");
2816		return -1;
2817	}
2818
2819	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2820		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2821		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2822	} else {
2823		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2824	}
2825	fc_poplevel(ctx);
2826
2827	callstack_decrease_current(ctx, FC_PUSH_VPM);
2828	return 0;
2829}
2830
2831static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2832{
2833	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2834
2835	fc_pushlevel(ctx, FC_LOOP);
2836
2837	/* check stack depth */
2838	callstack_check_depth(ctx, FC_LOOP, 0);
2839	return 0;
2840}
2841
2842static int tgsi_endloop(struct r600_shader_ctx *ctx)
2843{
2844	int i;
2845
2846	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2847
2848	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2849		R600_ERR("loop/endloop in shader code are not paired.\n");
2850		return -EINVAL;
2851	}
2852
2853	/* fixup loop pointers - from r600isa
2854	   LOOP END points to CF after LOOP START,
2855	   LOOP START point to CF after LOOP END
2856	   BRK/CONT point to LOOP END CF
2857	*/
2858	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2859
2860	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2861
2862	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2863		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2864	}
2865	/* TODO add LOOPRET support */
2866	fc_poplevel(ctx);
2867	callstack_decrease_current(ctx, FC_LOOP);
2868	return 0;
2869}
2870
2871static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2872{
2873	unsigned int fscp;
2874
2875	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2876	{
2877		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2878			break;
2879	}
2880
2881	if (fscp == 0) {
2882		R600_ERR("Break not inside loop/endloop pair\n");
2883		return -EINVAL;
2884	}
2885
2886	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2887	ctx->bc->cf_last->pop_count = 1;
2888
2889	fc_set_mid(ctx, fscp);
2890
2891	pops(ctx, 1);
2892	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2893	return 0;
2894}
2895
2896static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2897	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2898	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2899	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2900
2901	/* FIXME:
2902	 * For state trackers other than OpenGL, we'll want to use
2903	 * _RECIP_IEEE instead.
2904	 */
2905	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2906
2907	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2908	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2909	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2910	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2911	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2912	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2913	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2914	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2915	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2916	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2917	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2918	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2919	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2920	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2921	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2922	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923	/* gap */
2924	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926	/* gap */
2927	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2930	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2932	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2934	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2935	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2936	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2937	/* gap */
2938	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2940	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2942	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2943	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2944	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2945	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2946	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2947	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2949	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2950	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2951	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2952	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2954	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2955	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2956	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2957	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2958	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2959	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2960	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2961	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2962	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2963	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2964	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2965	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2966	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2967	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2968	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2969	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2970	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2971	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2972	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2973	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2974	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2975	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2976	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2977	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2978	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2979	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2980	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2981	/* gap */
2982	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2983	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2984	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2985	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2986	/* gap */
2987	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2988	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2989	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2990	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2991	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2992	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2993	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2994	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2995	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996	/* gap */
2997	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2998	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2999	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3000	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3001	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3002	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3004	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3006	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3007	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3009	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3010	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3011	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3012	/* gap */
3013	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3014	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3015	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3016	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3017	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3018	/* gap */
3019	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3020	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3021	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3022	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3023	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3024	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3025	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3026	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3027	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3028	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3029	/* gap */
3030	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3031	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3032	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3034	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3035	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3036	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3037	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3038	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3039	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3040	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3041	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3042	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3043	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3045	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3048	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3049	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3050	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3051	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3052	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3055	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3056	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3057	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058};
3059
3060static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3061	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3062	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3063	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3064	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3065	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3066	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3067	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3069	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3070	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3071	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3072	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3073	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3074	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3075	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3076	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3077	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3078	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3079	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3080	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081	/* gap */
3082	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084	/* gap */
3085	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3088	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3090	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3092	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3093	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3094	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3095	/* gap */
3096	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3098	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3100	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3101	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3102	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3103	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3104	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3109	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3110	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3112	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3113	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3114	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3115	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3117	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3118	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3119	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3120	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3124	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3126	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3130	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3131	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3132	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3133	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3134	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3136	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3137	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3138	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3139	/* gap */
3140	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3141	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3142	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3143	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3144	/* gap */
3145	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3146	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3151	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3153	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154	/* gap */
3155	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3156	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3160	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3164	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3165	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3167	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3168	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3169	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3170	/* gap */
3171	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3172	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3173	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3174	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3175	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3176	/* gap */
3177	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3178	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3179	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3180	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3181	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3182	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3183	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3184	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3185	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3186	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3187	/* gap */
3188	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3189	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3190	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3191	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3192	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3193	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3194	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3195	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3197	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3198	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3199	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3200	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3201	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3203	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3204	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3205	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3206	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3207	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3208	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3209	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3210	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3211	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3212	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3213	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3214	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3215	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3216};
3217