r600_shader.c revision 7be5455796facbe35cf1f1bdbefa83759b2e3b58
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_opcodes.h"
32#include "r600d.h"
33#include <stdio.h>
34#include <errno.h>
35
36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37{
38	struct r600_pipe_state *rstate = &shader->rstate;
39	struct r600_shader *rshader = &shader->shader;
40	unsigned spi_vs_out_id[10];
41	unsigned i, tmp;
42
43	/* clear previous register */
44	rstate->nregs = 0;
45
46	/* so far never got proper semantic id from tgsi */
47	/* FIXME better to move this in config things so they get emited
48	 * only one time per cs
49	 */
50	for (i = 0; i < 10; i++) {
51		spi_vs_out_id[i] = 0;
52	}
53	for (i = 0; i < 32; i++) {
54		tmp = i << ((i & 3) * 8);
55		spi_vs_out_id[i / 4] |= tmp;
56	}
57	for (i = 0; i < 10; i++) {
58		r600_pipe_state_add_reg(rstate,
59					R_028614_SPI_VS_OUT_ID_0 + i * 4,
60					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
61	}
62
63	r600_pipe_state_add_reg(rstate,
64			R_0286C4_SPI_VS_OUT_CONFIG,
65			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
66			0xFFFFFFFF, NULL);
67	r600_pipe_state_add_reg(rstate,
68			R_028868_SQ_PGM_RESOURCES_VS,
69			S_028868_NUM_GPRS(rshader->bc.ngpr) |
70			S_028868_STACK_SIZE(rshader->bc.nstack),
71			0xFFFFFFFF, NULL);
72	r600_pipe_state_add_reg(rstate,
73			R_0288D0_SQ_PGM_CF_OFFSET_VS,
74			0x00000000, 0xFFFFFFFF, NULL);
75	r600_pipe_state_add_reg(rstate,
76			R_028858_SQ_PGM_START_VS,
77			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
78
79	r600_pipe_state_add_reg(rstate,
80				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
81				0xFFFFFFFF, NULL);
82
83}
84
85int r600_find_vs_semantic_index(struct r600_shader *vs,
86				struct r600_shader *ps, int id)
87{
88	struct r600_shader_io *input = &ps->input[id];
89
90	for (int i = 0; i < vs->noutput; i++) {
91		if (input->name == vs->output[i].name &&
92			input->sid == vs->output[i].sid) {
93			return i - 1;
94		}
95	}
96	return 0;
97}
98
99static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
100{
101	struct r600_pipe_state *rstate = &shader->rstate;
102	struct r600_shader *rshader = &shader->shader;
103	unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
104	int pos_index = -1, face_index = -1;
105
106	rstate->nregs = 0;
107
108	for (i = 0; i < rshader->ninput; i++) {
109		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
110			pos_index = i;
111		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
112			face_index = i;
113	}
114
115	for (i = 0; i < rshader->noutput; i++) {
116		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
117			r600_pipe_state_add_reg(rstate,
118						R_02880C_DB_SHADER_CONTROL,
119						S_02880C_Z_EXPORT_ENABLE(1),
120						S_02880C_Z_EXPORT_ENABLE(1), NULL);
121		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
122			r600_pipe_state_add_reg(rstate,
123						R_02880C_DB_SHADER_CONTROL,
124						S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
125						S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
126	}
127
128	exports_ps = 0;
129	num_cout = 0;
130	for (i = 0; i < rshader->noutput; i++) {
131		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
132			exports_ps |= 1;
133		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
134			num_cout++;
135		}
136	}
137	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
138	if (!exports_ps) {
139		/* always at least export 1 component per pixel */
140		exports_ps = 2;
141	}
142
143	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
144				S_0286CC_PERSP_GRADIENT_ENA(1);
145	spi_input_z = 0;
146	if (pos_index != -1) {
147		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
148					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
149					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
150					S_0286CC_BARYC_SAMPLE_CNTL(1));
151		spi_input_z |= 1;
152	}
153
154	spi_ps_in_control_1 = 0;
155	if (face_index != -1) {
156		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
157			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
158	}
159
160	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
161	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
162	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
163	r600_pipe_state_add_reg(rstate,
164				R_028840_SQ_PGM_START_PS,
165				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
166	r600_pipe_state_add_reg(rstate,
167				R_028850_SQ_PGM_RESOURCES_PS,
168				S_028868_NUM_GPRS(rshader->bc.ngpr) |
169				S_028868_STACK_SIZE(rshader->bc.nstack),
170				0xFFFFFFFF, NULL);
171	r600_pipe_state_add_reg(rstate,
172				R_028854_SQ_PGM_EXPORTS_PS,
173				exports_ps, 0xFFFFFFFF, NULL);
174	r600_pipe_state_add_reg(rstate,
175				R_0288CC_SQ_PGM_CF_OFFSET_PS,
176				0x00000000, 0xFFFFFFFF, NULL);
177
178	if (rshader->uses_kill) {
179		/* only set some bits here, the other bits are set in the dsa state */
180		r600_pipe_state_add_reg(rstate,
181					R_02880C_DB_SHADER_CONTROL,
182					S_02880C_KILL_ENABLE(1),
183					S_02880C_KILL_ENABLE(1), NULL);
184	}
185	r600_pipe_state_add_reg(rstate,
186				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
187				0xFFFFFFFF, NULL);
188}
189
190int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
191{
192	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
193	struct r600_shader *rshader = &shader->shader;
194	void *ptr;
195
196	/* copy new shader */
197	if (shader->bo == NULL) {
198		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
199		if (shader->bo == NULL) {
200			return -ENOMEM;
201		}
202		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
203		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
204		r600_bo_unmap(rctx->radeon, shader->bo);
205	}
206	/* build state */
207	switch (rshader->processor_type) {
208	case TGSI_PROCESSOR_VERTEX:
209		if (rshader->family >= CHIP_CEDAR) {
210			evergreen_pipe_shader_vs(ctx, shader);
211		} else {
212			r600_pipe_shader_vs(ctx, shader);
213		}
214		break;
215	case TGSI_PROCESSOR_FRAGMENT:
216		if (rshader->family >= CHIP_CEDAR) {
217			evergreen_pipe_shader_ps(ctx, shader);
218		} else {
219			r600_pipe_shader_ps(ctx, shader);
220		}
221		break;
222	default:
223		return -EINVAL;
224	}
225	return 0;
226}
227
228int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
229int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
230{
231	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
232	int r;
233
234//fprintf(stderr, "--------------------------------------------------------------\n");
235//tgsi_dump(tokens, 0);
236	shader->shader.family = r600_get_family(rctx->radeon);
237	r = r600_shader_from_tgsi(tokens, &shader->shader);
238	if (r) {
239		R600_ERR("translation from TGSI failed !\n");
240		return r;
241	}
242	r = r600_bc_build(&shader->shader.bc);
243	if (r) {
244		R600_ERR("building bytecode failed !\n");
245		return r;
246	}
247//r600_bc_dump(&shader->shader.bc);
248//fprintf(stderr, "______________________________________________________________\n");
249	return r600_pipe_shader(ctx, shader);
250}
251
252void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
253{
254	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
255
256	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
257	r600_bc_clear(&shader->shader.bc);
258}
259
260/*
261 * tgsi -> r600 shader
262 */
263struct r600_shader_tgsi_instruction;
264
265struct r600_shader_ctx {
266	struct tgsi_shader_info			info;
267	struct tgsi_parse_context		parse;
268	const struct tgsi_token			*tokens;
269	unsigned				type;
270	unsigned				file_offset[TGSI_FILE_COUNT];
271	unsigned				temp_reg;
272	struct r600_shader_tgsi_instruction	*inst_info;
273	struct r600_bc				*bc;
274	struct r600_shader			*shader;
275	u32					value[4];
276	u32					*literals;
277	u32					nliterals;
278	u32					max_driver_temp_used;
279	/* needed for evergreen interpolation */
280	boolean                                 input_centroid;
281	boolean                                 input_linear;
282	boolean                                 input_perspective;
283	int					num_interp_gpr;
284};
285
286struct r600_shader_tgsi_instruction {
287	unsigned	tgsi_opcode;
288	unsigned	is_op3;
289	unsigned	r600_opcode;
290	int (*process)(struct r600_shader_ctx *ctx);
291};
292
293static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
294static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
295
296static int tgsi_is_supported(struct r600_shader_ctx *ctx)
297{
298	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
299	int j;
300
301	if (i->Instruction.NumDstRegs > 1) {
302		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
303		return -EINVAL;
304	}
305	if (i->Instruction.Predicate) {
306		R600_ERR("predicate unsupported\n");
307		return -EINVAL;
308	}
309#if 0
310	if (i->Instruction.Label) {
311		R600_ERR("label unsupported\n");
312		return -EINVAL;
313	}
314#endif
315	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
316		if (i->Src[j].Register.Dimension) {
317			R600_ERR("unsupported src %d (dimension %d)\n", j,
318				 i->Src[j].Register.Dimension);
319			return -EINVAL;
320		}
321	}
322	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
323		if (i->Dst[j].Register.Dimension) {
324			R600_ERR("unsupported dst (dimension)\n");
325			return -EINVAL;
326		}
327	}
328	return 0;
329}
330
331static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
332{
333	int i, r;
334	struct r600_bc_alu alu;
335	int gpr = 0, base_chan = 0;
336	int ij_index = 0;
337
338	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
339		ij_index = 0;
340		if (ctx->shader->input[input].centroid)
341			ij_index++;
342	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
343		ij_index = 0;
344		/* if we have perspective add one */
345		if (ctx->input_perspective)  {
346			ij_index++;
347			/* if we have perspective centroid */
348			if (ctx->input_centroid)
349				ij_index++;
350		}
351		if (ctx->shader->input[input].centroid)
352			ij_index++;
353	}
354
355	/* work out gpr and base_chan from index */
356	gpr = ij_index / 2;
357	base_chan = (2 * (ij_index % 2)) + 1;
358
359	for (i = 0; i < 8; i++) {
360		memset(&alu, 0, sizeof(struct r600_bc_alu));
361
362		if (i < 4)
363			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
364		else
365			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
366
367		if ((i > 1) && (i < 6)) {
368			alu.dst.sel = ctx->shader->input[input].gpr;
369			alu.dst.write = 1;
370		}
371
372		alu.dst.chan = i % 4;
373
374		alu.src[0].sel = gpr;
375		alu.src[0].chan = (base_chan - (i % 2));
376
377		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
378
379		alu.bank_swizzle_force = SQ_ALU_VEC_210;
380		if ((i % 4) == 3)
381			alu.last = 1;
382		r = r600_bc_add_alu(ctx->bc, &alu);
383		if (r)
384			return r;
385	}
386	return 0;
387}
388
389
390static int tgsi_declaration(struct r600_shader_ctx *ctx)
391{
392	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
393	unsigned i;
394
395	switch (d->Declaration.File) {
396	case TGSI_FILE_INPUT:
397		i = ctx->shader->ninput++;
398		ctx->shader->input[i].name = d->Semantic.Name;
399		ctx->shader->input[i].sid = d->Semantic.Index;
400		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
401		ctx->shader->input[i].centroid = d->Declaration.Centroid;
402		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
403		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
404			/* turn input into interpolate on EG */
405			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
406				if (ctx->shader->input[i].interpolate > 0) {
407					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
408					evergreen_interp_alu(ctx, i);
409				}
410			}
411		}
412		break;
413	case TGSI_FILE_OUTPUT:
414		i = ctx->shader->noutput++;
415		ctx->shader->output[i].name = d->Semantic.Name;
416		ctx->shader->output[i].sid = d->Semantic.Index;
417		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
418		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
419		break;
420	case TGSI_FILE_CONSTANT:
421	case TGSI_FILE_TEMPORARY:
422	case TGSI_FILE_SAMPLER:
423	case TGSI_FILE_ADDRESS:
424		break;
425	default:
426		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
427		return -EINVAL;
428	}
429	return 0;
430}
431
432static int r600_get_temp(struct r600_shader_ctx *ctx)
433{
434	return ctx->temp_reg + ctx->max_driver_temp_used++;
435}
436
437/*
438 * for evergreen we need to scan the shader to find the number of GPRs we need to
439 * reserve for interpolation.
440 *
441 * we need to know if we are going to emit
442 * any centroid inputs
443 * if perspective and linear are required
444*/
445static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
446{
447	int i;
448	int num_baryc;
449
450	ctx->input_linear = FALSE;
451	ctx->input_perspective = FALSE;
452	ctx->input_centroid = FALSE;
453	ctx->num_interp_gpr = 1;
454
455	/* any centroid inputs */
456	for (i = 0; i < ctx->info.num_inputs; i++) {
457		/* skip position/face */
458		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
459		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
460			continue;
461		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
462			ctx->input_linear = TRUE;
463		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
464			ctx->input_perspective = TRUE;
465		if (ctx->info.input_centroid[i])
466			ctx->input_centroid = TRUE;
467	}
468
469	num_baryc = 0;
470	/* ignoring sample for now */
471	if (ctx->input_perspective)
472		num_baryc++;
473	if (ctx->input_linear)
474		num_baryc++;
475	if (ctx->input_centroid)
476		num_baryc *= 2;
477
478	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
479
480	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
481	return ctx->num_interp_gpr;
482}
483
484int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
485{
486	struct tgsi_full_immediate *immediate;
487	struct r600_shader_ctx ctx;
488	struct r600_bc_output output[32];
489	unsigned output_done, noutput;
490	unsigned opcode;
491	int i, r = 0, pos0;
492
493	ctx.bc = &shader->bc;
494	ctx.shader = shader;
495	r = r600_bc_init(ctx.bc, shader->family);
496	if (r)
497		return r;
498	ctx.tokens = tokens;
499	tgsi_scan_shader(tokens, &ctx.info);
500	tgsi_parse_init(&ctx.parse, tokens);
501	ctx.type = ctx.parse.FullHeader.Processor.Processor;
502	shader->processor_type = ctx.type;
503	ctx.bc->type = shader->processor_type;
504
505	/* register allocations */
506	/* Values [0,127] correspond to GPR[0..127].
507	 * Values [128,159] correspond to constant buffer bank 0
508	 * Values [160,191] correspond to constant buffer bank 1
509	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
510	 * Values [256,287] correspond to constant buffer bank 2 (EG)
511	 * Values [288,319] correspond to constant buffer bank 3 (EG)
512	 * Other special values are shown in the list below.
513	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
514	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
515	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
516	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
517	 * 248	SQ_ALU_SRC_0: special constant 0.0.
518	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
519	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
520	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
521	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
522	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
523	 * 254	SQ_ALU_SRC_PV: previous vector result.
524	 * 255	SQ_ALU_SRC_PS: previous scalar result.
525	 */
526	for (i = 0; i < TGSI_FILE_COUNT; i++) {
527		ctx.file_offset[i] = 0;
528	}
529	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
530		ctx.file_offset[TGSI_FILE_INPUT] = 1;
531		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
532			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
533		} else {
534			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
535		}
536	}
537	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
538		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
539	}
540	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
541						ctx.info.file_count[TGSI_FILE_INPUT];
542	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
543						ctx.info.file_count[TGSI_FILE_OUTPUT];
544
545	/* Outside the GPR range. This will be translated to one of the
546	 * kcache banks later. */
547	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
548
549	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
550	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
551			ctx.info.file_count[TGSI_FILE_TEMPORARY];
552
553	ctx.nliterals = 0;
554	ctx.literals = NULL;
555
556	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
557		tgsi_parse_token(&ctx.parse);
558		switch (ctx.parse.FullToken.Token.Type) {
559		case TGSI_TOKEN_TYPE_IMMEDIATE:
560			immediate = &ctx.parse.FullToken.FullImmediate;
561			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
562			if(ctx.literals == NULL) {
563				r = -ENOMEM;
564				goto out_err;
565			}
566			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
567			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
568			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
569			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
570			ctx.nliterals++;
571			break;
572		case TGSI_TOKEN_TYPE_DECLARATION:
573			r = tgsi_declaration(&ctx);
574			if (r)
575				goto out_err;
576			break;
577		case TGSI_TOKEN_TYPE_INSTRUCTION:
578			r = tgsi_is_supported(&ctx);
579			if (r)
580				goto out_err;
581			ctx.max_driver_temp_used = 0;
582			/* reserve first tmp for everyone */
583			r600_get_temp(&ctx);
584			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
585			if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
586				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
587			else
588				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
589			r = ctx.inst_info->process(&ctx);
590			if (r)
591				goto out_err;
592			r = r600_bc_add_literal(ctx.bc, ctx.value);
593			if (r)
594				goto out_err;
595			break;
596		case TGSI_TOKEN_TYPE_PROPERTY:
597			break;
598		default:
599			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
600			r = -EINVAL;
601			goto out_err;
602		}
603	}
604	/* export output */
605	noutput = shader->noutput;
606	for (i = 0, pos0 = 0; i < noutput; i++) {
607		memset(&output[i], 0, sizeof(struct r600_bc_output));
608		output[i].gpr = shader->output[i].gpr;
609		output[i].elem_size = 3;
610		output[i].swizzle_x = 0;
611		output[i].swizzle_y = 1;
612		output[i].swizzle_z = 2;
613		output[i].swizzle_w = 3;
614		output[i].barrier = 1;
615		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
616		output[i].array_base = i - pos0;
617		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
618		switch (ctx.type) {
619		case TGSI_PROCESSOR_VERTEX:
620			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
621				output[i].array_base = 60;
622				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
623				/* position doesn't count in array_base */
624				pos0++;
625			}
626			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
627				output[i].array_base = 61;
628				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
629				/* position doesn't count in array_base */
630				pos0++;
631			}
632			break;
633		case TGSI_PROCESSOR_FRAGMENT:
634			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
635				output[i].array_base = shader->output[i].sid;
636				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
637			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
638				output[i].array_base = 61;
639				output[i].swizzle_x = 2;
640				output[i].swizzle_y = 7;
641				output[i].swizzle_z = output[i].swizzle_w = 7;
642				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
643			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
644				output[i].array_base = 61;
645				output[i].swizzle_x = 7;
646				output[i].swizzle_y = 1;
647				output[i].swizzle_z = output[i].swizzle_w = 7;
648				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
649			} else {
650				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
651				r = -EINVAL;
652				goto out_err;
653			}
654			break;
655		default:
656			R600_ERR("unsupported processor type %d\n", ctx.type);
657			r = -EINVAL;
658			goto out_err;
659		}
660	}
661	/* add fake param output for vertex shader if no param is exported */
662	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
663		for (i = 0, pos0 = 0; i < noutput; i++) {
664			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
665				pos0 = 1;
666				break;
667			}
668		}
669		if (!pos0) {
670			memset(&output[i], 0, sizeof(struct r600_bc_output));
671			output[i].gpr = 0;
672			output[i].elem_size = 3;
673			output[i].swizzle_x = 0;
674			output[i].swizzle_y = 1;
675			output[i].swizzle_z = 2;
676			output[i].swizzle_w = 3;
677			output[i].barrier = 1;
678			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
679			output[i].array_base = 0;
680			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
681			noutput++;
682		}
683	}
684	/* add fake pixel export */
685	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
686		memset(&output[0], 0, sizeof(struct r600_bc_output));
687		output[0].gpr = 0;
688		output[0].elem_size = 3;
689		output[0].swizzle_x = 7;
690		output[0].swizzle_y = 7;
691		output[0].swizzle_z = 7;
692		output[0].swizzle_w = 7;
693		output[0].barrier = 1;
694		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
695		output[0].array_base = 0;
696		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
697		noutput++;
698	}
699	/* set export done on last export of each type */
700	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
701		if (i == (noutput - 1)) {
702			output[i].end_of_program = 1;
703		}
704		if (!(output_done & (1 << output[i].type))) {
705			output_done |= (1 << output[i].type);
706			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
707		}
708	}
709	/* add output to bytecode */
710	for (i = 0; i < noutput; i++) {
711		r = r600_bc_add_output(ctx.bc, &output[i]);
712		if (r)
713			goto out_err;
714	}
715	free(ctx.literals);
716	tgsi_parse_free(&ctx.parse);
717	return 0;
718out_err:
719	free(ctx.literals);
720	tgsi_parse_free(&ctx.parse);
721	return r;
722}
723
724static int tgsi_unsupported(struct r600_shader_ctx *ctx)
725{
726	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
727	return -EINVAL;
728}
729
730static int tgsi_end(struct r600_shader_ctx *ctx)
731{
732	return 0;
733}
734
735static int tgsi_src(struct r600_shader_ctx *ctx,
736			const struct tgsi_full_src_register *tgsi_src,
737			struct r600_bc_alu_src *r600_src)
738{
739	int index;
740	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
741	r600_src->sel = tgsi_src->Register.Index;
742	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
743		r600_src->sel = 0;
744		index = tgsi_src->Register.Index;
745		ctx->value[0] = ctx->literals[index * 4 + 0];
746		ctx->value[1] = ctx->literals[index * 4 + 1];
747		ctx->value[2] = ctx->literals[index * 4 + 2];
748		ctx->value[3] = ctx->literals[index * 4 + 3];
749	}
750	if (tgsi_src->Register.Indirect)
751		r600_src->rel = V_SQ_REL_RELATIVE;
752	r600_src->neg = tgsi_src->Register.Negate;
753	r600_src->abs = tgsi_src->Register.Absolute;
754	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
755	return 0;
756}
757
758static int tgsi_dst(struct r600_shader_ctx *ctx,
759			const struct tgsi_full_dst_register *tgsi_dst,
760			unsigned swizzle,
761			struct r600_bc_alu_dst *r600_dst)
762{
763	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
764
765	r600_dst->sel = tgsi_dst->Register.Index;
766	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
767	r600_dst->chan = swizzle;
768	r600_dst->write = 1;
769	if (tgsi_dst->Register.Indirect)
770		r600_dst->rel = V_SQ_REL_RELATIVE;
771	if (inst->Instruction.Saturate) {
772		r600_dst->clamp = 1;
773	}
774	return 0;
775}
776
777static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
778{
779	switch (swizzle) {
780	case 0:
781		return tgsi_src->Register.SwizzleX;
782	case 1:
783		return tgsi_src->Register.SwizzleY;
784	case 2:
785		return tgsi_src->Register.SwizzleZ;
786	case 3:
787		return tgsi_src->Register.SwizzleW;
788	default:
789		return 0;
790	}
791}
792
793static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
794{
795	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
796	struct r600_bc_alu alu;
797	int i, j, k, nconst, r;
798
799	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
800		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
801			nconst++;
802		}
803		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
804		if (r) {
805			return r;
806		}
807	}
808	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
809		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
810			int treg = r600_get_temp(ctx);
811			for (k = 0; k < 4; k++) {
812				memset(&alu, 0, sizeof(struct r600_bc_alu));
813				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
814				alu.src[0].sel = r600_src[i].sel;
815				alu.src[0].chan = k;
816				alu.src[0].rel = r600_src[i].rel;
817				alu.dst.sel = treg;
818				alu.dst.chan = k;
819				alu.dst.write = 1;
820				if (k == 3)
821					alu.last = 1;
822				r = r600_bc_add_alu(ctx->bc, &alu);
823				if (r)
824					return r;
825			}
826			r600_src[i].sel = treg;
827			r600_src[i].rel =0;
828			j--;
829		}
830	}
831	return 0;
832}
833
834/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
835static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
836{
837	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
838	struct r600_bc_alu alu;
839	int i, j, k, nliteral, r;
840
841	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
842		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
843			nliteral++;
844		}
845	}
846	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
847		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
848			int treg = r600_get_temp(ctx);
849			for (k = 0; k < 4; k++) {
850				memset(&alu, 0, sizeof(struct r600_bc_alu));
851				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
852				alu.src[0].sel = r600_src[i].sel;
853				alu.src[0].chan = k;
854				alu.dst.sel = treg;
855				alu.dst.chan = k;
856				alu.dst.write = 1;
857				if (k == 3)
858					alu.last = 1;
859				r = r600_bc_add_alu(ctx->bc, &alu);
860				if (r)
861					return r;
862			}
863			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
864			if (r)
865				return r;
866			r600_src[i].sel = treg;
867			j--;
868		}
869	}
870	return 0;
871}
872
873static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
874{
875	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
876	struct r600_bc_alu_src r600_src[3];
877	struct r600_bc_alu alu;
878	int i, j, r;
879	int lasti = 0;
880
881	for (i = 0; i < 4; i++) {
882		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
883			lasti = i;
884		}
885	}
886
887	r = tgsi_split_constant(ctx, r600_src);
888	if (r)
889		return r;
890	r = tgsi_split_literal_constant(ctx, r600_src);
891	if (r)
892		return r;
893	for (i = 0; i < lasti + 1; i++) {
894		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
895			continue;
896
897		memset(&alu, 0, sizeof(struct r600_bc_alu));
898		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
899		if (r)
900			return r;
901
902		alu.inst = ctx->inst_info->r600_opcode;
903		if (!swap) {
904			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
905				alu.src[j] = r600_src[j];
906				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
907			}
908		} else {
909			alu.src[0] = r600_src[1];
910			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
911
912			alu.src[1] = r600_src[0];
913			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
914		}
915		/* handle some special cases */
916		switch (ctx->inst_info->tgsi_opcode) {
917		case TGSI_OPCODE_SUB:
918			alu.src[1].neg = 1;
919			break;
920		case TGSI_OPCODE_ABS:
921			alu.src[0].abs = 1;
922			break;
923		default:
924			break;
925		}
926		if (i == lasti) {
927			alu.last = 1;
928		}
929		r = r600_bc_add_alu(ctx->bc, &alu);
930		if (r)
931			return r;
932	}
933	return 0;
934}
935
936static int tgsi_op2(struct r600_shader_ctx *ctx)
937{
938	return tgsi_op2_s(ctx, 0);
939}
940
941static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
942{
943	return tgsi_op2_s(ctx, 1);
944}
945
946/*
947 * r600 - trunc to -PI..PI range
948 * r700 - normalize by dividing by 2PI
949 * see fdo bug 27901
950 */
951static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
952			   struct r600_bc_alu_src r600_src[3])
953{
954	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
955	int r, src0_chan;
956	uint32_t lit_vals[4];
957	struct r600_bc_alu alu;
958
959	memset(lit_vals, 0, 4*4);
960	r = tgsi_split_constant(ctx, r600_src);
961	if (r)
962		return r;
963	r = tgsi_split_literal_constant(ctx, r600_src);
964	if (r)
965		return r;
966
967	src0_chan = tgsi_chan(&inst->Src[0], 0);
968
969	/* We are going to feed two literals to the MAD below,
970	 * which means that if the first operand is a literal as well,
971	 * we need to copy its value manually.
972	 */
973	if (r600_src[0].sel == V_SQ_ALU_SRC_LITERAL) {
974		unsigned index = inst->Src[0].Register.Index;
975
976		lit_vals[2] = ctx->literals[index * 4 + src0_chan];
977		src0_chan = 2;
978	}
979
980	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
981	lit_vals[1] = fui(0.5f);
982
983	memset(&alu, 0, sizeof(struct r600_bc_alu));
984	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
985	alu.is_op3 = 1;
986
987	alu.dst.chan = 0;
988	alu.dst.sel = ctx->temp_reg;
989	alu.dst.write = 1;
990
991	alu.src[0] = r600_src[0];
992	alu.src[0].chan = src0_chan;
993
994	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
995	alu.src[1].chan = 0;
996	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
997	alu.src[2].chan = 1;
998	alu.last = 1;
999	r = r600_bc_add_alu(ctx->bc, &alu);
1000	if (r)
1001		return r;
1002	r = r600_bc_add_literal(ctx->bc, lit_vals);
1003	if (r)
1004		return r;
1005
1006	memset(&alu, 0, sizeof(struct r600_bc_alu));
1007	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1008
1009	alu.dst.chan = 0;
1010	alu.dst.sel = ctx->temp_reg;
1011	alu.dst.write = 1;
1012
1013	alu.src[0].sel = ctx->temp_reg;
1014	alu.src[0].chan = 0;
1015	alu.last = 1;
1016	r = r600_bc_add_alu(ctx->bc, &alu);
1017	if (r)
1018		return r;
1019
1020	if (ctx->bc->chiprev == CHIPREV_R600) {
1021		lit_vals[0] = fui(3.1415926535897f * 2.0f);
1022		lit_vals[1] = fui(-3.1415926535897f);
1023	} else {
1024		lit_vals[0] = fui(1.0f);
1025		lit_vals[1] = fui(-0.5f);
1026	}
1027
1028	memset(&alu, 0, sizeof(struct r600_bc_alu));
1029	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1030	alu.is_op3 = 1;
1031
1032	alu.dst.chan = 0;
1033	alu.dst.sel = ctx->temp_reg;
1034	alu.dst.write = 1;
1035
1036	alu.src[0].sel = ctx->temp_reg;
1037	alu.src[0].chan = 0;
1038
1039	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1040	alu.src[1].chan = 0;
1041	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1042	alu.src[2].chan = 1;
1043	alu.last = 1;
1044	r = r600_bc_add_alu(ctx->bc, &alu);
1045	if (r)
1046		return r;
1047	r = r600_bc_add_literal(ctx->bc, lit_vals);
1048	if (r)
1049		return r;
1050	return 0;
1051}
1052
1053static int tgsi_trig(struct r600_shader_ctx *ctx)
1054{
1055	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1056	struct r600_bc_alu_src r600_src[3];
1057	struct r600_bc_alu alu;
1058	int i, r;
1059	int lasti = 0;
1060
1061	r = tgsi_setup_trig(ctx, r600_src);
1062	if (r)
1063		return r;
1064
1065	memset(&alu, 0, sizeof(struct r600_bc_alu));
1066	alu.inst = ctx->inst_info->r600_opcode;
1067	alu.dst.chan = 0;
1068	alu.dst.sel = ctx->temp_reg;
1069	alu.dst.write = 1;
1070
1071	alu.src[0].sel = ctx->temp_reg;
1072	alu.src[0].chan = 0;
1073	alu.last = 1;
1074	r = r600_bc_add_alu(ctx->bc, &alu);
1075	if (r)
1076		return r;
1077
1078	/* replicate result */
1079	for (i = 0; i < 4; i++) {
1080		if (inst->Dst[0].Register.WriteMask & (1 << i))
1081			lasti = i;
1082	}
1083	for (i = 0; i < lasti + 1; i++) {
1084		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1085			continue;
1086
1087		memset(&alu, 0, sizeof(struct r600_bc_alu));
1088		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1089
1090		alu.src[0].sel = ctx->temp_reg;
1091		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1092		if (r)
1093			return r;
1094		if (i == lasti)
1095			alu.last = 1;
1096		r = r600_bc_add_alu(ctx->bc, &alu);
1097		if (r)
1098			return r;
1099	}
1100	return 0;
1101}
1102
1103static int tgsi_scs(struct r600_shader_ctx *ctx)
1104{
1105	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1106	struct r600_bc_alu_src r600_src[3];
1107	struct r600_bc_alu alu;
1108	int r;
1109
1110	/* We'll only need the trig stuff if we are going to write to the
1111	 * X or Y components of the destination vector.
1112	 */
1113	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1114		r = tgsi_setup_trig(ctx, r600_src);
1115		if (r)
1116			return r;
1117	}
1118
1119	/* dst.x = COS */
1120	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1121		memset(&alu, 0, sizeof(struct r600_bc_alu));
1122		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1123		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1124		if (r)
1125			return r;
1126
1127		alu.src[0].sel = ctx->temp_reg;
1128		alu.src[0].chan = 0;
1129		alu.last = 1;
1130		r = r600_bc_add_alu(ctx->bc, &alu);
1131		if (r)
1132			return r;
1133	}
1134
1135	/* dst.y = SIN */
1136	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1137		memset(&alu, 0, sizeof(struct r600_bc_alu));
1138		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1139		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1140		if (r)
1141			return r;
1142
1143		alu.src[0].sel = ctx->temp_reg;
1144		alu.src[0].chan = 0;
1145		alu.last = 1;
1146		r = r600_bc_add_alu(ctx->bc, &alu);
1147		if (r)
1148			return r;
1149	}
1150
1151	/* dst.z = 0.0; */
1152	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1153		memset(&alu, 0, sizeof(struct r600_bc_alu));
1154
1155		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1156
1157		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1158		if (r)
1159			return r;
1160
1161		alu.src[0].sel = V_SQ_ALU_SRC_0;
1162		alu.src[0].chan = 0;
1163
1164		alu.last = 1;
1165
1166		r = r600_bc_add_alu(ctx->bc, &alu);
1167		if (r)
1168			return r;
1169
1170		r = r600_bc_add_literal(ctx->bc, ctx->value);
1171		if (r)
1172			return r;
1173	}
1174
1175	/* dst.w = 1.0; */
1176	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1177		memset(&alu, 0, sizeof(struct r600_bc_alu));
1178
1179		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1180
1181		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1182		if (r)
1183			return r;
1184
1185		alu.src[0].sel = V_SQ_ALU_SRC_1;
1186		alu.src[0].chan = 0;
1187
1188		alu.last = 1;
1189
1190		r = r600_bc_add_alu(ctx->bc, &alu);
1191		if (r)
1192			return r;
1193
1194		r = r600_bc_add_literal(ctx->bc, ctx->value);
1195		if (r)
1196			return r;
1197	}
1198
1199	return 0;
1200}
1201
1202static int tgsi_kill(struct r600_shader_ctx *ctx)
1203{
1204	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1205	struct r600_bc_alu alu;
1206	int i, r;
1207
1208	for (i = 0; i < 4; i++) {
1209		memset(&alu, 0, sizeof(struct r600_bc_alu));
1210		alu.inst = ctx->inst_info->r600_opcode;
1211
1212		alu.dst.chan = i;
1213
1214		alu.src[0].sel = V_SQ_ALU_SRC_0;
1215
1216		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1217			alu.src[1].sel = V_SQ_ALU_SRC_1;
1218			alu.src[1].neg = 1;
1219		} else {
1220			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1221			if (r)
1222				return r;
1223			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1224		}
1225		if (i == 3) {
1226			alu.last = 1;
1227		}
1228		r = r600_bc_add_alu(ctx->bc, &alu);
1229		if (r)
1230			return r;
1231	}
1232	r = r600_bc_add_literal(ctx->bc, ctx->value);
1233	if (r)
1234		return r;
1235
1236	/* kill must be last in ALU */
1237	ctx->bc->force_add_cf = 1;
1238	ctx->shader->uses_kill = TRUE;
1239	return 0;
1240}
1241
1242static int tgsi_lit(struct r600_shader_ctx *ctx)
1243{
1244	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1245	struct r600_bc_alu alu;
1246	struct r600_bc_alu_src r600_src[3];
1247	int r;
1248
1249	r = tgsi_split_constant(ctx, r600_src);
1250	if (r)
1251		return r;
1252	r = tgsi_split_literal_constant(ctx, r600_src);
1253	if (r)
1254		return r;
1255
1256	/* dst.x, <- 1.0  */
1257	memset(&alu, 0, sizeof(struct r600_bc_alu));
1258	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1259	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1260	alu.src[0].chan = 0;
1261	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1262	if (r)
1263		return r;
1264	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1265	r = r600_bc_add_alu(ctx->bc, &alu);
1266	if (r)
1267		return r;
1268
1269	/* dst.y = max(src.x, 0.0) */
1270	memset(&alu, 0, sizeof(struct r600_bc_alu));
1271	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1272	alu.src[0] = r600_src[0];
1273	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1274	alu.src[1].chan = 0;
1275	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1276	if (r)
1277		return r;
1278	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1279	r = r600_bc_add_alu(ctx->bc, &alu);
1280	if (r)
1281		return r;
1282
1283	/* dst.w, <- 1.0  */
1284	memset(&alu, 0, sizeof(struct r600_bc_alu));
1285	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1286	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1287	alu.src[0].chan = 0;
1288	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1289	if (r)
1290		return r;
1291	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1292	alu.last = 1;
1293	r = r600_bc_add_alu(ctx->bc, &alu);
1294	if (r)
1295		return r;
1296
1297	r = r600_bc_add_literal(ctx->bc, ctx->value);
1298	if (r)
1299		return r;
1300
1301	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1302	{
1303		int chan;
1304		int sel;
1305
1306		/* dst.z = log(src.y) */
1307		memset(&alu, 0, sizeof(struct r600_bc_alu));
1308		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1309		alu.src[0] = r600_src[0];
1310		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1311		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1312		if (r)
1313			return r;
1314		alu.last = 1;
1315		r = r600_bc_add_alu(ctx->bc, &alu);
1316		if (r)
1317			return r;
1318
1319		r = r600_bc_add_literal(ctx->bc, ctx->value);
1320		if (r)
1321			return r;
1322
1323		chan = alu.dst.chan;
1324		sel = alu.dst.sel;
1325
1326		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1327		memset(&alu, 0, sizeof(struct r600_bc_alu));
1328		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1329		alu.src[0] = r600_src[0];
1330		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1331		alu.src[1].sel  = sel;
1332		alu.src[1].chan = chan;
1333
1334		alu.src[2] = r600_src[0];
1335		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1336		alu.dst.sel = ctx->temp_reg;
1337		alu.dst.chan = 0;
1338		alu.dst.write = 1;
1339		alu.is_op3 = 1;
1340		alu.last = 1;
1341		r = r600_bc_add_alu(ctx->bc, &alu);
1342		if (r)
1343			return r;
1344
1345		r = r600_bc_add_literal(ctx->bc, ctx->value);
1346		if (r)
1347			return r;
1348		/* dst.z = exp(tmp.x) */
1349		memset(&alu, 0, sizeof(struct r600_bc_alu));
1350		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1351		alu.src[0].sel = ctx->temp_reg;
1352		alu.src[0].chan = 0;
1353		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1354		if (r)
1355			return r;
1356		alu.last = 1;
1357		r = r600_bc_add_alu(ctx->bc, &alu);
1358		if (r)
1359			return r;
1360	}
1361	return 0;
1362}
1363
1364static int tgsi_rsq(struct r600_shader_ctx *ctx)
1365{
1366	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1367	struct r600_bc_alu alu;
1368	int i, r;
1369
1370	memset(&alu, 0, sizeof(struct r600_bc_alu));
1371
1372	/* FIXME:
1373	 * For state trackers other than OpenGL, we'll want to use
1374	 * _RECIPSQRT_IEEE instead.
1375	 */
1376	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1377
1378	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1379		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1380		if (r)
1381			return r;
1382		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1383		alu.src[i].abs = 1;
1384	}
1385	alu.dst.sel = ctx->temp_reg;
1386	alu.dst.write = 1;
1387	alu.last = 1;
1388	r = r600_bc_add_alu(ctx->bc, &alu);
1389	if (r)
1390		return r;
1391	r = r600_bc_add_literal(ctx->bc, ctx->value);
1392	if (r)
1393		return r;
1394	/* replicate result */
1395	return tgsi_helper_tempx_replicate(ctx);
1396}
1397
1398static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1399{
1400	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1401	struct r600_bc_alu alu;
1402	int i, r;
1403
1404	for (i = 0; i < 4; i++) {
1405		memset(&alu, 0, sizeof(struct r600_bc_alu));
1406		alu.src[0].sel = ctx->temp_reg;
1407		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1408		alu.dst.chan = i;
1409		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1410		if (r)
1411			return r;
1412		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1413		if (i == 3)
1414			alu.last = 1;
1415		r = r600_bc_add_alu(ctx->bc, &alu);
1416		if (r)
1417			return r;
1418	}
1419	return 0;
1420}
1421
1422static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1423{
1424	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1425	struct r600_bc_alu alu;
1426	int i, r;
1427
1428	memset(&alu, 0, sizeof(struct r600_bc_alu));
1429	alu.inst = ctx->inst_info->r600_opcode;
1430	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1431		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1432		if (r)
1433			return r;
1434		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1435	}
1436	alu.dst.sel = ctx->temp_reg;
1437	alu.dst.write = 1;
1438	alu.last = 1;
1439	r = r600_bc_add_alu(ctx->bc, &alu);
1440	if (r)
1441		return r;
1442	r = r600_bc_add_literal(ctx->bc, ctx->value);
1443	if (r)
1444		return r;
1445	/* replicate result */
1446	return tgsi_helper_tempx_replicate(ctx);
1447}
1448
1449static int tgsi_pow(struct r600_shader_ctx *ctx)
1450{
1451	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1452	struct r600_bc_alu alu;
1453	int r;
1454
1455	/* LOG2(a) */
1456	memset(&alu, 0, sizeof(struct r600_bc_alu));
1457	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1458	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1459	if (r)
1460		return r;
1461	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1462	alu.dst.sel = ctx->temp_reg;
1463	alu.dst.write = 1;
1464	alu.last = 1;
1465	r = r600_bc_add_alu(ctx->bc, &alu);
1466	if (r)
1467		return r;
1468	r = r600_bc_add_literal(ctx->bc,ctx->value);
1469	if (r)
1470		return r;
1471	/* b * LOG2(a) */
1472	memset(&alu, 0, sizeof(struct r600_bc_alu));
1473	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1474	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1475	if (r)
1476		return r;
1477	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1478	alu.src[1].sel = ctx->temp_reg;
1479	alu.dst.sel = ctx->temp_reg;
1480	alu.dst.write = 1;
1481	alu.last = 1;
1482	r = r600_bc_add_alu(ctx->bc, &alu);
1483	if (r)
1484		return r;
1485	r = r600_bc_add_literal(ctx->bc,ctx->value);
1486	if (r)
1487		return r;
1488	/* POW(a,b) = EXP2(b * LOG2(a))*/
1489	memset(&alu, 0, sizeof(struct r600_bc_alu));
1490	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1491	alu.src[0].sel = ctx->temp_reg;
1492	alu.dst.sel = ctx->temp_reg;
1493	alu.dst.write = 1;
1494	alu.last = 1;
1495	r = r600_bc_add_alu(ctx->bc, &alu);
1496	if (r)
1497		return r;
1498	r = r600_bc_add_literal(ctx->bc,ctx->value);
1499	if (r)
1500		return r;
1501	return tgsi_helper_tempx_replicate(ctx);
1502}
1503
1504static int tgsi_ssg(struct r600_shader_ctx *ctx)
1505{
1506	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1507	struct r600_bc_alu alu;
1508	struct r600_bc_alu_src r600_src[3];
1509	int i, r;
1510
1511	r = tgsi_split_constant(ctx, r600_src);
1512	if (r)
1513		return r;
1514	r = tgsi_split_literal_constant(ctx, r600_src);
1515	if (r)
1516		return r;
1517
1518	/* tmp = (src > 0 ? 1 : src) */
1519	for (i = 0; i < 4; i++) {
1520		memset(&alu, 0, sizeof(struct r600_bc_alu));
1521		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1522		alu.is_op3 = 1;
1523
1524		alu.dst.sel = ctx->temp_reg;
1525		alu.dst.chan = i;
1526
1527		alu.src[0] = r600_src[0];
1528		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1529
1530		alu.src[1].sel = V_SQ_ALU_SRC_1;
1531
1532		alu.src[2] = r600_src[0];
1533		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1534		if (i == 3)
1535			alu.last = 1;
1536		r = r600_bc_add_alu(ctx->bc, &alu);
1537		if (r)
1538			return r;
1539	}
1540	r = r600_bc_add_literal(ctx->bc, ctx->value);
1541	if (r)
1542		return r;
1543
1544	/* dst = (-tmp > 0 ? -1 : tmp) */
1545	for (i = 0; i < 4; i++) {
1546		memset(&alu, 0, sizeof(struct r600_bc_alu));
1547		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1548		alu.is_op3 = 1;
1549		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1550		if (r)
1551			return r;
1552
1553		alu.src[0].sel = ctx->temp_reg;
1554		alu.src[0].chan = i;
1555		alu.src[0].neg = 1;
1556
1557		alu.src[1].sel = V_SQ_ALU_SRC_1;
1558		alu.src[1].neg = 1;
1559
1560		alu.src[2].sel = ctx->temp_reg;
1561		alu.src[2].chan = i;
1562
1563		if (i == 3)
1564			alu.last = 1;
1565		r = r600_bc_add_alu(ctx->bc, &alu);
1566		if (r)
1567			return r;
1568	}
1569	return 0;
1570}
1571
1572static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1573{
1574	struct r600_bc_alu alu;
1575	int i, r;
1576
1577	r = r600_bc_add_literal(ctx->bc, ctx->value);
1578	if (r)
1579		return r;
1580	for (i = 0; i < 4; i++) {
1581		memset(&alu, 0, sizeof(struct r600_bc_alu));
1582		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1583			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1584			alu.dst.chan = i;
1585		} else {
1586			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1587			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1588			if (r)
1589				return r;
1590			alu.src[0].sel = ctx->temp_reg;
1591			alu.src[0].chan = i;
1592		}
1593		if (i == 3) {
1594			alu.last = 1;
1595		}
1596		r = r600_bc_add_alu(ctx->bc, &alu);
1597		if (r)
1598			return r;
1599	}
1600	return 0;
1601}
1602
1603static int tgsi_op3(struct r600_shader_ctx *ctx)
1604{
1605	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1606	struct r600_bc_alu_src r600_src[3];
1607	struct r600_bc_alu alu;
1608	int i, j, r;
1609	int lasti = 0;
1610
1611	for (i = 0; i < 4; i++) {
1612		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1613			lasti = i;
1614		}
1615	}
1616
1617	r = tgsi_split_constant(ctx, r600_src);
1618	if (r)
1619		return r;
1620	r = tgsi_split_literal_constant(ctx, r600_src);
1621	if (r)
1622		return r;
1623	for (i = 0; i < lasti + 1; i++) {
1624		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1625			continue;
1626
1627		memset(&alu, 0, sizeof(struct r600_bc_alu));
1628		alu.inst = ctx->inst_info->r600_opcode;
1629		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1630			alu.src[j] = r600_src[j];
1631			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1632		}
1633
1634		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1635		if (r)
1636			return r;
1637
1638		alu.dst.chan = i;
1639		alu.dst.write = 1;
1640		alu.is_op3 = 1;
1641		if (i == lasti) {
1642			alu.last = 1;
1643		}
1644		r = r600_bc_add_alu(ctx->bc, &alu);
1645		if (r)
1646			return r;
1647	}
1648	return 0;
1649}
1650
1651static int tgsi_dp(struct r600_shader_ctx *ctx)
1652{
1653	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1654	struct r600_bc_alu_src r600_src[3];
1655	struct r600_bc_alu alu;
1656	int i, j, r;
1657
1658	r = tgsi_split_constant(ctx, r600_src);
1659	if (r)
1660		return r;
1661	r = tgsi_split_literal_constant(ctx, r600_src);
1662	if (r)
1663		return r;
1664	for (i = 0; i < 4; i++) {
1665		memset(&alu, 0, sizeof(struct r600_bc_alu));
1666		alu.inst = ctx->inst_info->r600_opcode;
1667		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1668			alu.src[j] = r600_src[j];
1669			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1670		}
1671		if(inst->Dst[0].Register.WriteMask & (1 << i)) {
1672			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1673			if (r)
1674				return r;
1675		} else {
1676			alu.dst.sel = ctx->temp_reg;
1677		}
1678		alu.dst.chan = i;
1679		alu.dst.write = 1;
1680		/* handle some special cases */
1681		switch (ctx->inst_info->tgsi_opcode) {
1682		case TGSI_OPCODE_DP2:
1683			if (i > 1) {
1684				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1685				alu.src[0].chan = alu.src[1].chan = 0;
1686			}
1687			break;
1688		case TGSI_OPCODE_DP3:
1689			if (i > 2) {
1690				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1691				alu.src[0].chan = alu.src[1].chan = 0;
1692			}
1693			break;
1694		case TGSI_OPCODE_DPH:
1695			if (i == 3) {
1696				alu.src[0].sel = V_SQ_ALU_SRC_1;
1697				alu.src[0].chan = 0;
1698				alu.src[0].neg = 0;
1699			}
1700			break;
1701		default:
1702			break;
1703		}
1704		if (i == 3) {
1705			alu.last = 1;
1706		}
1707		r = r600_bc_add_alu(ctx->bc, &alu);
1708		if (r)
1709			return r;
1710	}
1711	return 0;
1712}
1713
1714static int tgsi_tex(struct r600_shader_ctx *ctx)
1715{
1716	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1717	struct r600_bc_tex tex;
1718	struct r600_bc_alu alu;
1719	unsigned src_gpr;
1720	int r, i;
1721	int opcode;
1722	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1723	uint32_t lit_vals[4];
1724
1725	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1726
1727	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1728		/* Add perspective divide */
1729		memset(&alu, 0, sizeof(struct r600_bc_alu));
1730		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1731		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1732		if (r)
1733			return r;
1734
1735		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1736		alu.dst.sel = ctx->temp_reg;
1737		alu.dst.chan = 3;
1738		alu.last = 1;
1739		alu.dst.write = 1;
1740		r = r600_bc_add_alu(ctx->bc, &alu);
1741		if (r)
1742			return r;
1743
1744		for (i = 0; i < 3; i++) {
1745			memset(&alu, 0, sizeof(struct r600_bc_alu));
1746			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1747			alu.src[0].sel = ctx->temp_reg;
1748			alu.src[0].chan = 3;
1749			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1750			if (r)
1751				return r;
1752			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1753			alu.dst.sel = ctx->temp_reg;
1754			alu.dst.chan = i;
1755			alu.dst.write = 1;
1756			r = r600_bc_add_alu(ctx->bc, &alu);
1757			if (r)
1758				return r;
1759		}
1760		memset(&alu, 0, sizeof(struct r600_bc_alu));
1761		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1762		alu.src[0].sel = V_SQ_ALU_SRC_1;
1763		alu.src[0].chan = 0;
1764		alu.dst.sel = ctx->temp_reg;
1765		alu.dst.chan = 3;
1766		alu.last = 1;
1767		alu.dst.write = 1;
1768		r = r600_bc_add_alu(ctx->bc, &alu);
1769		if (r)
1770			return r;
1771		src_not_temp = FALSE;
1772		src_gpr = ctx->temp_reg;
1773	}
1774
1775	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1776		int src_chan, src2_chan;
1777
1778		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1779		for (i = 0; i < 4; i++) {
1780			memset(&alu, 0, sizeof(struct r600_bc_alu));
1781			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1782			switch (i) {
1783			case 0:
1784				src_chan = 2;
1785				src2_chan = 1;
1786				break;
1787			case 1:
1788				src_chan = 2;
1789				src2_chan = 0;
1790				break;
1791			case 2:
1792				src_chan = 0;
1793				src2_chan = 2;
1794				break;
1795			case 3:
1796				src_chan = 1;
1797				src2_chan = 2;
1798				break;
1799			default:
1800				assert(0);
1801				src_chan = 0;
1802				src2_chan = 0;
1803				break;
1804			}
1805			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1806			if (r)
1807				return r;
1808			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1809			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1810			if (r)
1811				return r;
1812			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1813			alu.dst.sel = ctx->temp_reg;
1814			alu.dst.chan = i;
1815			if (i == 3)
1816				alu.last = 1;
1817			alu.dst.write = 1;
1818			r = r600_bc_add_alu(ctx->bc, &alu);
1819			if (r)
1820				return r;
1821		}
1822
1823		/* tmp1.z = RCP_e(|tmp1.z|) */
1824		memset(&alu, 0, sizeof(struct r600_bc_alu));
1825		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1826		alu.src[0].sel = ctx->temp_reg;
1827		alu.src[0].chan = 2;
1828		alu.src[0].abs = 1;
1829		alu.dst.sel = ctx->temp_reg;
1830		alu.dst.chan = 2;
1831		alu.dst.write = 1;
1832		alu.last = 1;
1833		r = r600_bc_add_alu(ctx->bc, &alu);
1834		if (r)
1835			return r;
1836
1837		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1838		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1839		 * muladd has no writemask, have to use another temp
1840		 */
1841		memset(&alu, 0, sizeof(struct r600_bc_alu));
1842		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1843		alu.is_op3 = 1;
1844
1845		alu.src[0].sel = ctx->temp_reg;
1846		alu.src[0].chan = 0;
1847		alu.src[1].sel = ctx->temp_reg;
1848		alu.src[1].chan = 2;
1849
1850		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1851		alu.src[2].chan = 0;
1852
1853		alu.dst.sel = ctx->temp_reg;
1854		alu.dst.chan = 0;
1855		alu.dst.write = 1;
1856
1857		r = r600_bc_add_alu(ctx->bc, &alu);
1858		if (r)
1859			return r;
1860
1861		memset(&alu, 0, sizeof(struct r600_bc_alu));
1862		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1863		alu.is_op3 = 1;
1864
1865		alu.src[0].sel = ctx->temp_reg;
1866		alu.src[0].chan = 1;
1867		alu.src[1].sel = ctx->temp_reg;
1868		alu.src[1].chan = 2;
1869
1870		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1871		alu.src[2].chan = 0;
1872
1873		alu.dst.sel = ctx->temp_reg;
1874		alu.dst.chan = 1;
1875		alu.dst.write = 1;
1876
1877		alu.last = 1;
1878		r = r600_bc_add_alu(ctx->bc, &alu);
1879		if (r)
1880			return r;
1881
1882		lit_vals[0] = fui(1.5f);
1883
1884		r = r600_bc_add_literal(ctx->bc, lit_vals);
1885		if (r)
1886			return r;
1887		src_not_temp = FALSE;
1888		src_gpr = ctx->temp_reg;
1889	}
1890
1891	if (src_not_temp) {
1892		for (i = 0; i < 4; i++) {
1893			memset(&alu, 0, sizeof(struct r600_bc_alu));
1894			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1895			alu.src[0].sel = src_gpr;
1896			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1897			alu.dst.sel = ctx->temp_reg;
1898			alu.dst.chan = i;
1899			if (i == 3)
1900				alu.last = 1;
1901			alu.dst.write = 1;
1902			r = r600_bc_add_alu(ctx->bc, &alu);
1903			if (r)
1904				return r;
1905		}
1906		src_gpr = ctx->temp_reg;
1907	}
1908
1909	opcode = ctx->inst_info->r600_opcode;
1910	if (opcode == SQ_TEX_INST_SAMPLE &&
1911	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1912		opcode = SQ_TEX_INST_SAMPLE_C;
1913
1914	memset(&tex, 0, sizeof(struct r600_bc_tex));
1915	tex.inst = opcode;
1916	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1917	tex.resource_id = tex.sampler_id;
1918	tex.src_gpr = src_gpr;
1919	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1920	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1921	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1922	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1923	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1924	tex.src_sel_x = 0;
1925	tex.src_sel_y = 1;
1926	tex.src_sel_z = 2;
1927	tex.src_sel_w = 3;
1928
1929	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1930		tex.src_sel_x = 1;
1931		tex.src_sel_y = 0;
1932		tex.src_sel_z = 3;
1933		tex.src_sel_w = 1;
1934	}
1935
1936	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1937		tex.coord_type_x = 1;
1938		tex.coord_type_y = 1;
1939		tex.coord_type_z = 1;
1940		tex.coord_type_w = 1;
1941	}
1942
1943	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1944		tex.src_sel_w = 2;
1945
1946	r = r600_bc_add_tex(ctx->bc, &tex);
1947	if (r)
1948		return r;
1949
1950	/* add shadow ambient support  - gallium doesn't do it yet */
1951	return 0;
1952}
1953
1954static int tgsi_lrp(struct r600_shader_ctx *ctx)
1955{
1956	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1957	struct r600_bc_alu_src r600_src[3];
1958	struct r600_bc_alu alu;
1959	unsigned i;
1960	int r;
1961
1962	r = tgsi_split_constant(ctx, r600_src);
1963	if (r)
1964		return r;
1965	r = tgsi_split_literal_constant(ctx, r600_src);
1966	if (r)
1967		return r;
1968	/* 1 - src0 */
1969	for (i = 0; i < 4; i++) {
1970		memset(&alu, 0, sizeof(struct r600_bc_alu));
1971		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1972		alu.src[0].sel = V_SQ_ALU_SRC_1;
1973		alu.src[0].chan = 0;
1974		alu.src[1] = r600_src[0];
1975		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1976		alu.src[1].neg = 1;
1977		alu.dst.sel = ctx->temp_reg;
1978		alu.dst.chan = i;
1979		if (i == 3) {
1980			alu.last = 1;
1981		}
1982		alu.dst.write = 1;
1983		r = r600_bc_add_alu(ctx->bc, &alu);
1984		if (r)
1985			return r;
1986	}
1987	r = r600_bc_add_literal(ctx->bc, ctx->value);
1988	if (r)
1989		return r;
1990
1991	/* (1 - src0) * src2 */
1992	for (i = 0; i < 4; i++) {
1993		memset(&alu, 0, sizeof(struct r600_bc_alu));
1994		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1995		alu.src[0].sel = ctx->temp_reg;
1996		alu.src[0].chan = i;
1997		alu.src[1] = r600_src[2];
1998		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1999		alu.dst.sel = ctx->temp_reg;
2000		alu.dst.chan = i;
2001		if (i == 3) {
2002			alu.last = 1;
2003		}
2004		alu.dst.write = 1;
2005		r = r600_bc_add_alu(ctx->bc, &alu);
2006		if (r)
2007			return r;
2008	}
2009	r = r600_bc_add_literal(ctx->bc, ctx->value);
2010	if (r)
2011		return r;
2012
2013	/* src0 * src1 + (1 - src0) * src2 */
2014	for (i = 0; i < 4; i++) {
2015		memset(&alu, 0, sizeof(struct r600_bc_alu));
2016		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2017		alu.is_op3 = 1;
2018		alu.src[0] = r600_src[0];
2019		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2020		alu.src[1] = r600_src[1];
2021		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2022		alu.src[2].sel = ctx->temp_reg;
2023		alu.src[2].chan = i;
2024		alu.dst.sel = ctx->temp_reg;
2025		alu.dst.chan = i;
2026		if (i == 3) {
2027			alu.last = 1;
2028		}
2029		r = r600_bc_add_alu(ctx->bc, &alu);
2030		if (r)
2031			return r;
2032	}
2033	return tgsi_helper_copy(ctx, inst);
2034}
2035
2036static int tgsi_cmp(struct r600_shader_ctx *ctx)
2037{
2038	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2039	struct r600_bc_alu_src r600_src[3];
2040	struct r600_bc_alu alu;
2041	int i, r;
2042	int lasti = 0;
2043
2044	for (i = 0; i < 4; i++) {
2045		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
2046			lasti = i;
2047		}
2048	}
2049
2050	r = tgsi_split_constant(ctx, r600_src);
2051	if (r)
2052		return r;
2053	r = tgsi_split_literal_constant(ctx, r600_src);
2054	if (r)
2055		return r;
2056
2057	for (i = 0; i < lasti + 1; i++) {
2058		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2059			continue;
2060
2061		memset(&alu, 0, sizeof(struct r600_bc_alu));
2062		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2063		alu.src[0] = r600_src[0];
2064		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2065
2066		alu.src[1] = r600_src[2];
2067		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2068
2069		alu.src[2] = r600_src[1];
2070		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2071
2072		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2073		if (r)
2074			return r;
2075
2076		alu.dst.chan = i;
2077		alu.dst.write = 1;
2078		alu.is_op3 = 1;
2079		if (i == lasti)
2080			alu.last = 1;
2081		r = r600_bc_add_alu(ctx->bc, &alu);
2082		if (r)
2083			return r;
2084	}
2085	return 0;
2086}
2087
2088static int tgsi_xpd(struct r600_shader_ctx *ctx)
2089{
2090	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2091	struct r600_bc_alu_src r600_src[3];
2092	struct r600_bc_alu alu;
2093	uint32_t use_temp = 0;
2094	int i, r;
2095
2096	if (inst->Dst[0].Register.WriteMask != 0xf)
2097		use_temp = 1;
2098
2099	r = tgsi_split_constant(ctx, r600_src);
2100	if (r)
2101		return r;
2102	r = tgsi_split_literal_constant(ctx, r600_src);
2103	if (r)
2104		return r;
2105
2106	for (i = 0; i < 4; i++) {
2107		memset(&alu, 0, sizeof(struct r600_bc_alu));
2108		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2109
2110		alu.src[0] = r600_src[0];
2111		switch (i) {
2112		case 0:
2113			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2114			break;
2115		case 1:
2116			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2117			break;
2118		case 2:
2119			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2120			break;
2121		case 3:
2122			alu.src[0].sel = V_SQ_ALU_SRC_0;
2123			alu.src[0].chan = i;
2124		}
2125
2126		alu.src[1] = r600_src[1];
2127		switch (i) {
2128		case 0:
2129			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2130			break;
2131		case 1:
2132			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2133			break;
2134		case 2:
2135			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2136			break;
2137		case 3:
2138			alu.src[1].sel = V_SQ_ALU_SRC_0;
2139			alu.src[1].chan = i;
2140		}
2141
2142		alu.dst.sel = ctx->temp_reg;
2143		alu.dst.chan = i;
2144		alu.dst.write = 1;
2145
2146		if (i == 3)
2147			alu.last = 1;
2148		r = r600_bc_add_alu(ctx->bc, &alu);
2149		if (r)
2150			return r;
2151
2152		r = r600_bc_add_literal(ctx->bc, ctx->value);
2153		if (r)
2154			return r;
2155	}
2156
2157	for (i = 0; i < 4; i++) {
2158		memset(&alu, 0, sizeof(struct r600_bc_alu));
2159		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2160
2161		alu.src[0] = r600_src[0];
2162		switch (i) {
2163		case 0:
2164			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2165			break;
2166		case 1:
2167			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2168			break;
2169		case 2:
2170			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2171			break;
2172		case 3:
2173			alu.src[0].sel = V_SQ_ALU_SRC_0;
2174			alu.src[0].chan = i;
2175		}
2176
2177		alu.src[1] = r600_src[1];
2178		switch (i) {
2179		case 0:
2180			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2181			break;
2182		case 1:
2183			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2184			break;
2185		case 2:
2186			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2187			break;
2188		case 3:
2189			alu.src[1].sel = V_SQ_ALU_SRC_0;
2190			alu.src[1].chan = i;
2191		}
2192
2193		alu.src[2].sel = ctx->temp_reg;
2194		alu.src[2].neg = 1;
2195		alu.src[2].chan = i;
2196
2197		if (use_temp)
2198			alu.dst.sel = ctx->temp_reg;
2199		else {
2200			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2201			if (r)
2202				return r;
2203		}
2204		alu.dst.chan = i;
2205		alu.dst.write = 1;
2206		alu.is_op3 = 1;
2207		if (i == 3)
2208			alu.last = 1;
2209		r = r600_bc_add_alu(ctx->bc, &alu);
2210		if (r)
2211			return r;
2212
2213		r = r600_bc_add_literal(ctx->bc, ctx->value);
2214		if (r)
2215			return r;
2216	}
2217	if (use_temp)
2218		return tgsi_helper_copy(ctx, inst);
2219	return 0;
2220}
2221
2222static int tgsi_exp(struct r600_shader_ctx *ctx)
2223{
2224	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2225	struct r600_bc_alu_src r600_src[3] = { { 0 } };
2226	struct r600_bc_alu alu;
2227	int r;
2228
2229	/* result.x = 2^floor(src); */
2230	if (inst->Dst[0].Register.WriteMask & 1) {
2231		memset(&alu, 0, sizeof(struct r600_bc_alu));
2232
2233		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2234		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2235		if (r)
2236			return r;
2237
2238		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2239
2240		alu.dst.sel = ctx->temp_reg;
2241		alu.dst.chan = 0;
2242		alu.dst.write = 1;
2243		alu.last = 1;
2244		r = r600_bc_add_alu(ctx->bc, &alu);
2245		if (r)
2246			return r;
2247
2248		r = r600_bc_add_literal(ctx->bc, ctx->value);
2249		if (r)
2250			return r;
2251
2252		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2253		alu.src[0].sel = ctx->temp_reg;
2254		alu.src[0].chan = 0;
2255
2256		alu.dst.sel = ctx->temp_reg;
2257		alu.dst.chan = 0;
2258		alu.dst.write = 1;
2259		alu.last = 1;
2260		r = r600_bc_add_alu(ctx->bc, &alu);
2261		if (r)
2262			return r;
2263
2264		r = r600_bc_add_literal(ctx->bc, ctx->value);
2265		if (r)
2266			return r;
2267	}
2268
2269	/* result.y = tmp - floor(tmp); */
2270	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2271		memset(&alu, 0, sizeof(struct r600_bc_alu));
2272
2273		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2274		alu.src[0] = r600_src[0];
2275		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2276		if (r)
2277			return r;
2278		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2279
2280		alu.dst.sel = ctx->temp_reg;
2281//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2282//		if (r)
2283//			return r;
2284		alu.dst.write = 1;
2285		alu.dst.chan = 1;
2286
2287		alu.last = 1;
2288
2289		r = r600_bc_add_alu(ctx->bc, &alu);
2290		if (r)
2291			return r;
2292		r = r600_bc_add_literal(ctx->bc, ctx->value);
2293		if (r)
2294			return r;
2295	}
2296
2297	/* result.z = RoughApprox2ToX(tmp);*/
2298	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2299		memset(&alu, 0, sizeof(struct r600_bc_alu));
2300		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2301		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2302		if (r)
2303			return r;
2304		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2305
2306		alu.dst.sel = ctx->temp_reg;
2307		alu.dst.write = 1;
2308		alu.dst.chan = 2;
2309
2310		alu.last = 1;
2311
2312		r = r600_bc_add_alu(ctx->bc, &alu);
2313		if (r)
2314			return r;
2315		r = r600_bc_add_literal(ctx->bc, ctx->value);
2316		if (r)
2317			return r;
2318	}
2319
2320	/* result.w = 1.0;*/
2321	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2322		memset(&alu, 0, sizeof(struct r600_bc_alu));
2323
2324		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2325		alu.src[0].sel = V_SQ_ALU_SRC_1;
2326		alu.src[0].chan = 0;
2327
2328		alu.dst.sel = ctx->temp_reg;
2329		alu.dst.chan = 3;
2330		alu.dst.write = 1;
2331		alu.last = 1;
2332		r = r600_bc_add_alu(ctx->bc, &alu);
2333		if (r)
2334			return r;
2335		r = r600_bc_add_literal(ctx->bc, ctx->value);
2336		if (r)
2337			return r;
2338	}
2339	return tgsi_helper_copy(ctx, inst);
2340}
2341
2342static int tgsi_log(struct r600_shader_ctx *ctx)
2343{
2344	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2345	struct r600_bc_alu alu;
2346	int r;
2347
2348	/* result.x = floor(log2(src)); */
2349	if (inst->Dst[0].Register.WriteMask & 1) {
2350		memset(&alu, 0, sizeof(struct r600_bc_alu));
2351
2352		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2353		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2354		if (r)
2355			return r;
2356
2357		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2358
2359		alu.dst.sel = ctx->temp_reg;
2360		alu.dst.chan = 0;
2361		alu.dst.write = 1;
2362		alu.last = 1;
2363		r = r600_bc_add_alu(ctx->bc, &alu);
2364		if (r)
2365			return r;
2366
2367		r = r600_bc_add_literal(ctx->bc, ctx->value);
2368		if (r)
2369			return r;
2370
2371		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2372		alu.src[0].sel = ctx->temp_reg;
2373		alu.src[0].chan = 0;
2374
2375		alu.dst.sel = ctx->temp_reg;
2376		alu.dst.chan = 0;
2377		alu.dst.write = 1;
2378		alu.last = 1;
2379
2380		r = r600_bc_add_alu(ctx->bc, &alu);
2381		if (r)
2382			return r;
2383
2384		r = r600_bc_add_literal(ctx->bc, ctx->value);
2385		if (r)
2386			return r;
2387	}
2388
2389	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2390	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2391		memset(&alu, 0, sizeof(struct r600_bc_alu));
2392
2393		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2394		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2395		if (r)
2396			return r;
2397
2398		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2399
2400		alu.dst.sel = ctx->temp_reg;
2401		alu.dst.chan = 1;
2402		alu.dst.write = 1;
2403		alu.last = 1;
2404
2405		r = r600_bc_add_alu(ctx->bc, &alu);
2406		if (r)
2407			return r;
2408
2409		r = r600_bc_add_literal(ctx->bc, ctx->value);
2410		if (r)
2411			return r;
2412
2413		memset(&alu, 0, sizeof(struct r600_bc_alu));
2414
2415		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2416		alu.src[0].sel = ctx->temp_reg;
2417		alu.src[0].chan = 1;
2418
2419		alu.dst.sel = ctx->temp_reg;
2420		alu.dst.chan = 1;
2421		alu.dst.write = 1;
2422		alu.last = 1;
2423
2424		r = r600_bc_add_alu(ctx->bc, &alu);
2425		if (r)
2426			return r;
2427
2428		r = r600_bc_add_literal(ctx->bc, ctx->value);
2429		if (r)
2430			return r;
2431
2432		memset(&alu, 0, sizeof(struct r600_bc_alu));
2433
2434		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2435		alu.src[0].sel = ctx->temp_reg;
2436		alu.src[0].chan = 1;
2437
2438		alu.dst.sel = ctx->temp_reg;
2439		alu.dst.chan = 1;
2440		alu.dst.write = 1;
2441		alu.last = 1;
2442
2443		r = r600_bc_add_alu(ctx->bc, &alu);
2444		if (r)
2445			return r;
2446
2447		r = r600_bc_add_literal(ctx->bc, ctx->value);
2448		if (r)
2449			return r;
2450
2451		memset(&alu, 0, sizeof(struct r600_bc_alu));
2452
2453		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2454		alu.src[0].sel = ctx->temp_reg;
2455		alu.src[0].chan = 1;
2456
2457		alu.dst.sel = ctx->temp_reg;
2458		alu.dst.chan = 1;
2459		alu.dst.write = 1;
2460		alu.last = 1;
2461
2462		r = r600_bc_add_alu(ctx->bc, &alu);
2463		if (r)
2464			return r;
2465
2466		r = r600_bc_add_literal(ctx->bc, ctx->value);
2467		if (r)
2468			return r;
2469
2470		memset(&alu, 0, sizeof(struct r600_bc_alu));
2471
2472		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2473
2474		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2475		if (r)
2476			return r;
2477
2478		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2479
2480		alu.src[1].sel = ctx->temp_reg;
2481		alu.src[1].chan = 1;
2482
2483		alu.dst.sel = ctx->temp_reg;
2484		alu.dst.chan = 1;
2485		alu.dst.write = 1;
2486		alu.last = 1;
2487
2488		r = r600_bc_add_alu(ctx->bc, &alu);
2489		if (r)
2490			return r;
2491
2492		r = r600_bc_add_literal(ctx->bc, ctx->value);
2493		if (r)
2494			return r;
2495	}
2496
2497	/* result.z = log2(src);*/
2498	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2499		memset(&alu, 0, sizeof(struct r600_bc_alu));
2500
2501		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2502		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2503		if (r)
2504			return r;
2505
2506		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2507
2508		alu.dst.sel = ctx->temp_reg;
2509		alu.dst.write = 1;
2510		alu.dst.chan = 2;
2511		alu.last = 1;
2512
2513		r = r600_bc_add_alu(ctx->bc, &alu);
2514		if (r)
2515			return r;
2516
2517		r = r600_bc_add_literal(ctx->bc, ctx->value);
2518		if (r)
2519			return r;
2520	}
2521
2522	/* result.w = 1.0; */
2523	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2524		memset(&alu, 0, sizeof(struct r600_bc_alu));
2525
2526		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2527		alu.src[0].sel = V_SQ_ALU_SRC_1;
2528		alu.src[0].chan = 0;
2529
2530		alu.dst.sel = ctx->temp_reg;
2531		alu.dst.chan = 3;
2532		alu.dst.write = 1;
2533		alu.last = 1;
2534
2535		r = r600_bc_add_alu(ctx->bc, &alu);
2536		if (r)
2537			return r;
2538
2539		r = r600_bc_add_literal(ctx->bc, ctx->value);
2540		if (r)
2541			return r;
2542	}
2543
2544	return tgsi_helper_copy(ctx, inst);
2545}
2546
2547static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2548{
2549	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2550	struct r600_bc_alu alu;
2551	int r;
2552	memset(&alu, 0, sizeof(struct r600_bc_alu));
2553
2554	switch (inst->Instruction.Opcode) {
2555	case TGSI_OPCODE_ARL:
2556		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2557		break;
2558	case TGSI_OPCODE_ARR:
2559		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2560		break;
2561	default:
2562		assert(0);
2563		return -1;
2564	}
2565
2566	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2567	if (r)
2568		return r;
2569	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2570	alu.last = 1;
2571	alu.dst.chan = 0;
2572	alu.dst.sel = ctx->temp_reg;
2573	alu.dst.write = 1;
2574	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2575	if (r)
2576		return r;
2577	memset(&alu, 0, sizeof(struct r600_bc_alu));
2578	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2579	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2580	if (r)
2581		return r;
2582	alu.src[0].sel = ctx->temp_reg;
2583	alu.src[0].chan = 0;
2584	alu.last = 1;
2585	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2586	if (r)
2587		return r;
2588	return 0;
2589}
2590static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2591{
2592	/* TODO from r600c, ar values don't persist between clauses */
2593	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2594	struct r600_bc_alu alu;
2595	int r;
2596	memset(&alu, 0, sizeof(struct r600_bc_alu));
2597
2598	switch (inst->Instruction.Opcode) {
2599	case TGSI_OPCODE_ARL:
2600		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2601		break;
2602	case TGSI_OPCODE_ARR:
2603		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2604		break;
2605	default:
2606		assert(0);
2607		return -1;
2608	}
2609
2610
2611	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2612	if (r)
2613		return r;
2614	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2615
2616	alu.last = 1;
2617
2618	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2619	if (r)
2620		return r;
2621	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2622	return 0;
2623}
2624
2625static int tgsi_opdst(struct r600_shader_ctx *ctx)
2626{
2627	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2628	struct r600_bc_alu alu;
2629	int i, r = 0;
2630
2631	for (i = 0; i < 4; i++) {
2632		memset(&alu, 0, sizeof(struct r600_bc_alu));
2633
2634		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2635		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2636		if (r)
2637			return r;
2638
2639		if (i == 0 || i == 3) {
2640			alu.src[0].sel = V_SQ_ALU_SRC_1;
2641		} else {
2642			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2643			if (r)
2644				return r;
2645			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2646		}
2647
2648	        if (i == 0 || i == 2) {
2649			alu.src[1].sel = V_SQ_ALU_SRC_1;
2650		} else {
2651			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2652			if (r)
2653				return r;
2654			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2655		}
2656		if (i == 3)
2657			alu.last = 1;
2658		r = r600_bc_add_alu(ctx->bc, &alu);
2659		if (r)
2660			return r;
2661	}
2662	return 0;
2663}
2664
2665static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2666{
2667	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2668	struct r600_bc_alu alu;
2669	int r;
2670
2671	memset(&alu, 0, sizeof(struct r600_bc_alu));
2672	alu.inst = opcode;
2673	alu.predicate = 1;
2674
2675	alu.dst.sel = ctx->temp_reg;
2676	alu.dst.write = 1;
2677	alu.dst.chan = 0;
2678
2679	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2680	if (r)
2681		return r;
2682	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2683	alu.src[1].sel = V_SQ_ALU_SRC_0;
2684	alu.src[1].chan = 0;
2685
2686	alu.last = 1;
2687
2688	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2689	if (r)
2690		return r;
2691	return 0;
2692}
2693
2694static int pops(struct r600_shader_ctx *ctx, int pops)
2695{
2696	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2697	ctx->bc->cf_last->pop_count = pops;
2698	ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2699	return 0;
2700}
2701
2702static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2703{
2704	switch(reason) {
2705	case FC_PUSH_VPM:
2706		ctx->bc->callstack[ctx->bc->call_sp].current--;
2707		break;
2708	case FC_PUSH_WQM:
2709	case FC_LOOP:
2710		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2711		break;
2712	case FC_REP:
2713		/* TOODO : for 16 vp asic should -= 2; */
2714		ctx->bc->callstack[ctx->bc->call_sp].current --;
2715		break;
2716	}
2717}
2718
2719static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2720{
2721	if (check_max_only) {
2722		int diff;
2723		switch (reason) {
2724		case FC_PUSH_VPM:
2725			diff = 1;
2726			break;
2727		case FC_PUSH_WQM:
2728			diff = 4;
2729			break;
2730		default:
2731			assert(0);
2732			diff = 0;
2733		}
2734		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2735		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2736			ctx->bc->callstack[ctx->bc->call_sp].max =
2737				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2738		}
2739		return;
2740	}
2741	switch (reason) {
2742	case FC_PUSH_VPM:
2743		ctx->bc->callstack[ctx->bc->call_sp].current++;
2744		break;
2745	case FC_PUSH_WQM:
2746	case FC_LOOP:
2747		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2748		break;
2749	case FC_REP:
2750		ctx->bc->callstack[ctx->bc->call_sp].current++;
2751		break;
2752	}
2753
2754	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2755	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2756		ctx->bc->callstack[ctx->bc->call_sp].max =
2757			ctx->bc->callstack[ctx->bc->call_sp].current;
2758	}
2759}
2760
2761static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2762{
2763	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2764
2765	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2766						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2767	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2768	sp->num_mid++;
2769}
2770
2771static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2772{
2773	ctx->bc->fc_sp++;
2774	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2775	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2776}
2777
2778static void fc_poplevel(struct r600_shader_ctx *ctx)
2779{
2780	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2781	if (sp->mid) {
2782		free(sp->mid);
2783		sp->mid = NULL;
2784	}
2785	sp->num_mid = 0;
2786	sp->start = NULL;
2787	sp->type = 0;
2788	ctx->bc->fc_sp--;
2789}
2790
2791#if 0
2792static int emit_return(struct r600_shader_ctx *ctx)
2793{
2794	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2795	return 0;
2796}
2797
2798static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2799{
2800
2801	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2802	ctx->bc->cf_last->pop_count = pops;
2803	/* TODO work out offset */
2804	return 0;
2805}
2806
2807static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2808{
2809	return 0;
2810}
2811
2812static void emit_testflag(struct r600_shader_ctx *ctx)
2813{
2814
2815}
2816
2817static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2818{
2819	emit_testflag(ctx);
2820	emit_jump_to_offset(ctx, 1, 4);
2821	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2822	pops(ctx, ifidx + 1);
2823	emit_return(ctx);
2824}
2825
2826static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2827{
2828	emit_testflag(ctx);
2829
2830	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2831	ctx->bc->cf_last->pop_count = 1;
2832
2833	fc_set_mid(ctx, fc_sp);
2834
2835	pops(ctx, 1);
2836}
2837#endif
2838
2839static int tgsi_if(struct r600_shader_ctx *ctx)
2840{
2841	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2842
2843	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2844
2845	fc_pushlevel(ctx, FC_IF);
2846
2847	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2848	return 0;
2849}
2850
2851static int tgsi_else(struct r600_shader_ctx *ctx)
2852{
2853	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2854	ctx->bc->cf_last->pop_count = 1;
2855
2856	fc_set_mid(ctx, ctx->bc->fc_sp);
2857	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2858	return 0;
2859}
2860
2861static int tgsi_endif(struct r600_shader_ctx *ctx)
2862{
2863	pops(ctx, 1);
2864	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2865		R600_ERR("if/endif unbalanced in shader\n");
2866		return -1;
2867	}
2868
2869	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2870		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2871		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2872	} else {
2873		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2874	}
2875	fc_poplevel(ctx);
2876
2877	callstack_decrease_current(ctx, FC_PUSH_VPM);
2878	return 0;
2879}
2880
2881static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2882{
2883	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2884
2885	fc_pushlevel(ctx, FC_LOOP);
2886
2887	/* check stack depth */
2888	callstack_check_depth(ctx, FC_LOOP, 0);
2889	return 0;
2890}
2891
2892static int tgsi_endloop(struct r600_shader_ctx *ctx)
2893{
2894	int i;
2895
2896	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2897
2898	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2899		R600_ERR("loop/endloop in shader code are not paired.\n");
2900		return -EINVAL;
2901	}
2902
2903	/* fixup loop pointers - from r600isa
2904	   LOOP END points to CF after LOOP START,
2905	   LOOP START point to CF after LOOP END
2906	   BRK/CONT point to LOOP END CF
2907	*/
2908	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2909
2910	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2911
2912	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2913		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2914	}
2915	/* TODO add LOOPRET support */
2916	fc_poplevel(ctx);
2917	callstack_decrease_current(ctx, FC_LOOP);
2918	return 0;
2919}
2920
2921static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2922{
2923	unsigned int fscp;
2924
2925	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2926	{
2927		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2928			break;
2929	}
2930
2931	if (fscp == 0) {
2932		R600_ERR("Break not inside loop/endloop pair\n");
2933		return -EINVAL;
2934	}
2935
2936	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2937	ctx->bc->cf_last->pop_count = 1;
2938
2939	fc_set_mid(ctx, fscp);
2940
2941	pops(ctx, 1);
2942	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2943	return 0;
2944}
2945
2946static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2947	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2948	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2949	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2950
2951	/* FIXME:
2952	 * For state trackers other than OpenGL, we'll want to use
2953	 * _RECIP_IEEE instead.
2954	 */
2955	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2956
2957	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2958	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2959	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2960	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2961	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2962	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2963	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2964	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2965	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2966	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2967	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2968	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2969	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2970	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2971	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2972	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2973	/* gap */
2974	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2975	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2976	/* gap */
2977	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2978	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2979	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2980	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2981	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2982	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2983	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2984	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2985	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2986	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2987	/* gap */
2988	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2989	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2990	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2991	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2992	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2993	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2994	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2995	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2996	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2997	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2998	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2999	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3000	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3001	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3002	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3004	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3005	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3006	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3007	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3009	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3010	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3011	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3012	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3013	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3014	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3015	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3016	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3017	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3018	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3019	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3020	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3021	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3022	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3023	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3024	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3025	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3026	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3027	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3028	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3029	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3030	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3031	/* gap */
3032	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3034	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3035	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3036	/* gap */
3037	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3038	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3039	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3040	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3041	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3042	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3043	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3045	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046	/* gap */
3047	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3048	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3049	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3050	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3051	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3052	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3055	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3056	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3057	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3059	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3060	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3061	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3062	/* gap */
3063	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3065	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3066	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3067	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068	/* gap */
3069	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3072	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3077	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3078	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3079	/* gap */
3080	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3102	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108};
3109
3110static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3111	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3112	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3113	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3114	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3115	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3116	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3117	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3118	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3119	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3120	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3121	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3122	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3123	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3124	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3125	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3126	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3127	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3128	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3129	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3130	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3131	/* gap */
3132	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3133	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3134	/* gap */
3135	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3136	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3137	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3138	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3139	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3140	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3141	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3142	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3143	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3144	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3145	/* gap */
3146	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3148	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3150	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3151	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3152	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3153	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3154	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3156	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3158	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3159	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3160	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3162	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3163	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3164	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3165	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3167	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3168	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3169	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3170	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3171	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3172	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3173	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3174	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3175	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3176	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3177	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3178	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3179	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3180	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3181	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3182	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3183	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3184	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3185	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3186	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3187	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3188	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3189	/* gap */
3190	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3191	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3192	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3193	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3194	/* gap */
3195	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3197	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3198	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3199	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3200	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3201	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3203	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3204	/* gap */
3205	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3206	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3207	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3208	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3209	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3210	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3211	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3212	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3213	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3214	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3215	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3216	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3217	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3218	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3219	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3220	/* gap */
3221	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3222	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3223	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3224	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3225	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3226	/* gap */
3227	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3228	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3229	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3230	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3231	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3232	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3233	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3234	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3235	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3236	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3237	/* gap */
3238	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3239	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3241	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3242	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3243	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3244	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3245	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3246	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3247	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3248	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3249	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3250	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3251	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3252	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3253	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3254	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3255	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3256	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3257	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3258	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3259	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3260	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3261	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3262	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3263	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3266};
3267