r600_shader.c revision e973221538d5edfad62abedf5b37a4fb774d71fc
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_opcodes.h"
32#include "r600d.h"
33#include <stdio.h>
34#include <errno.h>
35
36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37{
38	struct r600_pipe_state *rstate = &shader->rstate;
39	struct r600_shader *rshader = &shader->shader;
40	unsigned spi_vs_out_id[10];
41	unsigned i, tmp;
42
43	/* clear previous register */
44	rstate->nregs = 0;
45
46	/* so far never got proper semantic id from tgsi */
47	for (i = 0; i < 10; i++) {
48		spi_vs_out_id[i] = 0;
49	}
50	for (i = 0; i < 32; i++) {
51		tmp = i << ((i & 3) * 8);
52		spi_vs_out_id[i / 4] |= tmp;
53	}
54	for (i = 0; i < 10; i++) {
55		r600_pipe_state_add_reg(rstate,
56					R_028614_SPI_VS_OUT_ID_0 + i * 4,
57					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
58	}
59
60	r600_pipe_state_add_reg(rstate,
61			R_0286C4_SPI_VS_OUT_CONFIG,
62			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
63			0xFFFFFFFF, NULL);
64	r600_pipe_state_add_reg(rstate,
65			R_028868_SQ_PGM_RESOURCES_VS,
66			S_028868_NUM_GPRS(rshader->bc.ngpr) |
67			S_028868_STACK_SIZE(rshader->bc.nstack),
68			0xFFFFFFFF, NULL);
69	r600_pipe_state_add_reg(rstate,
70			R_0288A4_SQ_PGM_RESOURCES_FS,
71			0x00000000, 0xFFFFFFFF, NULL);
72	r600_pipe_state_add_reg(rstate,
73			R_0288D0_SQ_PGM_CF_OFFSET_VS,
74			0x00000000, 0xFFFFFFFF, NULL);
75	r600_pipe_state_add_reg(rstate,
76			R_0288DC_SQ_PGM_CF_OFFSET_FS,
77			0x00000000, 0xFFFFFFFF, NULL);
78	r600_pipe_state_add_reg(rstate,
79			R_028858_SQ_PGM_START_VS,
80			0x00000000, 0xFFFFFFFF, shader->bo);
81	r600_pipe_state_add_reg(rstate,
82			R_028894_SQ_PGM_START_FS,
83			0x00000000, 0xFFFFFFFF, shader->bo);
84}
85
86int r600_find_vs_semantic_index(struct r600_shader *vs,
87				struct r600_shader *ps, int id)
88{
89	struct r600_shader_io *input = &ps->input[id];
90
91	for (int i = 0; i < vs->noutput; i++) {
92		if (input->name == vs->output[i].name &&
93			input->sid == vs->output[i].sid) {
94			return i - 1;
95		}
96	}
97	return 0;
98}
99
100static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
101{
102	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
103	struct r600_pipe_state *rstate = &shader->rstate;
104	struct r600_shader *rshader = &shader->shader;
105	unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z;
106	boolean have_pos = FALSE, have_face = FALSE;
107
108	/* clear previous register */
109	rstate->nregs = 0;
110
111	for (i = 0; i < rshader->ninput; i++) {
112		tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
113		tmp |= S_028644_SEL_CENTROID(1);
114		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
115			have_pos = TRUE;
116		if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
117		    rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
118		    rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
119			tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
120		}
121		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
122			have_face = TRUE;
123		if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
124			rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
125			tmp |= S_028644_PT_SPRITE_TEX(1);
126		}
127		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
128	}
129	for (i = 0; i < rshader->noutput; i++) {
130		r600_pipe_state_add_reg(rstate,
131				R_02880C_DB_SHADER_CONTROL,
132				S_02880C_Z_EXPORT_ENABLE(1),
133				S_02880C_Z_EXPORT_ENABLE(1), NULL);
134	}
135
136	exports_ps = 0;
137	num_cout = 0;
138	for (i = 0; i < rshader->noutput; i++) {
139		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
140			exports_ps |= 1;
141		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
142			num_cout++;
143		}
144	}
145	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
146	if (!exports_ps) {
147		/* always at least export 1 component per pixel */
148		exports_ps = 2;
149	}
150
151	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
152				S_0286CC_PERSP_GRADIENT_ENA(1);
153	spi_input_z = 0;
154	if (have_pos) {
155		spi_ps_in_control_0 |=  S_0286CC_POSITION_ENA(1) |
156					S_0286CC_BARYC_SAMPLE_CNTL(1);
157		spi_input_z |= 1;
158	}
159	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
160	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL);
161	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
162	r600_pipe_state_add_reg(rstate,
163				R_028840_SQ_PGM_START_PS,
164				0x00000000, 0xFFFFFFFF, shader->bo);
165	r600_pipe_state_add_reg(rstate,
166				R_028850_SQ_PGM_RESOURCES_PS,
167				S_028868_NUM_GPRS(rshader->bc.ngpr) |
168				S_028868_STACK_SIZE(rshader->bc.nstack),
169				0xFFFFFFFF, NULL);
170	r600_pipe_state_add_reg(rstate,
171				R_028854_SQ_PGM_EXPORTS_PS,
172				exports_ps, 0xFFFFFFFF, NULL);
173	r600_pipe_state_add_reg(rstate,
174				R_0288CC_SQ_PGM_CF_OFFSET_PS,
175				0x00000000, 0xFFFFFFFF, NULL);
176
177	if (rshader->uses_kill) {
178		/* only set some bits here, the other bits are set in the dsa state */
179		r600_pipe_state_add_reg(rstate,
180					R_02880C_DB_SHADER_CONTROL,
181					S_02880C_KILL_ENABLE(1),
182					S_02880C_KILL_ENABLE(1), NULL);
183	}
184}
185
186static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
187{
188	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
189	struct r600_shader *rshader = &shader->shader;
190	void *ptr;
191
192	/* copy new shader */
193	if (shader->bo == NULL) {
194		shader->bo = radeon_ws_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0);
195		if (shader->bo == NULL) {
196			return -ENOMEM;
197		}
198		ptr = radeon_ws_bo_map(rctx->radeon, shader->bo, 0, NULL);
199		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
200		radeon_ws_bo_unmap(rctx->radeon, shader->bo);
201	}
202	/* build state */
203	rshader->flat_shade = rctx->flatshade;
204	switch (rshader->processor_type) {
205	case TGSI_PROCESSOR_VERTEX:
206		if (rshader->family >= CHIP_CEDAR) {
207			evergreen_pipe_shader_vs(ctx, shader);
208		} else {
209			r600_pipe_shader_vs(ctx, shader);
210		}
211		break;
212	case TGSI_PROCESSOR_FRAGMENT:
213		if (rshader->family >= CHIP_CEDAR) {
214			evergreen_pipe_shader_ps(ctx, shader);
215		} else {
216			r600_pipe_shader_ps(ctx, shader);
217		}
218		break;
219	default:
220		return -EINVAL;
221	}
222	r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
223	return 0;
224}
225
226static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
227{
228	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
229	struct r600_shader *shader = &rshader->shader;
230	const struct util_format_description *desc;
231	enum pipe_format resource_format[160];
232	unsigned i, nresources = 0;
233	struct r600_bc *bc = &shader->bc;
234	struct r600_bc_cf *cf;
235	struct r600_bc_vtx *vtx;
236
237	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
238		return 0;
239	if (!memcmp(&rshader->vertex_elements, rctx->vertex_elements, sizeof(struct r600_vertex_element))) {
240		return 0;
241	}
242	rshader->vertex_elements = *rctx->vertex_elements;
243	for (i = 0; i < rctx->vertex_elements->count; i++) {
244		resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
245	}
246	radeon_ws_bo_reference(rctx->radeon, &rshader->bo, NULL);
247	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
248		switch (cf->inst) {
249		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
250		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
251			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
252				desc = util_format_description(resource_format[vtx->buffer_id]);
253				if (desc == NULL) {
254					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
255					return -EINVAL;
256				}
257				vtx->dst_sel_x = desc->swizzle[0];
258				vtx->dst_sel_y = desc->swizzle[1];
259				vtx->dst_sel_z = desc->swizzle[2];
260				vtx->dst_sel_w = desc->swizzle[3];
261			}
262			break;
263		default:
264			break;
265		}
266	}
267	return r600_bc_build(&shader->bc);
268}
269
270int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
271{
272	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
273	int r;
274
275	if (shader == NULL)
276		return -EINVAL;
277	/* there should be enough input */
278	if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
279		R600_ERR("%d resources provided, expecting %d\n",
280			rctx->vertex_elements->count, shader->shader.bc.nresource);
281		return -EINVAL;
282	}
283	r = r600_shader_update(ctx, shader);
284	if (r)
285		return r;
286	return r600_pipe_shader(ctx, shader);
287}
288
289int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
290int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
291{
292	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
293	int r;
294
295//fprintf(stderr, "--------------------------------------------------------------\n");
296//tgsi_dump(tokens, 0);
297	shader->shader.family = r600_get_family(rctx->radeon);
298	r = r600_shader_from_tgsi(tokens, &shader->shader);
299	if (r) {
300		R600_ERR("translation from TGSI failed !\n");
301		return r;
302	}
303	r = r600_bc_build(&shader->shader.bc);
304	if (r) {
305		R600_ERR("building bytecode failed !\n");
306		return r;
307	}
308//fprintf(stderr, "______________________________________________________________\n");
309	return 0;
310}
311
312/*
313 * tgsi -> r600 shader
314 */
315struct r600_shader_tgsi_instruction;
316
317struct r600_shader_ctx {
318	struct tgsi_shader_info			info;
319	struct tgsi_parse_context		parse;
320	const struct tgsi_token			*tokens;
321	unsigned				type;
322	unsigned				file_offset[TGSI_FILE_COUNT];
323	unsigned				temp_reg;
324	struct r600_shader_tgsi_instruction	*inst_info;
325	struct r600_bc				*bc;
326	struct r600_shader			*shader;
327	u32					value[4];
328	u32					*literals;
329	u32					nliterals;
330	u32					max_driver_temp_used;
331};
332
333struct r600_shader_tgsi_instruction {
334	unsigned	tgsi_opcode;
335	unsigned	is_op3;
336	unsigned	r600_opcode;
337	int (*process)(struct r600_shader_ctx *ctx);
338};
339
340static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
341static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
342
343static int tgsi_is_supported(struct r600_shader_ctx *ctx)
344{
345	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
346	int j;
347
348	if (i->Instruction.NumDstRegs > 1) {
349		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
350		return -EINVAL;
351	}
352	if (i->Instruction.Predicate) {
353		R600_ERR("predicate unsupported\n");
354		return -EINVAL;
355	}
356#if 0
357	if (i->Instruction.Label) {
358		R600_ERR("label unsupported\n");
359		return -EINVAL;
360	}
361#endif
362	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
363		if (i->Src[j].Register.Dimension ||
364			i->Src[j].Register.Absolute) {
365			R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
366				 i->Src[j].Register.Dimension,
367				 i->Src[j].Register.Absolute);
368			return -EINVAL;
369		}
370	}
371	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
372		if (i->Dst[j].Register.Dimension) {
373			R600_ERR("unsupported dst (dimension)\n");
374			return -EINVAL;
375		}
376	}
377	return 0;
378}
379
380static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr)
381{
382	int i, r;
383	struct r600_bc_alu alu;
384
385	for (i = 0; i < 8; i++) {
386		memset(&alu, 0, sizeof(struct r600_bc_alu));
387
388		if (i < 4)
389			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
390		else
391			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
392
393		if ((i > 1) && (i < 6)) {
394			alu.dst.sel = ctx->shader->input[gpr].gpr;
395			alu.dst.write = 1;
396		}
397
398		alu.dst.chan = i % 4;
399		alu.src[0].chan = (1 - (i % 2));
400		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr;
401
402		alu.bank_swizzle_force = SQ_ALU_VEC_210;
403		if ((i % 4) == 3)
404			alu.last = 1;
405		r = r600_bc_add_alu(ctx->bc, &alu);
406		if (r)
407			return r;
408	}
409	return 0;
410}
411
412
413static int tgsi_declaration(struct r600_shader_ctx *ctx)
414{
415	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
416	struct r600_bc_vtx vtx;
417	unsigned i;
418	int r;
419
420	switch (d->Declaration.File) {
421	case TGSI_FILE_INPUT:
422		i = ctx->shader->ninput++;
423		ctx->shader->input[i].name = d->Semantic.Name;
424		ctx->shader->input[i].sid = d->Semantic.Index;
425		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
426		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
427		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
428			/* turn input into fetch */
429			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
430			vtx.inst = 0;
431			vtx.fetch_type = 0;
432			vtx.buffer_id = i;
433			/* register containing the index into the buffer */
434			vtx.src_gpr = 0;
435			vtx.src_sel_x = 0;
436			vtx.mega_fetch_count = 0x1F;
437			vtx.dst_gpr = ctx->shader->input[i].gpr;
438			vtx.dst_sel_x = 0;
439			vtx.dst_sel_y = 1;
440			vtx.dst_sel_z = 2;
441			vtx.dst_sel_w = 3;
442			vtx.use_const_fields = 1;
443			r = r600_bc_add_vtx(ctx->bc, &vtx);
444			if (r)
445				return r;
446		}
447		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) {
448			/* turn input into interpolate on EG */
449			evergreen_interp_alu(ctx, i);
450		}
451		break;
452	case TGSI_FILE_OUTPUT:
453		i = ctx->shader->noutput++;
454		ctx->shader->output[i].name = d->Semantic.Name;
455		ctx->shader->output[i].sid = d->Semantic.Index;
456		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
457		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
458		break;
459	case TGSI_FILE_CONSTANT:
460	case TGSI_FILE_TEMPORARY:
461	case TGSI_FILE_SAMPLER:
462	case TGSI_FILE_ADDRESS:
463		break;
464	default:
465		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
466		return -EINVAL;
467	}
468	return 0;
469}
470
471static int r600_get_temp(struct r600_shader_ctx *ctx)
472{
473	return ctx->temp_reg + ctx->max_driver_temp_used++;
474}
475
476int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
477{
478	struct tgsi_full_immediate *immediate;
479	struct r600_shader_ctx ctx;
480	struct r600_bc_output output[32];
481	unsigned output_done, noutput;
482	unsigned opcode;
483	int i, r = 0, pos0;
484
485	ctx.bc = &shader->bc;
486	ctx.shader = shader;
487	r = r600_bc_init(ctx.bc, shader->family);
488	if (r)
489		return r;
490	ctx.bc->use_mem_constant = shader->use_mem_constant;
491	ctx.tokens = tokens;
492	tgsi_scan_shader(tokens, &ctx.info);
493	tgsi_parse_init(&ctx.parse, tokens);
494	ctx.type = ctx.parse.FullHeader.Processor.Processor;
495	shader->processor_type = ctx.type;
496
497	/* register allocations */
498	/* Values [0,127] correspond to GPR[0..127].
499	 * Values [128,159] correspond to constant buffer bank 0
500	 * Values [160,191] correspond to constant buffer bank 1
501	 * Values [256,511] correspond to cfile constants c[0..255].
502	 * Other special values are shown in the list below.
503	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
504	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
505	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
506	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
507	 * 248	SQ_ALU_SRC_0: special constant 0.0.
508	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
509	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
510	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
511	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
512	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
513	 * 254	SQ_ALU_SRC_PV: previous vector result.
514	 * 255	SQ_ALU_SRC_PS: previous scalar result.
515	 */
516	for (i = 0; i < TGSI_FILE_COUNT; i++) {
517		ctx.file_offset[i] = 0;
518	}
519	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
520		ctx.file_offset[TGSI_FILE_INPUT] = 1;
521	}
522	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
523						ctx.info.file_count[TGSI_FILE_INPUT];
524	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
525						ctx.info.file_count[TGSI_FILE_OUTPUT];
526	if (ctx.shader->use_mem_constant)
527		ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
528	else
529		ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
530
531	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
532	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
533			ctx.info.file_count[TGSI_FILE_TEMPORARY];
534
535	ctx.nliterals = 0;
536	ctx.literals = NULL;
537
538	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
539		tgsi_parse_token(&ctx.parse);
540		switch (ctx.parse.FullToken.Token.Type) {
541		case TGSI_TOKEN_TYPE_IMMEDIATE:
542			immediate = &ctx.parse.FullToken.FullImmediate;
543			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
544			if(ctx.literals == NULL) {
545				r = -ENOMEM;
546				goto out_err;
547			}
548			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
549			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
550			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
551			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
552			ctx.nliterals++;
553			break;
554		case TGSI_TOKEN_TYPE_DECLARATION:
555			r = tgsi_declaration(&ctx);
556			if (r)
557				goto out_err;
558			break;
559		case TGSI_TOKEN_TYPE_INSTRUCTION:
560			r = tgsi_is_supported(&ctx);
561			if (r)
562				goto out_err;
563			ctx.max_driver_temp_used = 0;
564			/* reserve first tmp for everyone */
565			r600_get_temp(&ctx);
566			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
567			if (ctx.bc->chiprev == 2)
568				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
569			else
570				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
571			r = ctx.inst_info->process(&ctx);
572			if (r)
573				goto out_err;
574			r = r600_bc_add_literal(ctx.bc, ctx.value);
575			if (r)
576				goto out_err;
577			break;
578		default:
579			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
580			r = -EINVAL;
581			goto out_err;
582		}
583	}
584	/* export output */
585	noutput = shader->noutput;
586	for (i = 0, pos0 = 0; i < noutput; i++) {
587		memset(&output[i], 0, sizeof(struct r600_bc_output));
588		output[i].gpr = shader->output[i].gpr;
589		output[i].elem_size = 3;
590		output[i].swizzle_x = 0;
591		output[i].swizzle_y = 1;
592		output[i].swizzle_z = 2;
593		output[i].swizzle_w = 3;
594		output[i].barrier = 1;
595		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
596		output[i].array_base = i - pos0;
597		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
598		switch (ctx.type) {
599		case TGSI_PROCESSOR_VERTEX:
600			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
601				output[i].array_base = 60;
602				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
603				/* position doesn't count in array_base */
604				pos0++;
605			}
606			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
607				output[i].array_base = 61;
608				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
609				/* position doesn't count in array_base */
610				pos0++;
611			}
612			break;
613		case TGSI_PROCESSOR_FRAGMENT:
614			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
615				output[i].array_base = shader->output[i].sid;
616				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
617			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
618				output[i].array_base = 61;
619				output[i].swizzle_x = 2;
620				output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
621				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
622			} else {
623				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
624				r = -EINVAL;
625				goto out_err;
626			}
627			break;
628		default:
629			R600_ERR("unsupported processor type %d\n", ctx.type);
630			r = -EINVAL;
631			goto out_err;
632		}
633	}
634	/* add fake param output for vertex shader if no param is exported */
635	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
636		for (i = 0, pos0 = 0; i < noutput; i++) {
637			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
638				pos0 = 1;
639				break;
640			}
641		}
642		if (!pos0) {
643			memset(&output[i], 0, sizeof(struct r600_bc_output));
644			output[i].gpr = 0;
645			output[i].elem_size = 3;
646			output[i].swizzle_x = 0;
647			output[i].swizzle_y = 1;
648			output[i].swizzle_z = 2;
649			output[i].swizzle_w = 3;
650			output[i].barrier = 1;
651			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
652			output[i].array_base = 0;
653			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
654			noutput++;
655		}
656	}
657	/* add fake pixel export */
658	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
659		memset(&output[0], 0, sizeof(struct r600_bc_output));
660		output[0].gpr = 0;
661		output[0].elem_size = 3;
662		output[0].swizzle_x = 7;
663		output[0].swizzle_y = 7;
664		output[0].swizzle_z = 7;
665		output[0].swizzle_w = 7;
666		output[0].barrier = 1;
667		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
668		output[0].array_base = 0;
669		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
670		noutput++;
671	}
672	/* set export done on last export of each type */
673	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
674		if (i == (noutput - 1)) {
675			output[i].end_of_program = 1;
676		}
677		if (!(output_done & (1 << output[i].type))) {
678			output_done |= (1 << output[i].type);
679			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
680		}
681	}
682	/* add output to bytecode */
683	for (i = 0; i < noutput; i++) {
684		r = r600_bc_add_output(ctx.bc, &output[i]);
685		if (r)
686			goto out_err;
687	}
688	free(ctx.literals);
689	tgsi_parse_free(&ctx.parse);
690	return 0;
691out_err:
692	free(ctx.literals);
693	tgsi_parse_free(&ctx.parse);
694	return r;
695}
696
697static int tgsi_unsupported(struct r600_shader_ctx *ctx)
698{
699	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
700	return -EINVAL;
701}
702
703static int tgsi_end(struct r600_shader_ctx *ctx)
704{
705	return 0;
706}
707
708static int tgsi_src(struct r600_shader_ctx *ctx,
709			const struct tgsi_full_src_register *tgsi_src,
710			struct r600_bc_alu_src *r600_src)
711{
712	int index;
713	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
714	r600_src->sel = tgsi_src->Register.Index;
715	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
716		r600_src->sel = 0;
717		index = tgsi_src->Register.Index;
718		ctx->value[0] = ctx->literals[index * 4 + 0];
719		ctx->value[1] = ctx->literals[index * 4 + 1];
720		ctx->value[2] = ctx->literals[index * 4 + 2];
721		ctx->value[3] = ctx->literals[index * 4 + 3];
722	}
723	if (tgsi_src->Register.Indirect)
724		r600_src->rel = V_SQ_REL_RELATIVE;
725	r600_src->neg = tgsi_src->Register.Negate;
726	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
727	return 0;
728}
729
730static int tgsi_dst(struct r600_shader_ctx *ctx,
731			const struct tgsi_full_dst_register *tgsi_dst,
732			unsigned swizzle,
733			struct r600_bc_alu_dst *r600_dst)
734{
735	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
736
737	r600_dst->sel = tgsi_dst->Register.Index;
738	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
739	r600_dst->chan = swizzle;
740	r600_dst->write = 1;
741	if (tgsi_dst->Register.Indirect)
742		r600_dst->rel = V_SQ_REL_RELATIVE;
743	if (inst->Instruction.Saturate) {
744		r600_dst->clamp = 1;
745	}
746	return 0;
747}
748
749static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
750{
751	switch (swizzle) {
752	case 0:
753		return tgsi_src->Register.SwizzleX;
754	case 1:
755		return tgsi_src->Register.SwizzleY;
756	case 2:
757		return tgsi_src->Register.SwizzleZ;
758	case 3:
759		return tgsi_src->Register.SwizzleW;
760	default:
761		return 0;
762	}
763}
764
765static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
766{
767	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
768	struct r600_bc_alu alu;
769	int i, j, k, nconst, r;
770
771	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
772		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
773			nconst++;
774		}
775		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
776		if (r) {
777			return r;
778		}
779	}
780	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
781		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
782			int treg = r600_get_temp(ctx);
783			for (k = 0; k < 4; k++) {
784				memset(&alu, 0, sizeof(struct r600_bc_alu));
785				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
786				alu.src[0].sel = r600_src[i].sel;
787				alu.src[0].chan = k;
788				alu.dst.sel = treg;
789				alu.dst.chan = k;
790				alu.dst.write = 1;
791				if (k == 3)
792					alu.last = 1;
793				r = r600_bc_add_alu(ctx->bc, &alu);
794				if (r)
795					return r;
796			}
797			r600_src[i].sel = treg;
798			j--;
799		}
800	}
801	return 0;
802}
803
804/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
805static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
806{
807	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
808	struct r600_bc_alu alu;
809	int i, j, k, nliteral, r, index;
810
811	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
812		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
813			nliteral++;
814		}
815	}
816	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
817		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
818			int treg = r600_get_temp(ctx);
819			for (k = 0; k < 4; k++) {
820				memset(&alu, 0, sizeof(struct r600_bc_alu));
821				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
822				alu.src[0].sel = r600_src[i].sel;
823				alu.src[0].chan = k;
824				alu.dst.sel = treg;
825				alu.dst.chan = k;
826				alu.dst.write = 1;
827				if (k == 3)
828					alu.last = 1;
829				r = r600_bc_add_alu(ctx->bc, &alu);
830				if (r)
831					return r;
832			}
833			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
834			if (r)
835				return r;
836			r600_src[i].sel = treg;
837			j--;
838		}
839	}
840	return 0;
841}
842
843static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
844{
845	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
846	struct r600_bc_alu_src r600_src[3];
847	struct r600_bc_alu alu;
848	int i, j, r;
849	int lasti = 0;
850
851	for (i = 0; i < 4; i++) {
852		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
853			lasti = i;
854		}
855	}
856
857	r = tgsi_split_constant(ctx, r600_src);
858	if (r)
859		return r;
860	r = tgsi_split_literal_constant(ctx, r600_src);
861	if (r)
862		return r;
863	for (i = 0; i < lasti + 1; i++) {
864		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
865			continue;
866
867		memset(&alu, 0, sizeof(struct r600_bc_alu));
868		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
869		if (r)
870			return r;
871
872		alu.inst = ctx->inst_info->r600_opcode;
873		if (!swap) {
874			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
875				alu.src[j] = r600_src[j];
876				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
877			}
878		} else {
879			alu.src[0] = r600_src[1];
880			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
881
882			alu.src[1] = r600_src[0];
883			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
884		}
885		/* handle some special cases */
886		switch (ctx->inst_info->tgsi_opcode) {
887		case TGSI_OPCODE_SUB:
888			alu.src[1].neg = 1;
889			break;
890		case TGSI_OPCODE_ABS:
891			alu.src[0].abs = 1;
892			break;
893		default:
894			break;
895		}
896		if (i == lasti) {
897			alu.last = 1;
898		}
899		r = r600_bc_add_alu(ctx->bc, &alu);
900		if (r)
901			return r;
902	}
903	return 0;
904}
905
906static int tgsi_op2(struct r600_shader_ctx *ctx)
907{
908	return tgsi_op2_s(ctx, 0);
909}
910
911static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
912{
913	return tgsi_op2_s(ctx, 1);
914}
915
916/*
917 * r600 - trunc to -PI..PI range
918 * r700 - normalize by dividing by 2PI
919 * see fdo bug 27901
920 */
921static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
922			   struct r600_bc_alu_src r600_src[3])
923{
924	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
925	int r;
926	uint32_t lit_vals[4];
927	struct r600_bc_alu alu;
928
929	memset(lit_vals, 0, 4*4);
930	r = tgsi_split_constant(ctx, r600_src);
931	if (r)
932		return r;
933	r = tgsi_split_literal_constant(ctx, r600_src);
934	if (r)
935		return r;
936
937	r = tgsi_split_literal_constant(ctx, r600_src);
938	if (r)
939		return r;
940
941	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
942	lit_vals[1] = fui(0.5f);
943
944	memset(&alu, 0, sizeof(struct r600_bc_alu));
945	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
946	alu.is_op3 = 1;
947
948	alu.dst.chan = 0;
949	alu.dst.sel = ctx->temp_reg;
950	alu.dst.write = 1;
951
952	alu.src[0] = r600_src[0];
953	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
954
955	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
956	alu.src[1].chan = 0;
957	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
958	alu.src[2].chan = 1;
959	alu.last = 1;
960	r = r600_bc_add_alu(ctx->bc, &alu);
961	if (r)
962		return r;
963	r = r600_bc_add_literal(ctx->bc, lit_vals);
964	if (r)
965		return r;
966
967	memset(&alu, 0, sizeof(struct r600_bc_alu));
968	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
969
970	alu.dst.chan = 0;
971	alu.dst.sel = ctx->temp_reg;
972	alu.dst.write = 1;
973
974	alu.src[0].sel = ctx->temp_reg;
975	alu.src[0].chan = 0;
976	alu.last = 1;
977	r = r600_bc_add_alu(ctx->bc, &alu);
978	if (r)
979		return r;
980
981	if (ctx->bc->chiprev == 0) {
982		lit_vals[0] = fui(3.1415926535897f * 2.0f);
983		lit_vals[1] = fui(-3.1415926535897f);
984	} else {
985		lit_vals[0] = fui(1.0f);
986		lit_vals[1] = fui(-0.5f);
987	}
988
989	memset(&alu, 0, sizeof(struct r600_bc_alu));
990	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
991	alu.is_op3 = 1;
992
993	alu.dst.chan = 0;
994	alu.dst.sel = ctx->temp_reg;
995	alu.dst.write = 1;
996
997	alu.src[0].sel = ctx->temp_reg;
998	alu.src[0].chan = 0;
999
1000	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1001	alu.src[1].chan = 0;
1002	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1003	alu.src[2].chan = 1;
1004	alu.last = 1;
1005	r = r600_bc_add_alu(ctx->bc, &alu);
1006	if (r)
1007		return r;
1008	r = r600_bc_add_literal(ctx->bc, lit_vals);
1009	if (r)
1010		return r;
1011	return 0;
1012}
1013
1014static int tgsi_trig(struct r600_shader_ctx *ctx)
1015{
1016	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1017	struct r600_bc_alu_src r600_src[3];
1018	struct r600_bc_alu alu;
1019	int i, r;
1020	int lasti = 0;
1021
1022	r = tgsi_setup_trig(ctx, r600_src);
1023	if (r)
1024		return r;
1025
1026	memset(&alu, 0, sizeof(struct r600_bc_alu));
1027	alu.inst = ctx->inst_info->r600_opcode;
1028	alu.dst.chan = 0;
1029	alu.dst.sel = ctx->temp_reg;
1030	alu.dst.write = 1;
1031
1032	alu.src[0].sel = ctx->temp_reg;
1033	alu.src[0].chan = 0;
1034	alu.last = 1;
1035	r = r600_bc_add_alu(ctx->bc, &alu);
1036	if (r)
1037		return r;
1038
1039	/* replicate result */
1040	for (i = 0; i < 4; i++) {
1041		if (inst->Dst[0].Register.WriteMask & (1 << i))
1042			lasti = i;
1043	}
1044	for (i = 0; i < lasti + 1; i++) {
1045		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1046			continue;
1047
1048		memset(&alu, 0, sizeof(struct r600_bc_alu));
1049		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1050
1051		alu.src[0].sel = ctx->temp_reg;
1052		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1053		if (r)
1054			return r;
1055		if (i == lasti)
1056			alu.last = 1;
1057		r = r600_bc_add_alu(ctx->bc, &alu);
1058		if (r)
1059			return r;
1060	}
1061	return 0;
1062}
1063
1064static int tgsi_scs(struct r600_shader_ctx *ctx)
1065{
1066	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1067	struct r600_bc_alu_src r600_src[3];
1068	struct r600_bc_alu alu;
1069	int r;
1070
1071	/* We'll only need the trig stuff if we are going to write to the
1072	 * X or Y components of the destination vector.
1073	 */
1074	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1075		r = tgsi_setup_trig(ctx, r600_src);
1076		if (r)
1077			return r;
1078	}
1079
1080	/* dst.x = COS */
1081	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1082		memset(&alu, 0, sizeof(struct r600_bc_alu));
1083		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1084		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1085		if (r)
1086			return r;
1087
1088		alu.src[0].sel = ctx->temp_reg;
1089		alu.src[0].chan = 0;
1090		alu.last = 1;
1091		r = r600_bc_add_alu(ctx->bc, &alu);
1092		if (r)
1093			return r;
1094	}
1095
1096	/* dst.y = SIN */
1097	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1098		memset(&alu, 0, sizeof(struct r600_bc_alu));
1099		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1100		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1101		if (r)
1102			return r;
1103
1104		alu.src[0].sel = ctx->temp_reg;
1105		alu.src[0].chan = 0;
1106		alu.last = 1;
1107		r = r600_bc_add_alu(ctx->bc, &alu);
1108		if (r)
1109			return r;
1110	}
1111
1112	/* dst.z = 0.0; */
1113	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1114		memset(&alu, 0, sizeof(struct r600_bc_alu));
1115
1116		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1117
1118		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1119		if (r)
1120			return r;
1121
1122		alu.src[0].sel = V_SQ_ALU_SRC_0;
1123		alu.src[0].chan = 0;
1124
1125		alu.last = 1;
1126
1127		r = r600_bc_add_alu(ctx->bc, &alu);
1128		if (r)
1129			return r;
1130
1131		r = r600_bc_add_literal(ctx->bc, ctx->value);
1132		if (r)
1133			return r;
1134	}
1135
1136	/* dst.w = 1.0; */
1137	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1138		memset(&alu, 0, sizeof(struct r600_bc_alu));
1139
1140		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1141
1142		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1143		if (r)
1144			return r;
1145
1146		alu.src[0].sel = V_SQ_ALU_SRC_1;
1147		alu.src[0].chan = 0;
1148
1149		alu.last = 1;
1150
1151		r = r600_bc_add_alu(ctx->bc, &alu);
1152		if (r)
1153			return r;
1154
1155		r = r600_bc_add_literal(ctx->bc, ctx->value);
1156		if (r)
1157			return r;
1158	}
1159
1160	return 0;
1161}
1162
1163static int tgsi_kill(struct r600_shader_ctx *ctx)
1164{
1165	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1166	struct r600_bc_alu alu;
1167	int i, r;
1168
1169	for (i = 0; i < 4; i++) {
1170		memset(&alu, 0, sizeof(struct r600_bc_alu));
1171		alu.inst = ctx->inst_info->r600_opcode;
1172
1173		alu.dst.chan = i;
1174
1175		alu.src[0].sel = V_SQ_ALU_SRC_0;
1176
1177		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1178			alu.src[1].sel = V_SQ_ALU_SRC_1;
1179			alu.src[1].neg = 1;
1180		} else {
1181			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1182			if (r)
1183				return r;
1184			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1185		}
1186		if (i == 3) {
1187			alu.last = 1;
1188		}
1189		r = r600_bc_add_alu(ctx->bc, &alu);
1190		if (r)
1191			return r;
1192	}
1193	r = r600_bc_add_literal(ctx->bc, ctx->value);
1194	if (r)
1195		return r;
1196
1197	/* kill must be last in ALU */
1198	ctx->bc->force_add_cf = 1;
1199	ctx->shader->uses_kill = TRUE;
1200	return 0;
1201}
1202
1203static int tgsi_lit(struct r600_shader_ctx *ctx)
1204{
1205	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1206	struct r600_bc_alu alu;
1207	struct r600_bc_alu_src r600_src[3];
1208	int r;
1209
1210	r = tgsi_split_constant(ctx, r600_src);
1211	if (r)
1212		return r;
1213	r = tgsi_split_literal_constant(ctx, r600_src);
1214	if (r)
1215		return r;
1216
1217	/* dst.x, <- 1.0  */
1218	memset(&alu, 0, sizeof(struct r600_bc_alu));
1219	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1220	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1221	alu.src[0].chan = 0;
1222	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1223	if (r)
1224		return r;
1225	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1226	r = r600_bc_add_alu(ctx->bc, &alu);
1227	if (r)
1228		return r;
1229
1230	/* dst.y = max(src.x, 0.0) */
1231	memset(&alu, 0, sizeof(struct r600_bc_alu));
1232	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1233	alu.src[0] = r600_src[0];
1234	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1235	alu.src[1].chan = 0;
1236	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1237	if (r)
1238		return r;
1239	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1240	r = r600_bc_add_alu(ctx->bc, &alu);
1241	if (r)
1242		return r;
1243
1244	/* dst.w, <- 1.0  */
1245	memset(&alu, 0, sizeof(struct r600_bc_alu));
1246	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1247	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1248	alu.src[0].chan = 0;
1249	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1250	if (r)
1251		return r;
1252	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1253	alu.last = 1;
1254	r = r600_bc_add_alu(ctx->bc, &alu);
1255	if (r)
1256		return r;
1257
1258	r = r600_bc_add_literal(ctx->bc, ctx->value);
1259	if (r)
1260		return r;
1261
1262	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1263	{
1264		int chan;
1265		int sel;
1266
1267		/* dst.z = log(src.y) */
1268		memset(&alu, 0, sizeof(struct r600_bc_alu));
1269		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1270		alu.src[0] = r600_src[0];
1271		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1272		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1273		if (r)
1274			return r;
1275		alu.last = 1;
1276		r = r600_bc_add_alu(ctx->bc, &alu);
1277		if (r)
1278			return r;
1279
1280		r = r600_bc_add_literal(ctx->bc, ctx->value);
1281		if (r)
1282			return r;
1283
1284		chan = alu.dst.chan;
1285		sel = alu.dst.sel;
1286
1287		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1288		memset(&alu, 0, sizeof(struct r600_bc_alu));
1289		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1290		alu.src[0] = r600_src[0];
1291		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1292		alu.src[1].sel  = sel;
1293		alu.src[1].chan = chan;
1294
1295		alu.src[2] = r600_src[0];
1296		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1297		alu.dst.sel = ctx->temp_reg;
1298		alu.dst.chan = 0;
1299		alu.dst.write = 1;
1300		alu.is_op3 = 1;
1301		alu.last = 1;
1302		r = r600_bc_add_alu(ctx->bc, &alu);
1303		if (r)
1304			return r;
1305
1306		r = r600_bc_add_literal(ctx->bc, ctx->value);
1307		if (r)
1308			return r;
1309		/* dst.z = exp(tmp.x) */
1310		memset(&alu, 0, sizeof(struct r600_bc_alu));
1311		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1312		alu.src[0].sel = ctx->temp_reg;
1313		alu.src[0].chan = 0;
1314		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1315		if (r)
1316			return r;
1317		alu.last = 1;
1318		r = r600_bc_add_alu(ctx->bc, &alu);
1319		if (r)
1320			return r;
1321	}
1322	return 0;
1323}
1324
1325static int tgsi_rsq(struct r600_shader_ctx *ctx)
1326{
1327	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1328	struct r600_bc_alu alu;
1329	int i, r;
1330
1331	memset(&alu, 0, sizeof(struct r600_bc_alu));
1332
1333	/* FIXME:
1334	 * For state trackers other than OpenGL, we'll want to use
1335	 * _RECIPSQRT_IEEE instead.
1336	 */
1337	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1338
1339	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1340		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1341		if (r)
1342			return r;
1343		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1344		alu.src[i].abs = 1;
1345	}
1346	alu.dst.sel = ctx->temp_reg;
1347	alu.dst.write = 1;
1348	alu.last = 1;
1349	r = r600_bc_add_alu(ctx->bc, &alu);
1350	if (r)
1351		return r;
1352	r = r600_bc_add_literal(ctx->bc, ctx->value);
1353	if (r)
1354		return r;
1355	/* replicate result */
1356	return tgsi_helper_tempx_replicate(ctx);
1357}
1358
1359static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1360{
1361	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1362	struct r600_bc_alu alu;
1363	int i, r;
1364
1365	for (i = 0; i < 4; i++) {
1366		memset(&alu, 0, sizeof(struct r600_bc_alu));
1367		alu.src[0].sel = ctx->temp_reg;
1368		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1369		alu.dst.chan = i;
1370		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1371		if (r)
1372			return r;
1373		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1374		if (i == 3)
1375			alu.last = 1;
1376		r = r600_bc_add_alu(ctx->bc, &alu);
1377		if (r)
1378			return r;
1379	}
1380	return 0;
1381}
1382
1383static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1384{
1385	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1386	struct r600_bc_alu alu;
1387	int i, r;
1388
1389	memset(&alu, 0, sizeof(struct r600_bc_alu));
1390	alu.inst = ctx->inst_info->r600_opcode;
1391	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1392		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1393		if (r)
1394			return r;
1395		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1396	}
1397	alu.dst.sel = ctx->temp_reg;
1398	alu.dst.write = 1;
1399	alu.last = 1;
1400	r = r600_bc_add_alu(ctx->bc, &alu);
1401	if (r)
1402		return r;
1403	r = r600_bc_add_literal(ctx->bc, ctx->value);
1404	if (r)
1405		return r;
1406	/* replicate result */
1407	return tgsi_helper_tempx_replicate(ctx);
1408}
1409
1410static int tgsi_pow(struct r600_shader_ctx *ctx)
1411{
1412	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1413	struct r600_bc_alu alu;
1414	int r;
1415
1416	/* LOG2(a) */
1417	memset(&alu, 0, sizeof(struct r600_bc_alu));
1418	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1419	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1420	if (r)
1421		return r;
1422	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1423	alu.dst.sel = ctx->temp_reg;
1424	alu.dst.write = 1;
1425	alu.last = 1;
1426	r = r600_bc_add_alu(ctx->bc, &alu);
1427	if (r)
1428		return r;
1429	r = r600_bc_add_literal(ctx->bc,ctx->value);
1430	if (r)
1431		return r;
1432	/* b * LOG2(a) */
1433	memset(&alu, 0, sizeof(struct r600_bc_alu));
1434	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1435	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1436	if (r)
1437		return r;
1438	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1439	alu.src[1].sel = ctx->temp_reg;
1440	alu.dst.sel = ctx->temp_reg;
1441	alu.dst.write = 1;
1442	alu.last = 1;
1443	r = r600_bc_add_alu(ctx->bc, &alu);
1444	if (r)
1445		return r;
1446	r = r600_bc_add_literal(ctx->bc,ctx->value);
1447	if (r)
1448		return r;
1449	/* POW(a,b) = EXP2(b * LOG2(a))*/
1450	memset(&alu, 0, sizeof(struct r600_bc_alu));
1451	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1452	alu.src[0].sel = ctx->temp_reg;
1453	alu.dst.sel = ctx->temp_reg;
1454	alu.dst.write = 1;
1455	alu.last = 1;
1456	r = r600_bc_add_alu(ctx->bc, &alu);
1457	if (r)
1458		return r;
1459	r = r600_bc_add_literal(ctx->bc,ctx->value);
1460	if (r)
1461		return r;
1462	return tgsi_helper_tempx_replicate(ctx);
1463}
1464
1465static int tgsi_ssg(struct r600_shader_ctx *ctx)
1466{
1467	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1468	struct r600_bc_alu alu;
1469	struct r600_bc_alu_src r600_src[3];
1470	int i, r;
1471
1472	r = tgsi_split_constant(ctx, r600_src);
1473	if (r)
1474		return r;
1475	r = tgsi_split_literal_constant(ctx, r600_src);
1476	if (r)
1477		return r;
1478
1479	/* tmp = (src > 0 ? 1 : src) */
1480	for (i = 0; i < 4; i++) {
1481		memset(&alu, 0, sizeof(struct r600_bc_alu));
1482		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1483		alu.is_op3 = 1;
1484
1485		alu.dst.sel = ctx->temp_reg;
1486		alu.dst.chan = i;
1487
1488		alu.src[0] = r600_src[0];
1489		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1490
1491		alu.src[1].sel = V_SQ_ALU_SRC_1;
1492
1493		alu.src[2] = r600_src[0];
1494		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1495		if (i == 3)
1496			alu.last = 1;
1497		r = r600_bc_add_alu(ctx->bc, &alu);
1498		if (r)
1499			return r;
1500	}
1501	r = r600_bc_add_literal(ctx->bc, ctx->value);
1502	if (r)
1503		return r;
1504
1505	/* dst = (-tmp > 0 ? -1 : tmp) */
1506	for (i = 0; i < 4; i++) {
1507		memset(&alu, 0, sizeof(struct r600_bc_alu));
1508		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1509		alu.is_op3 = 1;
1510		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1511		if (r)
1512			return r;
1513
1514		alu.src[0].sel = ctx->temp_reg;
1515		alu.src[0].chan = i;
1516		alu.src[0].neg = 1;
1517
1518		alu.src[1].sel = V_SQ_ALU_SRC_1;
1519		alu.src[1].neg = 1;
1520
1521		alu.src[2].sel = ctx->temp_reg;
1522		alu.src[2].chan = i;
1523
1524		if (i == 3)
1525			alu.last = 1;
1526		r = r600_bc_add_alu(ctx->bc, &alu);
1527		if (r)
1528			return r;
1529	}
1530	return 0;
1531}
1532
1533static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1534{
1535	struct r600_bc_alu alu;
1536	int i, r;
1537
1538	r = r600_bc_add_literal(ctx->bc, ctx->value);
1539	if (r)
1540		return r;
1541	for (i = 0; i < 4; i++) {
1542		memset(&alu, 0, sizeof(struct r600_bc_alu));
1543		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1544			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1545			alu.dst.chan = i;
1546		} else {
1547			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1548			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1549			if (r)
1550				return r;
1551			alu.src[0].sel = ctx->temp_reg;
1552			alu.src[0].chan = i;
1553		}
1554		if (i == 3) {
1555			alu.last = 1;
1556		}
1557		r = r600_bc_add_alu(ctx->bc, &alu);
1558		if (r)
1559			return r;
1560	}
1561	return 0;
1562}
1563
1564static int tgsi_op3(struct r600_shader_ctx *ctx)
1565{
1566	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1567	struct r600_bc_alu_src r600_src[3];
1568	struct r600_bc_alu alu;
1569	int i, j, r;
1570
1571	r = tgsi_split_constant(ctx, r600_src);
1572	if (r)
1573		return r;
1574	r = tgsi_split_literal_constant(ctx, r600_src);
1575	if (r)
1576		return r;
1577	/* do it in 2 step as op3 doesn't support writemask */
1578	for (i = 0; i < 4; i++) {
1579		memset(&alu, 0, sizeof(struct r600_bc_alu));
1580		alu.inst = ctx->inst_info->r600_opcode;
1581		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1582			alu.src[j] = r600_src[j];
1583			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1584		}
1585		alu.dst.sel = ctx->temp_reg;
1586		alu.dst.chan = i;
1587		alu.dst.write = 1;
1588		alu.is_op3 = 1;
1589		if (i == 3) {
1590			alu.last = 1;
1591		}
1592		r = r600_bc_add_alu(ctx->bc, &alu);
1593		if (r)
1594			return r;
1595	}
1596	return tgsi_helper_copy(ctx, inst);
1597}
1598
1599static int tgsi_dp(struct r600_shader_ctx *ctx)
1600{
1601	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1602	struct r600_bc_alu_src r600_src[3];
1603	struct r600_bc_alu alu;
1604	int i, j, r;
1605
1606	r = tgsi_split_constant(ctx, r600_src);
1607	if (r)
1608		return r;
1609	r = tgsi_split_literal_constant(ctx, r600_src);
1610	if (r)
1611		return r;
1612	for (i = 0; i < 4; i++) {
1613		memset(&alu, 0, sizeof(struct r600_bc_alu));
1614		alu.inst = ctx->inst_info->r600_opcode;
1615		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1616			alu.src[j] = r600_src[j];
1617			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1618		}
1619		alu.dst.sel = ctx->temp_reg;
1620		alu.dst.chan = i;
1621		alu.dst.write = 1;
1622		/* handle some special cases */
1623		switch (ctx->inst_info->tgsi_opcode) {
1624		case TGSI_OPCODE_DP2:
1625			if (i > 1) {
1626				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1627				alu.src[0].chan = alu.src[1].chan = 0;
1628			}
1629			break;
1630		case TGSI_OPCODE_DP3:
1631			if (i > 2) {
1632				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1633				alu.src[0].chan = alu.src[1].chan = 0;
1634			}
1635			break;
1636		case TGSI_OPCODE_DPH:
1637			if (i == 3) {
1638				alu.src[0].sel = V_SQ_ALU_SRC_1;
1639				alu.src[0].chan = 0;
1640				alu.src[0].neg = 0;
1641			}
1642			break;
1643		default:
1644			break;
1645		}
1646		if (i == 3) {
1647			alu.last = 1;
1648		}
1649		r = r600_bc_add_alu(ctx->bc, &alu);
1650		if (r)
1651			return r;
1652	}
1653	return tgsi_helper_copy(ctx, inst);
1654}
1655
1656static int tgsi_tex(struct r600_shader_ctx *ctx)
1657{
1658	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1659	struct r600_bc_tex tex;
1660	struct r600_bc_alu alu;
1661	unsigned src_gpr;
1662	int r, i;
1663	int opcode;
1664	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1665	uint32_t lit_vals[4];
1666
1667	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1668
1669	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1670		/* Add perspective divide */
1671		memset(&alu, 0, sizeof(struct r600_bc_alu));
1672		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1673		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1674		if (r)
1675			return r;
1676
1677		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1678		alu.dst.sel = ctx->temp_reg;
1679		alu.dst.chan = 3;
1680		alu.last = 1;
1681		alu.dst.write = 1;
1682		r = r600_bc_add_alu(ctx->bc, &alu);
1683		if (r)
1684			return r;
1685
1686		for (i = 0; i < 3; i++) {
1687			memset(&alu, 0, sizeof(struct r600_bc_alu));
1688			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1689			alu.src[0].sel = ctx->temp_reg;
1690			alu.src[0].chan = 3;
1691			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1692			if (r)
1693				return r;
1694			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1695			alu.dst.sel = ctx->temp_reg;
1696			alu.dst.chan = i;
1697			alu.dst.write = 1;
1698			r = r600_bc_add_alu(ctx->bc, &alu);
1699			if (r)
1700				return r;
1701		}
1702		memset(&alu, 0, sizeof(struct r600_bc_alu));
1703		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1704		alu.src[0].sel = V_SQ_ALU_SRC_1;
1705		alu.src[0].chan = 0;
1706		alu.dst.sel = ctx->temp_reg;
1707		alu.dst.chan = 3;
1708		alu.last = 1;
1709		alu.dst.write = 1;
1710		r = r600_bc_add_alu(ctx->bc, &alu);
1711		if (r)
1712			return r;
1713		src_not_temp = FALSE;
1714		src_gpr = ctx->temp_reg;
1715	}
1716
1717	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1718		int src_chan, src2_chan;
1719
1720		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1721		for (i = 0; i < 4; i++) {
1722			memset(&alu, 0, sizeof(struct r600_bc_alu));
1723			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1724			switch (i) {
1725			case 0:
1726				src_chan = 2;
1727				src2_chan = 1;
1728				break;
1729			case 1:
1730				src_chan = 2;
1731				src2_chan = 0;
1732				break;
1733			case 2:
1734				src_chan = 0;
1735				src2_chan = 2;
1736				break;
1737			case 3:
1738				src_chan = 1;
1739				src2_chan = 2;
1740				break;
1741			default:
1742				assert(0);
1743				src_chan = 0;
1744				src2_chan = 0;
1745				break;
1746			}
1747			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1748			if (r)
1749				return r;
1750			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1751			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1752			if (r)
1753				return r;
1754			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1755			alu.dst.sel = ctx->temp_reg;
1756			alu.dst.chan = i;
1757			if (i == 3)
1758				alu.last = 1;
1759			alu.dst.write = 1;
1760			r = r600_bc_add_alu(ctx->bc, &alu);
1761			if (r)
1762				return r;
1763		}
1764
1765		/* tmp1.z = RCP_e(|tmp1.z|) */
1766		memset(&alu, 0, sizeof(struct r600_bc_alu));
1767		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1768		alu.src[0].sel = ctx->temp_reg;
1769		alu.src[0].chan = 2;
1770		alu.src[0].abs = 1;
1771		alu.dst.sel = ctx->temp_reg;
1772		alu.dst.chan = 2;
1773		alu.dst.write = 1;
1774		alu.last = 1;
1775		r = r600_bc_add_alu(ctx->bc, &alu);
1776		if (r)
1777			return r;
1778
1779		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1780		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1781		 * muladd has no writemask, have to use another temp
1782		 */
1783		memset(&alu, 0, sizeof(struct r600_bc_alu));
1784		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1785		alu.is_op3 = 1;
1786
1787		alu.src[0].sel = ctx->temp_reg;
1788		alu.src[0].chan = 0;
1789		alu.src[1].sel = ctx->temp_reg;
1790		alu.src[1].chan = 2;
1791
1792		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1793		alu.src[2].chan = 0;
1794
1795		alu.dst.sel = ctx->temp_reg;
1796		alu.dst.chan = 0;
1797		alu.dst.write = 1;
1798
1799		r = r600_bc_add_alu(ctx->bc, &alu);
1800		if (r)
1801			return r;
1802
1803		memset(&alu, 0, sizeof(struct r600_bc_alu));
1804		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1805		alu.is_op3 = 1;
1806
1807		alu.src[0].sel = ctx->temp_reg;
1808		alu.src[0].chan = 1;
1809		alu.src[1].sel = ctx->temp_reg;
1810		alu.src[1].chan = 2;
1811
1812		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1813		alu.src[2].chan = 0;
1814
1815		alu.dst.sel = ctx->temp_reg;
1816		alu.dst.chan = 1;
1817		alu.dst.write = 1;
1818
1819		alu.last = 1;
1820		r = r600_bc_add_alu(ctx->bc, &alu);
1821		if (r)
1822			return r;
1823
1824		lit_vals[0] = fui(1.5f);
1825
1826		r = r600_bc_add_literal(ctx->bc, lit_vals);
1827		if (r)
1828			return r;
1829		src_not_temp = FALSE;
1830		src_gpr = ctx->temp_reg;
1831	}
1832
1833	if (src_not_temp) {
1834		for (i = 0; i < 4; i++) {
1835			memset(&alu, 0, sizeof(struct r600_bc_alu));
1836			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1837			alu.src[0].sel = src_gpr;
1838			alu.src[0].chan = i;
1839			alu.dst.sel = ctx->temp_reg;
1840			alu.dst.chan = i;
1841			if (i == 3)
1842				alu.last = 1;
1843			alu.dst.write = 1;
1844			r = r600_bc_add_alu(ctx->bc, &alu);
1845			if (r)
1846				return r;
1847		}
1848		src_gpr = ctx->temp_reg;
1849	}
1850
1851	opcode = ctx->inst_info->r600_opcode;
1852	if (opcode == SQ_TEX_INST_SAMPLE &&
1853	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1854		opcode = SQ_TEX_INST_SAMPLE_C;
1855
1856	memset(&tex, 0, sizeof(struct r600_bc_tex));
1857	tex.inst = opcode;
1858	tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1859	tex.sampler_id = tex.resource_id;
1860	tex.src_gpr = src_gpr;
1861	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1862	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1863	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1864	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1865	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1866	tex.src_sel_x = 0;
1867	tex.src_sel_y = 1;
1868	tex.src_sel_z = 2;
1869	tex.src_sel_w = 3;
1870
1871	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1872		tex.src_sel_x = 1;
1873		tex.src_sel_y = 0;
1874		tex.src_sel_z = 3;
1875		tex.src_sel_w = 1;
1876	}
1877
1878	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1879		tex.coord_type_x = 1;
1880		tex.coord_type_y = 1;
1881		tex.coord_type_z = 1;
1882		tex.coord_type_w = 1;
1883	}
1884
1885	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1886		tex.src_sel_w = 2;
1887
1888	r = r600_bc_add_tex(ctx->bc, &tex);
1889	if (r)
1890		return r;
1891
1892	/* add shadow ambient support  - gallium doesn't do it yet */
1893	return 0;
1894
1895}
1896
1897static int tgsi_lrp(struct r600_shader_ctx *ctx)
1898{
1899	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1900	struct r600_bc_alu_src r600_src[3];
1901	struct r600_bc_alu alu;
1902	unsigned i;
1903	int r;
1904
1905	r = tgsi_split_constant(ctx, r600_src);
1906	if (r)
1907		return r;
1908	r = tgsi_split_literal_constant(ctx, r600_src);
1909	if (r)
1910		return r;
1911	/* 1 - src0 */
1912	for (i = 0; i < 4; i++) {
1913		memset(&alu, 0, sizeof(struct r600_bc_alu));
1914		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1915		alu.src[0].sel = V_SQ_ALU_SRC_1;
1916		alu.src[0].chan = 0;
1917		alu.src[1] = r600_src[0];
1918		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1919		alu.src[1].neg = 1;
1920		alu.dst.sel = ctx->temp_reg;
1921		alu.dst.chan = i;
1922		if (i == 3) {
1923			alu.last = 1;
1924		}
1925		alu.dst.write = 1;
1926		r = r600_bc_add_alu(ctx->bc, &alu);
1927		if (r)
1928			return r;
1929	}
1930	r = r600_bc_add_literal(ctx->bc, ctx->value);
1931	if (r)
1932		return r;
1933
1934	/* (1 - src0) * src2 */
1935	for (i = 0; i < 4; i++) {
1936		memset(&alu, 0, sizeof(struct r600_bc_alu));
1937		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1938		alu.src[0].sel = ctx->temp_reg;
1939		alu.src[0].chan = i;
1940		alu.src[1] = r600_src[2];
1941		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1942		alu.dst.sel = ctx->temp_reg;
1943		alu.dst.chan = i;
1944		if (i == 3) {
1945			alu.last = 1;
1946		}
1947		alu.dst.write = 1;
1948		r = r600_bc_add_alu(ctx->bc, &alu);
1949		if (r)
1950			return r;
1951	}
1952	r = r600_bc_add_literal(ctx->bc, ctx->value);
1953	if (r)
1954		return r;
1955
1956	/* src0 * src1 + (1 - src0) * src2 */
1957	for (i = 0; i < 4; i++) {
1958		memset(&alu, 0, sizeof(struct r600_bc_alu));
1959		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1960		alu.is_op3 = 1;
1961		alu.src[0] = r600_src[0];
1962		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1963		alu.src[1] = r600_src[1];
1964		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1965		alu.src[2].sel = ctx->temp_reg;
1966		alu.src[2].chan = i;
1967		alu.dst.sel = ctx->temp_reg;
1968		alu.dst.chan = i;
1969		if (i == 3) {
1970			alu.last = 1;
1971		}
1972		r = r600_bc_add_alu(ctx->bc, &alu);
1973		if (r)
1974			return r;
1975	}
1976	return tgsi_helper_copy(ctx, inst);
1977}
1978
1979static int tgsi_cmp(struct r600_shader_ctx *ctx)
1980{
1981	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1982	struct r600_bc_alu_src r600_src[3];
1983	struct r600_bc_alu alu;
1984	int use_temp = 0;
1985	int i, r;
1986
1987	r = tgsi_split_constant(ctx, r600_src);
1988	if (r)
1989		return r;
1990	r = tgsi_split_literal_constant(ctx, r600_src);
1991	if (r)
1992		return r;
1993
1994	if (inst->Dst[0].Register.WriteMask != 0xf)
1995		use_temp = 1;
1996
1997	for (i = 0; i < 4; i++) {
1998		memset(&alu, 0, sizeof(struct r600_bc_alu));
1999		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2000		alu.src[0] = r600_src[0];
2001		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2002
2003		alu.src[1] = r600_src[2];
2004		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2005
2006		alu.src[2] = r600_src[1];
2007		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2008
2009		if (use_temp)
2010			alu.dst.sel = ctx->temp_reg;
2011		else {
2012			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2013			if (r)
2014				return r;
2015		}
2016		alu.dst.chan = i;
2017		alu.dst.write = 1;
2018		alu.is_op3 = 1;
2019		if (i == 3)
2020			alu.last = 1;
2021		r = r600_bc_add_alu(ctx->bc, &alu);
2022		if (r)
2023			return r;
2024	}
2025	if (use_temp)
2026		return tgsi_helper_copy(ctx, inst);
2027	return 0;
2028}
2029
2030static int tgsi_xpd(struct r600_shader_ctx *ctx)
2031{
2032	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2033	struct r600_bc_alu_src r600_src[3];
2034	struct r600_bc_alu alu;
2035	uint32_t use_temp = 0;
2036	int i, r;
2037
2038	if (inst->Dst[0].Register.WriteMask != 0xf)
2039		use_temp = 1;
2040
2041	r = tgsi_split_constant(ctx, r600_src);
2042	if (r)
2043		return r;
2044	r = tgsi_split_literal_constant(ctx, r600_src);
2045	if (r)
2046		return r;
2047
2048	for (i = 0; i < 4; i++) {
2049		memset(&alu, 0, sizeof(struct r600_bc_alu));
2050		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2051
2052		alu.src[0] = r600_src[0];
2053		switch (i) {
2054		case 0:
2055			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2056			break;
2057		case 1:
2058			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2059			break;
2060		case 2:
2061			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2062			break;
2063		case 3:
2064			alu.src[0].sel = V_SQ_ALU_SRC_0;
2065			alu.src[0].chan = i;
2066		}
2067
2068		alu.src[1] = r600_src[1];
2069		switch (i) {
2070		case 0:
2071			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2072			break;
2073		case 1:
2074			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2075			break;
2076		case 2:
2077			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2078			break;
2079		case 3:
2080			alu.src[1].sel = V_SQ_ALU_SRC_0;
2081			alu.src[1].chan = i;
2082		}
2083
2084		alu.dst.sel = ctx->temp_reg;
2085		alu.dst.chan = i;
2086		alu.dst.write = 1;
2087
2088		if (i == 3)
2089			alu.last = 1;
2090		r = r600_bc_add_alu(ctx->bc, &alu);
2091		if (r)
2092			return r;
2093
2094		r = r600_bc_add_literal(ctx->bc, ctx->value);
2095		if (r)
2096			return r;
2097	}
2098
2099	for (i = 0; i < 4; i++) {
2100		memset(&alu, 0, sizeof(struct r600_bc_alu));
2101		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2102
2103		alu.src[0] = r600_src[0];
2104		switch (i) {
2105		case 0:
2106			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2107			break;
2108		case 1:
2109			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2110			break;
2111		case 2:
2112			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2113			break;
2114		case 3:
2115			alu.src[0].sel = V_SQ_ALU_SRC_0;
2116			alu.src[0].chan = i;
2117		}
2118
2119		alu.src[1] = r600_src[1];
2120		switch (i) {
2121		case 0:
2122			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2123			break;
2124		case 1:
2125			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2126			break;
2127		case 2:
2128			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2129			break;
2130		case 3:
2131			alu.src[1].sel = V_SQ_ALU_SRC_0;
2132			alu.src[1].chan = i;
2133		}
2134
2135		alu.src[2].sel = ctx->temp_reg;
2136		alu.src[2].neg = 1;
2137		alu.src[2].chan = i;
2138
2139		if (use_temp)
2140			alu.dst.sel = ctx->temp_reg;
2141		else {
2142			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2143			if (r)
2144				return r;
2145		}
2146		alu.dst.chan = i;
2147		alu.dst.write = 1;
2148		alu.is_op3 = 1;
2149		if (i == 3)
2150			alu.last = 1;
2151		r = r600_bc_add_alu(ctx->bc, &alu);
2152		if (r)
2153			return r;
2154
2155		r = r600_bc_add_literal(ctx->bc, ctx->value);
2156		if (r)
2157			return r;
2158	}
2159	if (use_temp)
2160		return tgsi_helper_copy(ctx, inst);
2161	return 0;
2162}
2163
2164static int tgsi_exp(struct r600_shader_ctx *ctx)
2165{
2166	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2167	struct r600_bc_alu_src r600_src[3];
2168	struct r600_bc_alu alu;
2169	int r;
2170
2171	/* result.x = 2^floor(src); */
2172	if (inst->Dst[0].Register.WriteMask & 1) {
2173		memset(&alu, 0, sizeof(struct r600_bc_alu));
2174
2175		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2176		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2177		if (r)
2178			return r;
2179
2180		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2181
2182		alu.dst.sel = ctx->temp_reg;
2183		alu.dst.chan = 0;
2184		alu.dst.write = 1;
2185		alu.last = 1;
2186		r = r600_bc_add_alu(ctx->bc, &alu);
2187		if (r)
2188			return r;
2189
2190		r = r600_bc_add_literal(ctx->bc, ctx->value);
2191		if (r)
2192			return r;
2193
2194		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2195		alu.src[0].sel = ctx->temp_reg;
2196		alu.src[0].chan = 0;
2197
2198		alu.dst.sel = ctx->temp_reg;
2199		alu.dst.chan = 0;
2200		alu.dst.write = 1;
2201		alu.last = 1;
2202		r = r600_bc_add_alu(ctx->bc, &alu);
2203		if (r)
2204			return r;
2205
2206		r = r600_bc_add_literal(ctx->bc, ctx->value);
2207		if (r)
2208			return r;
2209	}
2210
2211	/* result.y = tmp - floor(tmp); */
2212	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2213		memset(&alu, 0, sizeof(struct r600_bc_alu));
2214
2215		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2216		alu.src[0] = r600_src[0];
2217		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2218		if (r)
2219			return r;
2220		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2221
2222		alu.dst.sel = ctx->temp_reg;
2223//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2224//		if (r)
2225//			return r;
2226		alu.dst.write = 1;
2227		alu.dst.chan = 1;
2228
2229		alu.last = 1;
2230
2231		r = r600_bc_add_alu(ctx->bc, &alu);
2232		if (r)
2233			return r;
2234		r = r600_bc_add_literal(ctx->bc, ctx->value);
2235		if (r)
2236			return r;
2237	}
2238
2239	/* result.z = RoughApprox2ToX(tmp);*/
2240	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2241		memset(&alu, 0, sizeof(struct r600_bc_alu));
2242		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2243		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2244		if (r)
2245			return r;
2246		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2247
2248		alu.dst.sel = ctx->temp_reg;
2249		alu.dst.write = 1;
2250		alu.dst.chan = 2;
2251
2252		alu.last = 1;
2253
2254		r = r600_bc_add_alu(ctx->bc, &alu);
2255		if (r)
2256			return r;
2257		r = r600_bc_add_literal(ctx->bc, ctx->value);
2258		if (r)
2259			return r;
2260	}
2261
2262	/* result.w = 1.0;*/
2263	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2264		memset(&alu, 0, sizeof(struct r600_bc_alu));
2265
2266		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2267		alu.src[0].sel = V_SQ_ALU_SRC_1;
2268		alu.src[0].chan = 0;
2269
2270		alu.dst.sel = ctx->temp_reg;
2271		alu.dst.chan = 3;
2272		alu.dst.write = 1;
2273		alu.last = 1;
2274		r = r600_bc_add_alu(ctx->bc, &alu);
2275		if (r)
2276			return r;
2277		r = r600_bc_add_literal(ctx->bc, ctx->value);
2278		if (r)
2279			return r;
2280	}
2281	return tgsi_helper_copy(ctx, inst);
2282}
2283
2284static int tgsi_log(struct r600_shader_ctx *ctx)
2285{
2286	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2287	struct r600_bc_alu alu;
2288	int r;
2289
2290	/* result.x = floor(log2(src)); */
2291	if (inst->Dst[0].Register.WriteMask & 1) {
2292		memset(&alu, 0, sizeof(struct r600_bc_alu));
2293
2294		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2295		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2296		if (r)
2297			return r;
2298
2299		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2300
2301		alu.dst.sel = ctx->temp_reg;
2302		alu.dst.chan = 0;
2303		alu.dst.write = 1;
2304		alu.last = 1;
2305		r = r600_bc_add_alu(ctx->bc, &alu);
2306		if (r)
2307			return r;
2308
2309		r = r600_bc_add_literal(ctx->bc, ctx->value);
2310		if (r)
2311			return r;
2312
2313		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2314		alu.src[0].sel = ctx->temp_reg;
2315		alu.src[0].chan = 0;
2316
2317		alu.dst.sel = ctx->temp_reg;
2318		alu.dst.chan = 0;
2319		alu.dst.write = 1;
2320		alu.last = 1;
2321
2322		r = r600_bc_add_alu(ctx->bc, &alu);
2323		if (r)
2324			return r;
2325
2326		r = r600_bc_add_literal(ctx->bc, ctx->value);
2327		if (r)
2328			return r;
2329	}
2330
2331	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2332	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2333		memset(&alu, 0, sizeof(struct r600_bc_alu));
2334
2335		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2336		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2337		if (r)
2338			return r;
2339
2340		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2341
2342		alu.dst.sel = ctx->temp_reg;
2343		alu.dst.chan = 1;
2344		alu.dst.write = 1;
2345		alu.last = 1;
2346
2347		r = r600_bc_add_alu(ctx->bc, &alu);
2348		if (r)
2349			return r;
2350
2351		r = r600_bc_add_literal(ctx->bc, ctx->value);
2352		if (r)
2353			return r;
2354
2355		memset(&alu, 0, sizeof(struct r600_bc_alu));
2356
2357		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2358		alu.src[0].sel = ctx->temp_reg;
2359		alu.src[0].chan = 1;
2360
2361		alu.dst.sel = ctx->temp_reg;
2362		alu.dst.chan = 1;
2363		alu.dst.write = 1;
2364		alu.last = 1;
2365
2366		r = r600_bc_add_alu(ctx->bc, &alu);
2367		if (r)
2368			return r;
2369
2370		r = r600_bc_add_literal(ctx->bc, ctx->value);
2371		if (r)
2372			return r;
2373
2374		memset(&alu, 0, sizeof(struct r600_bc_alu));
2375
2376		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2377		alu.src[0].sel = ctx->temp_reg;
2378		alu.src[0].chan = 1;
2379
2380		alu.dst.sel = ctx->temp_reg;
2381		alu.dst.chan = 1;
2382		alu.dst.write = 1;
2383		alu.last = 1;
2384
2385		r = r600_bc_add_alu(ctx->bc, &alu);
2386		if (r)
2387			return r;
2388
2389		r = r600_bc_add_literal(ctx->bc, ctx->value);
2390		if (r)
2391			return r;
2392
2393		memset(&alu, 0, sizeof(struct r600_bc_alu));
2394
2395		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2396		alu.src[0].sel = ctx->temp_reg;
2397		alu.src[0].chan = 1;
2398
2399		alu.dst.sel = ctx->temp_reg;
2400		alu.dst.chan = 1;
2401		alu.dst.write = 1;
2402		alu.last = 1;
2403
2404		r = r600_bc_add_alu(ctx->bc, &alu);
2405		if (r)
2406			return r;
2407
2408		r = r600_bc_add_literal(ctx->bc, ctx->value);
2409		if (r)
2410			return r;
2411
2412		memset(&alu, 0, sizeof(struct r600_bc_alu));
2413
2414		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2415
2416		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2417		if (r)
2418			return r;
2419
2420		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2421
2422		alu.src[1].sel = ctx->temp_reg;
2423		alu.src[1].chan = 1;
2424
2425		alu.dst.sel = ctx->temp_reg;
2426		alu.dst.chan = 1;
2427		alu.dst.write = 1;
2428		alu.last = 1;
2429
2430		r = r600_bc_add_alu(ctx->bc, &alu);
2431		if (r)
2432			return r;
2433
2434		r = r600_bc_add_literal(ctx->bc, ctx->value);
2435		if (r)
2436			return r;
2437	}
2438
2439	/* result.z = log2(src);*/
2440	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2441		memset(&alu, 0, sizeof(struct r600_bc_alu));
2442
2443		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2444		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2445		if (r)
2446			return r;
2447
2448		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2449
2450		alu.dst.sel = ctx->temp_reg;
2451		alu.dst.write = 1;
2452		alu.dst.chan = 2;
2453		alu.last = 1;
2454
2455		r = r600_bc_add_alu(ctx->bc, &alu);
2456		if (r)
2457			return r;
2458
2459		r = r600_bc_add_literal(ctx->bc, ctx->value);
2460		if (r)
2461			return r;
2462	}
2463
2464	/* result.w = 1.0; */
2465	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2466		memset(&alu, 0, sizeof(struct r600_bc_alu));
2467
2468		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2469		alu.src[0].sel = V_SQ_ALU_SRC_1;
2470		alu.src[0].chan = 0;
2471
2472		alu.dst.sel = ctx->temp_reg;
2473		alu.dst.chan = 3;
2474		alu.dst.write = 1;
2475		alu.last = 1;
2476
2477		r = r600_bc_add_alu(ctx->bc, &alu);
2478		if (r)
2479			return r;
2480
2481		r = r600_bc_add_literal(ctx->bc, ctx->value);
2482		if (r)
2483			return r;
2484	}
2485
2486	return tgsi_helper_copy(ctx, inst);
2487}
2488
2489/* r6/7 only for now */
2490static int tgsi_arl(struct r600_shader_ctx *ctx)
2491{
2492	/* TODO from r600c, ar values don't persist between clauses */
2493	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2494	struct r600_bc_alu alu;
2495	int r;
2496	memset(&alu, 0, sizeof(struct r600_bc_alu));
2497
2498	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2499
2500	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2501	if (r)
2502		return r;
2503	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2504
2505	alu.last = 1;
2506
2507	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2508	if (r)
2509		return r;
2510	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2511	return 0;
2512}
2513
2514static int tgsi_opdst(struct r600_shader_ctx *ctx)
2515{
2516	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2517	struct r600_bc_alu alu;
2518	int i, r = 0;
2519
2520	for (i = 0; i < 4; i++) {
2521		memset(&alu, 0, sizeof(struct r600_bc_alu));
2522
2523		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2524		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2525		if (r)
2526			return r;
2527
2528	        if (i == 0 || i == 3) {
2529			alu.src[0].sel = V_SQ_ALU_SRC_1;
2530		} else {
2531			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2532			if (r)
2533				return r;
2534			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2535		}
2536
2537	        if (i == 0 || i == 2) {
2538			alu.src[1].sel = V_SQ_ALU_SRC_1;
2539		} else {
2540			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2541			if (r)
2542				return r;
2543			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2544		}
2545		if (i == 3)
2546			alu.last = 1;
2547		r = r600_bc_add_alu(ctx->bc, &alu);
2548		if (r)
2549			return r;
2550	}
2551	return 0;
2552}
2553
2554static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2555{
2556	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2557	struct r600_bc_alu alu;
2558	int r;
2559
2560	memset(&alu, 0, sizeof(struct r600_bc_alu));
2561	alu.inst = opcode;
2562	alu.predicate = 1;
2563
2564	alu.dst.sel = ctx->temp_reg;
2565	alu.dst.write = 1;
2566	alu.dst.chan = 0;
2567
2568	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2569	if (r)
2570		return r;
2571	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2572	alu.src[1].sel = V_SQ_ALU_SRC_0;
2573	alu.src[1].chan = 0;
2574
2575	alu.last = 1;
2576
2577	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2578	if (r)
2579		return r;
2580	return 0;
2581}
2582
2583static int pops(struct r600_shader_ctx *ctx, int pops)
2584{
2585	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2586	ctx->bc->cf_last->pop_count = pops;
2587	return 0;
2588}
2589
2590static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2591{
2592	switch(reason) {
2593	case FC_PUSH_VPM:
2594		ctx->bc->callstack[ctx->bc->call_sp].current--;
2595		break;
2596	case FC_PUSH_WQM:
2597	case FC_LOOP:
2598		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2599		break;
2600	case FC_REP:
2601		/* TOODO : for 16 vp asic should -= 2; */
2602		ctx->bc->callstack[ctx->bc->call_sp].current --;
2603		break;
2604	}
2605}
2606
2607static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2608{
2609	if (check_max_only) {
2610		int diff;
2611		switch (reason) {
2612		case FC_PUSH_VPM:
2613			diff = 1;
2614			break;
2615		case FC_PUSH_WQM:
2616			diff = 4;
2617			break;
2618		default:
2619			assert(0);
2620			diff = 0;
2621		}
2622		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2623		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2624			ctx->bc->callstack[ctx->bc->call_sp].max =
2625				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2626		}
2627		return;
2628	}
2629	switch (reason) {
2630	case FC_PUSH_VPM:
2631		ctx->bc->callstack[ctx->bc->call_sp].current++;
2632		break;
2633	case FC_PUSH_WQM:
2634	case FC_LOOP:
2635		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2636		break;
2637	case FC_REP:
2638		ctx->bc->callstack[ctx->bc->call_sp].current++;
2639		break;
2640	}
2641
2642	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2643	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2644		ctx->bc->callstack[ctx->bc->call_sp].max =
2645			ctx->bc->callstack[ctx->bc->call_sp].current;
2646	}
2647}
2648
2649static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2650{
2651	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2652
2653	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2654						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2655	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2656	sp->num_mid++;
2657}
2658
2659static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2660{
2661	ctx->bc->fc_sp++;
2662	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2663	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2664}
2665
2666static void fc_poplevel(struct r600_shader_ctx *ctx)
2667{
2668	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2669	if (sp->mid) {
2670		free(sp->mid);
2671		sp->mid = NULL;
2672	}
2673	sp->num_mid = 0;
2674	sp->start = NULL;
2675	sp->type = 0;
2676	ctx->bc->fc_sp--;
2677}
2678
2679#if 0
2680static int emit_return(struct r600_shader_ctx *ctx)
2681{
2682	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2683	return 0;
2684}
2685
2686static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2687{
2688
2689	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2690	ctx->bc->cf_last->pop_count = pops;
2691	/* TODO work out offset */
2692	return 0;
2693}
2694
2695static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2696{
2697	return 0;
2698}
2699
2700static void emit_testflag(struct r600_shader_ctx *ctx)
2701{
2702
2703}
2704
2705static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2706{
2707	emit_testflag(ctx);
2708	emit_jump_to_offset(ctx, 1, 4);
2709	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2710	pops(ctx, ifidx + 1);
2711	emit_return(ctx);
2712}
2713
2714static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2715{
2716	emit_testflag(ctx);
2717
2718	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2719	ctx->bc->cf_last->pop_count = 1;
2720
2721	fc_set_mid(ctx, fc_sp);
2722
2723	pops(ctx, 1);
2724}
2725#endif
2726
2727static int tgsi_if(struct r600_shader_ctx *ctx)
2728{
2729	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2730
2731	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2732
2733	fc_pushlevel(ctx, FC_IF);
2734
2735	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2736	return 0;
2737}
2738
2739static int tgsi_else(struct r600_shader_ctx *ctx)
2740{
2741	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2742	ctx->bc->cf_last->pop_count = 1;
2743
2744	fc_set_mid(ctx, ctx->bc->fc_sp);
2745	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2746	return 0;
2747}
2748
2749static int tgsi_endif(struct r600_shader_ctx *ctx)
2750{
2751	pops(ctx, 1);
2752	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2753		R600_ERR("if/endif unbalanced in shader\n");
2754		return -1;
2755	}
2756
2757	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2758		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2759		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2760	} else {
2761		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2762	}
2763	fc_poplevel(ctx);
2764
2765	callstack_decrease_current(ctx, FC_PUSH_VPM);
2766	return 0;
2767}
2768
2769static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2770{
2771	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2772
2773	fc_pushlevel(ctx, FC_LOOP);
2774
2775	/* check stack depth */
2776	callstack_check_depth(ctx, FC_LOOP, 0);
2777	return 0;
2778}
2779
2780static int tgsi_endloop(struct r600_shader_ctx *ctx)
2781{
2782	int i;
2783
2784	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2785
2786	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2787		R600_ERR("loop/endloop in shader code are not paired.\n");
2788		return -EINVAL;
2789	}
2790
2791	/* fixup loop pointers - from r600isa
2792	   LOOP END points to CF after LOOP START,
2793	   LOOP START point to CF after LOOP END
2794	   BRK/CONT point to LOOP END CF
2795	*/
2796	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2797
2798	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2799
2800	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2801		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2802	}
2803	/* TODO add LOOPRET support */
2804	fc_poplevel(ctx);
2805	callstack_decrease_current(ctx, FC_LOOP);
2806	return 0;
2807}
2808
2809static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2810{
2811	unsigned int fscp;
2812
2813	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2814	{
2815		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2816			break;
2817	}
2818
2819	if (fscp == 0) {
2820		R600_ERR("Break not inside loop/endloop pair\n");
2821		return -EINVAL;
2822	}
2823
2824	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2825	ctx->bc->cf_last->pop_count = 1;
2826
2827	fc_set_mid(ctx, fscp);
2828
2829	pops(ctx, 1);
2830	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2831	return 0;
2832}
2833
2834static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2835	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2836	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2837	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2838
2839	/* FIXME:
2840	 * For state trackers other than OpenGL, we'll want to use
2841	 * _RECIP_IEEE instead.
2842	 */
2843	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2844
2845	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2846	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2847	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2848	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2849	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2850	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2851	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2852	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2853	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2854	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2855	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2856	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2857	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2858	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2859	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2860	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861	/* gap */
2862	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2863	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2864	/* gap */
2865	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2867	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2868	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2869	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2870	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2871	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2872	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2873	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2874	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2875	/* gap */
2876	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2877	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2878	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2880	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2881	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2882	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2883	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2884	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2885	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2887	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2890	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2892	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2893	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2894	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2895	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2897	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2899	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2909	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2910	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2911	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2912	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2913	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2916	{TGSI_OPCODE_TXL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2918	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2919	/* gap */
2920	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2923	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2924	/* gap */
2925	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2933	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934	/* gap */
2935	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2940	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2944	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2946	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2947	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2949	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2950	/* gap */
2951	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2952	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2954	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2955	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2956	/* gap */
2957	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2958	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2959	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2960	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2961	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2962	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2963	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2964	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2965	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2966	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2967	/* gap */
2968	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2969	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2970	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2971	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2972	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2973	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2974	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2975	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2976	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2977	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2978	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2979	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2980	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2981	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2982	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2983	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2984	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2985	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2986	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2987	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2988	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2989	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2990	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2991	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2992	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2993	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2994	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2995	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996};
2997
2998static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2999	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3000	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3001	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3002	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3003	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3004	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3005	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3006	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3007	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3008	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3009	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3010	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3011	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3012	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3013	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3014	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3015	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3016	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3017	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3018	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3019	/* gap */
3020	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3021	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3022	/* gap */
3023	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3024	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3025	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3026	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3027	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3028	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3029	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3030	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3031	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3032	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3033	/* gap */
3034	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3035	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3036	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3037	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3038	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3039	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3040	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3041	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3042	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3043	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3045	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3048	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3049	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3050	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3051	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3052	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3053	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3055	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3056	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3057	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3059	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3060	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3061	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3062	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3063	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3065	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3066	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3067	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3068	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3069	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3070	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3071	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3072	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3074	{TGSI_OPCODE_TXL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3076	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3077	/* gap */
3078	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3079	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3080	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3081	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3082	/* gap */
3083	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3091	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092	/* gap */
3093	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3102	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3105	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3107	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3108	/* gap */
3109	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3110	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3114	/* gap */
3115	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3117	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3118	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3119	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3120	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3124	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3125	/* gap */
3126	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3130	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3131	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3132	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3133	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3134	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3136	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3137	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3138	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3139	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3140	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3141	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3142	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3143	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3144	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3145	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3146	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3147	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3149	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3150	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3151	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3153	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154};
3155