r600_shader.c revision ef419599d9b18de2a9077c5f0a7f02bfc11d1762
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_screen.h"
29#include "r600_context.h"
30#include "r600_shader.h"
31#include "r600_asm.h"
32#include "r600_sq.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37
38
39struct r600_shader_tgsi_instruction;
40
41struct r600_shader_ctx {
42	struct tgsi_shader_info			info;
43	struct tgsi_parse_context		parse;
44	const struct tgsi_token			*tokens;
45	unsigned				type;
46	unsigned				file_offset[TGSI_FILE_COUNT];
47	unsigned				temp_reg;
48	struct r600_shader_tgsi_instruction	*inst_info;
49	struct r600_bc				*bc;
50	struct r600_shader			*shader;
51	u32					value[4];
52	u32					*literals;
53	u32					nliterals;
54	u32					max_driver_temp_used;
55};
56
57struct r600_shader_tgsi_instruction {
58	unsigned	tgsi_opcode;
59	unsigned	is_op3;
60	unsigned	r600_opcode;
61	int (*process)(struct r600_shader_ctx *ctx);
62};
63
64static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
65static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
66
67static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
68{
69	struct r600_context *rctx = r600_context(ctx);
70	const struct util_format_description *desc;
71	enum pipe_format resource_format[160];
72	unsigned i, nresources = 0;
73	struct r600_bc *bc = &shader->bc;
74	struct r600_bc_cf *cf;
75	struct r600_bc_vtx *vtx;
76
77	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
78		return 0;
79	for (i = 0; i < rctx->vertex_elements->count; i++) {
80		resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
81	}
82	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
83		switch (cf->inst) {
84		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
85		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
86			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
87				desc = util_format_description(resource_format[vtx->buffer_id]);
88				if (desc == NULL) {
89					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
90					return -EINVAL;
91				}
92				vtx->dst_sel_x = desc->swizzle[0];
93				vtx->dst_sel_y = desc->swizzle[1];
94				vtx->dst_sel_z = desc->swizzle[2];
95				vtx->dst_sel_w = desc->swizzle[3];
96			}
97			break;
98		default:
99			break;
100		}
101	}
102	return r600_bc_build(&shader->bc);
103}
104
105int r600_pipe_shader_create(struct pipe_context *ctx,
106			struct r600_context_state *rpshader,
107			const struct tgsi_token *tokens)
108{
109	struct r600_screen *rscreen = r600_screen(ctx->screen);
110	int r;
111
112//fprintf(stderr, "--------------------------------------------------------------\n");
113//tgsi_dump(tokens, 0);
114	if (rpshader == NULL)
115		return -ENOMEM;
116	rpshader->shader.family = radeon_get_family(rscreen->rw);
117	rpshader->shader.use_mem_constant = rscreen->use_mem_constant;
118	r = r600_shader_from_tgsi(tokens, &rpshader->shader);
119	if (r) {
120		R600_ERR("translation from TGSI failed !\n");
121		return r;
122	}
123	r = r600_bc_build(&rpshader->shader.bc);
124	if (r) {
125		R600_ERR("building bytecode failed !\n");
126		return r;
127	}
128//fprintf(stderr, "______________________________________________________________\n");
129	return 0;
130}
131
132static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
133{
134	struct r600_context *rctx = r600_context(ctx);
135	struct radeon_state *state;
136
137	state = &rpshader->rstate[0];
138	radeon_state_fini(&rpshader->rstate[0]);
139
140	return rctx->vtbl->vs_shader(rctx, rpshader, state);
141}
142
143static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
144{
145	struct r600_context *rctx = r600_context(ctx);
146	struct radeon_state *state;
147
148	state = &rpshader->rstate[0];
149	radeon_state_fini(state);
150
151	return rctx->vtbl->ps_shader(rctx, rpshader, state);
152}
153
154static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
155{
156	struct r600_screen *rscreen = r600_screen(ctx->screen);
157	struct r600_context *rctx = r600_context(ctx);
158	struct r600_shader *rshader = &rpshader->shader;
159	int r;
160	void *data;
161
162	/* copy new shader */
163	radeon_ws_bo_reference(rscreen->rw, &rpshader->bo, NULL);
164	rpshader->bo = NULL;
165	rpshader->bo = radeon_ws_bo(rscreen->rw, rshader->bc.ndw * 4,
166				    4096, 0);
167	if (rpshader->bo == NULL) {
168		return -ENOMEM;
169	}
170	data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, rctx);
171	memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4);
172	radeon_ws_bo_unmap(rscreen->rw, rpshader->bo);
173	/* build state */
174	rshader->flat_shade = rctx->flat_shade;
175	switch (rshader->processor_type) {
176	case TGSI_PROCESSOR_VERTEX:
177		r = r600_pipe_shader_vs(ctx, rpshader);
178		break;
179	case TGSI_PROCESSOR_FRAGMENT:
180		r = r600_pipe_shader_ps(ctx, rpshader);
181		break;
182	default:
183		r = -EINVAL;
184		break;
185	}
186	return r;
187}
188
189int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
190{
191	struct r600_context *rctx = r600_context(ctx);
192	int r;
193
194	if (rpshader == NULL)
195		return -EINVAL;
196	/* there should be enough input */
197	if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
198		R600_ERR("%d resources provided, expecting %d\n",
199			rctx->vertex_elements->count, rpshader->shader.bc.nresource);
200		return -EINVAL;
201	}
202	r = r600_shader_update(ctx, &rpshader->shader);
203	if (r)
204		return r;
205	return r600_pipe_shader(ctx, rpshader);
206}
207
208static int tgsi_is_supported(struct r600_shader_ctx *ctx)
209{
210	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
211	int j;
212
213	if (i->Instruction.NumDstRegs > 1) {
214		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
215		return -EINVAL;
216	}
217	if (i->Instruction.Predicate) {
218		R600_ERR("predicate unsupported\n");
219		return -EINVAL;
220	}
221#if 0
222	if (i->Instruction.Label) {
223		R600_ERR("label unsupported\n");
224		return -EINVAL;
225	}
226#endif
227	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
228		if (i->Src[j].Register.Dimension ||
229			i->Src[j].Register.Absolute) {
230			R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
231				 i->Src[j].Register.Dimension,
232				 i->Src[j].Register.Absolute);
233			return -EINVAL;
234		}
235	}
236	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
237		if (i->Dst[j].Register.Dimension) {
238			R600_ERR("unsupported dst (dimension)\n");
239			return -EINVAL;
240		}
241	}
242	return 0;
243}
244
245static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr)
246{
247	int i, r;
248	struct r600_bc_alu alu;
249
250	for (i = 0; i < 8; i++) {
251		memset(&alu, 0, sizeof(struct r600_bc_alu));
252
253		if (i < 4)
254			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
255		else
256			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
257
258		if ((i > 1) && (i < 6)) {
259			alu.dst.sel = ctx->shader->input[gpr].gpr;
260			alu.dst.write = 1;
261		}
262
263		alu.dst.chan = i % 4;
264		alu.src[0].chan = (1 - (i % 2));
265		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr;
266
267		alu.bank_swizzle_force = SQ_ALU_VEC_210;
268		if ((i % 4) == 3)
269			alu.last = 1;
270		r = r600_bc_add_alu(ctx->bc, &alu);
271		if (r)
272			return r;
273	}
274	return 0;
275}
276
277
278static int tgsi_declaration(struct r600_shader_ctx *ctx)
279{
280	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
281	struct r600_bc_vtx vtx;
282	unsigned i;
283	int r;
284
285	switch (d->Declaration.File) {
286	case TGSI_FILE_INPUT:
287		i = ctx->shader->ninput++;
288		ctx->shader->input[i].name = d->Semantic.Name;
289		ctx->shader->input[i].sid = d->Semantic.Index;
290		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
291		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
292		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
293			/* turn input into fetch */
294			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
295			vtx.inst = 0;
296			vtx.fetch_type = 0;
297			vtx.buffer_id = i;
298			/* register containing the index into the buffer */
299			vtx.src_gpr = 0;
300			vtx.src_sel_x = 0;
301			vtx.mega_fetch_count = 0x1F;
302			vtx.dst_gpr = ctx->shader->input[i].gpr;
303			vtx.dst_sel_x = 0;
304			vtx.dst_sel_y = 1;
305			vtx.dst_sel_z = 2;
306			vtx.dst_sel_w = 3;
307			r = r600_bc_add_vtx(ctx->bc, &vtx);
308			if (r)
309				return r;
310		}
311		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) {
312			/* turn input into interpolate on EG */
313			evergreen_interp_alu(ctx, i);
314		}
315		break;
316	case TGSI_FILE_OUTPUT:
317		i = ctx->shader->noutput++;
318		ctx->shader->output[i].name = d->Semantic.Name;
319		ctx->shader->output[i].sid = d->Semantic.Index;
320		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
321		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
322		break;
323	case TGSI_FILE_CONSTANT:
324	case TGSI_FILE_TEMPORARY:
325	case TGSI_FILE_SAMPLER:
326	case TGSI_FILE_ADDRESS:
327		break;
328	default:
329		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
330		return -EINVAL;
331	}
332	return 0;
333}
334
335static int r600_get_temp(struct r600_shader_ctx *ctx)
336{
337	return ctx->temp_reg + ctx->max_driver_temp_used++;
338}
339
340int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
341{
342	struct tgsi_full_immediate *immediate;
343	struct r600_shader_ctx ctx;
344	struct r600_bc_output output[32];
345	unsigned output_done, noutput;
346	unsigned opcode;
347	int i, r = 0, pos0;
348
349	ctx.bc = &shader->bc;
350	ctx.shader = shader;
351	r = r600_bc_init(ctx.bc, shader->family);
352	if (r)
353		return r;
354	ctx.bc->use_mem_constant = shader->use_mem_constant;
355	ctx.tokens = tokens;
356	tgsi_scan_shader(tokens, &ctx.info);
357	tgsi_parse_init(&ctx.parse, tokens);
358	ctx.type = ctx.parse.FullHeader.Processor.Processor;
359	shader->processor_type = ctx.type;
360
361	/* register allocations */
362	/* Values [0,127] correspond to GPR[0..127].
363	 * Values [128,159] correspond to constant buffer bank 0
364	 * Values [160,191] correspond to constant buffer bank 1
365	 * Values [256,511] correspond to cfile constants c[0..255].
366	 * Other special values are shown in the list below.
367	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
368	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
369	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
370	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
371	 * 248	SQ_ALU_SRC_0: special constant 0.0.
372	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
373	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
374	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
375	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
376	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
377	 * 254	SQ_ALU_SRC_PV: previous vector result.
378	 * 255	SQ_ALU_SRC_PS: previous scalar result.
379	 */
380	for (i = 0; i < TGSI_FILE_COUNT; i++) {
381		ctx.file_offset[i] = 0;
382	}
383	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
384		ctx.file_offset[TGSI_FILE_INPUT] = 1;
385	}
386	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
387						ctx.info.file_count[TGSI_FILE_INPUT];
388	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
389						ctx.info.file_count[TGSI_FILE_OUTPUT];
390	if (ctx.shader->use_mem_constant)
391		ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
392	else
393		ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
394
395	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
396	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
397			ctx.info.file_count[TGSI_FILE_TEMPORARY];
398
399	ctx.nliterals = 0;
400	ctx.literals = NULL;
401
402	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
403		tgsi_parse_token(&ctx.parse);
404		switch (ctx.parse.FullToken.Token.Type) {
405		case TGSI_TOKEN_TYPE_IMMEDIATE:
406			immediate = &ctx.parse.FullToken.FullImmediate;
407			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
408			if(ctx.literals == NULL) {
409				r = -ENOMEM;
410				goto out_err;
411			}
412			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
413			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
414			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
415			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
416			ctx.nliterals++;
417			break;
418		case TGSI_TOKEN_TYPE_DECLARATION:
419			r = tgsi_declaration(&ctx);
420			if (r)
421				goto out_err;
422			break;
423		case TGSI_TOKEN_TYPE_INSTRUCTION:
424			r = tgsi_is_supported(&ctx);
425			if (r)
426				goto out_err;
427			ctx.max_driver_temp_used = 0;
428			/* reserve first tmp for everyone */
429			r600_get_temp(&ctx);
430			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
431			if (ctx.bc->chiprev == 2)
432				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
433			else
434				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
435			r = ctx.inst_info->process(&ctx);
436			if (r)
437				goto out_err;
438			r = r600_bc_add_literal(ctx.bc, ctx.value);
439			if (r)
440				goto out_err;
441			break;
442		default:
443			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
444			r = -EINVAL;
445			goto out_err;
446		}
447	}
448	/* export output */
449	noutput = shader->noutput;
450	for (i = 0, pos0 = 0; i < noutput; i++) {
451		memset(&output[i], 0, sizeof(struct r600_bc_output));
452		output[i].gpr = shader->output[i].gpr;
453		output[i].elem_size = 3;
454		output[i].swizzle_x = 0;
455		output[i].swizzle_y = 1;
456		output[i].swizzle_z = 2;
457		output[i].swizzle_w = 3;
458		output[i].barrier = 1;
459		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
460		output[i].array_base = i - pos0;
461		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
462		switch (ctx.type) {
463		case TGSI_PROCESSOR_VERTEX:
464			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
465				output[i].array_base = 60;
466				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
467				/* position doesn't count in array_base */
468				pos0++;
469			}
470			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
471				output[i].array_base = 61;
472				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
473				/* position doesn't count in array_base */
474				pos0++;
475			}
476			break;
477		case TGSI_PROCESSOR_FRAGMENT:
478			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
479				output[i].array_base = shader->output[i].sid;
480				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
481			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
482				output[i].array_base = 61;
483				output[i].swizzle_x = 2;
484				output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
485				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
486			} else {
487				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
488				r = -EINVAL;
489				goto out_err;
490			}
491			break;
492		default:
493			R600_ERR("unsupported processor type %d\n", ctx.type);
494			r = -EINVAL;
495			goto out_err;
496		}
497	}
498	/* add fake param output for vertex shader if no param is exported */
499	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
500		for (i = 0, pos0 = 0; i < noutput; i++) {
501			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
502				pos0 = 1;
503				break;
504			}
505		}
506		if (!pos0) {
507			memset(&output[i], 0, sizeof(struct r600_bc_output));
508			output[i].gpr = 0;
509			output[i].elem_size = 3;
510			output[i].swizzle_x = 0;
511			output[i].swizzle_y = 1;
512			output[i].swizzle_z = 2;
513			output[i].swizzle_w = 3;
514			output[i].barrier = 1;
515			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
516			output[i].array_base = 0;
517			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
518			noutput++;
519		}
520	}
521	/* add fake pixel export */
522	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
523		memset(&output[0], 0, sizeof(struct r600_bc_output));
524		output[0].gpr = 0;
525		output[0].elem_size = 3;
526		output[0].swizzle_x = 7;
527		output[0].swizzle_y = 7;
528		output[0].swizzle_z = 7;
529		output[0].swizzle_w = 7;
530		output[0].barrier = 1;
531		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
532		output[0].array_base = 0;
533		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
534		noutput++;
535	}
536	/* set export done on last export of each type */
537	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
538		if (i == (noutput - 1)) {
539			output[i].end_of_program = 1;
540		}
541		if (!(output_done & (1 << output[i].type))) {
542			output_done |= (1 << output[i].type);
543			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
544		}
545	}
546	/* add output to bytecode */
547	for (i = 0; i < noutput; i++) {
548		r = r600_bc_add_output(ctx.bc, &output[i]);
549		if (r)
550			goto out_err;
551	}
552	free(ctx.literals);
553	tgsi_parse_free(&ctx.parse);
554	return 0;
555out_err:
556	free(ctx.literals);
557	tgsi_parse_free(&ctx.parse);
558	return r;
559}
560
561static int tgsi_unsupported(struct r600_shader_ctx *ctx)
562{
563	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
564	return -EINVAL;
565}
566
567static int tgsi_end(struct r600_shader_ctx *ctx)
568{
569	return 0;
570}
571
572static int tgsi_src(struct r600_shader_ctx *ctx,
573			const struct tgsi_full_src_register *tgsi_src,
574			struct r600_bc_alu_src *r600_src)
575{
576	int index;
577	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
578	r600_src->sel = tgsi_src->Register.Index;
579	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
580		r600_src->sel = 0;
581		index = tgsi_src->Register.Index;
582		ctx->value[0] = ctx->literals[index * 4 + 0];
583		ctx->value[1] = ctx->literals[index * 4 + 1];
584		ctx->value[2] = ctx->literals[index * 4 + 2];
585		ctx->value[3] = ctx->literals[index * 4 + 3];
586	}
587	if (tgsi_src->Register.Indirect)
588		r600_src->rel = V_SQ_REL_RELATIVE;
589	r600_src->neg = tgsi_src->Register.Negate;
590	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
591	return 0;
592}
593
594static int tgsi_dst(struct r600_shader_ctx *ctx,
595			const struct tgsi_full_dst_register *tgsi_dst,
596			unsigned swizzle,
597			struct r600_bc_alu_dst *r600_dst)
598{
599	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
600
601	r600_dst->sel = tgsi_dst->Register.Index;
602	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
603	r600_dst->chan = swizzle;
604	r600_dst->write = 1;
605	if (tgsi_dst->Register.Indirect)
606		r600_dst->rel = V_SQ_REL_RELATIVE;
607	if (inst->Instruction.Saturate) {
608		r600_dst->clamp = 1;
609	}
610	return 0;
611}
612
613static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
614{
615	switch (swizzle) {
616	case 0:
617		return tgsi_src->Register.SwizzleX;
618	case 1:
619		return tgsi_src->Register.SwizzleY;
620	case 2:
621		return tgsi_src->Register.SwizzleZ;
622	case 3:
623		return tgsi_src->Register.SwizzleW;
624	default:
625		return 0;
626	}
627}
628
629static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
630{
631	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
632	struct r600_bc_alu alu;
633	int i, j, k, nconst, r;
634
635	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
636		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
637			nconst++;
638		}
639		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
640		if (r) {
641			return r;
642		}
643	}
644	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
645		if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
646			int treg = r600_get_temp(ctx);
647			for (k = 0; k < 4; k++) {
648				memset(&alu, 0, sizeof(struct r600_bc_alu));
649				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
650				alu.src[0].sel = r600_src[j].sel;
651				alu.src[0].chan = k;
652				alu.dst.sel = treg;
653				alu.dst.chan = k;
654				alu.dst.write = 1;
655				if (k == 3)
656					alu.last = 1;
657				r = r600_bc_add_alu(ctx->bc, &alu);
658				if (r)
659					return r;
660			}
661			r600_src[j].sel = treg;
662			j--;
663		}
664	}
665	return 0;
666}
667
668/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
669static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
670{
671	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
672	struct r600_bc_alu alu;
673	int i, j, k, nliteral, r;
674
675	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
676		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
677			nliteral++;
678		}
679	}
680	for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
681		if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
682			int treg = r600_get_temp(ctx);
683			for (k = 0; k < 4; k++) {
684				memset(&alu, 0, sizeof(struct r600_bc_alu));
685				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
686				alu.src[0].sel = r600_src[j].sel;
687				alu.src[0].chan = k;
688				alu.dst.sel = treg;
689				alu.dst.chan = k;
690				alu.dst.write = 1;
691				if (k == 3)
692					alu.last = 1;
693				r = r600_bc_add_alu(ctx->bc, &alu);
694				if (r)
695					return r;
696			}
697			r = r600_bc_add_literal(ctx->bc, ctx->value);
698			if (r)
699				return r;
700			r600_src[j].sel = treg;
701			j++;
702		}
703	}
704	return 0;
705}
706
707static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
708{
709	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
710	struct r600_bc_alu_src r600_src[3];
711	struct r600_bc_alu alu;
712	int i, j, r;
713	int lasti = 0;
714
715	for (i = 0; i < 4; i++) {
716		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
717			lasti = i;
718		}
719	}
720
721	r = tgsi_split_constant(ctx, r600_src);
722	if (r)
723		return r;
724	for (i = 0; i < lasti + 1; i++) {
725		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
726			continue;
727
728		memset(&alu, 0, sizeof(struct r600_bc_alu));
729		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
730		if (r)
731			return r;
732
733		alu.inst = ctx->inst_info->r600_opcode;
734		if (!swap) {
735			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
736				alu.src[j] = r600_src[j];
737				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
738			}
739		} else {
740			alu.src[0] = r600_src[1];
741			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
742
743			alu.src[1] = r600_src[0];
744			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
745		}
746		/* handle some special cases */
747		switch (ctx->inst_info->tgsi_opcode) {
748		case TGSI_OPCODE_SUB:
749			alu.src[1].neg = 1;
750			break;
751		case TGSI_OPCODE_ABS:
752			alu.src[0].abs = 1;
753			break;
754		default:
755			break;
756		}
757		if (i == lasti) {
758			alu.last = 1;
759		}
760		r = r600_bc_add_alu(ctx->bc, &alu);
761		if (r)
762			return r;
763	}
764	return 0;
765}
766
767static int tgsi_op2(struct r600_shader_ctx *ctx)
768{
769	return tgsi_op2_s(ctx, 0);
770}
771
772static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
773{
774	return tgsi_op2_s(ctx, 1);
775}
776
777/*
778 * r600 - trunc to -PI..PI range
779 * r700 - normalize by dividing by 2PI
780 * see fdo bug 27901
781 */
782static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
783			   struct r600_bc_alu_src r600_src[3])
784{
785	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
786	int r;
787	uint32_t lit_vals[4];
788	struct r600_bc_alu alu;
789
790	memset(lit_vals, 0, 4*4);
791	r = tgsi_split_constant(ctx, r600_src);
792	if (r)
793		return r;
794
795	r = tgsi_split_literal_constant(ctx, r600_src);
796	if (r)
797		return r;
798
799	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
800	lit_vals[1] = fui(0.5f);
801
802	memset(&alu, 0, sizeof(struct r600_bc_alu));
803	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
804	alu.is_op3 = 1;
805
806	alu.dst.chan = 0;
807	alu.dst.sel = ctx->temp_reg;
808	alu.dst.write = 1;
809
810	alu.src[0] = r600_src[0];
811	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
812
813	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
814	alu.src[1].chan = 0;
815	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
816	alu.src[2].chan = 1;
817	alu.last = 1;
818	r = r600_bc_add_alu(ctx->bc, &alu);
819	if (r)
820		return r;
821	r = r600_bc_add_literal(ctx->bc, lit_vals);
822	if (r)
823		return r;
824
825	memset(&alu, 0, sizeof(struct r600_bc_alu));
826	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
827
828	alu.dst.chan = 0;
829	alu.dst.sel = ctx->temp_reg;
830	alu.dst.write = 1;
831
832	alu.src[0].sel = ctx->temp_reg;
833	alu.src[0].chan = 0;
834	alu.last = 1;
835	r = r600_bc_add_alu(ctx->bc, &alu);
836	if (r)
837		return r;
838
839	if (ctx->bc->chiprev == 0) {
840		lit_vals[0] = fui(3.1415926535897f * 2.0f);
841		lit_vals[1] = fui(-3.1415926535897f);
842	} else {
843		lit_vals[0] = fui(1.0f);
844		lit_vals[1] = fui(-0.5f);
845	}
846
847	memset(&alu, 0, sizeof(struct r600_bc_alu));
848	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
849	alu.is_op3 = 1;
850
851	alu.dst.chan = 0;
852	alu.dst.sel = ctx->temp_reg;
853	alu.dst.write = 1;
854
855	alu.src[0].sel = ctx->temp_reg;
856	alu.src[0].chan = 0;
857
858	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
859	alu.src[1].chan = 0;
860	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
861	alu.src[2].chan = 1;
862	alu.last = 1;
863	r = r600_bc_add_alu(ctx->bc, &alu);
864	if (r)
865		return r;
866	r = r600_bc_add_literal(ctx->bc, lit_vals);
867	if (r)
868		return r;
869	return 0;
870}
871
872static int tgsi_trig(struct r600_shader_ctx *ctx)
873{
874	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
875	struct r600_bc_alu_src r600_src[3];
876	struct r600_bc_alu alu;
877	int i, r;
878	int lasti = 0;
879
880	r = tgsi_setup_trig(ctx, r600_src);
881	if (r)
882		return r;
883
884	memset(&alu, 0, sizeof(struct r600_bc_alu));
885	alu.inst = ctx->inst_info->r600_opcode;
886	alu.dst.chan = 0;
887	alu.dst.sel = ctx->temp_reg;
888	alu.dst.write = 1;
889
890	alu.src[0].sel = ctx->temp_reg;
891	alu.src[0].chan = 0;
892	alu.last = 1;
893	r = r600_bc_add_alu(ctx->bc, &alu);
894	if (r)
895		return r;
896
897	/* replicate result */
898	for (i = 0; i < 4; i++) {
899		if (inst->Dst[0].Register.WriteMask & (1 << i))
900			lasti = i;
901	}
902	for (i = 0; i < lasti + 1; i++) {
903		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
904			continue;
905
906		memset(&alu, 0, sizeof(struct r600_bc_alu));
907		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
908
909		alu.src[0].sel = ctx->temp_reg;
910		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
911		if (r)
912			return r;
913		if (i == lasti)
914			alu.last = 1;
915		r = r600_bc_add_alu(ctx->bc, &alu);
916		if (r)
917			return r;
918	}
919	return 0;
920}
921
922static int tgsi_scs(struct r600_shader_ctx *ctx)
923{
924	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
925	struct r600_bc_alu_src r600_src[3];
926	struct r600_bc_alu alu;
927	int r;
928
929	/* We'll only need the trig stuff if we are going to write to the
930	 * X or Y components of the destination vector.
931	 */
932	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
933		r = tgsi_setup_trig(ctx, r600_src);
934		if (r)
935			return r;
936	}
937
938	/* dst.x = COS */
939	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
940		memset(&alu, 0, sizeof(struct r600_bc_alu));
941		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
942		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
943		if (r)
944			return r;
945
946		alu.src[0].sel = ctx->temp_reg;
947		alu.src[0].chan = 0;
948		alu.last = 1;
949		r = r600_bc_add_alu(ctx->bc, &alu);
950		if (r)
951			return r;
952	}
953
954	/* dst.y = SIN */
955	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
956		memset(&alu, 0, sizeof(struct r600_bc_alu));
957		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
958		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
959		if (r)
960			return r;
961
962		alu.src[0].sel = ctx->temp_reg;
963		alu.src[0].chan = 0;
964		alu.last = 1;
965		r = r600_bc_add_alu(ctx->bc, &alu);
966		if (r)
967			return r;
968	}
969
970	/* dst.z = 0.0; */
971	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
972		fprintf(stderr, "writing z\n");
973		memset(&alu, 0, sizeof(struct r600_bc_alu));
974
975		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
976
977		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
978		if (r)
979			return r;
980
981		alu.src[0].sel = V_SQ_ALU_SRC_0;
982		alu.src[0].chan = 0;
983
984		alu.last = 1;
985
986		r = r600_bc_add_alu(ctx->bc, &alu);
987		if (r)
988			return r;
989
990		r = r600_bc_add_literal(ctx->bc, ctx->value);
991		if (r)
992			return r;
993	}
994
995	/* dst.w = 1.0; */
996	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
997		memset(&alu, 0, sizeof(struct r600_bc_alu));
998
999		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1000
1001		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1002		if (r)
1003			return r;
1004
1005		alu.src[0].sel = V_SQ_ALU_SRC_1;
1006		alu.src[0].chan = 0;
1007
1008		alu.last = 1;
1009
1010		r = r600_bc_add_alu(ctx->bc, &alu);
1011		if (r)
1012			return r;
1013
1014		r = r600_bc_add_literal(ctx->bc, ctx->value);
1015		if (r)
1016			return r;
1017	}
1018
1019	return 0;
1020}
1021
1022static int tgsi_kill(struct r600_shader_ctx *ctx)
1023{
1024	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1025	struct r600_bc_alu alu;
1026	int i, r;
1027
1028	for (i = 0; i < 4; i++) {
1029		memset(&alu, 0, sizeof(struct r600_bc_alu));
1030		alu.inst = ctx->inst_info->r600_opcode;
1031
1032		alu.dst.chan = i;
1033
1034		alu.src[0].sel = V_SQ_ALU_SRC_0;
1035
1036		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1037			alu.src[1].sel = V_SQ_ALU_SRC_1;
1038			alu.src[1].neg = 1;
1039		} else {
1040			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1041			if (r)
1042				return r;
1043			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1044		}
1045		if (i == 3) {
1046			alu.last = 1;
1047		}
1048		r = r600_bc_add_alu(ctx->bc, &alu);
1049		if (r)
1050			return r;
1051	}
1052	r = r600_bc_add_literal(ctx->bc, ctx->value);
1053	if (r)
1054		return r;
1055
1056	/* kill must be last in ALU */
1057	ctx->bc->force_add_cf = 1;
1058	ctx->shader->uses_kill = TRUE;
1059	return 0;
1060}
1061
1062static int tgsi_lit(struct r600_shader_ctx *ctx)
1063{
1064	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1065	struct r600_bc_alu alu;
1066	struct r600_bc_alu_src r600_src[3];
1067	int r;
1068
1069	r = tgsi_split_constant(ctx, r600_src);
1070	if (r)
1071		return r;
1072	r = tgsi_split_literal_constant(ctx, r600_src);
1073	if (r)
1074		return r;
1075
1076	/* dst.x, <- 1.0  */
1077	memset(&alu, 0, sizeof(struct r600_bc_alu));
1078	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1079	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1080	alu.src[0].chan = 0;
1081	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1082	if (r)
1083		return r;
1084	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1085	r = r600_bc_add_alu(ctx->bc, &alu);
1086	if (r)
1087		return r;
1088
1089	/* dst.y = max(src.x, 0.0) */
1090	memset(&alu, 0, sizeof(struct r600_bc_alu));
1091	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1092	alu.src[0] = r600_src[0];
1093	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1094	alu.src[1].chan = 0;
1095	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1096	if (r)
1097		return r;
1098	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1099	r = r600_bc_add_alu(ctx->bc, &alu);
1100	if (r)
1101		return r;
1102
1103	/* dst.w, <- 1.0  */
1104	memset(&alu, 0, sizeof(struct r600_bc_alu));
1105	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1106	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1107	alu.src[0].chan = 0;
1108	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1109	if (r)
1110		return r;
1111	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1112	alu.last = 1;
1113	r = r600_bc_add_alu(ctx->bc, &alu);
1114	if (r)
1115		return r;
1116
1117	r = r600_bc_add_literal(ctx->bc, ctx->value);
1118	if (r)
1119		return r;
1120
1121	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1122	{
1123		int chan;
1124		int sel;
1125
1126		/* dst.z = log(src.y) */
1127		memset(&alu, 0, sizeof(struct r600_bc_alu));
1128		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1129		alu.src[0] = r600_src[0];
1130		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1131		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1132		if (r)
1133			return r;
1134		alu.last = 1;
1135		r = r600_bc_add_alu(ctx->bc, &alu);
1136		if (r)
1137			return r;
1138
1139		r = r600_bc_add_literal(ctx->bc, ctx->value);
1140		if (r)
1141			return r;
1142
1143		chan = alu.dst.chan;
1144		sel = alu.dst.sel;
1145
1146		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1147		memset(&alu, 0, sizeof(struct r600_bc_alu));
1148		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1149		alu.src[0] = r600_src[0];
1150		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1151		alu.src[1].sel  = sel;
1152		alu.src[1].chan = chan;
1153
1154		alu.src[2] = r600_src[0];
1155		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1156		alu.dst.sel = ctx->temp_reg;
1157		alu.dst.chan = 0;
1158		alu.dst.write = 1;
1159		alu.is_op3 = 1;
1160		alu.last = 1;
1161		r = r600_bc_add_alu(ctx->bc, &alu);
1162		if (r)
1163			return r;
1164
1165		r = r600_bc_add_literal(ctx->bc, ctx->value);
1166		if (r)
1167			return r;
1168		/* dst.z = exp(tmp.x) */
1169		memset(&alu, 0, sizeof(struct r600_bc_alu));
1170		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1171		alu.src[0].sel = ctx->temp_reg;
1172		alu.src[0].chan = 0;
1173		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1174		if (r)
1175			return r;
1176		alu.last = 1;
1177		r = r600_bc_add_alu(ctx->bc, &alu);
1178		if (r)
1179			return r;
1180	}
1181	return 0;
1182}
1183
1184static int tgsi_rsq(struct r600_shader_ctx *ctx)
1185{
1186	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1187	struct r600_bc_alu alu;
1188	int i, r;
1189
1190	memset(&alu, 0, sizeof(struct r600_bc_alu));
1191
1192	/* FIXME:
1193	 * For state trackers other than OpenGL, we'll want to use
1194	 * _RECIPSQRT_IEEE instead.
1195	 */
1196	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1197
1198	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1199		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1200		if (r)
1201			return r;
1202		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1203		alu.src[i].abs = 1;
1204	}
1205	alu.dst.sel = ctx->temp_reg;
1206	alu.dst.write = 1;
1207	alu.last = 1;
1208	r = r600_bc_add_alu(ctx->bc, &alu);
1209	if (r)
1210		return r;
1211	r = r600_bc_add_literal(ctx->bc, ctx->value);
1212	if (r)
1213		return r;
1214	/* replicate result */
1215	return tgsi_helper_tempx_replicate(ctx);
1216}
1217
1218static int tgsi_trans(struct r600_shader_ctx *ctx)
1219{
1220	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1221	struct r600_bc_alu alu;
1222	int i, j, r;
1223
1224	for (i = 0; i < 4; i++) {
1225		memset(&alu, 0, sizeof(struct r600_bc_alu));
1226		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1227			alu.inst = ctx->inst_info->r600_opcode;
1228			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1229				r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1230				if (r)
1231					return r;
1232				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1233			}
1234			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1235			if (r)
1236				return r;
1237			alu.last = 1;
1238			r = r600_bc_add_alu(ctx->bc, &alu);
1239			if (r)
1240				return r;
1241		}
1242	}
1243	return 0;
1244}
1245
1246static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1247{
1248	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1249	struct r600_bc_alu alu;
1250	int i, r;
1251
1252	for (i = 0; i < 4; i++) {
1253		memset(&alu, 0, sizeof(struct r600_bc_alu));
1254		alu.src[0].sel = ctx->temp_reg;
1255		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1256		alu.dst.chan = i;
1257		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1258		if (r)
1259			return r;
1260		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1261		if (i == 3)
1262			alu.last = 1;
1263		r = r600_bc_add_alu(ctx->bc, &alu);
1264		if (r)
1265			return r;
1266	}
1267	return 0;
1268}
1269
1270static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1271{
1272	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1273	struct r600_bc_alu alu;
1274	int i, r;
1275
1276	memset(&alu, 0, sizeof(struct r600_bc_alu));
1277	alu.inst = ctx->inst_info->r600_opcode;
1278	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1279		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1280		if (r)
1281			return r;
1282		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1283	}
1284	alu.dst.sel = ctx->temp_reg;
1285	alu.dst.write = 1;
1286	alu.last = 1;
1287	r = r600_bc_add_alu(ctx->bc, &alu);
1288	if (r)
1289		return r;
1290	r = r600_bc_add_literal(ctx->bc, ctx->value);
1291	if (r)
1292		return r;
1293	/* replicate result */
1294	return tgsi_helper_tempx_replicate(ctx);
1295}
1296
1297static int tgsi_pow(struct r600_shader_ctx *ctx)
1298{
1299	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1300	struct r600_bc_alu alu;
1301	int r;
1302
1303	/* LOG2(a) */
1304	memset(&alu, 0, sizeof(struct r600_bc_alu));
1305	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1306	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1307	if (r)
1308		return r;
1309	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1310	alu.dst.sel = ctx->temp_reg;
1311	alu.dst.write = 1;
1312	alu.last = 1;
1313	r = r600_bc_add_alu(ctx->bc, &alu);
1314	if (r)
1315		return r;
1316	r = r600_bc_add_literal(ctx->bc,ctx->value);
1317	if (r)
1318		return r;
1319	/* b * LOG2(a) */
1320	memset(&alu, 0, sizeof(struct r600_bc_alu));
1321	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1322	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1323	if (r)
1324		return r;
1325	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1326	alu.src[1].sel = ctx->temp_reg;
1327	alu.dst.sel = ctx->temp_reg;
1328	alu.dst.write = 1;
1329	alu.last = 1;
1330	r = r600_bc_add_alu(ctx->bc, &alu);
1331	if (r)
1332		return r;
1333	r = r600_bc_add_literal(ctx->bc,ctx->value);
1334	if (r)
1335		return r;
1336	/* POW(a,b) = EXP2(b * LOG2(a))*/
1337	memset(&alu, 0, sizeof(struct r600_bc_alu));
1338	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1339	alu.src[0].sel = ctx->temp_reg;
1340	alu.dst.sel = ctx->temp_reg;
1341	alu.dst.write = 1;
1342	alu.last = 1;
1343	r = r600_bc_add_alu(ctx->bc, &alu);
1344	if (r)
1345		return r;
1346	r = r600_bc_add_literal(ctx->bc,ctx->value);
1347	if (r)
1348		return r;
1349	return tgsi_helper_tempx_replicate(ctx);
1350}
1351
1352static int tgsi_ssg(struct r600_shader_ctx *ctx)
1353{
1354	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1355	struct r600_bc_alu alu;
1356	struct r600_bc_alu_src r600_src[3];
1357	int i, r;
1358
1359	r = tgsi_split_constant(ctx, r600_src);
1360	if (r)
1361		return r;
1362
1363	/* tmp = (src > 0 ? 1 : src) */
1364	for (i = 0; i < 4; i++) {
1365		memset(&alu, 0, sizeof(struct r600_bc_alu));
1366		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1367		alu.is_op3 = 1;
1368
1369		alu.dst.sel = ctx->temp_reg;
1370		alu.dst.chan = i;
1371
1372		alu.src[0] = r600_src[0];
1373		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1374
1375		alu.src[1].sel = V_SQ_ALU_SRC_1;
1376
1377		alu.src[2] = r600_src[0];
1378		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1379		if (i == 3)
1380			alu.last = 1;
1381		r = r600_bc_add_alu(ctx->bc, &alu);
1382		if (r)
1383			return r;
1384	}
1385	r = r600_bc_add_literal(ctx->bc, ctx->value);
1386	if (r)
1387		return r;
1388
1389	/* dst = (-tmp > 0 ? -1 : tmp) */
1390	for (i = 0; i < 4; i++) {
1391		memset(&alu, 0, sizeof(struct r600_bc_alu));
1392		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1393		alu.is_op3 = 1;
1394		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1395		if (r)
1396			return r;
1397
1398		alu.src[0].sel = ctx->temp_reg;
1399		alu.src[0].chan = i;
1400		alu.src[0].neg = 1;
1401
1402		alu.src[1].sel = V_SQ_ALU_SRC_1;
1403		alu.src[1].neg = 1;
1404
1405		alu.src[2].sel = ctx->temp_reg;
1406		alu.src[2].chan = i;
1407
1408		if (i == 3)
1409			alu.last = 1;
1410		r = r600_bc_add_alu(ctx->bc, &alu);
1411		if (r)
1412			return r;
1413	}
1414	return 0;
1415}
1416
1417static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1418{
1419	struct r600_bc_alu alu;
1420	int i, r;
1421
1422	r = r600_bc_add_literal(ctx->bc, ctx->value);
1423	if (r)
1424		return r;
1425	for (i = 0; i < 4; i++) {
1426		memset(&alu, 0, sizeof(struct r600_bc_alu));
1427		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1428			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1429			alu.dst.chan = i;
1430		} else {
1431			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1432			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1433			if (r)
1434				return r;
1435			alu.src[0].sel = ctx->temp_reg;
1436			alu.src[0].chan = i;
1437		}
1438		if (i == 3) {
1439			alu.last = 1;
1440		}
1441		r = r600_bc_add_alu(ctx->bc, &alu);
1442		if (r)
1443			return r;
1444	}
1445	return 0;
1446}
1447
1448static int tgsi_op3(struct r600_shader_ctx *ctx)
1449{
1450	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1451	struct r600_bc_alu_src r600_src[3];
1452	struct r600_bc_alu alu;
1453	int i, j, r;
1454
1455	r = tgsi_split_constant(ctx, r600_src);
1456	if (r)
1457		return r;
1458	/* do it in 2 step as op3 doesn't support writemask */
1459	for (i = 0; i < 4; i++) {
1460		memset(&alu, 0, sizeof(struct r600_bc_alu));
1461		alu.inst = ctx->inst_info->r600_opcode;
1462		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1463			alu.src[j] = r600_src[j];
1464			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1465		}
1466		alu.dst.sel = ctx->temp_reg;
1467		alu.dst.chan = i;
1468		alu.dst.write = 1;
1469		alu.is_op3 = 1;
1470		if (i == 3) {
1471			alu.last = 1;
1472		}
1473		r = r600_bc_add_alu(ctx->bc, &alu);
1474		if (r)
1475			return r;
1476	}
1477	return tgsi_helper_copy(ctx, inst);
1478}
1479
1480static int tgsi_dp(struct r600_shader_ctx *ctx)
1481{
1482	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1483	struct r600_bc_alu_src r600_src[3];
1484	struct r600_bc_alu alu;
1485	int i, j, r;
1486
1487	r = tgsi_split_constant(ctx, r600_src);
1488	if (r)
1489		return r;
1490	for (i = 0; i < 4; i++) {
1491		memset(&alu, 0, sizeof(struct r600_bc_alu));
1492		alu.inst = ctx->inst_info->r600_opcode;
1493		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1494			alu.src[j] = r600_src[j];
1495			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1496		}
1497		alu.dst.sel = ctx->temp_reg;
1498		alu.dst.chan = i;
1499		alu.dst.write = 1;
1500		/* handle some special cases */
1501		switch (ctx->inst_info->tgsi_opcode) {
1502		case TGSI_OPCODE_DP2:
1503			if (i > 1) {
1504				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1505				alu.src[0].chan = alu.src[1].chan = 0;
1506			}
1507			break;
1508		case TGSI_OPCODE_DP3:
1509			if (i > 2) {
1510				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1511				alu.src[0].chan = alu.src[1].chan = 0;
1512			}
1513			break;
1514		case TGSI_OPCODE_DPH:
1515			if (i == 3) {
1516				alu.src[0].sel = V_SQ_ALU_SRC_1;
1517				alu.src[0].chan = 0;
1518				alu.src[0].neg = 0;
1519			}
1520			break;
1521		default:
1522			break;
1523		}
1524		if (i == 3) {
1525			alu.last = 1;
1526		}
1527		r = r600_bc_add_alu(ctx->bc, &alu);
1528		if (r)
1529			return r;
1530	}
1531	return tgsi_helper_copy(ctx, inst);
1532}
1533
1534static int tgsi_tex(struct r600_shader_ctx *ctx)
1535{
1536	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1537	struct r600_bc_tex tex;
1538	struct r600_bc_alu alu;
1539	unsigned src_gpr;
1540	int r, i;
1541	int opcode;
1542	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1543	uint32_t lit_vals[4];
1544
1545	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1546
1547	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1548		/* Add perspective divide */
1549		memset(&alu, 0, sizeof(struct r600_bc_alu));
1550		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1551		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1552		if (r)
1553			return r;
1554
1555		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1556		alu.dst.sel = ctx->temp_reg;
1557		alu.dst.chan = 3;
1558		alu.last = 1;
1559		alu.dst.write = 1;
1560		r = r600_bc_add_alu(ctx->bc, &alu);
1561		if (r)
1562			return r;
1563
1564		for (i = 0; i < 3; i++) {
1565			memset(&alu, 0, sizeof(struct r600_bc_alu));
1566			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1567			alu.src[0].sel = ctx->temp_reg;
1568			alu.src[0].chan = 3;
1569			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1570			if (r)
1571				return r;
1572			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1573			alu.dst.sel = ctx->temp_reg;
1574			alu.dst.chan = i;
1575			alu.dst.write = 1;
1576			r = r600_bc_add_alu(ctx->bc, &alu);
1577			if (r)
1578				return r;
1579		}
1580		memset(&alu, 0, sizeof(struct r600_bc_alu));
1581		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1582		alu.src[0].sel = V_SQ_ALU_SRC_1;
1583		alu.src[0].chan = 0;
1584		alu.dst.sel = ctx->temp_reg;
1585		alu.dst.chan = 3;
1586		alu.last = 1;
1587		alu.dst.write = 1;
1588		r = r600_bc_add_alu(ctx->bc, &alu);
1589		if (r)
1590			return r;
1591		src_not_temp = FALSE;
1592		src_gpr = ctx->temp_reg;
1593	}
1594
1595	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1596		int src_chan, src2_chan;
1597
1598		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1599		for (i = 0; i < 4; i++) {
1600			memset(&alu, 0, sizeof(struct r600_bc_alu));
1601			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1602			switch (i) {
1603			case 0:
1604				src_chan = 2;
1605				src2_chan = 1;
1606				break;
1607			case 1:
1608				src_chan = 2;
1609				src2_chan = 0;
1610				break;
1611			case 2:
1612				src_chan = 0;
1613				src2_chan = 2;
1614				break;
1615			case 3:
1616				src_chan = 1;
1617				src2_chan = 2;
1618				break;
1619			}
1620			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1621			if (r)
1622				return r;
1623			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1624			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1625			if (r)
1626				return r;
1627			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1628			alu.dst.sel = ctx->temp_reg;
1629			alu.dst.chan = i;
1630			if (i == 3)
1631				alu.last = 1;
1632			alu.dst.write = 1;
1633			r = r600_bc_add_alu(ctx->bc, &alu);
1634			if (r)
1635				return r;
1636		}
1637
1638		/* tmp1.z = RCP_e(|tmp1.z|) */
1639		memset(&alu, 0, sizeof(struct r600_bc_alu));
1640		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1641		alu.src[0].sel = ctx->temp_reg;
1642		alu.src[0].chan = 2;
1643		alu.src[0].abs = 1;
1644		alu.dst.sel = ctx->temp_reg;
1645		alu.dst.chan = 2;
1646		alu.dst.write = 1;
1647		alu.last = 1;
1648		r = r600_bc_add_alu(ctx->bc, &alu);
1649		if (r)
1650			return r;
1651
1652		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1653		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1654		 * muladd has no writemask, have to use another temp
1655		 */
1656		memset(&alu, 0, sizeof(struct r600_bc_alu));
1657		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1658		alu.is_op3 = 1;
1659
1660		alu.src[0].sel = ctx->temp_reg;
1661		alu.src[0].chan = 0;
1662		alu.src[1].sel = ctx->temp_reg;
1663		alu.src[1].chan = 2;
1664
1665		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1666		alu.src[2].chan = 0;
1667
1668		alu.dst.sel = ctx->temp_reg;
1669		alu.dst.chan = 0;
1670		alu.dst.write = 1;
1671
1672		r = r600_bc_add_alu(ctx->bc, &alu);
1673		if (r)
1674			return r;
1675
1676		memset(&alu, 0, sizeof(struct r600_bc_alu));
1677		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1678		alu.is_op3 = 1;
1679
1680		alu.src[0].sel = ctx->temp_reg;
1681		alu.src[0].chan = 1;
1682		alu.src[1].sel = ctx->temp_reg;
1683		alu.src[1].chan = 2;
1684
1685		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1686		alu.src[2].chan = 0;
1687
1688		alu.dst.sel = ctx->temp_reg;
1689		alu.dst.chan = 1;
1690		alu.dst.write = 1;
1691
1692		alu.last = 1;
1693		r = r600_bc_add_alu(ctx->bc, &alu);
1694		if (r)
1695			return r;
1696
1697		lit_vals[0] = fui(1.5f);
1698
1699		r = r600_bc_add_literal(ctx->bc, lit_vals);
1700		if (r)
1701			return r;
1702		src_not_temp = FALSE;
1703		src_gpr = ctx->temp_reg;
1704	}
1705
1706	if (src_not_temp) {
1707		for (i = 0; i < 4; i++) {
1708			memset(&alu, 0, sizeof(struct r600_bc_alu));
1709			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1710			alu.src[0].sel = src_gpr;
1711			alu.src[0].chan = i;
1712			alu.dst.sel = ctx->temp_reg;
1713			alu.dst.chan = i;
1714			if (i == 3)
1715				alu.last = 1;
1716			alu.dst.write = 1;
1717			r = r600_bc_add_alu(ctx->bc, &alu);
1718			if (r)
1719				return r;
1720		}
1721		src_gpr = ctx->temp_reg;
1722	}
1723
1724	opcode = ctx->inst_info->r600_opcode;
1725	if (opcode == SQ_TEX_INST_SAMPLE &&
1726	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1727		opcode = SQ_TEX_INST_SAMPLE_C;
1728
1729	memset(&tex, 0, sizeof(struct r600_bc_tex));
1730	tex.inst = opcode;
1731	tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1732	tex.sampler_id = tex.resource_id;
1733	tex.src_gpr = src_gpr;
1734	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1735	tex.dst_sel_x = 0;
1736	tex.dst_sel_y = 1;
1737	tex.dst_sel_z = 2;
1738	tex.dst_sel_w = 3;
1739	tex.src_sel_x = 0;
1740	tex.src_sel_y = 1;
1741	tex.src_sel_z = 2;
1742	tex.src_sel_w = 3;
1743
1744	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1745		tex.src_sel_x = 1;
1746		tex.src_sel_y = 0;
1747		tex.src_sel_z = 3;
1748		tex.src_sel_w = 1;
1749	}
1750
1751	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1752		tex.coord_type_x = 1;
1753		tex.coord_type_y = 1;
1754		tex.coord_type_z = 1;
1755		tex.coord_type_w = 1;
1756	}
1757
1758	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1759		tex.src_sel_w = 2;
1760
1761	r = r600_bc_add_tex(ctx->bc, &tex);
1762	if (r)
1763		return r;
1764
1765	/* add shadow ambient support  - gallium doesn't do it yet */
1766	return 0;
1767
1768}
1769
1770static int tgsi_lrp(struct r600_shader_ctx *ctx)
1771{
1772	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1773	struct r600_bc_alu_src r600_src[3];
1774	struct r600_bc_alu alu;
1775	unsigned i;
1776	int r;
1777
1778	r = tgsi_split_constant(ctx, r600_src);
1779	if (r)
1780		return r;
1781	/* 1 - src0 */
1782	for (i = 0; i < 4; i++) {
1783		memset(&alu, 0, sizeof(struct r600_bc_alu));
1784		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1785		alu.src[0].sel = V_SQ_ALU_SRC_1;
1786		alu.src[0].chan = 0;
1787		alu.src[1] = r600_src[0];
1788		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1789		alu.src[1].neg = 1;
1790		alu.dst.sel = ctx->temp_reg;
1791		alu.dst.chan = i;
1792		if (i == 3) {
1793			alu.last = 1;
1794		}
1795		alu.dst.write = 1;
1796		r = r600_bc_add_alu(ctx->bc, &alu);
1797		if (r)
1798			return r;
1799	}
1800	r = r600_bc_add_literal(ctx->bc, ctx->value);
1801	if (r)
1802		return r;
1803
1804	/* (1 - src0) * src2 */
1805	for (i = 0; i < 4; i++) {
1806		memset(&alu, 0, sizeof(struct r600_bc_alu));
1807		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1808		alu.src[0].sel = ctx->temp_reg;
1809		alu.src[0].chan = i;
1810		alu.src[1] = r600_src[2];
1811		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1812		alu.dst.sel = ctx->temp_reg;
1813		alu.dst.chan = i;
1814		if (i == 3) {
1815			alu.last = 1;
1816		}
1817		alu.dst.write = 1;
1818		r = r600_bc_add_alu(ctx->bc, &alu);
1819		if (r)
1820			return r;
1821	}
1822	r = r600_bc_add_literal(ctx->bc, ctx->value);
1823	if (r)
1824		return r;
1825
1826	/* src0 * src1 + (1 - src0) * src2 */
1827	for (i = 0; i < 4; i++) {
1828		memset(&alu, 0, sizeof(struct r600_bc_alu));
1829		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1830		alu.is_op3 = 1;
1831		alu.src[0] = r600_src[0];
1832		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1833		alu.src[1] = r600_src[1];
1834		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1835		alu.src[2].sel = ctx->temp_reg;
1836		alu.src[2].chan = i;
1837		alu.dst.sel = ctx->temp_reg;
1838		alu.dst.chan = i;
1839		if (i == 3) {
1840			alu.last = 1;
1841		}
1842		r = r600_bc_add_alu(ctx->bc, &alu);
1843		if (r)
1844			return r;
1845	}
1846	return tgsi_helper_copy(ctx, inst);
1847}
1848
1849static int tgsi_cmp(struct r600_shader_ctx *ctx)
1850{
1851	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1852	struct r600_bc_alu_src r600_src[3];
1853	struct r600_bc_alu alu;
1854	int use_temp = 0;
1855	int i, r;
1856
1857	r = tgsi_split_constant(ctx, r600_src);
1858	if (r)
1859		return r;
1860
1861	if (inst->Dst[0].Register.WriteMask != 0xf)
1862		use_temp = 1;
1863
1864	for (i = 0; i < 4; i++) {
1865		memset(&alu, 0, sizeof(struct r600_bc_alu));
1866		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1867		alu.src[0] = r600_src[0];
1868		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1869
1870		alu.src[1] = r600_src[2];
1871		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1872
1873		alu.src[2] = r600_src[1];
1874		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1875
1876		if (use_temp)
1877			alu.dst.sel = ctx->temp_reg;
1878		else {
1879			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1880			if (r)
1881				return r;
1882		}
1883		alu.dst.chan = i;
1884		alu.dst.write = 1;
1885		alu.is_op3 = 1;
1886		if (i == 3)
1887			alu.last = 1;
1888		r = r600_bc_add_alu(ctx->bc, &alu);
1889		if (r)
1890			return r;
1891	}
1892	if (use_temp)
1893		return tgsi_helper_copy(ctx, inst);
1894	return 0;
1895}
1896
1897static int tgsi_xpd(struct r600_shader_ctx *ctx)
1898{
1899	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1900	struct r600_bc_alu_src r600_src[3];
1901	struct r600_bc_alu alu;
1902	uint32_t use_temp = 0;
1903	int i, r;
1904
1905	if (inst->Dst[0].Register.WriteMask != 0xf)
1906		use_temp = 1;
1907
1908	r = tgsi_split_constant(ctx, r600_src);
1909	if (r)
1910		return r;
1911
1912	for (i = 0; i < 4; i++) {
1913		memset(&alu, 0, sizeof(struct r600_bc_alu));
1914		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1915
1916		alu.src[0] = r600_src[0];
1917		switch (i) {
1918		case 0:
1919			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1920			break;
1921		case 1:
1922			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1923			break;
1924		case 2:
1925			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1926			break;
1927		case 3:
1928			alu.src[0].sel = V_SQ_ALU_SRC_0;
1929			alu.src[0].chan = i;
1930		}
1931
1932		alu.src[1] = r600_src[1];
1933		switch (i) {
1934		case 0:
1935			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1936			break;
1937		case 1:
1938			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1939			break;
1940		case 2:
1941			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1942			break;
1943		case 3:
1944			alu.src[1].sel = V_SQ_ALU_SRC_0;
1945			alu.src[1].chan = i;
1946		}
1947
1948		alu.dst.sel = ctx->temp_reg;
1949		alu.dst.chan = i;
1950		alu.dst.write = 1;
1951
1952		if (i == 3)
1953			alu.last = 1;
1954		r = r600_bc_add_alu(ctx->bc, &alu);
1955		if (r)
1956			return r;
1957
1958		r = r600_bc_add_literal(ctx->bc, ctx->value);
1959		if (r)
1960			return r;
1961	}
1962
1963	for (i = 0; i < 4; i++) {
1964		memset(&alu, 0, sizeof(struct r600_bc_alu));
1965		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1966
1967		alu.src[0] = r600_src[0];
1968		switch (i) {
1969		case 0:
1970			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1971			break;
1972		case 1:
1973			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1974			break;
1975		case 2:
1976			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1977			break;
1978		case 3:
1979			alu.src[0].sel = V_SQ_ALU_SRC_0;
1980			alu.src[0].chan = i;
1981		}
1982
1983		alu.src[1] = r600_src[1];
1984		switch (i) {
1985		case 0:
1986			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1987			break;
1988		case 1:
1989			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1990			break;
1991		case 2:
1992			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1993			break;
1994		case 3:
1995			alu.src[1].sel = V_SQ_ALU_SRC_0;
1996			alu.src[1].chan = i;
1997		}
1998
1999		alu.src[2].sel = ctx->temp_reg;
2000		alu.src[2].neg = 1;
2001		alu.src[2].chan = i;
2002
2003		if (use_temp)
2004			alu.dst.sel = ctx->temp_reg;
2005		else {
2006			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2007			if (r)
2008				return r;
2009		}
2010		alu.dst.chan = i;
2011		alu.dst.write = 1;
2012		alu.is_op3 = 1;
2013		if (i == 3)
2014			alu.last = 1;
2015		r = r600_bc_add_alu(ctx->bc, &alu);
2016		if (r)
2017			return r;
2018
2019		r = r600_bc_add_literal(ctx->bc, ctx->value);
2020		if (r)
2021			return r;
2022	}
2023	if (use_temp)
2024		return tgsi_helper_copy(ctx, inst);
2025	return 0;
2026}
2027
2028static int tgsi_exp(struct r600_shader_ctx *ctx)
2029{
2030	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2031	struct r600_bc_alu_src r600_src[3];
2032	struct r600_bc_alu alu;
2033	int r;
2034
2035	/* result.x = 2^floor(src); */
2036	if (inst->Dst[0].Register.WriteMask & 1) {
2037		memset(&alu, 0, sizeof(struct r600_bc_alu));
2038
2039		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2040		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2041		if (r)
2042			return r;
2043
2044		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2045
2046		alu.dst.sel = ctx->temp_reg;
2047		alu.dst.chan = 0;
2048		alu.dst.write = 1;
2049		alu.last = 1;
2050		r = r600_bc_add_alu(ctx->bc, &alu);
2051		if (r)
2052			return r;
2053
2054		r = r600_bc_add_literal(ctx->bc, ctx->value);
2055		if (r)
2056			return r;
2057
2058		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2059		alu.src[0].sel = ctx->temp_reg;
2060		alu.src[0].chan = 0;
2061
2062		alu.dst.sel = ctx->temp_reg;
2063		alu.dst.chan = 0;
2064		alu.dst.write = 1;
2065		alu.last = 1;
2066		r = r600_bc_add_alu(ctx->bc, &alu);
2067		if (r)
2068			return r;
2069
2070		r = r600_bc_add_literal(ctx->bc, ctx->value);
2071		if (r)
2072			return r;
2073	}
2074
2075	/* result.y = tmp - floor(tmp); */
2076	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2077		memset(&alu, 0, sizeof(struct r600_bc_alu));
2078
2079		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2080		alu.src[0] = r600_src[0];
2081		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2082		if (r)
2083			return r;
2084		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2085
2086		alu.dst.sel = ctx->temp_reg;
2087//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2088//		if (r)
2089//			return r;
2090		alu.dst.write = 1;
2091		alu.dst.chan = 1;
2092
2093		alu.last = 1;
2094
2095		r = r600_bc_add_alu(ctx->bc, &alu);
2096		if (r)
2097			return r;
2098		r = r600_bc_add_literal(ctx->bc, ctx->value);
2099		if (r)
2100			return r;
2101	}
2102
2103	/* result.z = RoughApprox2ToX(tmp);*/
2104	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2105		memset(&alu, 0, sizeof(struct r600_bc_alu));
2106		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2107		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2108		if (r)
2109			return r;
2110		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2111
2112		alu.dst.sel = ctx->temp_reg;
2113		alu.dst.write = 1;
2114		alu.dst.chan = 2;
2115
2116		alu.last = 1;
2117
2118		r = r600_bc_add_alu(ctx->bc, &alu);
2119		if (r)
2120			return r;
2121		r = r600_bc_add_literal(ctx->bc, ctx->value);
2122		if (r)
2123			return r;
2124	}
2125
2126	/* result.w = 1.0;*/
2127	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2128		memset(&alu, 0, sizeof(struct r600_bc_alu));
2129
2130		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2131		alu.src[0].sel = V_SQ_ALU_SRC_1;
2132		alu.src[0].chan = 0;
2133
2134		alu.dst.sel = ctx->temp_reg;
2135		alu.dst.chan = 3;
2136		alu.dst.write = 1;
2137		alu.last = 1;
2138		r = r600_bc_add_alu(ctx->bc, &alu);
2139		if (r)
2140			return r;
2141		r = r600_bc_add_literal(ctx->bc, ctx->value);
2142		if (r)
2143			return r;
2144	}
2145	return tgsi_helper_copy(ctx, inst);
2146}
2147
2148static int tgsi_log(struct r600_shader_ctx *ctx)
2149{
2150	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2151	struct r600_bc_alu alu;
2152	int r;
2153
2154	/* result.x = floor(log2(src)); */
2155	if (inst->Dst[0].Register.WriteMask & 1) {
2156		memset(&alu, 0, sizeof(struct r600_bc_alu));
2157
2158		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2159		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2160		if (r)
2161			return r;
2162
2163		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2164
2165		alu.dst.sel = ctx->temp_reg;
2166		alu.dst.chan = 0;
2167		alu.dst.write = 1;
2168		alu.last = 1;
2169		r = r600_bc_add_alu(ctx->bc, &alu);
2170		if (r)
2171			return r;
2172
2173		r = r600_bc_add_literal(ctx->bc, ctx->value);
2174		if (r)
2175			return r;
2176
2177		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2178		alu.src[0].sel = ctx->temp_reg;
2179		alu.src[0].chan = 0;
2180
2181		alu.dst.sel = ctx->temp_reg;
2182		alu.dst.chan = 0;
2183		alu.dst.write = 1;
2184		alu.last = 1;
2185
2186		r = r600_bc_add_alu(ctx->bc, &alu);
2187		if (r)
2188			return r;
2189
2190		r = r600_bc_add_literal(ctx->bc, ctx->value);
2191		if (r)
2192			return r;
2193	}
2194
2195	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2196	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2197		memset(&alu, 0, sizeof(struct r600_bc_alu));
2198
2199		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2200		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2201		if (r)
2202			return r;
2203
2204		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2205
2206		alu.dst.sel = ctx->temp_reg;
2207		alu.dst.chan = 1;
2208		alu.dst.write = 1;
2209		alu.last = 1;
2210
2211		r = r600_bc_add_alu(ctx->bc, &alu);
2212		if (r)
2213			return r;
2214
2215		r = r600_bc_add_literal(ctx->bc, ctx->value);
2216		if (r)
2217			return r;
2218
2219		memset(&alu, 0, sizeof(struct r600_bc_alu));
2220
2221		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2222		alu.src[0].sel = ctx->temp_reg;
2223		alu.src[0].chan = 1;
2224
2225		alu.dst.sel = ctx->temp_reg;
2226		alu.dst.chan = 1;
2227		alu.dst.write = 1;
2228		alu.last = 1;
2229
2230		r = r600_bc_add_alu(ctx->bc, &alu);
2231		if (r)
2232			return r;
2233
2234		r = r600_bc_add_literal(ctx->bc, ctx->value);
2235		if (r)
2236			return r;
2237
2238		memset(&alu, 0, sizeof(struct r600_bc_alu));
2239
2240		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2241		alu.src[0].sel = ctx->temp_reg;
2242		alu.src[0].chan = 1;
2243
2244		alu.dst.sel = ctx->temp_reg;
2245		alu.dst.chan = 1;
2246		alu.dst.write = 1;
2247		alu.last = 1;
2248
2249		r = r600_bc_add_alu(ctx->bc, &alu);
2250		if (r)
2251			return r;
2252
2253		r = r600_bc_add_literal(ctx->bc, ctx->value);
2254		if (r)
2255			return r;
2256
2257		memset(&alu, 0, sizeof(struct r600_bc_alu));
2258
2259		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2260		alu.src[0].sel = ctx->temp_reg;
2261		alu.src[0].chan = 1;
2262
2263		alu.dst.sel = ctx->temp_reg;
2264		alu.dst.chan = 1;
2265		alu.dst.write = 1;
2266		alu.last = 1;
2267
2268		r = r600_bc_add_alu(ctx->bc, &alu);
2269		if (r)
2270			return r;
2271
2272		r = r600_bc_add_literal(ctx->bc, ctx->value);
2273		if (r)
2274			return r;
2275
2276		memset(&alu, 0, sizeof(struct r600_bc_alu));
2277
2278		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2279
2280		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2281		if (r)
2282			return r;
2283
2284		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2285
2286		alu.src[1].sel = ctx->temp_reg;
2287		alu.src[1].chan = 1;
2288
2289		alu.dst.sel = ctx->temp_reg;
2290		alu.dst.chan = 1;
2291		alu.dst.write = 1;
2292		alu.last = 1;
2293
2294		r = r600_bc_add_alu(ctx->bc, &alu);
2295		if (r)
2296			return r;
2297
2298		r = r600_bc_add_literal(ctx->bc, ctx->value);
2299		if (r)
2300			return r;
2301	}
2302
2303	/* result.z = log2(src);*/
2304	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2305		memset(&alu, 0, sizeof(struct r600_bc_alu));
2306
2307		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2308		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2309		if (r)
2310			return r;
2311
2312		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2313
2314		alu.dst.sel = ctx->temp_reg;
2315		alu.dst.write = 1;
2316		alu.dst.chan = 2;
2317		alu.last = 1;
2318
2319		r = r600_bc_add_alu(ctx->bc, &alu);
2320		if (r)
2321			return r;
2322
2323		r = r600_bc_add_literal(ctx->bc, ctx->value);
2324		if (r)
2325			return r;
2326	}
2327
2328	/* result.w = 1.0; */
2329	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2330		memset(&alu, 0, sizeof(struct r600_bc_alu));
2331
2332		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2333		alu.src[0].sel = V_SQ_ALU_SRC_1;
2334		alu.src[0].chan = 0;
2335
2336		alu.dst.sel = ctx->temp_reg;
2337		alu.dst.chan = 3;
2338		alu.dst.write = 1;
2339		alu.last = 1;
2340
2341		r = r600_bc_add_alu(ctx->bc, &alu);
2342		if (r)
2343			return r;
2344
2345		r = r600_bc_add_literal(ctx->bc, ctx->value);
2346		if (r)
2347			return r;
2348	}
2349
2350	return tgsi_helper_copy(ctx, inst);
2351}
2352
2353/* r6/7 only for now */
2354static int tgsi_arl(struct r600_shader_ctx *ctx)
2355{
2356	/* TODO from r600c, ar values don't persist between clauses */
2357	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2358	struct r600_bc_alu alu;
2359	int r;
2360	memset(&alu, 0, sizeof(struct r600_bc_alu));
2361
2362	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2363
2364	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2365	if (r)
2366		return r;
2367	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2368
2369	alu.last = 1;
2370
2371	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2372	if (r)
2373		return r;
2374	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2375	return 0;
2376}
2377
2378static int tgsi_opdst(struct r600_shader_ctx *ctx)
2379{
2380	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2381	struct r600_bc_alu alu;
2382	int i, r = 0;
2383
2384	for (i = 0; i < 4; i++) {
2385		memset(&alu, 0, sizeof(struct r600_bc_alu));
2386
2387		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2388		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2389		if (r)
2390			return r;
2391
2392	        if (i == 0 || i == 3) {
2393			alu.src[0].sel = V_SQ_ALU_SRC_1;
2394		} else {
2395			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2396			if (r)
2397				return r;
2398			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2399		}
2400
2401	        if (i == 0 || i == 2) {
2402			alu.src[1].sel = V_SQ_ALU_SRC_1;
2403		} else {
2404			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2405			if (r)
2406				return r;
2407			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2408		}
2409		if (i == 3)
2410			alu.last = 1;
2411		r = r600_bc_add_alu(ctx->bc, &alu);
2412		if (r)
2413			return r;
2414	}
2415	return 0;
2416}
2417
2418static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2419{
2420	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2421	struct r600_bc_alu alu;
2422	int r;
2423
2424	memset(&alu, 0, sizeof(struct r600_bc_alu));
2425	alu.inst = opcode;
2426	alu.predicate = 1;
2427
2428	alu.dst.sel = ctx->temp_reg;
2429	alu.dst.write = 1;
2430	alu.dst.chan = 0;
2431
2432	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2433	if (r)
2434		return r;
2435	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2436	alu.src[1].sel = V_SQ_ALU_SRC_0;
2437	alu.src[1].chan = 0;
2438
2439	alu.last = 1;
2440
2441	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2442	if (r)
2443		return r;
2444	return 0;
2445}
2446
2447static int pops(struct r600_shader_ctx *ctx, int pops)
2448{
2449	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2450	ctx->bc->cf_last->pop_count = pops;
2451	return 0;
2452}
2453
2454static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2455{
2456	switch(reason) {
2457	case FC_PUSH_VPM:
2458		ctx->bc->callstack[ctx->bc->call_sp].current--;
2459		break;
2460	case FC_PUSH_WQM:
2461	case FC_LOOP:
2462		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2463		break;
2464	case FC_REP:
2465		/* TOODO : for 16 vp asic should -= 2; */
2466		ctx->bc->callstack[ctx->bc->call_sp].current --;
2467		break;
2468	}
2469}
2470
2471static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2472{
2473	if (check_max_only) {
2474		int diff;
2475		switch (reason) {
2476		case FC_PUSH_VPM:
2477			diff = 1;
2478			break;
2479		case FC_PUSH_WQM:
2480			diff = 4;
2481			break;
2482		default:
2483			assert(0);
2484			diff = 0;
2485		}
2486		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2487		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2488			ctx->bc->callstack[ctx->bc->call_sp].max =
2489				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2490		}
2491		return;
2492	}
2493	switch (reason) {
2494	case FC_PUSH_VPM:
2495		ctx->bc->callstack[ctx->bc->call_sp].current++;
2496		break;
2497	case FC_PUSH_WQM:
2498	case FC_LOOP:
2499		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2500		break;
2501	case FC_REP:
2502		ctx->bc->callstack[ctx->bc->call_sp].current++;
2503		break;
2504	}
2505
2506	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2507	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2508		ctx->bc->callstack[ctx->bc->call_sp].max =
2509			ctx->bc->callstack[ctx->bc->call_sp].current;
2510	}
2511}
2512
2513static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2514{
2515	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2516
2517	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2518						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2519	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2520	sp->num_mid++;
2521}
2522
2523static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2524{
2525	ctx->bc->fc_sp++;
2526	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2527	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2528}
2529
2530static void fc_poplevel(struct r600_shader_ctx *ctx)
2531{
2532	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2533	if (sp->mid) {
2534		free(sp->mid);
2535		sp->mid = NULL;
2536	}
2537	sp->num_mid = 0;
2538	sp->start = NULL;
2539	sp->type = 0;
2540	ctx->bc->fc_sp--;
2541}
2542
2543#if 0
2544static int emit_return(struct r600_shader_ctx *ctx)
2545{
2546	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2547	return 0;
2548}
2549
2550static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2551{
2552
2553	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2554	ctx->bc->cf_last->pop_count = pops;
2555	/* TODO work out offset */
2556	return 0;
2557}
2558
2559static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2560{
2561	return 0;
2562}
2563
2564static void emit_testflag(struct r600_shader_ctx *ctx)
2565{
2566
2567}
2568
2569static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2570{
2571	emit_testflag(ctx);
2572	emit_jump_to_offset(ctx, 1, 4);
2573	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2574	pops(ctx, ifidx + 1);
2575	emit_return(ctx);
2576}
2577
2578static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2579{
2580	emit_testflag(ctx);
2581
2582	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2583	ctx->bc->cf_last->pop_count = 1;
2584
2585	fc_set_mid(ctx, fc_sp);
2586
2587	pops(ctx, 1);
2588}
2589#endif
2590
2591static int tgsi_if(struct r600_shader_ctx *ctx)
2592{
2593	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2594
2595	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2596
2597	fc_pushlevel(ctx, FC_IF);
2598
2599	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2600	return 0;
2601}
2602
2603static int tgsi_else(struct r600_shader_ctx *ctx)
2604{
2605	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2606	ctx->bc->cf_last->pop_count = 1;
2607
2608	fc_set_mid(ctx, ctx->bc->fc_sp);
2609	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2610	return 0;
2611}
2612
2613static int tgsi_endif(struct r600_shader_ctx *ctx)
2614{
2615	pops(ctx, 1);
2616	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2617		R600_ERR("if/endif unbalanced in shader\n");
2618		return -1;
2619	}
2620
2621	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2622		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2623		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2624	} else {
2625		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2626	}
2627	fc_poplevel(ctx);
2628
2629	callstack_decrease_current(ctx, FC_PUSH_VPM);
2630	return 0;
2631}
2632
2633static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2634{
2635	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2636
2637	fc_pushlevel(ctx, FC_LOOP);
2638
2639	/* check stack depth */
2640	callstack_check_depth(ctx, FC_LOOP, 0);
2641	return 0;
2642}
2643
2644static int tgsi_endloop(struct r600_shader_ctx *ctx)
2645{
2646	int i;
2647
2648	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2649
2650	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2651		R600_ERR("loop/endloop in shader code are not paired.\n");
2652		return -EINVAL;
2653	}
2654
2655	/* fixup loop pointers - from r600isa
2656	   LOOP END points to CF after LOOP START,
2657	   LOOP START point to CF after LOOP END
2658	   BRK/CONT point to LOOP END CF
2659	*/
2660	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2661
2662	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2663
2664	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2665		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2666	}
2667	/* TODO add LOOPRET support */
2668	fc_poplevel(ctx);
2669	callstack_decrease_current(ctx, FC_LOOP);
2670	return 0;
2671}
2672
2673static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2674{
2675	unsigned int fscp;
2676
2677	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2678	{
2679		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2680			break;
2681	}
2682
2683	if (fscp == 0) {
2684		R600_ERR("Break not inside loop/endloop pair\n");
2685		return -EINVAL;
2686	}
2687
2688	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2689	ctx->bc->cf_last->pop_count = 1;
2690
2691	fc_set_mid(ctx, fscp);
2692
2693	pops(ctx, 1);
2694	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2695	return 0;
2696}
2697
2698static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2699	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2700	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2701	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2702
2703	/* FIXME:
2704	 * For state trackers other than OpenGL, we'll want to use
2705	 * _RECIP_IEEE instead.
2706	 */
2707	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2708
2709	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2710	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2711	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2712	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2713	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2714	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2715	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2716	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2717	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2718	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2719	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2720	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2721	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2722	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2723	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2724	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2725	/* gap */
2726	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2727	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2728	/* gap */
2729	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2730	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2731	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2732	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2733	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2734	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2735	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2736	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2737	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2738	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2739	/* gap */
2740	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2741	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2742	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2743	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2744	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2745	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2746	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2747	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2748	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2749	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2750	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2751	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2752	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2753	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2754	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2755	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2756	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2757	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2758	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2759	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2760	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2761	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2762	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2763	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2764	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2765	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2766	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2767	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2768	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2769	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2770	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2771	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2772	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2773	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2774	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2775	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2776	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2777	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2778	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2779	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2780	{TGSI_OPCODE_TXL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2781	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2782	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2783	/* gap */
2784	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2785	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2786	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2787	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2788	/* gap */
2789	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2790	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2791	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2792	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2793	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2794	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2795	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2796	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2797	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2798	/* gap */
2799	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2800	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2801	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2802	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2803	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2804	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2805	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2806	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2807	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2808	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2809	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2810	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2811	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2812	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2813	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2814	/* gap */
2815	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2816	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2817	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2818	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2819	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2820	/* gap */
2821	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2822	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2823	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2824	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2825	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2826	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2827	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2828	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2829	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2830	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2831	/* gap */
2832	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2833	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2834	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2835	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2836	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2837	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2839	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2840	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2841	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2842	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2843	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2844	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2845	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2846	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2847	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2848	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2849	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2853	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2854	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2855	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2856	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2857	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2858	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2859	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2860};
2861
2862static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2863	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2864	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2865	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2866	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2867	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2868	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2869	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2870	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2871	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2872	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2873	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2874	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2875	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2876	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2877	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2878	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2879	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2880	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2881	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2882	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883	/* gap */
2884	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2885	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886	/* gap */
2887	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2890	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2892	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2894	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2895	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2896	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2897	/* gap */
2898	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2899	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2900	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2902	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2903	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2904	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2905	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2906	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2909	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2912	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2914	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2915	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2916	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2917	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2918	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2919	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2921	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2932	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2933	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2934	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2935	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2938	{TGSI_OPCODE_TXL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2940	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2941	/* gap */
2942	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2944	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2945	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2946	/* gap */
2947	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2949	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2950	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2951	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2952	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2954	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2955	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2956	/* gap */
2957	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2958	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2959	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2960	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2961	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2962	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2963	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2964	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2965	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2966	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2967	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2968	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2969	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2970	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2971	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2972	/* gap */
2973	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2974	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2975	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2976	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2977	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2978	/* gap */
2979	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2980	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2981	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2982	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2983	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2984	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2985	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2986	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2987	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2988	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2989	/* gap */
2990	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2991	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2992	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2993	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2994	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2995	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2997	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2998	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2999	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3000	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3001	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3002	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3004	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3006	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3007	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3009	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3010	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3011	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3012	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3013	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3014	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3015	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3016	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3017	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3018};
3019