r600_shader.c revision 7e5173d065f0da450cf553e3e3084a0f774919a3
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_screen.h"
29#include "r600_context.h"
30#include "r600_shader.h"
31#include "r600_asm.h"
32#include "r600_sq.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37
38
39struct r600_shader_tgsi_instruction;
40
41struct r600_shader_ctx {
42	struct tgsi_shader_info			info;
43	struct tgsi_parse_context		parse;
44	const struct tgsi_token			*tokens;
45	unsigned				type;
46	unsigned				file_offset[TGSI_FILE_COUNT];
47	unsigned				temp_reg;
48	struct r600_shader_tgsi_instruction	*inst_info;
49	struct r600_bc				*bc;
50	struct r600_shader			*shader;
51	u32					value[4];
52	u32					*literals;
53	u32					nliterals;
54	u32					max_driver_temp_used;
55};
56
57struct r600_shader_tgsi_instruction {
58	unsigned	tgsi_opcode;
59	unsigned	is_op3;
60	unsigned	r600_opcode;
61	int (*process)(struct r600_shader_ctx *ctx);
62};
63
64static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
65static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
66
67static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
68{
69	struct r600_context *rctx = r600_context(ctx);
70	const struct util_format_description *desc;
71	enum pipe_format resource_format[160];
72	unsigned i, nresources = 0;
73	struct r600_bc *bc = &shader->bc;
74	struct r600_bc_cf *cf;
75	struct r600_bc_vtx *vtx;
76
77	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
78		return 0;
79	for (i = 0; i < rctx->vertex_elements->count; i++) {
80		resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
81	}
82	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
83		switch (cf->inst) {
84		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
85		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
86			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
87				desc = util_format_description(resource_format[vtx->buffer_id]);
88				if (desc == NULL) {
89					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
90					return -EINVAL;
91				}
92				vtx->dst_sel_x = desc->swizzle[0];
93				vtx->dst_sel_y = desc->swizzle[1];
94				vtx->dst_sel_z = desc->swizzle[2];
95				vtx->dst_sel_w = desc->swizzle[3];
96			}
97			break;
98		default:
99			break;
100		}
101	}
102	return r600_bc_build(&shader->bc);
103}
104
105int r600_pipe_shader_create(struct pipe_context *ctx,
106			struct r600_context_state *rpshader,
107			const struct tgsi_token *tokens)
108{
109	struct r600_screen *rscreen = r600_screen(ctx->screen);
110	int r;
111
112//fprintf(stderr, "--------------------------------------------------------------\n");
113//tgsi_dump(tokens, 0);
114	if (rpshader == NULL)
115		return -ENOMEM;
116	rpshader->shader.family = radeon_get_family(rscreen->rw);
117	rpshader->shader.use_mem_constant = rscreen->use_mem_constant;
118	r = r600_shader_from_tgsi(tokens, &rpshader->shader);
119	if (r) {
120		R600_ERR("translation from TGSI failed !\n");
121		return r;
122	}
123	r = r600_bc_build(&rpshader->shader.bc);
124	if (r) {
125		R600_ERR("building bytecode failed !\n");
126		return r;
127	}
128//fprintf(stderr, "______________________________________________________________\n");
129	return 0;
130}
131
132static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
133{
134	struct r600_context *rctx = r600_context(ctx);
135	struct radeon_state *state;
136
137	state = &rpshader->rstate[0];
138	radeon_state_fini(&rpshader->rstate[0]);
139
140	return rctx->vtbl->vs_shader(rctx, rpshader, state);
141}
142
143static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
144{
145	struct r600_context *rctx = r600_context(ctx);
146	struct radeon_state *state;
147
148	state = &rpshader->rstate[0];
149	radeon_state_fini(state);
150
151	return rctx->vtbl->ps_shader(rctx, rpshader, state);
152}
153
154static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
155{
156	struct r600_screen *rscreen = r600_screen(ctx->screen);
157	struct r600_context *rctx = r600_context(ctx);
158	struct r600_shader *rshader = &rpshader->shader;
159	int r;
160	void *data;
161
162	/* copy new shader */
163	radeon_ws_bo_reference(rscreen->rw, &rpshader->bo, NULL);
164	rpshader->bo = NULL;
165	rpshader->bo = radeon_ws_bo(rscreen->rw, rshader->bc.ndw * 4,
166				    4096, 0);
167	if (rpshader->bo == NULL) {
168		return -ENOMEM;
169	}
170	data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, rctx);
171	memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4);
172	radeon_ws_bo_unmap(rscreen->rw, rpshader->bo);
173	/* build state */
174	rshader->flat_shade = rctx->flat_shade;
175	switch (rshader->processor_type) {
176	case TGSI_PROCESSOR_VERTEX:
177		r = r600_pipe_shader_vs(ctx, rpshader);
178		break;
179	case TGSI_PROCESSOR_FRAGMENT:
180		r = r600_pipe_shader_ps(ctx, rpshader);
181		break;
182	default:
183		r = -EINVAL;
184		break;
185	}
186	return r;
187}
188
189int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
190{
191	struct r600_context *rctx = r600_context(ctx);
192	int r;
193
194	if (rpshader == NULL)
195		return -EINVAL;
196	/* there should be enough input */
197	if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
198		R600_ERR("%d resources provided, expecting %d\n",
199			rctx->vertex_elements->count, rpshader->shader.bc.nresource);
200		return -EINVAL;
201	}
202	r = r600_shader_update(ctx, &rpshader->shader);
203	if (r)
204		return r;
205	return r600_pipe_shader(ctx, rpshader);
206}
207
208static int tgsi_is_supported(struct r600_shader_ctx *ctx)
209{
210	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
211	int j;
212
213	if (i->Instruction.NumDstRegs > 1) {
214		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
215		return -EINVAL;
216	}
217	if (i->Instruction.Predicate) {
218		R600_ERR("predicate unsupported\n");
219		return -EINVAL;
220	}
221#if 0
222	if (i->Instruction.Label) {
223		R600_ERR("label unsupported\n");
224		return -EINVAL;
225	}
226#endif
227	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
228		if (i->Src[j].Register.Dimension ||
229			i->Src[j].Register.Absolute) {
230			R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
231				 i->Src[j].Register.Dimension,
232				 i->Src[j].Register.Absolute);
233			return -EINVAL;
234		}
235	}
236	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
237		if (i->Dst[j].Register.Dimension) {
238			R600_ERR("unsupported dst (dimension)\n");
239			return -EINVAL;
240		}
241	}
242	return 0;
243}
244
245static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr)
246{
247	int i, r;
248	struct r600_bc_alu alu;
249
250	for (i = 0; i < 8; i++) {
251		memset(&alu, 0, sizeof(struct r600_bc_alu));
252
253		if (i < 4)
254			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
255		else
256			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
257
258		if ((i > 1) && (i < 6)) {
259			alu.dst.sel = ctx->shader->input[gpr].gpr;
260			alu.dst.write = 1;
261		}
262
263		alu.dst.chan = i % 4;
264		alu.src[0].chan = (1 - (i % 2));
265		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr;
266
267		alu.bank_swizzle_force = SQ_ALU_VEC_210;
268		if ((i % 4) == 3)
269			alu.last = 1;
270		r = r600_bc_add_alu(ctx->bc, &alu);
271		if (r)
272			return r;
273	}
274	return 0;
275}
276
277
278static int tgsi_declaration(struct r600_shader_ctx *ctx)
279{
280	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
281	struct r600_bc_vtx vtx;
282	unsigned i;
283	int r;
284
285	switch (d->Declaration.File) {
286	case TGSI_FILE_INPUT:
287		i = ctx->shader->ninput++;
288		ctx->shader->input[i].name = d->Semantic.Name;
289		ctx->shader->input[i].sid = d->Semantic.Index;
290		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
291		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
292		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
293			/* turn input into fetch */
294			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
295			vtx.inst = 0;
296			vtx.fetch_type = 0;
297			vtx.buffer_id = i;
298			/* register containing the index into the buffer */
299			vtx.src_gpr = 0;
300			vtx.src_sel_x = 0;
301			vtx.mega_fetch_count = 0x1F;
302			vtx.dst_gpr = ctx->shader->input[i].gpr;
303			vtx.dst_sel_x = 0;
304			vtx.dst_sel_y = 1;
305			vtx.dst_sel_z = 2;
306			vtx.dst_sel_w = 3;
307			r = r600_bc_add_vtx(ctx->bc, &vtx);
308			if (r)
309				return r;
310		}
311		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) {
312			/* turn input into interpolate on EG */
313			evergreen_interp_alu(ctx, i);
314		}
315		break;
316	case TGSI_FILE_OUTPUT:
317		i = ctx->shader->noutput++;
318		ctx->shader->output[i].name = d->Semantic.Name;
319		ctx->shader->output[i].sid = d->Semantic.Index;
320		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
321		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
322		break;
323	case TGSI_FILE_CONSTANT:
324	case TGSI_FILE_TEMPORARY:
325	case TGSI_FILE_SAMPLER:
326	case TGSI_FILE_ADDRESS:
327		break;
328	default:
329		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
330		return -EINVAL;
331	}
332	return 0;
333}
334
335static int r600_get_temp(struct r600_shader_ctx *ctx)
336{
337	return ctx->temp_reg + ctx->max_driver_temp_used++;
338}
339
340int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
341{
342	struct tgsi_full_immediate *immediate;
343	struct r600_shader_ctx ctx;
344	struct r600_bc_output output[32];
345	unsigned output_done, noutput;
346	unsigned opcode;
347	int i, r = 0, pos0;
348
349	ctx.bc = &shader->bc;
350	ctx.shader = shader;
351	r = r600_bc_init(ctx.bc, shader->family);
352	if (r)
353		return r;
354	ctx.bc->use_mem_constant = shader->use_mem_constant;
355	ctx.tokens = tokens;
356	tgsi_scan_shader(tokens, &ctx.info);
357	tgsi_parse_init(&ctx.parse, tokens);
358	ctx.type = ctx.parse.FullHeader.Processor.Processor;
359	shader->processor_type = ctx.type;
360
361	/* register allocations */
362	/* Values [0,127] correspond to GPR[0..127].
363	 * Values [128,159] correspond to constant buffer bank 0
364	 * Values [160,191] correspond to constant buffer bank 1
365	 * Values [256,511] correspond to cfile constants c[0..255].
366	 * Other special values are shown in the list below.
367	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
368	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
369	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
370	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
371	 * 248	SQ_ALU_SRC_0: special constant 0.0.
372	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
373	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
374	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
375	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
376	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
377	 * 254	SQ_ALU_SRC_PV: previous vector result.
378	 * 255	SQ_ALU_SRC_PS: previous scalar result.
379	 */
380	for (i = 0; i < TGSI_FILE_COUNT; i++) {
381		ctx.file_offset[i] = 0;
382	}
383	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
384		ctx.file_offset[TGSI_FILE_INPUT] = 1;
385	}
386	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
387						ctx.info.file_count[TGSI_FILE_INPUT];
388	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
389						ctx.info.file_count[TGSI_FILE_OUTPUT];
390	if (ctx.shader->use_mem_constant)
391		ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
392	else
393		ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
394
395	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
396	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
397			ctx.info.file_count[TGSI_FILE_TEMPORARY];
398
399	ctx.nliterals = 0;
400	ctx.literals = NULL;
401
402	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
403		tgsi_parse_token(&ctx.parse);
404		switch (ctx.parse.FullToken.Token.Type) {
405		case TGSI_TOKEN_TYPE_IMMEDIATE:
406			immediate = &ctx.parse.FullToken.FullImmediate;
407			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
408			if(ctx.literals == NULL) {
409				r = -ENOMEM;
410				goto out_err;
411			}
412			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
413			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
414			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
415			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
416			ctx.nliterals++;
417			break;
418		case TGSI_TOKEN_TYPE_DECLARATION:
419			r = tgsi_declaration(&ctx);
420			if (r)
421				goto out_err;
422			break;
423		case TGSI_TOKEN_TYPE_INSTRUCTION:
424			r = tgsi_is_supported(&ctx);
425			if (r)
426				goto out_err;
427			ctx.max_driver_temp_used = 0;
428			/* reserve first tmp for everyone */
429			r600_get_temp(&ctx);
430			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
431			if (ctx.bc->chiprev == 2)
432				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
433			else
434				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
435			r = ctx.inst_info->process(&ctx);
436			if (r)
437				goto out_err;
438			r = r600_bc_add_literal(ctx.bc, ctx.value);
439			if (r)
440				goto out_err;
441			break;
442		default:
443			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
444			r = -EINVAL;
445			goto out_err;
446		}
447	}
448	/* export output */
449	noutput = shader->noutput;
450	for (i = 0, pos0 = 0; i < noutput; i++) {
451		memset(&output[i], 0, sizeof(struct r600_bc_output));
452		output[i].gpr = shader->output[i].gpr;
453		output[i].elem_size = 3;
454		output[i].swizzle_x = 0;
455		output[i].swizzle_y = 1;
456		output[i].swizzle_z = 2;
457		output[i].swizzle_w = 3;
458		output[i].barrier = 1;
459		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
460		output[i].array_base = i - pos0;
461		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
462		switch (ctx.type) {
463		case TGSI_PROCESSOR_VERTEX:
464			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
465				output[i].array_base = 60;
466				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
467				/* position doesn't count in array_base */
468				pos0++;
469			}
470			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
471				output[i].array_base = 61;
472				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
473				/* position doesn't count in array_base */
474				pos0++;
475			}
476			break;
477		case TGSI_PROCESSOR_FRAGMENT:
478			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
479				output[i].array_base = shader->output[i].sid;
480				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
481			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
482				output[i].array_base = 61;
483				output[i].swizzle_x = 2;
484				output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
485				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
486			} else {
487				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
488				r = -EINVAL;
489				goto out_err;
490			}
491			break;
492		default:
493			R600_ERR("unsupported processor type %d\n", ctx.type);
494			r = -EINVAL;
495			goto out_err;
496		}
497	}
498	/* add fake param output for vertex shader if no param is exported */
499	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
500		for (i = 0, pos0 = 0; i < noutput; i++) {
501			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
502				pos0 = 1;
503				break;
504			}
505		}
506		if (!pos0) {
507			memset(&output[i], 0, sizeof(struct r600_bc_output));
508			output[i].gpr = 0;
509			output[i].elem_size = 3;
510			output[i].swizzle_x = 0;
511			output[i].swizzle_y = 1;
512			output[i].swizzle_z = 2;
513			output[i].swizzle_w = 3;
514			output[i].barrier = 1;
515			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
516			output[i].array_base = 0;
517			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
518			noutput++;
519		}
520	}
521	/* add fake pixel export */
522	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
523		memset(&output[0], 0, sizeof(struct r600_bc_output));
524		output[0].gpr = 0;
525		output[0].elem_size = 3;
526		output[0].swizzle_x = 7;
527		output[0].swizzle_y = 7;
528		output[0].swizzle_z = 7;
529		output[0].swizzle_w = 7;
530		output[0].barrier = 1;
531		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
532		output[0].array_base = 0;
533		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
534		noutput++;
535	}
536	/* set export done on last export of each type */
537	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
538		if (i == (noutput - 1)) {
539			output[i].end_of_program = 1;
540		}
541		if (!(output_done & (1 << output[i].type))) {
542			output_done |= (1 << output[i].type);
543			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
544		}
545	}
546	/* add output to bytecode */
547	for (i = 0; i < noutput; i++) {
548		r = r600_bc_add_output(ctx.bc, &output[i]);
549		if (r)
550			goto out_err;
551	}
552	free(ctx.literals);
553	tgsi_parse_free(&ctx.parse);
554	return 0;
555out_err:
556	free(ctx.literals);
557	tgsi_parse_free(&ctx.parse);
558	return r;
559}
560
561static int tgsi_unsupported(struct r600_shader_ctx *ctx)
562{
563	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
564	return -EINVAL;
565}
566
567static int tgsi_end(struct r600_shader_ctx *ctx)
568{
569	return 0;
570}
571
572static int tgsi_src(struct r600_shader_ctx *ctx,
573			const struct tgsi_full_src_register *tgsi_src,
574			struct r600_bc_alu_src *r600_src)
575{
576	int index;
577	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
578	r600_src->sel = tgsi_src->Register.Index;
579	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
580		r600_src->sel = 0;
581		index = tgsi_src->Register.Index;
582		ctx->value[0] = ctx->literals[index * 4 + 0];
583		ctx->value[1] = ctx->literals[index * 4 + 1];
584		ctx->value[2] = ctx->literals[index * 4 + 2];
585		ctx->value[3] = ctx->literals[index * 4 + 3];
586	}
587	if (tgsi_src->Register.Indirect)
588		r600_src->rel = V_SQ_REL_RELATIVE;
589	r600_src->neg = tgsi_src->Register.Negate;
590	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
591	return 0;
592}
593
594static int tgsi_dst(struct r600_shader_ctx *ctx,
595			const struct tgsi_full_dst_register *tgsi_dst,
596			unsigned swizzle,
597			struct r600_bc_alu_dst *r600_dst)
598{
599	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
600
601	r600_dst->sel = tgsi_dst->Register.Index;
602	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
603	r600_dst->chan = swizzle;
604	r600_dst->write = 1;
605	if (tgsi_dst->Register.Indirect)
606		r600_dst->rel = V_SQ_REL_RELATIVE;
607	if (inst->Instruction.Saturate) {
608		r600_dst->clamp = 1;
609	}
610	return 0;
611}
612
613static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
614{
615	switch (swizzle) {
616	case 0:
617		return tgsi_src->Register.SwizzleX;
618	case 1:
619		return tgsi_src->Register.SwizzleY;
620	case 2:
621		return tgsi_src->Register.SwizzleZ;
622	case 3:
623		return tgsi_src->Register.SwizzleW;
624	default:
625		return 0;
626	}
627}
628
629static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
630{
631	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
632	struct r600_bc_alu alu;
633	int i, j, k, nconst, r;
634
635	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
636		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
637			nconst++;
638		}
639		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
640		if (r) {
641			return r;
642		}
643	}
644	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
645		if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
646			int treg = r600_get_temp(ctx);
647			for (k = 0; k < 4; k++) {
648				memset(&alu, 0, sizeof(struct r600_bc_alu));
649				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
650				alu.src[0].sel = r600_src[j].sel;
651				alu.src[0].chan = k;
652				alu.dst.sel = treg;
653				alu.dst.chan = k;
654				alu.dst.write = 1;
655				if (k == 3)
656					alu.last = 1;
657				r = r600_bc_add_alu(ctx->bc, &alu);
658				if (r)
659					return r;
660			}
661			r600_src[j].sel = treg;
662			j--;
663		}
664	}
665	return 0;
666}
667
668/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
669static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
670{
671	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
672	struct r600_bc_alu alu;
673	int i, j, k, nliteral, r;
674
675	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
676		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
677			nliteral++;
678		}
679	}
680	for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
681		if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
682			int treg = r600_get_temp(ctx);
683			for (k = 0; k < 4; k++) {
684				memset(&alu, 0, sizeof(struct r600_bc_alu));
685				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
686				alu.src[0].sel = r600_src[j].sel;
687				alu.src[0].chan = k;
688				alu.dst.sel = treg;
689				alu.dst.chan = k;
690				alu.dst.write = 1;
691				if (k == 3)
692					alu.last = 1;
693				r = r600_bc_add_alu(ctx->bc, &alu);
694				if (r)
695					return r;
696			}
697			r = r600_bc_add_literal(ctx->bc, ctx->value);
698			if (r)
699				return r;
700			r600_src[j].sel = treg;
701			j++;
702		}
703	}
704	return 0;
705}
706
707static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
708{
709	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
710	struct r600_bc_alu_src r600_src[3];
711	struct r600_bc_alu alu;
712	int i, j, r;
713	int lasti = 0;
714
715	for (i = 0; i < 4; i++) {
716		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
717			lasti = i;
718		}
719	}
720
721	r = tgsi_split_constant(ctx, r600_src);
722	if (r)
723		return r;
724	for (i = 0; i < lasti + 1; i++) {
725		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
726			continue;
727
728		memset(&alu, 0, sizeof(struct r600_bc_alu));
729		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
730		if (r)
731			return r;
732
733		alu.inst = ctx->inst_info->r600_opcode;
734		if (!swap) {
735			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
736				alu.src[j] = r600_src[j];
737				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
738			}
739		} else {
740			alu.src[0] = r600_src[1];
741			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
742
743			alu.src[1] = r600_src[0];
744			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
745		}
746		/* handle some special cases */
747		switch (ctx->inst_info->tgsi_opcode) {
748		case TGSI_OPCODE_SUB:
749			alu.src[1].neg = 1;
750			break;
751		case TGSI_OPCODE_ABS:
752			alu.src[0].abs = 1;
753			break;
754		default:
755			break;
756		}
757		if (i == lasti) {
758			alu.last = 1;
759		}
760		r = r600_bc_add_alu(ctx->bc, &alu);
761		if (r)
762			return r;
763	}
764	return 0;
765}
766
767static int tgsi_op2(struct r600_shader_ctx *ctx)
768{
769	return tgsi_op2_s(ctx, 0);
770}
771
772static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
773{
774	return tgsi_op2_s(ctx, 1);
775}
776
777/*
778 * r600 - trunc to -PI..PI range
779 * r700 - normalize by dividing by 2PI
780 * see fdo bug 27901
781 */
782static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
783			   struct r600_bc_alu_src r600_src[3])
784{
785	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
786	int r;
787	uint32_t lit_vals[4];
788	struct r600_bc_alu alu;
789
790	memset(lit_vals, 0, 4*4);
791	r = tgsi_split_constant(ctx, r600_src);
792	if (r)
793		return r;
794
795	r = tgsi_split_literal_constant(ctx, r600_src);
796	if (r)
797		return r;
798
799	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
800	lit_vals[1] = fui(0.5f);
801
802	memset(&alu, 0, sizeof(struct r600_bc_alu));
803	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
804	alu.is_op3 = 1;
805
806	alu.dst.chan = 0;
807	alu.dst.sel = ctx->temp_reg;
808	alu.dst.write = 1;
809
810	alu.src[0] = r600_src[0];
811	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
812
813	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
814	alu.src[1].chan = 0;
815	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
816	alu.src[2].chan = 1;
817	alu.last = 1;
818	r = r600_bc_add_alu(ctx->bc, &alu);
819	if (r)
820		return r;
821	r = r600_bc_add_literal(ctx->bc, lit_vals);
822	if (r)
823		return r;
824
825	memset(&alu, 0, sizeof(struct r600_bc_alu));
826	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
827
828	alu.dst.chan = 0;
829	alu.dst.sel = ctx->temp_reg;
830	alu.dst.write = 1;
831
832	alu.src[0].sel = ctx->temp_reg;
833	alu.src[0].chan = 0;
834	alu.last = 1;
835	r = r600_bc_add_alu(ctx->bc, &alu);
836	if (r)
837		return r;
838
839	if (ctx->bc->chiprev == 0) {
840		lit_vals[0] = fui(3.1415926535897f * 2.0f);
841		lit_vals[1] = fui(-3.1415926535897f);
842	} else {
843		lit_vals[0] = fui(1.0f);
844		lit_vals[1] = fui(-0.5f);
845	}
846
847	memset(&alu, 0, sizeof(struct r600_bc_alu));
848	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
849	alu.is_op3 = 1;
850
851	alu.dst.chan = 0;
852	alu.dst.sel = ctx->temp_reg;
853	alu.dst.write = 1;
854
855	alu.src[0].sel = ctx->temp_reg;
856	alu.src[0].chan = 0;
857
858	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
859	alu.src[1].chan = 0;
860	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
861	alu.src[2].chan = 1;
862	alu.last = 1;
863	r = r600_bc_add_alu(ctx->bc, &alu);
864	if (r)
865		return r;
866	r = r600_bc_add_literal(ctx->bc, lit_vals);
867	if (r)
868		return r;
869	return 0;
870}
871
872static int tgsi_trig(struct r600_shader_ctx *ctx)
873{
874	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
875	struct r600_bc_alu_src r600_src[3];
876	struct r600_bc_alu alu;
877	int i, r;
878	int lasti = 0;
879
880	r = tgsi_setup_trig(ctx, r600_src);
881	if (r)
882		return r;
883
884	memset(&alu, 0, sizeof(struct r600_bc_alu));
885	alu.inst = ctx->inst_info->r600_opcode;
886	alu.dst.chan = 0;
887	alu.dst.sel = ctx->temp_reg;
888	alu.dst.write = 1;
889
890	alu.src[0].sel = ctx->temp_reg;
891	alu.src[0].chan = 0;
892	alu.last = 1;
893	r = r600_bc_add_alu(ctx->bc, &alu);
894	if (r)
895		return r;
896
897	/* replicate result */
898	for (i = 0; i < 4; i++) {
899		if (inst->Dst[0].Register.WriteMask & (1 << i))
900			lasti = i;
901	}
902	for (i = 0; i < lasti + 1; i++) {
903		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
904			continue;
905
906		memset(&alu, 0, sizeof(struct r600_bc_alu));
907		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
908
909		alu.src[0].sel = ctx->temp_reg;
910		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
911		if (r)
912			return r;
913		if (i == lasti)
914			alu.last = 1;
915		r = r600_bc_add_alu(ctx->bc, &alu);
916		if (r)
917			return r;
918	}
919	return 0;
920}
921
922static int tgsi_scs(struct r600_shader_ctx *ctx)
923{
924	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
925	struct r600_bc_alu_src r600_src[3];
926	struct r600_bc_alu alu;
927	int r;
928
929	r = tgsi_setup_trig(ctx, r600_src);
930	if (r)
931		return r;
932
933
934	/* dst.x = COS */
935	memset(&alu, 0, sizeof(struct r600_bc_alu));
936	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
937	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
938	if (r)
939		return r;
940
941	alu.src[0].sel = ctx->temp_reg;
942	alu.src[0].chan = 0;
943	alu.last = 1;
944	r = r600_bc_add_alu(ctx->bc, &alu);
945	if (r)
946		return r;
947
948	/* dst.y = SIN */
949	memset(&alu, 0, sizeof(struct r600_bc_alu));
950	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
951	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
952	if (r)
953		return r;
954
955	alu.src[0].sel = ctx->temp_reg;
956	alu.src[0].chan = 0;
957	alu.last = 1;
958	r = r600_bc_add_alu(ctx->bc, &alu);
959	if (r)
960		return r;
961	return 0;
962}
963
964static int tgsi_kill(struct r600_shader_ctx *ctx)
965{
966	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
967	struct r600_bc_alu alu;
968	int i, r;
969
970	for (i = 0; i < 4; i++) {
971		memset(&alu, 0, sizeof(struct r600_bc_alu));
972		alu.inst = ctx->inst_info->r600_opcode;
973
974		alu.dst.chan = i;
975
976		alu.src[0].sel = V_SQ_ALU_SRC_0;
977
978		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
979			alu.src[1].sel = V_SQ_ALU_SRC_1;
980			alu.src[1].neg = 1;
981		} else {
982			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
983			if (r)
984				return r;
985			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
986		}
987		if (i == 3) {
988			alu.last = 1;
989		}
990		r = r600_bc_add_alu(ctx->bc, &alu);
991		if (r)
992			return r;
993	}
994	r = r600_bc_add_literal(ctx->bc, ctx->value);
995	if (r)
996		return r;
997
998	/* kill must be last in ALU */
999	ctx->bc->force_add_cf = 1;
1000	ctx->shader->uses_kill = TRUE;
1001	return 0;
1002}
1003
1004static int tgsi_lit(struct r600_shader_ctx *ctx)
1005{
1006	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1007	struct r600_bc_alu alu;
1008	struct r600_bc_alu_src r600_src[3];
1009	int r;
1010
1011	r = tgsi_split_constant(ctx, r600_src);
1012	if (r)
1013		return r;
1014	r = tgsi_split_literal_constant(ctx, r600_src);
1015	if (r)
1016		return r;
1017
1018	/* dst.x, <- 1.0  */
1019	memset(&alu, 0, sizeof(struct r600_bc_alu));
1020	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1021	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1022	alu.src[0].chan = 0;
1023	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1024	if (r)
1025		return r;
1026	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1027	r = r600_bc_add_alu(ctx->bc, &alu);
1028	if (r)
1029		return r;
1030
1031	/* dst.y = max(src.x, 0.0) */
1032	memset(&alu, 0, sizeof(struct r600_bc_alu));
1033	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1034	alu.src[0] = r600_src[0];
1035	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1036	alu.src[1].chan = 0;
1037	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1038	if (r)
1039		return r;
1040	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1041	r = r600_bc_add_alu(ctx->bc, &alu);
1042	if (r)
1043		return r;
1044
1045	/* dst.w, <- 1.0  */
1046	memset(&alu, 0, sizeof(struct r600_bc_alu));
1047	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1048	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1049	alu.src[0].chan = 0;
1050	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1051	if (r)
1052		return r;
1053	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1054	alu.last = 1;
1055	r = r600_bc_add_alu(ctx->bc, &alu);
1056	if (r)
1057		return r;
1058
1059	r = r600_bc_add_literal(ctx->bc, ctx->value);
1060	if (r)
1061		return r;
1062
1063	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1064	{
1065		int chan;
1066		int sel;
1067
1068		/* dst.z = log(src.y) */
1069		memset(&alu, 0, sizeof(struct r600_bc_alu));
1070		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1071		alu.src[0] = r600_src[0];
1072		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1073		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1074		if (r)
1075			return r;
1076		alu.last = 1;
1077		r = r600_bc_add_alu(ctx->bc, &alu);
1078		if (r)
1079			return r;
1080
1081		r = r600_bc_add_literal(ctx->bc, ctx->value);
1082		if (r)
1083			return r;
1084
1085		chan = alu.dst.chan;
1086		sel = alu.dst.sel;
1087
1088		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1089		memset(&alu, 0, sizeof(struct r600_bc_alu));
1090		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1091		alu.src[0] = r600_src[0];
1092		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1093		alu.src[1].sel  = sel;
1094		alu.src[1].chan = chan;
1095
1096		alu.src[2] = r600_src[0];
1097		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1098		alu.dst.sel = ctx->temp_reg;
1099		alu.dst.chan = 0;
1100		alu.dst.write = 1;
1101		alu.is_op3 = 1;
1102		alu.last = 1;
1103		r = r600_bc_add_alu(ctx->bc, &alu);
1104		if (r)
1105			return r;
1106
1107		r = r600_bc_add_literal(ctx->bc, ctx->value);
1108		if (r)
1109			return r;
1110		/* dst.z = exp(tmp.x) */
1111		memset(&alu, 0, sizeof(struct r600_bc_alu));
1112		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1113		alu.src[0].sel = ctx->temp_reg;
1114		alu.src[0].chan = 0;
1115		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1116		if (r)
1117			return r;
1118		alu.last = 1;
1119		r = r600_bc_add_alu(ctx->bc, &alu);
1120		if (r)
1121			return r;
1122	}
1123	return 0;
1124}
1125
1126static int tgsi_rsq(struct r600_shader_ctx *ctx)
1127{
1128	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1129	struct r600_bc_alu alu;
1130	int i, r;
1131
1132	memset(&alu, 0, sizeof(struct r600_bc_alu));
1133
1134	/* FIXME:
1135	 * For state trackers other than OpenGL, we'll want to use
1136	 * _RECIPSQRT_IEEE instead.
1137	 */
1138	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1139
1140	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1141		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1142		if (r)
1143			return r;
1144		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1145		alu.src[i].abs = 1;
1146	}
1147	alu.dst.sel = ctx->temp_reg;
1148	alu.dst.write = 1;
1149	alu.last = 1;
1150	r = r600_bc_add_alu(ctx->bc, &alu);
1151	if (r)
1152		return r;
1153	r = r600_bc_add_literal(ctx->bc, ctx->value);
1154	if (r)
1155		return r;
1156	/* replicate result */
1157	return tgsi_helper_tempx_replicate(ctx);
1158}
1159
1160static int tgsi_trans(struct r600_shader_ctx *ctx)
1161{
1162	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1163	struct r600_bc_alu alu;
1164	int i, j, r;
1165
1166	for (i = 0; i < 4; i++) {
1167		memset(&alu, 0, sizeof(struct r600_bc_alu));
1168		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1169			alu.inst = ctx->inst_info->r600_opcode;
1170			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1171				r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1172				if (r)
1173					return r;
1174				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1175			}
1176			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1177			if (r)
1178				return r;
1179			alu.last = 1;
1180			r = r600_bc_add_alu(ctx->bc, &alu);
1181			if (r)
1182				return r;
1183		}
1184	}
1185	return 0;
1186}
1187
1188static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1189{
1190	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1191	struct r600_bc_alu alu;
1192	int i, r;
1193
1194	for (i = 0; i < 4; i++) {
1195		memset(&alu, 0, sizeof(struct r600_bc_alu));
1196		alu.src[0].sel = ctx->temp_reg;
1197		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1198		alu.dst.chan = i;
1199		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1200		if (r)
1201			return r;
1202		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1203		if (i == 3)
1204			alu.last = 1;
1205		r = r600_bc_add_alu(ctx->bc, &alu);
1206		if (r)
1207			return r;
1208	}
1209	return 0;
1210}
1211
1212static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1213{
1214	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1215	struct r600_bc_alu alu;
1216	int i, r;
1217
1218	memset(&alu, 0, sizeof(struct r600_bc_alu));
1219	alu.inst = ctx->inst_info->r600_opcode;
1220	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1221		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1222		if (r)
1223			return r;
1224		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1225	}
1226	alu.dst.sel = ctx->temp_reg;
1227	alu.dst.write = 1;
1228	alu.last = 1;
1229	r = r600_bc_add_alu(ctx->bc, &alu);
1230	if (r)
1231		return r;
1232	r = r600_bc_add_literal(ctx->bc, ctx->value);
1233	if (r)
1234		return r;
1235	/* replicate result */
1236	return tgsi_helper_tempx_replicate(ctx);
1237}
1238
1239static int tgsi_pow(struct r600_shader_ctx *ctx)
1240{
1241	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1242	struct r600_bc_alu alu;
1243	int r;
1244
1245	/* LOG2(a) */
1246	memset(&alu, 0, sizeof(struct r600_bc_alu));
1247	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1248	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1249	if (r)
1250		return r;
1251	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1252	alu.dst.sel = ctx->temp_reg;
1253	alu.dst.write = 1;
1254	alu.last = 1;
1255	r = r600_bc_add_alu(ctx->bc, &alu);
1256	if (r)
1257		return r;
1258	r = r600_bc_add_literal(ctx->bc,ctx->value);
1259	if (r)
1260		return r;
1261	/* b * LOG2(a) */
1262	memset(&alu, 0, sizeof(struct r600_bc_alu));
1263	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1264	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1265	if (r)
1266		return r;
1267	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1268	alu.src[1].sel = ctx->temp_reg;
1269	alu.dst.sel = ctx->temp_reg;
1270	alu.dst.write = 1;
1271	alu.last = 1;
1272	r = r600_bc_add_alu(ctx->bc, &alu);
1273	if (r)
1274		return r;
1275	r = r600_bc_add_literal(ctx->bc,ctx->value);
1276	if (r)
1277		return r;
1278	/* POW(a,b) = EXP2(b * LOG2(a))*/
1279	memset(&alu, 0, sizeof(struct r600_bc_alu));
1280	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1281	alu.src[0].sel = ctx->temp_reg;
1282	alu.dst.sel = ctx->temp_reg;
1283	alu.dst.write = 1;
1284	alu.last = 1;
1285	r = r600_bc_add_alu(ctx->bc, &alu);
1286	if (r)
1287		return r;
1288	r = r600_bc_add_literal(ctx->bc,ctx->value);
1289	if (r)
1290		return r;
1291	return tgsi_helper_tempx_replicate(ctx);
1292}
1293
1294static int tgsi_ssg(struct r600_shader_ctx *ctx)
1295{
1296	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1297	struct r600_bc_alu alu;
1298	struct r600_bc_alu_src r600_src[3];
1299	int i, r;
1300
1301	r = tgsi_split_constant(ctx, r600_src);
1302	if (r)
1303		return r;
1304
1305	/* tmp = (src > 0 ? 1 : src) */
1306	for (i = 0; i < 4; i++) {
1307		memset(&alu, 0, sizeof(struct r600_bc_alu));
1308		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1309		alu.is_op3 = 1;
1310
1311		alu.dst.sel = ctx->temp_reg;
1312		alu.dst.chan = i;
1313
1314		alu.src[0] = r600_src[0];
1315		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1316
1317		alu.src[1].sel = V_SQ_ALU_SRC_1;
1318
1319		alu.src[2] = r600_src[0];
1320		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1321		if (i == 3)
1322			alu.last = 1;
1323		r = r600_bc_add_alu(ctx->bc, &alu);
1324		if (r)
1325			return r;
1326	}
1327	r = r600_bc_add_literal(ctx->bc, ctx->value);
1328	if (r)
1329		return r;
1330
1331	/* dst = (-tmp > 0 ? -1 : tmp) */
1332	for (i = 0; i < 4; i++) {
1333		memset(&alu, 0, sizeof(struct r600_bc_alu));
1334		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1335		alu.is_op3 = 1;
1336		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1337		if (r)
1338			return r;
1339
1340		alu.src[0].sel = ctx->temp_reg;
1341		alu.src[0].chan = i;
1342		alu.src[0].neg = 1;
1343
1344		alu.src[1].sel = V_SQ_ALU_SRC_1;
1345		alu.src[1].neg = 1;
1346
1347		alu.src[2].sel = ctx->temp_reg;
1348		alu.src[2].chan = i;
1349
1350		if (i == 3)
1351			alu.last = 1;
1352		r = r600_bc_add_alu(ctx->bc, &alu);
1353		if (r)
1354			return r;
1355	}
1356	return 0;
1357}
1358
1359static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1360{
1361	struct r600_bc_alu alu;
1362	int i, r;
1363
1364	r = r600_bc_add_literal(ctx->bc, ctx->value);
1365	if (r)
1366		return r;
1367	for (i = 0; i < 4; i++) {
1368		memset(&alu, 0, sizeof(struct r600_bc_alu));
1369		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1370			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1371			alu.dst.chan = i;
1372		} else {
1373			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1374			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1375			if (r)
1376				return r;
1377			alu.src[0].sel = ctx->temp_reg;
1378			alu.src[0].chan = i;
1379		}
1380		if (i == 3) {
1381			alu.last = 1;
1382		}
1383		r = r600_bc_add_alu(ctx->bc, &alu);
1384		if (r)
1385			return r;
1386	}
1387	return 0;
1388}
1389
1390static int tgsi_op3(struct r600_shader_ctx *ctx)
1391{
1392	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1393	struct r600_bc_alu_src r600_src[3];
1394	struct r600_bc_alu alu;
1395	int i, j, r;
1396
1397	r = tgsi_split_constant(ctx, r600_src);
1398	if (r)
1399		return r;
1400	/* do it in 2 step as op3 doesn't support writemask */
1401	for (i = 0; i < 4; i++) {
1402		memset(&alu, 0, sizeof(struct r600_bc_alu));
1403		alu.inst = ctx->inst_info->r600_opcode;
1404		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1405			alu.src[j] = r600_src[j];
1406			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1407		}
1408		alu.dst.sel = ctx->temp_reg;
1409		alu.dst.chan = i;
1410		alu.dst.write = 1;
1411		alu.is_op3 = 1;
1412		if (i == 3) {
1413			alu.last = 1;
1414		}
1415		r = r600_bc_add_alu(ctx->bc, &alu);
1416		if (r)
1417			return r;
1418	}
1419	return tgsi_helper_copy(ctx, inst);
1420}
1421
1422static int tgsi_dp(struct r600_shader_ctx *ctx)
1423{
1424	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1425	struct r600_bc_alu_src r600_src[3];
1426	struct r600_bc_alu alu;
1427	int i, j, r;
1428
1429	r = tgsi_split_constant(ctx, r600_src);
1430	if (r)
1431		return r;
1432	for (i = 0; i < 4; i++) {
1433		memset(&alu, 0, sizeof(struct r600_bc_alu));
1434		alu.inst = ctx->inst_info->r600_opcode;
1435		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1436			alu.src[j] = r600_src[j];
1437			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1438		}
1439		alu.dst.sel = ctx->temp_reg;
1440		alu.dst.chan = i;
1441		alu.dst.write = 1;
1442		/* handle some special cases */
1443		switch (ctx->inst_info->tgsi_opcode) {
1444		case TGSI_OPCODE_DP2:
1445			if (i > 1) {
1446				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1447				alu.src[0].chan = alu.src[1].chan = 0;
1448			}
1449			break;
1450		case TGSI_OPCODE_DP3:
1451			if (i > 2) {
1452				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1453				alu.src[0].chan = alu.src[1].chan = 0;
1454			}
1455			break;
1456		case TGSI_OPCODE_DPH:
1457			if (i == 3) {
1458				alu.src[0].sel = V_SQ_ALU_SRC_1;
1459				alu.src[0].chan = 0;
1460				alu.src[0].neg = 0;
1461			}
1462			break;
1463		default:
1464			break;
1465		}
1466		if (i == 3) {
1467			alu.last = 1;
1468		}
1469		r = r600_bc_add_alu(ctx->bc, &alu);
1470		if (r)
1471			return r;
1472	}
1473	return tgsi_helper_copy(ctx, inst);
1474}
1475
1476static int tgsi_tex(struct r600_shader_ctx *ctx)
1477{
1478	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1479	struct r600_bc_tex tex;
1480	struct r600_bc_alu alu;
1481	unsigned src_gpr;
1482	int r, i;
1483	int opcode;
1484	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1485	uint32_t lit_vals[4];
1486
1487	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1488
1489	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1490		/* Add perspective divide */
1491		memset(&alu, 0, sizeof(struct r600_bc_alu));
1492		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1493		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1494		if (r)
1495			return r;
1496
1497		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1498		alu.dst.sel = ctx->temp_reg;
1499		alu.dst.chan = 3;
1500		alu.last = 1;
1501		alu.dst.write = 1;
1502		r = r600_bc_add_alu(ctx->bc, &alu);
1503		if (r)
1504			return r;
1505
1506		for (i = 0; i < 3; i++) {
1507			memset(&alu, 0, sizeof(struct r600_bc_alu));
1508			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1509			alu.src[0].sel = ctx->temp_reg;
1510			alu.src[0].chan = 3;
1511			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1512			if (r)
1513				return r;
1514			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1515			alu.dst.sel = ctx->temp_reg;
1516			alu.dst.chan = i;
1517			alu.dst.write = 1;
1518			r = r600_bc_add_alu(ctx->bc, &alu);
1519			if (r)
1520				return r;
1521		}
1522		memset(&alu, 0, sizeof(struct r600_bc_alu));
1523		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1524		alu.src[0].sel = V_SQ_ALU_SRC_1;
1525		alu.src[0].chan = 0;
1526		alu.dst.sel = ctx->temp_reg;
1527		alu.dst.chan = 3;
1528		alu.last = 1;
1529		alu.dst.write = 1;
1530		r = r600_bc_add_alu(ctx->bc, &alu);
1531		if (r)
1532			return r;
1533		src_not_temp = FALSE;
1534		src_gpr = ctx->temp_reg;
1535	}
1536
1537	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1538		int src_chan, src2_chan;
1539
1540		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1541		for (i = 0; i < 4; i++) {
1542			memset(&alu, 0, sizeof(struct r600_bc_alu));
1543			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1544			switch (i) {
1545			case 0:
1546				src_chan = 2;
1547				src2_chan = 1;
1548				break;
1549			case 1:
1550				src_chan = 2;
1551				src2_chan = 0;
1552				break;
1553			case 2:
1554				src_chan = 0;
1555				src2_chan = 2;
1556				break;
1557			case 3:
1558				src_chan = 1;
1559				src2_chan = 2;
1560				break;
1561			}
1562			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1563			if (r)
1564				return r;
1565			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1566			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1567			if (r)
1568				return r;
1569			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1570			alu.dst.sel = ctx->temp_reg;
1571			alu.dst.chan = i;
1572			if (i == 3)
1573				alu.last = 1;
1574			alu.dst.write = 1;
1575			r = r600_bc_add_alu(ctx->bc, &alu);
1576			if (r)
1577				return r;
1578		}
1579
1580		/* tmp1.z = RCP_e(|tmp1.z|) */
1581		memset(&alu, 0, sizeof(struct r600_bc_alu));
1582		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1583		alu.src[0].sel = ctx->temp_reg;
1584		alu.src[0].chan = 2;
1585		alu.src[0].abs = 1;
1586		alu.dst.sel = ctx->temp_reg;
1587		alu.dst.chan = 2;
1588		alu.dst.write = 1;
1589		alu.last = 1;
1590		r = r600_bc_add_alu(ctx->bc, &alu);
1591		if (r)
1592			return r;
1593
1594		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1595		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1596		 * muladd has no writemask, have to use another temp
1597		 */
1598		memset(&alu, 0, sizeof(struct r600_bc_alu));
1599		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1600		alu.is_op3 = 1;
1601
1602		alu.src[0].sel = ctx->temp_reg;
1603		alu.src[0].chan = 0;
1604		alu.src[1].sel = ctx->temp_reg;
1605		alu.src[1].chan = 2;
1606
1607		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1608		alu.src[2].chan = 0;
1609
1610		alu.dst.sel = ctx->temp_reg;
1611		alu.dst.chan = 0;
1612		alu.dst.write = 1;
1613
1614		r = r600_bc_add_alu(ctx->bc, &alu);
1615		if (r)
1616			return r;
1617
1618		memset(&alu, 0, sizeof(struct r600_bc_alu));
1619		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1620		alu.is_op3 = 1;
1621
1622		alu.src[0].sel = ctx->temp_reg;
1623		alu.src[0].chan = 1;
1624		alu.src[1].sel = ctx->temp_reg;
1625		alu.src[1].chan = 2;
1626
1627		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1628		alu.src[2].chan = 0;
1629
1630		alu.dst.sel = ctx->temp_reg;
1631		alu.dst.chan = 1;
1632		alu.dst.write = 1;
1633
1634		alu.last = 1;
1635		r = r600_bc_add_alu(ctx->bc, &alu);
1636		if (r)
1637			return r;
1638
1639		lit_vals[0] = fui(1.5f);
1640
1641		r = r600_bc_add_literal(ctx->bc, lit_vals);
1642		if (r)
1643			return r;
1644		src_not_temp = FALSE;
1645		src_gpr = ctx->temp_reg;
1646	}
1647
1648	if (src_not_temp) {
1649		for (i = 0; i < 4; i++) {
1650			memset(&alu, 0, sizeof(struct r600_bc_alu));
1651			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1652			alu.src[0].sel = src_gpr;
1653			alu.src[0].chan = i;
1654			alu.dst.sel = ctx->temp_reg;
1655			alu.dst.chan = i;
1656			if (i == 3)
1657				alu.last = 1;
1658			alu.dst.write = 1;
1659			r = r600_bc_add_alu(ctx->bc, &alu);
1660			if (r)
1661				return r;
1662		}
1663		src_gpr = ctx->temp_reg;
1664	}
1665
1666	opcode = ctx->inst_info->r600_opcode;
1667	if (opcode == SQ_TEX_INST_SAMPLE &&
1668	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1669		opcode = SQ_TEX_INST_SAMPLE_C;
1670
1671	memset(&tex, 0, sizeof(struct r600_bc_tex));
1672	tex.inst = opcode;
1673	tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1674	tex.sampler_id = tex.resource_id;
1675	tex.src_gpr = src_gpr;
1676	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1677	tex.dst_sel_x = 0;
1678	tex.dst_sel_y = 1;
1679	tex.dst_sel_z = 2;
1680	tex.dst_sel_w = 3;
1681	tex.src_sel_x = 0;
1682	tex.src_sel_y = 1;
1683	tex.src_sel_z = 2;
1684	tex.src_sel_w = 3;
1685
1686	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1687		tex.src_sel_x = 1;
1688		tex.src_sel_y = 0;
1689		tex.src_sel_z = 3;
1690		tex.src_sel_w = 1;
1691	}
1692
1693	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1694		tex.coord_type_x = 1;
1695		tex.coord_type_y = 1;
1696		tex.coord_type_z = 1;
1697		tex.coord_type_w = 1;
1698	}
1699
1700	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1701		tex.src_sel_w = 2;
1702
1703	r = r600_bc_add_tex(ctx->bc, &tex);
1704	if (r)
1705		return r;
1706
1707	/* add shadow ambient support  - gallium doesn't do it yet */
1708	return 0;
1709
1710}
1711
1712static int tgsi_lrp(struct r600_shader_ctx *ctx)
1713{
1714	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1715	struct r600_bc_alu_src r600_src[3];
1716	struct r600_bc_alu alu;
1717	unsigned i;
1718	int r;
1719
1720	r = tgsi_split_constant(ctx, r600_src);
1721	if (r)
1722		return r;
1723	/* 1 - src0 */
1724	for (i = 0; i < 4; i++) {
1725		memset(&alu, 0, sizeof(struct r600_bc_alu));
1726		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1727		alu.src[0].sel = V_SQ_ALU_SRC_1;
1728		alu.src[0].chan = 0;
1729		alu.src[1] = r600_src[0];
1730		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1731		alu.src[1].neg = 1;
1732		alu.dst.sel = ctx->temp_reg;
1733		alu.dst.chan = i;
1734		if (i == 3) {
1735			alu.last = 1;
1736		}
1737		alu.dst.write = 1;
1738		r = r600_bc_add_alu(ctx->bc, &alu);
1739		if (r)
1740			return r;
1741	}
1742	r = r600_bc_add_literal(ctx->bc, ctx->value);
1743	if (r)
1744		return r;
1745
1746	/* (1 - src0) * src2 */
1747	for (i = 0; i < 4; i++) {
1748		memset(&alu, 0, sizeof(struct r600_bc_alu));
1749		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1750		alu.src[0].sel = ctx->temp_reg;
1751		alu.src[0].chan = i;
1752		alu.src[1] = r600_src[2];
1753		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1754		alu.dst.sel = ctx->temp_reg;
1755		alu.dst.chan = i;
1756		if (i == 3) {
1757			alu.last = 1;
1758		}
1759		alu.dst.write = 1;
1760		r = r600_bc_add_alu(ctx->bc, &alu);
1761		if (r)
1762			return r;
1763	}
1764	r = r600_bc_add_literal(ctx->bc, ctx->value);
1765	if (r)
1766		return r;
1767
1768	/* src0 * src1 + (1 - src0) * src2 */
1769	for (i = 0; i < 4; i++) {
1770		memset(&alu, 0, sizeof(struct r600_bc_alu));
1771		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1772		alu.is_op3 = 1;
1773		alu.src[0] = r600_src[0];
1774		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1775		alu.src[1] = r600_src[1];
1776		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1777		alu.src[2].sel = ctx->temp_reg;
1778		alu.src[2].chan = i;
1779		alu.dst.sel = ctx->temp_reg;
1780		alu.dst.chan = i;
1781		if (i == 3) {
1782			alu.last = 1;
1783		}
1784		r = r600_bc_add_alu(ctx->bc, &alu);
1785		if (r)
1786			return r;
1787	}
1788	return tgsi_helper_copy(ctx, inst);
1789}
1790
1791static int tgsi_cmp(struct r600_shader_ctx *ctx)
1792{
1793	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1794	struct r600_bc_alu_src r600_src[3];
1795	struct r600_bc_alu alu;
1796	int use_temp = 0;
1797	int i, r;
1798
1799	r = tgsi_split_constant(ctx, r600_src);
1800	if (r)
1801		return r;
1802
1803	if (inst->Dst[0].Register.WriteMask != 0xf)
1804		use_temp = 1;
1805
1806	for (i = 0; i < 4; i++) {
1807		memset(&alu, 0, sizeof(struct r600_bc_alu));
1808		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1809		alu.src[0] = r600_src[0];
1810		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1811
1812		alu.src[1] = r600_src[2];
1813		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1814
1815		alu.src[2] = r600_src[1];
1816		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1817
1818		if (use_temp)
1819			alu.dst.sel = ctx->temp_reg;
1820		else {
1821			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1822			if (r)
1823				return r;
1824		}
1825		alu.dst.chan = i;
1826		alu.dst.write = 1;
1827		alu.is_op3 = 1;
1828		if (i == 3)
1829			alu.last = 1;
1830		r = r600_bc_add_alu(ctx->bc, &alu);
1831		if (r)
1832			return r;
1833	}
1834	if (use_temp)
1835		return tgsi_helper_copy(ctx, inst);
1836	return 0;
1837}
1838
1839static int tgsi_xpd(struct r600_shader_ctx *ctx)
1840{
1841	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1842	struct r600_bc_alu_src r600_src[3];
1843	struct r600_bc_alu alu;
1844	uint32_t use_temp = 0;
1845	int i, r;
1846
1847	if (inst->Dst[0].Register.WriteMask != 0xf)
1848		use_temp = 1;
1849
1850	r = tgsi_split_constant(ctx, r600_src);
1851	if (r)
1852		return r;
1853
1854	for (i = 0; i < 4; i++) {
1855		memset(&alu, 0, sizeof(struct r600_bc_alu));
1856		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1857
1858		alu.src[0] = r600_src[0];
1859		switch (i) {
1860		case 0:
1861			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1862			break;
1863		case 1:
1864			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1865			break;
1866		case 2:
1867			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1868			break;
1869		case 3:
1870			alu.src[0].sel = V_SQ_ALU_SRC_0;
1871			alu.src[0].chan = i;
1872		}
1873
1874		alu.src[1] = r600_src[1];
1875		switch (i) {
1876		case 0:
1877			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1878			break;
1879		case 1:
1880			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1881			break;
1882		case 2:
1883			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1884			break;
1885		case 3:
1886			alu.src[1].sel = V_SQ_ALU_SRC_0;
1887			alu.src[1].chan = i;
1888		}
1889
1890		alu.dst.sel = ctx->temp_reg;
1891		alu.dst.chan = i;
1892		alu.dst.write = 1;
1893
1894		if (i == 3)
1895			alu.last = 1;
1896		r = r600_bc_add_alu(ctx->bc, &alu);
1897		if (r)
1898			return r;
1899
1900		r = r600_bc_add_literal(ctx->bc, ctx->value);
1901		if (r)
1902			return r;
1903	}
1904
1905	for (i = 0; i < 4; i++) {
1906		memset(&alu, 0, sizeof(struct r600_bc_alu));
1907		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1908
1909		alu.src[0] = r600_src[0];
1910		switch (i) {
1911		case 0:
1912			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1913			break;
1914		case 1:
1915			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1916			break;
1917		case 2:
1918			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1919			break;
1920		case 3:
1921			alu.src[0].sel = V_SQ_ALU_SRC_0;
1922			alu.src[0].chan = i;
1923		}
1924
1925		alu.src[1] = r600_src[1];
1926		switch (i) {
1927		case 0:
1928			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1929			break;
1930		case 1:
1931			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1932			break;
1933		case 2:
1934			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1935			break;
1936		case 3:
1937			alu.src[1].sel = V_SQ_ALU_SRC_0;
1938			alu.src[1].chan = i;
1939		}
1940
1941		alu.src[2].sel = ctx->temp_reg;
1942		alu.src[2].neg = 1;
1943		alu.src[2].chan = i;
1944
1945		if (use_temp)
1946			alu.dst.sel = ctx->temp_reg;
1947		else {
1948			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1949			if (r)
1950				return r;
1951		}
1952		alu.dst.chan = i;
1953		alu.dst.write = 1;
1954		alu.is_op3 = 1;
1955		if (i == 3)
1956			alu.last = 1;
1957		r = r600_bc_add_alu(ctx->bc, &alu);
1958		if (r)
1959			return r;
1960
1961		r = r600_bc_add_literal(ctx->bc, ctx->value);
1962		if (r)
1963			return r;
1964	}
1965	if (use_temp)
1966		return tgsi_helper_copy(ctx, inst);
1967	return 0;
1968}
1969
1970static int tgsi_exp(struct r600_shader_ctx *ctx)
1971{
1972	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1973	struct r600_bc_alu_src r600_src[3];
1974	struct r600_bc_alu alu;
1975	int r;
1976
1977	/* result.x = 2^floor(src); */
1978	if (inst->Dst[0].Register.WriteMask & 1) {
1979		memset(&alu, 0, sizeof(struct r600_bc_alu));
1980
1981		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
1982		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1983		if (r)
1984			return r;
1985
1986		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1987
1988		alu.dst.sel = ctx->temp_reg;
1989		alu.dst.chan = 0;
1990		alu.dst.write = 1;
1991		alu.last = 1;
1992		r = r600_bc_add_alu(ctx->bc, &alu);
1993		if (r)
1994			return r;
1995
1996		r = r600_bc_add_literal(ctx->bc, ctx->value);
1997		if (r)
1998			return r;
1999
2000		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2001		alu.src[0].sel = ctx->temp_reg;
2002		alu.src[0].chan = 0;
2003
2004		alu.dst.sel = ctx->temp_reg;
2005		alu.dst.chan = 0;
2006		alu.dst.write = 1;
2007		alu.last = 1;
2008		r = r600_bc_add_alu(ctx->bc, &alu);
2009		if (r)
2010			return r;
2011
2012		r = r600_bc_add_literal(ctx->bc, ctx->value);
2013		if (r)
2014			return r;
2015	}
2016
2017	/* result.y = tmp - floor(tmp); */
2018	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2019		memset(&alu, 0, sizeof(struct r600_bc_alu));
2020
2021		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2022		alu.src[0] = r600_src[0];
2023		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2024		if (r)
2025			return r;
2026		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2027
2028		alu.dst.sel = ctx->temp_reg;
2029//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2030//		if (r)
2031//			return r;
2032		alu.dst.write = 1;
2033		alu.dst.chan = 1;
2034
2035		alu.last = 1;
2036
2037		r = r600_bc_add_alu(ctx->bc, &alu);
2038		if (r)
2039			return r;
2040		r = r600_bc_add_literal(ctx->bc, ctx->value);
2041		if (r)
2042			return r;
2043	}
2044
2045	/* result.z = RoughApprox2ToX(tmp);*/
2046	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2047		memset(&alu, 0, sizeof(struct r600_bc_alu));
2048		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2049		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2050		if (r)
2051			return r;
2052		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2053
2054		alu.dst.sel = ctx->temp_reg;
2055		alu.dst.write = 1;
2056		alu.dst.chan = 2;
2057
2058		alu.last = 1;
2059
2060		r = r600_bc_add_alu(ctx->bc, &alu);
2061		if (r)
2062			return r;
2063		r = r600_bc_add_literal(ctx->bc, ctx->value);
2064		if (r)
2065			return r;
2066	}
2067
2068	/* result.w = 1.0;*/
2069	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2070		memset(&alu, 0, sizeof(struct r600_bc_alu));
2071
2072		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2073		alu.src[0].sel = V_SQ_ALU_SRC_1;
2074		alu.src[0].chan = 0;
2075
2076		alu.dst.sel = ctx->temp_reg;
2077		alu.dst.chan = 3;
2078		alu.dst.write = 1;
2079		alu.last = 1;
2080		r = r600_bc_add_alu(ctx->bc, &alu);
2081		if (r)
2082			return r;
2083		r = r600_bc_add_literal(ctx->bc, ctx->value);
2084		if (r)
2085			return r;
2086	}
2087	return tgsi_helper_copy(ctx, inst);
2088}
2089
2090static int tgsi_log(struct r600_shader_ctx *ctx)
2091{
2092	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2093	struct r600_bc_alu alu;
2094	int r;
2095
2096	/* result.x = floor(log2(src)); */
2097	if (inst->Dst[0].Register.WriteMask & 1) {
2098		memset(&alu, 0, sizeof(struct r600_bc_alu));
2099
2100		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2101		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2102		if (r)
2103			return r;
2104
2105		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2106
2107		alu.dst.sel = ctx->temp_reg;
2108		alu.dst.chan = 0;
2109		alu.dst.write = 1;
2110		alu.last = 1;
2111		r = r600_bc_add_alu(ctx->bc, &alu);
2112		if (r)
2113			return r;
2114
2115		r = r600_bc_add_literal(ctx->bc, ctx->value);
2116		if (r)
2117			return r;
2118
2119		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2120		alu.src[0].sel = ctx->temp_reg;
2121		alu.src[0].chan = 0;
2122
2123		alu.dst.sel = ctx->temp_reg;
2124		alu.dst.chan = 0;
2125		alu.dst.write = 1;
2126		alu.last = 1;
2127
2128		r = r600_bc_add_alu(ctx->bc, &alu);
2129		if (r)
2130			return r;
2131
2132		r = r600_bc_add_literal(ctx->bc, ctx->value);
2133		if (r)
2134			return r;
2135	}
2136
2137	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2138	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2139		memset(&alu, 0, sizeof(struct r600_bc_alu));
2140
2141		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2142		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2143		if (r)
2144			return r;
2145
2146		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2147
2148		alu.dst.sel = ctx->temp_reg;
2149		alu.dst.chan = 1;
2150		alu.dst.write = 1;
2151		alu.last = 1;
2152
2153		r = r600_bc_add_alu(ctx->bc, &alu);
2154		if (r)
2155			return r;
2156
2157		r = r600_bc_add_literal(ctx->bc, ctx->value);
2158		if (r)
2159			return r;
2160
2161		memset(&alu, 0, sizeof(struct r600_bc_alu));
2162
2163		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2164		alu.src[0].sel = ctx->temp_reg;
2165		alu.src[0].chan = 1;
2166
2167		alu.dst.sel = ctx->temp_reg;
2168		alu.dst.chan = 1;
2169		alu.dst.write = 1;
2170		alu.last = 1;
2171
2172		r = r600_bc_add_alu(ctx->bc, &alu);
2173		if (r)
2174			return r;
2175
2176		r = r600_bc_add_literal(ctx->bc, ctx->value);
2177		if (r)
2178			return r;
2179
2180		memset(&alu, 0, sizeof(struct r600_bc_alu));
2181
2182		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2183		alu.src[0].sel = ctx->temp_reg;
2184		alu.src[0].chan = 1;
2185
2186		alu.dst.sel = ctx->temp_reg;
2187		alu.dst.chan = 1;
2188		alu.dst.write = 1;
2189		alu.last = 1;
2190
2191		r = r600_bc_add_alu(ctx->bc, &alu);
2192		if (r)
2193			return r;
2194
2195		r = r600_bc_add_literal(ctx->bc, ctx->value);
2196		if (r)
2197			return r;
2198
2199		memset(&alu, 0, sizeof(struct r600_bc_alu));
2200
2201		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2202		alu.src[0].sel = ctx->temp_reg;
2203		alu.src[0].chan = 1;
2204
2205		alu.dst.sel = ctx->temp_reg;
2206		alu.dst.chan = 1;
2207		alu.dst.write = 1;
2208		alu.last = 1;
2209
2210		r = r600_bc_add_alu(ctx->bc, &alu);
2211		if (r)
2212			return r;
2213
2214		r = r600_bc_add_literal(ctx->bc, ctx->value);
2215		if (r)
2216			return r;
2217
2218		memset(&alu, 0, sizeof(struct r600_bc_alu));
2219
2220		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2221
2222		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2223		if (r)
2224			return r;
2225
2226		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2227
2228		alu.src[1].sel = ctx->temp_reg;
2229		alu.src[1].chan = 1;
2230
2231		alu.dst.sel = ctx->temp_reg;
2232		alu.dst.chan = 1;
2233		alu.dst.write = 1;
2234		alu.last = 1;
2235
2236		r = r600_bc_add_alu(ctx->bc, &alu);
2237		if (r)
2238			return r;
2239
2240		r = r600_bc_add_literal(ctx->bc, ctx->value);
2241		if (r)
2242			return r;
2243	}
2244
2245	/* result.z = log2(src);*/
2246	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2247		memset(&alu, 0, sizeof(struct r600_bc_alu));
2248
2249		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2250		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2251		if (r)
2252			return r;
2253
2254		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2255
2256		alu.dst.sel = ctx->temp_reg;
2257		alu.dst.write = 1;
2258		alu.dst.chan = 2;
2259		alu.last = 1;
2260
2261		r = r600_bc_add_alu(ctx->bc, &alu);
2262		if (r)
2263			return r;
2264
2265		r = r600_bc_add_literal(ctx->bc, ctx->value);
2266		if (r)
2267			return r;
2268	}
2269
2270	/* result.w = 1.0; */
2271	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2272		memset(&alu, 0, sizeof(struct r600_bc_alu));
2273
2274		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2275		alu.src[0].sel = V_SQ_ALU_SRC_1;
2276		alu.src[0].chan = 0;
2277
2278		alu.dst.sel = ctx->temp_reg;
2279		alu.dst.chan = 3;
2280		alu.dst.write = 1;
2281		alu.last = 1;
2282
2283		r = r600_bc_add_alu(ctx->bc, &alu);
2284		if (r)
2285			return r;
2286
2287		r = r600_bc_add_literal(ctx->bc, ctx->value);
2288		if (r)
2289			return r;
2290	}
2291
2292	return tgsi_helper_copy(ctx, inst);
2293}
2294
2295/* r6/7 only for now */
2296static int tgsi_arl(struct r600_shader_ctx *ctx)
2297{
2298	/* TODO from r600c, ar values don't persist between clauses */
2299	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2300	struct r600_bc_alu alu;
2301	int r;
2302	memset(&alu, 0, sizeof(struct r600_bc_alu));
2303
2304	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2305
2306	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2307	if (r)
2308		return r;
2309	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2310
2311	alu.last = 1;
2312
2313	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2314	if (r)
2315		return r;
2316	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2317	return 0;
2318}
2319
2320static int tgsi_opdst(struct r600_shader_ctx *ctx)
2321{
2322	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2323	struct r600_bc_alu alu;
2324	int i, r = 0;
2325
2326	for (i = 0; i < 4; i++) {
2327		memset(&alu, 0, sizeof(struct r600_bc_alu));
2328
2329		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2330		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2331		if (r)
2332			return r;
2333
2334	        if (i == 0 || i == 3) {
2335			alu.src[0].sel = V_SQ_ALU_SRC_1;
2336		} else {
2337			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2338			if (r)
2339				return r;
2340			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2341		}
2342
2343	        if (i == 0 || i == 2) {
2344			alu.src[1].sel = V_SQ_ALU_SRC_1;
2345		} else {
2346			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2347			if (r)
2348				return r;
2349			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2350		}
2351		if (i == 3)
2352			alu.last = 1;
2353		r = r600_bc_add_alu(ctx->bc, &alu);
2354		if (r)
2355			return r;
2356	}
2357	return 0;
2358}
2359
2360static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2361{
2362	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2363	struct r600_bc_alu alu;
2364	int r;
2365
2366	memset(&alu, 0, sizeof(struct r600_bc_alu));
2367	alu.inst = opcode;
2368	alu.predicate = 1;
2369
2370	alu.dst.sel = ctx->temp_reg;
2371	alu.dst.write = 1;
2372	alu.dst.chan = 0;
2373
2374	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2375	if (r)
2376		return r;
2377	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2378	alu.src[1].sel = V_SQ_ALU_SRC_0;
2379	alu.src[1].chan = 0;
2380
2381	alu.last = 1;
2382
2383	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2384	if (r)
2385		return r;
2386	return 0;
2387}
2388
2389static int pops(struct r600_shader_ctx *ctx, int pops)
2390{
2391	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2392	ctx->bc->cf_last->pop_count = pops;
2393	return 0;
2394}
2395
2396static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2397{
2398	switch(reason) {
2399	case FC_PUSH_VPM:
2400		ctx->bc->callstack[ctx->bc->call_sp].current--;
2401		break;
2402	case FC_PUSH_WQM:
2403	case FC_LOOP:
2404		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2405		break;
2406	case FC_REP:
2407		/* TOODO : for 16 vp asic should -= 2; */
2408		ctx->bc->callstack[ctx->bc->call_sp].current --;
2409		break;
2410	}
2411}
2412
2413static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2414{
2415	if (check_max_only) {
2416		int diff;
2417		switch (reason) {
2418		case FC_PUSH_VPM:
2419			diff = 1;
2420			break;
2421		case FC_PUSH_WQM:
2422			diff = 4;
2423			break;
2424		default:
2425			assert(0);
2426			diff = 0;
2427		}
2428		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2429		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2430			ctx->bc->callstack[ctx->bc->call_sp].max =
2431				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2432		}
2433		return;
2434	}
2435	switch (reason) {
2436	case FC_PUSH_VPM:
2437		ctx->bc->callstack[ctx->bc->call_sp].current++;
2438		break;
2439	case FC_PUSH_WQM:
2440	case FC_LOOP:
2441		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2442		break;
2443	case FC_REP:
2444		ctx->bc->callstack[ctx->bc->call_sp].current++;
2445		break;
2446	}
2447
2448	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2449	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2450		ctx->bc->callstack[ctx->bc->call_sp].max =
2451			ctx->bc->callstack[ctx->bc->call_sp].current;
2452	}
2453}
2454
2455static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2456{
2457	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2458
2459	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2460						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2461	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2462	sp->num_mid++;
2463}
2464
2465static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2466{
2467	ctx->bc->fc_sp++;
2468	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2469	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2470}
2471
2472static void fc_poplevel(struct r600_shader_ctx *ctx)
2473{
2474	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2475	if (sp->mid) {
2476		free(sp->mid);
2477		sp->mid = NULL;
2478	}
2479	sp->num_mid = 0;
2480	sp->start = NULL;
2481	sp->type = 0;
2482	ctx->bc->fc_sp--;
2483}
2484
2485#if 0
2486static int emit_return(struct r600_shader_ctx *ctx)
2487{
2488	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2489	return 0;
2490}
2491
2492static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2493{
2494
2495	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2496	ctx->bc->cf_last->pop_count = pops;
2497	/* TODO work out offset */
2498	return 0;
2499}
2500
2501static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2502{
2503	return 0;
2504}
2505
2506static void emit_testflag(struct r600_shader_ctx *ctx)
2507{
2508
2509}
2510
2511static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2512{
2513	emit_testflag(ctx);
2514	emit_jump_to_offset(ctx, 1, 4);
2515	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2516	pops(ctx, ifidx + 1);
2517	emit_return(ctx);
2518}
2519
2520static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2521{
2522	emit_testflag(ctx);
2523
2524	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2525	ctx->bc->cf_last->pop_count = 1;
2526
2527	fc_set_mid(ctx, fc_sp);
2528
2529	pops(ctx, 1);
2530}
2531#endif
2532
2533static int tgsi_if(struct r600_shader_ctx *ctx)
2534{
2535	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2536
2537	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2538
2539	fc_pushlevel(ctx, FC_IF);
2540
2541	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2542	return 0;
2543}
2544
2545static int tgsi_else(struct r600_shader_ctx *ctx)
2546{
2547	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2548	ctx->bc->cf_last->pop_count = 1;
2549
2550	fc_set_mid(ctx, ctx->bc->fc_sp);
2551	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2552	return 0;
2553}
2554
2555static int tgsi_endif(struct r600_shader_ctx *ctx)
2556{
2557	pops(ctx, 1);
2558	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2559		R600_ERR("if/endif unbalanced in shader\n");
2560		return -1;
2561	}
2562
2563	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2564		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2565		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2566	} else {
2567		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2568	}
2569	fc_poplevel(ctx);
2570
2571	callstack_decrease_current(ctx, FC_PUSH_VPM);
2572	return 0;
2573}
2574
2575static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2576{
2577	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2578
2579	fc_pushlevel(ctx, FC_LOOP);
2580
2581	/* check stack depth */
2582	callstack_check_depth(ctx, FC_LOOP, 0);
2583	return 0;
2584}
2585
2586static int tgsi_endloop(struct r600_shader_ctx *ctx)
2587{
2588	int i;
2589
2590	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2591
2592	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2593		R600_ERR("loop/endloop in shader code are not paired.\n");
2594		return -EINVAL;
2595	}
2596
2597	/* fixup loop pointers - from r600isa
2598	   LOOP END points to CF after LOOP START,
2599	   LOOP START point to CF after LOOP END
2600	   BRK/CONT point to LOOP END CF
2601	*/
2602	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2603
2604	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2605
2606	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2607		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2608	}
2609	/* TODO add LOOPRET support */
2610	fc_poplevel(ctx);
2611	callstack_decrease_current(ctx, FC_LOOP);
2612	return 0;
2613}
2614
2615static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2616{
2617	unsigned int fscp;
2618
2619	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2620	{
2621		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2622			break;
2623	}
2624
2625	if (fscp == 0) {
2626		R600_ERR("Break not inside loop/endloop pair\n");
2627		return -EINVAL;
2628	}
2629
2630	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2631	ctx->bc->cf_last->pop_count = 1;
2632
2633	fc_set_mid(ctx, fscp);
2634
2635	pops(ctx, 1);
2636	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2637	return 0;
2638}
2639
2640static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2641	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2642	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2643	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2644
2645	/* FIXME:
2646	 * For state trackers other than OpenGL, we'll want to use
2647	 * _RECIP_IEEE instead.
2648	 */
2649	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2650
2651	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2652	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2653	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2654	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2655	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2656	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2657	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2658	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2659	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2660	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2661	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2662	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2663	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2664	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2665	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2666	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2667	/* gap */
2668	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2669	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2670	/* gap */
2671	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2672	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2673	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2674	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2675	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2676	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2677	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2678	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2679	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2680	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2681	/* gap */
2682	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2683	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2684	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2685	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2686	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2687	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2688	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2689	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2690	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2691	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2692	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2693	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2694	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2695	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2696	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2697	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2698	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2699	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2700	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2701	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2702	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2703	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2704	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2705	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2706	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2707	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2708	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2709	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2710	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2711	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2712	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2713	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2714	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2715	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2716	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2717	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2718	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2719	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2720	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2721	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2722	{TGSI_OPCODE_TXL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2723	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2724	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2725	/* gap */
2726	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2727	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2728	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2729	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2730	/* gap */
2731	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2732	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2733	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2734	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2735	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2736	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2737	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2738	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2739	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2740	/* gap */
2741	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2742	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2743	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2744	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2745	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2746	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2747	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2748	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2749	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2750	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2751	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2752	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2753	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2754	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2755	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2756	/* gap */
2757	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2758	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2759	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2760	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2761	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2762	/* gap */
2763	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2764	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2765	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2766	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2767	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2768	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2769	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2770	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2771	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2772	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2773	/* gap */
2774	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2775	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2776	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2777	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2778	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2779	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2780	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2781	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2782	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2783	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2784	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2785	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2786	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2787	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2788	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2789	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2790	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2791	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2792	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2793	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2794	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2795	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2796	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2797	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2798	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2799	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2800	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2801	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2802};
2803
2804static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2805	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2806	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2807	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2808	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2809	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2810	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2811	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2812	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2813	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2814	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2815	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2816	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2817	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2818	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2819	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2820	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2821	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2822	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2823	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2824	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2825	/* gap */
2826	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2827	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2828	/* gap */
2829	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2830	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2831	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2832	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2833	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2834	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2835	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2836	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2837	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2838	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2839	/* gap */
2840	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2841	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2842	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2843	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2844	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2845	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2846	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2847	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2848	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2849	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2853	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2854	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2855	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2856	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2857	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2858	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2859	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2860	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2861	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2863	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2864	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2865	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2867	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2868	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2869	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2870	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2871	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2872	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2873	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2874	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2875	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2876	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2877	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2880	{TGSI_OPCODE_TXL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2882	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2883	/* gap */
2884	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2885	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2887	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2888	/* gap */
2889	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2897	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898	/* gap */
2899	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2908	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2909	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2911	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2913	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914	/* gap */
2915	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2918	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920	/* gap */
2921	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2930	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2931	/* gap */
2932	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2940	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2944	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2946	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2947	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2949	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2950	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2951	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2952	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2954	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2955	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2956	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2957	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2958	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2959	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2960};
2961