r600_shader.c revision f8279fb9d82cbbbbaf8a5cc26486142c21d4d2d2
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_info.h"
25#include "tgsi/tgsi_parse.h"
26#include "tgsi/tgsi_scan.h"
27#include "tgsi/tgsi_dump.h"
28#include "util/u_format.h"
29#include "r600_pipe.h"
30#include "r600_asm.h"
31#include "r600_sq.h"
32#include "r600_formats.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37#include <byteswap.h>
38
39int r600_find_vs_semantic_index(struct r600_shader *vs,
40				struct r600_shader *ps, int id)
41{
42	struct r600_shader_io *input = &ps->input[id];
43
44	for (int i = 0; i < vs->noutput; i++) {
45		if (input->name == vs->output[i].name &&
46			input->sid == vs->output[i].sid) {
47			return i - 1;
48		}
49	}
50	return 0;
51}
52
53static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
54{
55	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
56	struct r600_shader *rshader = &shader->shader;
57	uint32_t *ptr;
58	int	i;
59
60	/* copy new shader */
61	if (shader->bo == NULL) {
62		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
63		if (shader->bo == NULL) {
64			return -ENOMEM;
65		}
66		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
67		if (R600_BIG_ENDIAN) {
68			for (i = 0; i < rshader->bc.ndw; ++i) {
69				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
70			}
71		} else {
72			memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
73		}
74		r600_bo_unmap(rctx->radeon, shader->bo);
75	}
76	/* build state */
77	switch (rshader->processor_type) {
78	case TGSI_PROCESSOR_VERTEX:
79		if (rshader->family >= CHIP_CEDAR) {
80			evergreen_pipe_shader_vs(ctx, shader);
81		} else {
82			r600_pipe_shader_vs(ctx, shader);
83		}
84		break;
85	case TGSI_PROCESSOR_FRAGMENT:
86		if (rshader->family >= CHIP_CEDAR) {
87			evergreen_pipe_shader_ps(ctx, shader);
88		} else {
89			r600_pipe_shader_ps(ctx, shader);
90		}
91		break;
92	default:
93		return -EINVAL;
94	}
95	return 0;
96}
97
98static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
99
100int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
101{
102	static int dump_shaders = -1;
103	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
104	int r;
105
106	/* Would like some magic "get_bool_option_once" routine.
107	*/
108	if (dump_shaders == -1)
109		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
110
111	if (dump_shaders) {
112		fprintf(stderr, "--------------------------------------------------------------\n");
113		tgsi_dump(tokens, 0);
114	}
115	shader->shader.family = r600_get_family(rctx->radeon);
116	r = r600_shader_from_tgsi(tokens, &shader->shader);
117	if (r) {
118		R600_ERR("translation from TGSI failed !\n");
119		return r;
120	}
121	r = r600_bc_build(&shader->shader.bc);
122	if (r) {
123		R600_ERR("building bytecode failed !\n");
124		return r;
125	}
126	if (dump_shaders) {
127		r600_bc_dump(&shader->shader.bc);
128		fprintf(stderr, "______________________________________________________________\n");
129	}
130	return r600_pipe_shader(ctx, shader);
131}
132
133void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
134{
135	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
136
137	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
138	r600_bc_clear(&shader->shader.bc);
139}
140
141/*
142 * tgsi -> r600 shader
143 */
144struct r600_shader_tgsi_instruction;
145
146struct r600_shader_src {
147	unsigned				sel;
148	unsigned				swizzle[4];
149	unsigned				neg;
150	unsigned				abs;
151	unsigned				rel;
152	uint32_t				value[4];
153};
154
155struct r600_shader_ctx {
156	struct tgsi_shader_info			info;
157	struct tgsi_parse_context		parse;
158	const struct tgsi_token			*tokens;
159	unsigned				type;
160	unsigned				file_offset[TGSI_FILE_COUNT];
161	unsigned				temp_reg;
162	unsigned				ar_reg;
163	struct r600_shader_tgsi_instruction	*inst_info;
164	struct r600_bc				*bc;
165	struct r600_shader			*shader;
166	struct r600_shader_src			src[3];
167	u32					*literals;
168	u32					nliterals;
169	u32					max_driver_temp_used;
170	/* needed for evergreen interpolation */
171	boolean                                 input_centroid;
172	boolean                                 input_linear;
173	boolean                                 input_perspective;
174	int					num_interp_gpr;
175};
176
177struct r600_shader_tgsi_instruction {
178	unsigned	tgsi_opcode;
179	unsigned	is_op3;
180	unsigned	r600_opcode;
181	int (*process)(struct r600_shader_ctx *ctx);
182};
183
184static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
185static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
186
187static int tgsi_is_supported(struct r600_shader_ctx *ctx)
188{
189	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
190	int j;
191
192	if (i->Instruction.NumDstRegs > 1) {
193		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
194		return -EINVAL;
195	}
196	if (i->Instruction.Predicate) {
197		R600_ERR("predicate unsupported\n");
198		return -EINVAL;
199	}
200#if 0
201	if (i->Instruction.Label) {
202		R600_ERR("label unsupported\n");
203		return -EINVAL;
204	}
205#endif
206	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
207		if (i->Src[j].Register.Dimension) {
208			R600_ERR("unsupported src %d (dimension %d)\n", j,
209				 i->Src[j].Register.Dimension);
210			return -EINVAL;
211		}
212	}
213	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
214		if (i->Dst[j].Register.Dimension) {
215			R600_ERR("unsupported dst (dimension)\n");
216			return -EINVAL;
217		}
218	}
219	return 0;
220}
221
222static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
223{
224	int i, r;
225	struct r600_bc_alu alu;
226	int gpr = 0, base_chan = 0;
227	int ij_index = 0;
228
229	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
230		ij_index = 0;
231		if (ctx->shader->input[input].centroid)
232			ij_index++;
233	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
234		ij_index = 0;
235		/* if we have perspective add one */
236		if (ctx->input_perspective)  {
237			ij_index++;
238			/* if we have perspective centroid */
239			if (ctx->input_centroid)
240				ij_index++;
241		}
242		if (ctx->shader->input[input].centroid)
243			ij_index++;
244	}
245
246	/* work out gpr and base_chan from index */
247	gpr = ij_index / 2;
248	base_chan = (2 * (ij_index % 2)) + 1;
249
250	for (i = 0; i < 8; i++) {
251		memset(&alu, 0, sizeof(struct r600_bc_alu));
252
253		if (i < 4)
254			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
255		else
256			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
257
258		if ((i > 1) && (i < 6)) {
259			alu.dst.sel = ctx->shader->input[input].gpr;
260			alu.dst.write = 1;
261		}
262
263		alu.dst.chan = i % 4;
264
265		alu.src[0].sel = gpr;
266		alu.src[0].chan = (base_chan - (i % 2));
267
268		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
269
270		alu.bank_swizzle_force = SQ_ALU_VEC_210;
271		if ((i % 4) == 3)
272			alu.last = 1;
273		r = r600_bc_add_alu(ctx->bc, &alu);
274		if (r)
275			return r;
276	}
277	return 0;
278}
279
280
281static int tgsi_declaration(struct r600_shader_ctx *ctx)
282{
283	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
284	unsigned i;
285	int r;
286
287	switch (d->Declaration.File) {
288	case TGSI_FILE_INPUT:
289		i = ctx->shader->ninput++;
290		ctx->shader->input[i].name = d->Semantic.Name;
291		ctx->shader->input[i].sid = d->Semantic.Index;
292		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
293		ctx->shader->input[i].centroid = d->Declaration.Centroid;
294		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
295		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
296			/* turn input into interpolate on EG */
297			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
298				if (ctx->shader->input[i].interpolate > 0) {
299					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
300					evergreen_interp_alu(ctx, i);
301				}
302			}
303		}
304		break;
305	case TGSI_FILE_OUTPUT:
306		i = ctx->shader->noutput++;
307		ctx->shader->output[i].name = d->Semantic.Name;
308		ctx->shader->output[i].sid = d->Semantic.Index;
309		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
310		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
311		break;
312	case TGSI_FILE_CONSTANT:
313	case TGSI_FILE_TEMPORARY:
314	case TGSI_FILE_SAMPLER:
315	case TGSI_FILE_ADDRESS:
316		break;
317
318	case TGSI_FILE_SYSTEM_VALUE:
319		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
320			struct r600_bc_alu alu;
321			memset(&alu, 0, sizeof(struct r600_bc_alu));
322
323			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
324			alu.src[0].sel = 0;
325			alu.src[0].chan = 3;
326
327			alu.dst.sel = 0;
328			alu.dst.chan = 3;
329			alu.dst.write = 1;
330			alu.last = 1;
331
332			if ((r = r600_bc_add_alu(ctx->bc, &alu)))
333				return r;
334			break;
335		}
336
337	default:
338		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
339		return -EINVAL;
340	}
341	return 0;
342}
343
344static int r600_get_temp(struct r600_shader_ctx *ctx)
345{
346	return ctx->temp_reg + ctx->max_driver_temp_used++;
347}
348
349/*
350 * for evergreen we need to scan the shader to find the number of GPRs we need to
351 * reserve for interpolation.
352 *
353 * we need to know if we are going to emit
354 * any centroid inputs
355 * if perspective and linear are required
356*/
357static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
358{
359	int i;
360	int num_baryc;
361
362	ctx->input_linear = FALSE;
363	ctx->input_perspective = FALSE;
364	ctx->input_centroid = FALSE;
365	ctx->num_interp_gpr = 1;
366
367	/* any centroid inputs */
368	for (i = 0; i < ctx->info.num_inputs; i++) {
369		/* skip position/face */
370		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
371		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
372			continue;
373		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
374			ctx->input_linear = TRUE;
375		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
376			ctx->input_perspective = TRUE;
377		if (ctx->info.input_centroid[i])
378			ctx->input_centroid = TRUE;
379	}
380
381	num_baryc = 0;
382	/* ignoring sample for now */
383	if (ctx->input_perspective)
384		num_baryc++;
385	if (ctx->input_linear)
386		num_baryc++;
387	if (ctx->input_centroid)
388		num_baryc *= 2;
389
390	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
391
392	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
393	return ctx->num_interp_gpr;
394}
395
396static void tgsi_src(struct r600_shader_ctx *ctx,
397		     const struct tgsi_full_src_register *tgsi_src,
398		     struct r600_shader_src *r600_src)
399{
400	memset(r600_src, 0, sizeof(*r600_src));
401	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
402	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
403	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
404	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
405	r600_src->neg = tgsi_src->Register.Negate;
406	r600_src->abs = tgsi_src->Register.Absolute;
407
408	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
409		int index;
410		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
411			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
412			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
413
414			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
415			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
416			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
417				return;
418		}
419		index = tgsi_src->Register.Index;
420		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
421		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
422	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
423		/* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
424		r600_src->swizzle[0] = 3;
425		r600_src->swizzle[1] = 3;
426		r600_src->swizzle[2] = 3;
427		r600_src->swizzle[3] = 3;
428		r600_src->sel = 0;
429	} else {
430		if (tgsi_src->Register.Indirect)
431			r600_src->rel = V_SQ_REL_RELATIVE;
432		r600_src->sel = tgsi_src->Register.Index;
433		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
434	}
435}
436
437static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
438{
439	struct r600_bc_vtx vtx;
440	unsigned int ar_reg;
441	int r;
442
443	if (offset) {
444		struct r600_bc_alu alu;
445
446		memset(&alu, 0, sizeof(alu));
447
448		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
449		alu.src[0].sel = ctx->ar_reg;
450
451		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
452		alu.src[1].value = offset;
453
454		alu.dst.sel = dst_reg;
455		alu.dst.write = 1;
456		alu.last = 1;
457
458		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
459			return r;
460
461		ar_reg = dst_reg;
462	} else {
463		ar_reg = ctx->ar_reg;
464	}
465
466	memset(&vtx, 0, sizeof(vtx));
467	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
468	vtx.src_gpr = ar_reg;
469	vtx.mega_fetch_count = 16;
470	vtx.dst_gpr = dst_reg;
471	vtx.dst_sel_x = 0;		/* SEL_X */
472	vtx.dst_sel_y = 1;		/* SEL_Y */
473	vtx.dst_sel_z = 2;		/* SEL_Z */
474	vtx.dst_sel_w = 3;		/* SEL_W */
475	vtx.data_format = FMT_32_32_32_32_FLOAT;
476	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
477	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
478	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
479	vtx.endian = r600_endian_swap(32);
480
481	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
482		return r;
483
484	return 0;
485}
486
487static int tgsi_split_constant(struct r600_shader_ctx *ctx)
488{
489	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
490	struct r600_bc_alu alu;
491	int i, j, k, nconst, r;
492
493	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
494		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
495			nconst++;
496		}
497		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
498	}
499	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
500		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
501			continue;
502		}
503
504		if (ctx->src[i].rel) {
505			int treg = r600_get_temp(ctx);
506			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
507				return r;
508
509			ctx->src[i].sel = treg;
510			ctx->src[i].rel = 0;
511			j--;
512		} else if (j > 0) {
513			int treg = r600_get_temp(ctx);
514			for (k = 0; k < 4; k++) {
515				memset(&alu, 0, sizeof(struct r600_bc_alu));
516				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
517				alu.src[0].sel = ctx->src[i].sel;
518				alu.src[0].chan = k;
519				alu.src[0].rel = ctx->src[i].rel;
520				alu.dst.sel = treg;
521				alu.dst.chan = k;
522				alu.dst.write = 1;
523				if (k == 3)
524					alu.last = 1;
525				r = r600_bc_add_alu(ctx->bc, &alu);
526				if (r)
527					return r;
528			}
529			ctx->src[i].sel = treg;
530			ctx->src[i].rel =0;
531			j--;
532		}
533	}
534	return 0;
535}
536
537/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
538static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
539{
540	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
541	struct r600_bc_alu alu;
542	int i, j, k, nliteral, r;
543
544	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
545		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
546			nliteral++;
547		}
548	}
549	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
550		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
551			int treg = r600_get_temp(ctx);
552			for (k = 0; k < 4; k++) {
553				memset(&alu, 0, sizeof(struct r600_bc_alu));
554				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
555				alu.src[0].sel = ctx->src[i].sel;
556				alu.src[0].chan = k;
557				alu.src[0].value = ctx->src[i].value[k];
558				alu.dst.sel = treg;
559				alu.dst.chan = k;
560				alu.dst.write = 1;
561				if (k == 3)
562					alu.last = 1;
563				r = r600_bc_add_alu(ctx->bc, &alu);
564				if (r)
565					return r;
566			}
567			ctx->src[i].sel = treg;
568			j--;
569		}
570	}
571	return 0;
572}
573
574static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
575{
576	struct tgsi_full_immediate *immediate;
577	struct tgsi_full_property *property;
578	struct r600_shader_ctx ctx;
579	struct r600_bc_output output[32];
580	unsigned output_done, noutput;
581	unsigned opcode;
582	int i, r = 0, pos0;
583
584	ctx.bc = &shader->bc;
585	ctx.shader = shader;
586	r = r600_bc_init(ctx.bc, shader->family);
587	if (r)
588		return r;
589	ctx.tokens = tokens;
590	tgsi_scan_shader(tokens, &ctx.info);
591	tgsi_parse_init(&ctx.parse, tokens);
592	ctx.type = ctx.parse.FullHeader.Processor.Processor;
593	shader->processor_type = ctx.type;
594	ctx.bc->type = shader->processor_type;
595
596	/* register allocations */
597	/* Values [0,127] correspond to GPR[0..127].
598	 * Values [128,159] correspond to constant buffer bank 0
599	 * Values [160,191] correspond to constant buffer bank 1
600	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
601	 * Values [256,287] correspond to constant buffer bank 2 (EG)
602	 * Values [288,319] correspond to constant buffer bank 3 (EG)
603	 * Other special values are shown in the list below.
604	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
605	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
606	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
607	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
608	 * 248	SQ_ALU_SRC_0: special constant 0.0.
609	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
610	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
611	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
612	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
613	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
614	 * 254	SQ_ALU_SRC_PV: previous vector result.
615	 * 255	SQ_ALU_SRC_PS: previous scalar result.
616	 */
617	for (i = 0; i < TGSI_FILE_COUNT; i++) {
618		ctx.file_offset[i] = 0;
619	}
620	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
621		ctx.file_offset[TGSI_FILE_INPUT] = 1;
622		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
623			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
624		} else {
625			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
626		}
627	}
628	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
629		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
630	}
631	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
632						ctx.info.file_count[TGSI_FILE_INPUT];
633	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
634						ctx.info.file_count[TGSI_FILE_OUTPUT];
635
636	/* Outside the GPR range. This will be translated to one of the
637	 * kcache banks later. */
638	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
639
640	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
641	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
642			ctx.info.file_count[TGSI_FILE_TEMPORARY];
643	ctx.temp_reg = ctx.ar_reg + 1;
644
645	ctx.nliterals = 0;
646	ctx.literals = NULL;
647	shader->fs_write_all = FALSE;
648	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
649		tgsi_parse_token(&ctx.parse);
650		switch (ctx.parse.FullToken.Token.Type) {
651		case TGSI_TOKEN_TYPE_IMMEDIATE:
652			immediate = &ctx.parse.FullToken.FullImmediate;
653			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
654			if(ctx.literals == NULL) {
655				r = -ENOMEM;
656				goto out_err;
657			}
658			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
659			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
660			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
661			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
662			ctx.nliterals++;
663			break;
664		case TGSI_TOKEN_TYPE_DECLARATION:
665			r = tgsi_declaration(&ctx);
666			if (r)
667				goto out_err;
668			break;
669		case TGSI_TOKEN_TYPE_INSTRUCTION:
670			r = tgsi_is_supported(&ctx);
671			if (r)
672				goto out_err;
673			ctx.max_driver_temp_used = 0;
674			/* reserve first tmp for everyone */
675			r600_get_temp(&ctx);
676
677			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
678			if ((r = tgsi_split_constant(&ctx)))
679				goto out_err;
680			if ((r = tgsi_split_literal_constant(&ctx)))
681				goto out_err;
682			if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
683				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
684			else
685				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
686			r = ctx.inst_info->process(&ctx);
687			if (r)
688				goto out_err;
689			break;
690		case TGSI_TOKEN_TYPE_PROPERTY:
691			property = &ctx.parse.FullToken.FullProperty;
692			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
693				if (property->u[0].Data == 1)
694					shader->fs_write_all = TRUE;
695			}
696			break;
697		default:
698			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
699			r = -EINVAL;
700			goto out_err;
701		}
702	}
703	/* export output */
704	noutput = shader->noutput;
705	for (i = 0, pos0 = 0; i < noutput; i++) {
706		memset(&output[i], 0, sizeof(struct r600_bc_output));
707		output[i].gpr = shader->output[i].gpr;
708		output[i].elem_size = 3;
709		output[i].swizzle_x = 0;
710		output[i].swizzle_y = 1;
711		output[i].swizzle_z = 2;
712		output[i].swizzle_w = 3;
713		output[i].burst_count = 1;
714		output[i].barrier = 1;
715		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
716		output[i].array_base = i - pos0;
717		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
718		switch (ctx.type) {
719		case TGSI_PROCESSOR_VERTEX:
720			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
721				output[i].array_base = 60;
722				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
723				/* position doesn't count in array_base */
724				pos0++;
725			}
726			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
727				output[i].array_base = 61;
728				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
729				/* position doesn't count in array_base */
730				pos0++;
731			}
732			break;
733		case TGSI_PROCESSOR_FRAGMENT:
734			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
735				output[i].array_base = shader->output[i].sid;
736				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
737			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
738				output[i].array_base = 61;
739				output[i].swizzle_x = 2;
740				output[i].swizzle_y = 7;
741				output[i].swizzle_z = output[i].swizzle_w = 7;
742				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
743			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
744				output[i].array_base = 61;
745				output[i].swizzle_x = 7;
746				output[i].swizzle_y = 1;
747				output[i].swizzle_z = output[i].swizzle_w = 7;
748				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
749			} else {
750				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
751				r = -EINVAL;
752				goto out_err;
753			}
754			break;
755		default:
756			R600_ERR("unsupported processor type %d\n", ctx.type);
757			r = -EINVAL;
758			goto out_err;
759		}
760	}
761	/* add fake param output for vertex shader if no param is exported */
762	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
763		for (i = 0, pos0 = 0; i < noutput; i++) {
764			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
765				pos0 = 1;
766				break;
767			}
768		}
769		if (!pos0) {
770			memset(&output[i], 0, sizeof(struct r600_bc_output));
771			output[i].gpr = 0;
772			output[i].elem_size = 3;
773			output[i].swizzle_x = 0;
774			output[i].swizzle_y = 1;
775			output[i].swizzle_z = 2;
776			output[i].swizzle_w = 3;
777			output[i].burst_count = 1;
778			output[i].barrier = 1;
779			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
780			output[i].array_base = 0;
781			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
782			noutput++;
783		}
784	}
785	/* add fake pixel export */
786	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
787		memset(&output[0], 0, sizeof(struct r600_bc_output));
788		output[0].gpr = 0;
789		output[0].elem_size = 3;
790		output[0].swizzle_x = 7;
791		output[0].swizzle_y = 7;
792		output[0].swizzle_z = 7;
793		output[0].swizzle_w = 7;
794		output[0].burst_count = 1;
795		output[0].barrier = 1;
796		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
797		output[0].array_base = 0;
798		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
799		noutput++;
800	}
801	/* set export done on last export of each type */
802	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
803		if (i == (noutput - 1)) {
804			output[i].end_of_program = 1;
805		}
806		if (!(output_done & (1 << output[i].type))) {
807			output_done |= (1 << output[i].type);
808			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
809		}
810	}
811	/* add output to bytecode */
812	for (i = 0; i < noutput; i++) {
813		r = r600_bc_add_output(ctx.bc, &output[i]);
814		if (r)
815			goto out_err;
816	}
817	free(ctx.literals);
818	tgsi_parse_free(&ctx.parse);
819	return 0;
820out_err:
821	free(ctx.literals);
822	tgsi_parse_free(&ctx.parse);
823	return r;
824}
825
826static int tgsi_unsupported(struct r600_shader_ctx *ctx)
827{
828	R600_ERR("%s tgsi opcode unsupported\n",
829		 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
830	return -EINVAL;
831}
832
833static int tgsi_end(struct r600_shader_ctx *ctx)
834{
835	return 0;
836}
837
838static void r600_bc_src(struct r600_bc_alu_src *bc_src,
839			const struct r600_shader_src *shader_src,
840			unsigned chan)
841{
842	bc_src->sel = shader_src->sel;
843	bc_src->chan = shader_src->swizzle[chan];
844	bc_src->neg = shader_src->neg;
845	bc_src->abs = shader_src->abs;
846	bc_src->rel = shader_src->rel;
847	bc_src->value = shader_src->value[bc_src->chan];
848}
849
850static void tgsi_dst(struct r600_shader_ctx *ctx,
851		     const struct tgsi_full_dst_register *tgsi_dst,
852		     unsigned swizzle,
853		     struct r600_bc_alu_dst *r600_dst)
854{
855	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
856
857	r600_dst->sel = tgsi_dst->Register.Index;
858	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
859	r600_dst->chan = swizzle;
860	r600_dst->write = 1;
861	if (tgsi_dst->Register.Indirect)
862		r600_dst->rel = V_SQ_REL_RELATIVE;
863	if (inst->Instruction.Saturate) {
864		r600_dst->clamp = 1;
865	}
866}
867
868static int tgsi_last_instruction(unsigned writemask)
869{
870	int i, lasti = 0;
871
872	for (i = 0; i < 4; i++) {
873		if (writemask & (1 << i)) {
874			lasti = i;
875		}
876	}
877	return lasti;
878}
879
880static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
881{
882	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
883	struct r600_bc_alu alu;
884	int i, j, r;
885	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
886
887	for (i = 0; i < lasti + 1; i++) {
888		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
889			continue;
890
891		memset(&alu, 0, sizeof(struct r600_bc_alu));
892		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
893
894		alu.inst = ctx->inst_info->r600_opcode;
895		if (!swap) {
896			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
897				r600_bc_src(&alu.src[j], &ctx->src[j], i);
898			}
899		} else {
900			r600_bc_src(&alu.src[0], &ctx->src[1], i);
901			r600_bc_src(&alu.src[1], &ctx->src[0], i);
902		}
903		/* handle some special cases */
904		switch (ctx->inst_info->tgsi_opcode) {
905		case TGSI_OPCODE_SUB:
906			alu.src[1].neg = 1;
907			break;
908		case TGSI_OPCODE_ABS:
909			alu.src[0].abs = 1;
910			if (alu.src[0].neg)
911			  alu.src[0].neg = 0;
912			break;
913		default:
914			break;
915		}
916		if (i == lasti) {
917			alu.last = 1;
918		}
919		r = r600_bc_add_alu(ctx->bc, &alu);
920		if (r)
921			return r;
922	}
923	return 0;
924}
925
926static int tgsi_op2(struct r600_shader_ctx *ctx)
927{
928	return tgsi_op2_s(ctx, 0);
929}
930
931static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
932{
933	return tgsi_op2_s(ctx, 1);
934}
935
936/*
937 * r600 - trunc to -PI..PI range
938 * r700 - normalize by dividing by 2PI
939 * see fdo bug 27901
940 */
941static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
942{
943	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
944	static float double_pi = 3.1415926535 * 2;
945	static float neg_pi = -3.1415926535;
946
947	int r;
948	struct r600_bc_alu alu;
949
950	memset(&alu, 0, sizeof(struct r600_bc_alu));
951	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
952	alu.is_op3 = 1;
953
954	alu.dst.chan = 0;
955	alu.dst.sel = ctx->temp_reg;
956	alu.dst.write = 1;
957
958	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
959
960	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
961	alu.src[1].chan = 0;
962	alu.src[1].value = *(uint32_t *)&half_inv_pi;
963	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
964	alu.src[2].chan = 0;
965	alu.last = 1;
966	r = r600_bc_add_alu(ctx->bc, &alu);
967	if (r)
968		return r;
969
970	memset(&alu, 0, sizeof(struct r600_bc_alu));
971	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
972
973	alu.dst.chan = 0;
974	alu.dst.sel = ctx->temp_reg;
975	alu.dst.write = 1;
976
977	alu.src[0].sel = ctx->temp_reg;
978	alu.src[0].chan = 0;
979	alu.last = 1;
980	r = r600_bc_add_alu(ctx->bc, &alu);
981	if (r)
982		return r;
983
984	memset(&alu, 0, sizeof(struct r600_bc_alu));
985	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
986	alu.is_op3 = 1;
987
988	alu.dst.chan = 0;
989	alu.dst.sel = ctx->temp_reg;
990	alu.dst.write = 1;
991
992	alu.src[0].sel = ctx->temp_reg;
993	alu.src[0].chan = 0;
994
995	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
996	alu.src[1].chan = 0;
997	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
998	alu.src[2].chan = 0;
999
1000	if (ctx->bc->chiprev == CHIPREV_R600) {
1001		alu.src[1].value = *(uint32_t *)&double_pi;
1002		alu.src[2].value = *(uint32_t *)&neg_pi;
1003	} else {
1004		alu.src[1].sel = V_SQ_ALU_SRC_1;
1005		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1006		alu.src[2].neg = 1;
1007	}
1008
1009	alu.last = 1;
1010	r = r600_bc_add_alu(ctx->bc, &alu);
1011	if (r)
1012		return r;
1013	return 0;
1014}
1015
1016static int tgsi_trig(struct r600_shader_ctx *ctx)
1017{
1018	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1019	struct r600_bc_alu alu;
1020	int i, r;
1021	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1022
1023	r = tgsi_setup_trig(ctx);
1024	if (r)
1025		return r;
1026
1027	memset(&alu, 0, sizeof(struct r600_bc_alu));
1028	alu.inst = ctx->inst_info->r600_opcode;
1029	alu.dst.chan = 0;
1030	alu.dst.sel = ctx->temp_reg;
1031	alu.dst.write = 1;
1032
1033	alu.src[0].sel = ctx->temp_reg;
1034	alu.src[0].chan = 0;
1035	alu.last = 1;
1036	r = r600_bc_add_alu(ctx->bc, &alu);
1037	if (r)
1038		return r;
1039
1040	/* replicate result */
1041	for (i = 0; i < lasti + 1; i++) {
1042		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1043			continue;
1044
1045		memset(&alu, 0, sizeof(struct r600_bc_alu));
1046		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1047
1048		alu.src[0].sel = ctx->temp_reg;
1049		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1050		if (i == lasti)
1051			alu.last = 1;
1052		r = r600_bc_add_alu(ctx->bc, &alu);
1053		if (r)
1054			return r;
1055	}
1056	return 0;
1057}
1058
1059static int tgsi_scs(struct r600_shader_ctx *ctx)
1060{
1061	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1062	struct r600_bc_alu alu;
1063	int r;
1064
1065	/* We'll only need the trig stuff if we are going to write to the
1066	 * X or Y components of the destination vector.
1067	 */
1068	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1069		r = tgsi_setup_trig(ctx);
1070		if (r)
1071			return r;
1072	}
1073
1074	/* dst.x = COS */
1075	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1076		memset(&alu, 0, sizeof(struct r600_bc_alu));
1077		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1078		tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1079
1080		alu.src[0].sel = ctx->temp_reg;
1081		alu.src[0].chan = 0;
1082		alu.last = 1;
1083		r = r600_bc_add_alu(ctx->bc, &alu);
1084		if (r)
1085			return r;
1086	}
1087
1088	/* dst.y = SIN */
1089	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1090		memset(&alu, 0, sizeof(struct r600_bc_alu));
1091		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1092		tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1093
1094		alu.src[0].sel = ctx->temp_reg;
1095		alu.src[0].chan = 0;
1096		alu.last = 1;
1097		r = r600_bc_add_alu(ctx->bc, &alu);
1098		if (r)
1099			return r;
1100	}
1101
1102	/* dst.z = 0.0; */
1103	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1104		memset(&alu, 0, sizeof(struct r600_bc_alu));
1105
1106		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1107
1108		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1109
1110		alu.src[0].sel = V_SQ_ALU_SRC_0;
1111		alu.src[0].chan = 0;
1112
1113		alu.last = 1;
1114
1115		r = r600_bc_add_alu(ctx->bc, &alu);
1116		if (r)
1117			return r;
1118	}
1119
1120	/* dst.w = 1.0; */
1121	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1122		memset(&alu, 0, sizeof(struct r600_bc_alu));
1123
1124		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1125
1126		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1127
1128		alu.src[0].sel = V_SQ_ALU_SRC_1;
1129		alu.src[0].chan = 0;
1130
1131		alu.last = 1;
1132
1133		r = r600_bc_add_alu(ctx->bc, &alu);
1134		if (r)
1135			return r;
1136	}
1137
1138	return 0;
1139}
1140
1141static int tgsi_kill(struct r600_shader_ctx *ctx)
1142{
1143	struct r600_bc_alu alu;
1144	int i, r;
1145
1146	for (i = 0; i < 4; i++) {
1147		memset(&alu, 0, sizeof(struct r600_bc_alu));
1148		alu.inst = ctx->inst_info->r600_opcode;
1149
1150		alu.dst.chan = i;
1151
1152		alu.src[0].sel = V_SQ_ALU_SRC_0;
1153
1154		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1155			alu.src[1].sel = V_SQ_ALU_SRC_1;
1156			alu.src[1].neg = 1;
1157		} else {
1158			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1159		}
1160		if (i == 3) {
1161			alu.last = 1;
1162		}
1163		r = r600_bc_add_alu(ctx->bc, &alu);
1164		if (r)
1165			return r;
1166	}
1167
1168	/* kill must be last in ALU */
1169	ctx->bc->force_add_cf = 1;
1170	ctx->shader->uses_kill = TRUE;
1171	return 0;
1172}
1173
1174static int tgsi_lit(struct r600_shader_ctx *ctx)
1175{
1176	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1177	struct r600_bc_alu alu;
1178	int r;
1179
1180	/* dst.x, <- 1.0  */
1181	memset(&alu, 0, sizeof(struct r600_bc_alu));
1182	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1183	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1184	alu.src[0].chan = 0;
1185	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1186	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1187	r = r600_bc_add_alu(ctx->bc, &alu);
1188	if (r)
1189		return r;
1190
1191	/* dst.y = max(src.x, 0.0) */
1192	memset(&alu, 0, sizeof(struct r600_bc_alu));
1193	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1194	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1195	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1196	alu.src[1].chan = 0;
1197	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1198	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1199	r = r600_bc_add_alu(ctx->bc, &alu);
1200	if (r)
1201		return r;
1202
1203	/* dst.w, <- 1.0  */
1204	memset(&alu, 0, sizeof(struct r600_bc_alu));
1205	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1206	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1207	alu.src[0].chan = 0;
1208	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1209	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1210	alu.last = 1;
1211	r = r600_bc_add_alu(ctx->bc, &alu);
1212	if (r)
1213		return r;
1214
1215	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1216	{
1217		int chan;
1218		int sel;
1219
1220		/* dst.z = log(src.y) */
1221		memset(&alu, 0, sizeof(struct r600_bc_alu));
1222		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1223		r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1224		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1225		alu.last = 1;
1226		r = r600_bc_add_alu(ctx->bc, &alu);
1227		if (r)
1228			return r;
1229
1230		chan = alu.dst.chan;
1231		sel = alu.dst.sel;
1232
1233		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1234		memset(&alu, 0, sizeof(struct r600_bc_alu));
1235		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1236		r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1237		alu.src[1].sel  = sel;
1238		alu.src[1].chan = chan;
1239
1240		r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1241		alu.dst.sel = ctx->temp_reg;
1242		alu.dst.chan = 0;
1243		alu.dst.write = 1;
1244		alu.is_op3 = 1;
1245		alu.last = 1;
1246		r = r600_bc_add_alu(ctx->bc, &alu);
1247		if (r)
1248			return r;
1249
1250		/* dst.z = exp(tmp.x) */
1251		memset(&alu, 0, sizeof(struct r600_bc_alu));
1252		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1253		alu.src[0].sel = ctx->temp_reg;
1254		alu.src[0].chan = 0;
1255		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1256		alu.last = 1;
1257		r = r600_bc_add_alu(ctx->bc, &alu);
1258		if (r)
1259			return r;
1260	}
1261	return 0;
1262}
1263
1264static int tgsi_rsq(struct r600_shader_ctx *ctx)
1265{
1266	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1267	struct r600_bc_alu alu;
1268	int i, r;
1269
1270	memset(&alu, 0, sizeof(struct r600_bc_alu));
1271
1272	/* FIXME:
1273	 * For state trackers other than OpenGL, we'll want to use
1274	 * _RECIPSQRT_IEEE instead.
1275	 */
1276	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1277
1278	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1279		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1280		alu.src[i].abs = 1;
1281	}
1282	alu.dst.sel = ctx->temp_reg;
1283	alu.dst.write = 1;
1284	alu.last = 1;
1285	r = r600_bc_add_alu(ctx->bc, &alu);
1286	if (r)
1287		return r;
1288	/* replicate result */
1289	return tgsi_helper_tempx_replicate(ctx);
1290}
1291
1292static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1293{
1294	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1295	struct r600_bc_alu alu;
1296	int i, r;
1297
1298	for (i = 0; i < 4; i++) {
1299		memset(&alu, 0, sizeof(struct r600_bc_alu));
1300		alu.src[0].sel = ctx->temp_reg;
1301		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1302		alu.dst.chan = i;
1303		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1304		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1305		if (i == 3)
1306			alu.last = 1;
1307		r = r600_bc_add_alu(ctx->bc, &alu);
1308		if (r)
1309			return r;
1310	}
1311	return 0;
1312}
1313
1314static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1315{
1316	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1317	struct r600_bc_alu alu;
1318	int i, r;
1319
1320	memset(&alu, 0, sizeof(struct r600_bc_alu));
1321	alu.inst = ctx->inst_info->r600_opcode;
1322	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1323		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1324	}
1325	alu.dst.sel = ctx->temp_reg;
1326	alu.dst.write = 1;
1327	alu.last = 1;
1328	r = r600_bc_add_alu(ctx->bc, &alu);
1329	if (r)
1330		return r;
1331	/* replicate result */
1332	return tgsi_helper_tempx_replicate(ctx);
1333}
1334
1335static int tgsi_pow(struct r600_shader_ctx *ctx)
1336{
1337	struct r600_bc_alu alu;
1338	int r;
1339
1340	/* LOG2(a) */
1341	memset(&alu, 0, sizeof(struct r600_bc_alu));
1342	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1343	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1344	alu.dst.sel = ctx->temp_reg;
1345	alu.dst.write = 1;
1346	alu.last = 1;
1347	r = r600_bc_add_alu(ctx->bc, &alu);
1348	if (r)
1349		return r;
1350	/* b * LOG2(a) */
1351	memset(&alu, 0, sizeof(struct r600_bc_alu));
1352	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1353	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1354	alu.src[1].sel = ctx->temp_reg;
1355	alu.dst.sel = ctx->temp_reg;
1356	alu.dst.write = 1;
1357	alu.last = 1;
1358	r = r600_bc_add_alu(ctx->bc, &alu);
1359	if (r)
1360		return r;
1361	/* POW(a,b) = EXP2(b * LOG2(a))*/
1362	memset(&alu, 0, sizeof(struct r600_bc_alu));
1363	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1364	alu.src[0].sel = ctx->temp_reg;
1365	alu.dst.sel = ctx->temp_reg;
1366	alu.dst.write = 1;
1367	alu.last = 1;
1368	r = r600_bc_add_alu(ctx->bc, &alu);
1369	if (r)
1370		return r;
1371	return tgsi_helper_tempx_replicate(ctx);
1372}
1373
1374static int tgsi_ssg(struct r600_shader_ctx *ctx)
1375{
1376	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1377	struct r600_bc_alu alu;
1378	int i, r;
1379
1380	/* tmp = (src > 0 ? 1 : src) */
1381	for (i = 0; i < 4; i++) {
1382		memset(&alu, 0, sizeof(struct r600_bc_alu));
1383		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1384		alu.is_op3 = 1;
1385
1386		alu.dst.sel = ctx->temp_reg;
1387		alu.dst.chan = i;
1388
1389		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1390		alu.src[1].sel = V_SQ_ALU_SRC_1;
1391		r600_bc_src(&alu.src[2], &ctx->src[0], i);
1392
1393		if (i == 3)
1394			alu.last = 1;
1395		r = r600_bc_add_alu(ctx->bc, &alu);
1396		if (r)
1397			return r;
1398	}
1399
1400	/* dst = (-tmp > 0 ? -1 : tmp) */
1401	for (i = 0; i < 4; i++) {
1402		memset(&alu, 0, sizeof(struct r600_bc_alu));
1403		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1404		alu.is_op3 = 1;
1405		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1406
1407		alu.src[0].sel = ctx->temp_reg;
1408		alu.src[0].chan = i;
1409		alu.src[0].neg = 1;
1410
1411		alu.src[1].sel = V_SQ_ALU_SRC_1;
1412		alu.src[1].neg = 1;
1413
1414		alu.src[2].sel = ctx->temp_reg;
1415		alu.src[2].chan = i;
1416
1417		if (i == 3)
1418			alu.last = 1;
1419		r = r600_bc_add_alu(ctx->bc, &alu);
1420		if (r)
1421			return r;
1422	}
1423	return 0;
1424}
1425
1426static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1427{
1428	struct r600_bc_alu alu;
1429	int i, r;
1430
1431	for (i = 0; i < 4; i++) {
1432		memset(&alu, 0, sizeof(struct r600_bc_alu));
1433		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1434			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1435			alu.dst.chan = i;
1436		} else {
1437			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1438			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1439			alu.src[0].sel = ctx->temp_reg;
1440			alu.src[0].chan = i;
1441		}
1442		if (i == 3) {
1443			alu.last = 1;
1444		}
1445		r = r600_bc_add_alu(ctx->bc, &alu);
1446		if (r)
1447			return r;
1448	}
1449	return 0;
1450}
1451
1452static int tgsi_op3(struct r600_shader_ctx *ctx)
1453{
1454	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1455	struct r600_bc_alu alu;
1456	int i, j, r;
1457	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1458
1459	for (i = 0; i < lasti + 1; i++) {
1460		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1461			continue;
1462
1463		memset(&alu, 0, sizeof(struct r600_bc_alu));
1464		alu.inst = ctx->inst_info->r600_opcode;
1465		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1466			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1467		}
1468
1469		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1470		alu.dst.chan = i;
1471		alu.dst.write = 1;
1472		alu.is_op3 = 1;
1473		if (i == lasti) {
1474			alu.last = 1;
1475		}
1476		r = r600_bc_add_alu(ctx->bc, &alu);
1477		if (r)
1478			return r;
1479	}
1480	return 0;
1481}
1482
1483static int tgsi_dp(struct r600_shader_ctx *ctx)
1484{
1485	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1486	struct r600_bc_alu alu;
1487	int i, j, r;
1488
1489	for (i = 0; i < 4; i++) {
1490		memset(&alu, 0, sizeof(struct r600_bc_alu));
1491		alu.inst = ctx->inst_info->r600_opcode;
1492		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1493			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1494		}
1495
1496		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1497		alu.dst.chan = i;
1498		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1499		/* handle some special cases */
1500		switch (ctx->inst_info->tgsi_opcode) {
1501		case TGSI_OPCODE_DP2:
1502			if (i > 1) {
1503				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1504				alu.src[0].chan = alu.src[1].chan = 0;
1505			}
1506			break;
1507		case TGSI_OPCODE_DP3:
1508			if (i > 2) {
1509				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1510				alu.src[0].chan = alu.src[1].chan = 0;
1511			}
1512			break;
1513		case TGSI_OPCODE_DPH:
1514			if (i == 3) {
1515				alu.src[0].sel = V_SQ_ALU_SRC_1;
1516				alu.src[0].chan = 0;
1517				alu.src[0].neg = 0;
1518			}
1519			break;
1520		default:
1521			break;
1522		}
1523		if (i == 3) {
1524			alu.last = 1;
1525		}
1526		r = r600_bc_add_alu(ctx->bc, &alu);
1527		if (r)
1528			return r;
1529	}
1530	return 0;
1531}
1532
1533static int tgsi_tex(struct r600_shader_ctx *ctx)
1534{
1535	static float one_point_five = 1.5f;
1536	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1537	struct r600_bc_tex tex;
1538	struct r600_bc_alu alu;
1539	unsigned src_gpr;
1540	int r, i;
1541	int opcode;
1542	/* Texture fetch instructions can only use gprs as source.
1543	 * Also they cannot negate the source or take the absolute value */
1544	const boolean src_requires_loading =
1545		(inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1546		inst->Src[0].Register.File != TGSI_FILE_INPUT) ||
1547		ctx->src[0].neg || ctx->src[0].abs;
1548	boolean src_loaded = FALSE;
1549
1550	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1551
1552	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1553		/* Add perspective divide */
1554		memset(&alu, 0, sizeof(struct r600_bc_alu));
1555		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1556		r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1557
1558		alu.dst.sel = ctx->temp_reg;
1559		alu.dst.chan = 3;
1560		alu.last = 1;
1561		alu.dst.write = 1;
1562		r = r600_bc_add_alu(ctx->bc, &alu);
1563		if (r)
1564			return r;
1565
1566		for (i = 0; i < 3; i++) {
1567			memset(&alu, 0, sizeof(struct r600_bc_alu));
1568			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1569			alu.src[0].sel = ctx->temp_reg;
1570			alu.src[0].chan = 3;
1571			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1572			alu.dst.sel = ctx->temp_reg;
1573			alu.dst.chan = i;
1574			alu.dst.write = 1;
1575			r = r600_bc_add_alu(ctx->bc, &alu);
1576			if (r)
1577				return r;
1578		}
1579		memset(&alu, 0, sizeof(struct r600_bc_alu));
1580		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1581		alu.src[0].sel = V_SQ_ALU_SRC_1;
1582		alu.src[0].chan = 0;
1583		alu.dst.sel = ctx->temp_reg;
1584		alu.dst.chan = 3;
1585		alu.last = 1;
1586		alu.dst.write = 1;
1587		r = r600_bc_add_alu(ctx->bc, &alu);
1588		if (r)
1589			return r;
1590		src_loaded = TRUE;
1591		src_gpr = ctx->temp_reg;
1592	}
1593
1594	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1595		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1596		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1597
1598		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1599		for (i = 0; i < 4; i++) {
1600			memset(&alu, 0, sizeof(struct r600_bc_alu));
1601			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1602			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1603			r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
1604			alu.dst.sel = ctx->temp_reg;
1605			alu.dst.chan = i;
1606			if (i == 3)
1607				alu.last = 1;
1608			alu.dst.write = 1;
1609			r = r600_bc_add_alu(ctx->bc, &alu);
1610			if (r)
1611				return r;
1612		}
1613
1614		/* tmp1.z = RCP_e(|tmp1.z|) */
1615		memset(&alu, 0, sizeof(struct r600_bc_alu));
1616		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1617		alu.src[0].sel = ctx->temp_reg;
1618		alu.src[0].chan = 2;
1619		alu.src[0].abs = 1;
1620		alu.dst.sel = ctx->temp_reg;
1621		alu.dst.chan = 2;
1622		alu.dst.write = 1;
1623		alu.last = 1;
1624		r = r600_bc_add_alu(ctx->bc, &alu);
1625		if (r)
1626			return r;
1627
1628		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1629		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1630		 * muladd has no writemask, have to use another temp
1631		 */
1632		memset(&alu, 0, sizeof(struct r600_bc_alu));
1633		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1634		alu.is_op3 = 1;
1635
1636		alu.src[0].sel = ctx->temp_reg;
1637		alu.src[0].chan = 0;
1638		alu.src[1].sel = ctx->temp_reg;
1639		alu.src[1].chan = 2;
1640
1641		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1642		alu.src[2].chan = 0;
1643		alu.src[2].value = *(uint32_t *)&one_point_five;
1644
1645		alu.dst.sel = ctx->temp_reg;
1646		alu.dst.chan = 0;
1647		alu.dst.write = 1;
1648
1649		r = r600_bc_add_alu(ctx->bc, &alu);
1650		if (r)
1651			return r;
1652
1653		memset(&alu, 0, sizeof(struct r600_bc_alu));
1654		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1655		alu.is_op3 = 1;
1656
1657		alu.src[0].sel = ctx->temp_reg;
1658		alu.src[0].chan = 1;
1659		alu.src[1].sel = ctx->temp_reg;
1660		alu.src[1].chan = 2;
1661
1662		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1663		alu.src[2].chan = 0;
1664		alu.src[2].value = *(uint32_t *)&one_point_five;
1665
1666		alu.dst.sel = ctx->temp_reg;
1667		alu.dst.chan = 1;
1668		alu.dst.write = 1;
1669
1670		alu.last = 1;
1671		r = r600_bc_add_alu(ctx->bc, &alu);
1672		if (r)
1673			return r;
1674
1675		src_loaded = TRUE;
1676		src_gpr = ctx->temp_reg;
1677	}
1678
1679	if (src_requires_loading && !src_loaded) {
1680		for (i = 0; i < 4; i++) {
1681			memset(&alu, 0, sizeof(struct r600_bc_alu));
1682			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1683			r600_bc_src(&alu.src[0], &ctx->src[0], i);
1684			alu.dst.sel = ctx->temp_reg;
1685			alu.dst.chan = i;
1686			if (i == 3)
1687				alu.last = 1;
1688			alu.dst.write = 1;
1689			r = r600_bc_add_alu(ctx->bc, &alu);
1690			if (r)
1691				return r;
1692		}
1693		src_loaded = TRUE;
1694		src_gpr = ctx->temp_reg;
1695	}
1696
1697	opcode = ctx->inst_info->r600_opcode;
1698	if (opcode == SQ_TEX_INST_SAMPLE &&
1699	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1700		opcode = SQ_TEX_INST_SAMPLE_C;
1701
1702	memset(&tex, 0, sizeof(struct r600_bc_tex));
1703	tex.inst = opcode;
1704	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1705	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1706	tex.src_gpr = src_gpr;
1707	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1708	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1709	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1710	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1711	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1712	if (src_loaded) {
1713		tex.src_sel_x = 0;
1714		tex.src_sel_y = 1;
1715		tex.src_sel_z = 2;
1716		tex.src_sel_w = 3;
1717	} else {
1718		tex.src_sel_x = ctx->src[0].swizzle[0];
1719		tex.src_sel_y = ctx->src[0].swizzle[1];
1720		tex.src_sel_z = ctx->src[0].swizzle[2];
1721		tex.src_sel_w = ctx->src[0].swizzle[3];
1722		tex.src_rel = ctx->src[0].rel;
1723	}
1724
1725	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1726		tex.src_sel_x = 1;
1727		tex.src_sel_y = 0;
1728		tex.src_sel_z = 3;
1729		tex.src_sel_w = 1;
1730	}
1731
1732	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1733		tex.coord_type_x = 1;
1734		tex.coord_type_y = 1;
1735		tex.coord_type_z = 1;
1736		tex.coord_type_w = 1;
1737	}
1738
1739	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1740		tex.coord_type_z = 0;
1741		tex.src_sel_z = tex.src_sel_y;
1742	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1743		tex.coord_type_z = 0;
1744
1745	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1746		tex.src_sel_w = tex.src_sel_z;
1747
1748	r = r600_bc_add_tex(ctx->bc, &tex);
1749	if (r)
1750		return r;
1751
1752	/* add shadow ambient support  - gallium doesn't do it yet */
1753	return 0;
1754}
1755
1756static int tgsi_lrp(struct r600_shader_ctx *ctx)
1757{
1758	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1759	struct r600_bc_alu alu;
1760	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1761	unsigned i;
1762	int r;
1763
1764	/* optimize if it's just an equal balance */
1765	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1766		for (i = 0; i < lasti + 1; i++) {
1767			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1768				continue;
1769
1770			memset(&alu, 0, sizeof(struct r600_bc_alu));
1771			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1772			r600_bc_src(&alu.src[0], &ctx->src[1], i);
1773			r600_bc_src(&alu.src[1], &ctx->src[2], i);
1774			alu.omod = 3;
1775			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1776			alu.dst.chan = i;
1777			if (i == lasti) {
1778				alu.last = 1;
1779			}
1780			r = r600_bc_add_alu(ctx->bc, &alu);
1781			if (r)
1782				return r;
1783		}
1784		return 0;
1785	}
1786
1787	/* 1 - src0 */
1788	for (i = 0; i < lasti + 1; i++) {
1789		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1790			continue;
1791
1792		memset(&alu, 0, sizeof(struct r600_bc_alu));
1793		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1794		alu.src[0].sel = V_SQ_ALU_SRC_1;
1795		alu.src[0].chan = 0;
1796		r600_bc_src(&alu.src[1], &ctx->src[0], i);
1797		alu.src[1].neg = 1;
1798		alu.dst.sel = ctx->temp_reg;
1799		alu.dst.chan = i;
1800		if (i == lasti) {
1801			alu.last = 1;
1802		}
1803		alu.dst.write = 1;
1804		r = r600_bc_add_alu(ctx->bc, &alu);
1805		if (r)
1806			return r;
1807	}
1808
1809	/* (1 - src0) * src2 */
1810	for (i = 0; i < lasti + 1; i++) {
1811		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1812			continue;
1813
1814		memset(&alu, 0, sizeof(struct r600_bc_alu));
1815		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1816		alu.src[0].sel = ctx->temp_reg;
1817		alu.src[0].chan = i;
1818		r600_bc_src(&alu.src[1], &ctx->src[2], i);
1819		alu.dst.sel = ctx->temp_reg;
1820		alu.dst.chan = i;
1821		if (i == lasti) {
1822			alu.last = 1;
1823		}
1824		alu.dst.write = 1;
1825		r = r600_bc_add_alu(ctx->bc, &alu);
1826		if (r)
1827			return r;
1828	}
1829
1830	/* src0 * src1 + (1 - src0) * src2 */
1831	for (i = 0; i < lasti + 1; i++) {
1832		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1833			continue;
1834
1835		memset(&alu, 0, sizeof(struct r600_bc_alu));
1836		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1837		alu.is_op3 = 1;
1838		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1839		r600_bc_src(&alu.src[1], &ctx->src[1], i);
1840		alu.src[2].sel = ctx->temp_reg;
1841		alu.src[2].chan = i;
1842
1843		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1844		alu.dst.chan = i;
1845		if (i == lasti) {
1846			alu.last = 1;
1847		}
1848		r = r600_bc_add_alu(ctx->bc, &alu);
1849		if (r)
1850			return r;
1851	}
1852	return 0;
1853}
1854
1855static int tgsi_cmp(struct r600_shader_ctx *ctx)
1856{
1857	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1858	struct r600_bc_alu alu;
1859	int i, r;
1860	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1861
1862	for (i = 0; i < lasti + 1; i++) {
1863		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1864			continue;
1865
1866		memset(&alu, 0, sizeof(struct r600_bc_alu));
1867		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1868		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1869		r600_bc_src(&alu.src[1], &ctx->src[2], i);
1870		r600_bc_src(&alu.src[2], &ctx->src[1], i);
1871		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1872		alu.dst.chan = i;
1873		alu.dst.write = 1;
1874		alu.is_op3 = 1;
1875		if (i == lasti)
1876			alu.last = 1;
1877		r = r600_bc_add_alu(ctx->bc, &alu);
1878		if (r)
1879			return r;
1880	}
1881	return 0;
1882}
1883
1884static int tgsi_xpd(struct r600_shader_ctx *ctx)
1885{
1886	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1887	static const unsigned int src0_swizzle[] = {2, 0, 1};
1888	static const unsigned int src1_swizzle[] = {1, 2, 0};
1889	struct r600_bc_alu alu;
1890	uint32_t use_temp = 0;
1891	int i, r;
1892
1893	if (inst->Dst[0].Register.WriteMask != 0xf)
1894		use_temp = 1;
1895
1896	for (i = 0; i < 4; i++) {
1897		memset(&alu, 0, sizeof(struct r600_bc_alu));
1898		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1899		if (i < 3) {
1900			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1901			r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
1902		} else {
1903			alu.src[0].sel = V_SQ_ALU_SRC_0;
1904			alu.src[0].chan = i;
1905			alu.src[1].sel = V_SQ_ALU_SRC_0;
1906			alu.src[1].chan = i;
1907		}
1908
1909		alu.dst.sel = ctx->temp_reg;
1910		alu.dst.chan = i;
1911		alu.dst.write = 1;
1912
1913		if (i == 3)
1914			alu.last = 1;
1915		r = r600_bc_add_alu(ctx->bc, &alu);
1916		if (r)
1917			return r;
1918	}
1919
1920	for (i = 0; i < 4; i++) {
1921		memset(&alu, 0, sizeof(struct r600_bc_alu));
1922		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1923
1924		if (i < 3) {
1925			r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
1926			r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
1927		} else {
1928			alu.src[0].sel = V_SQ_ALU_SRC_0;
1929			alu.src[0].chan = i;
1930			alu.src[1].sel = V_SQ_ALU_SRC_0;
1931			alu.src[1].chan = i;
1932		}
1933
1934		alu.src[2].sel = ctx->temp_reg;
1935		alu.src[2].neg = 1;
1936		alu.src[2].chan = i;
1937
1938		if (use_temp)
1939			alu.dst.sel = ctx->temp_reg;
1940		else
1941			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1942		alu.dst.chan = i;
1943		alu.dst.write = 1;
1944		alu.is_op3 = 1;
1945		if (i == 3)
1946			alu.last = 1;
1947		r = r600_bc_add_alu(ctx->bc, &alu);
1948		if (r)
1949			return r;
1950	}
1951	if (use_temp)
1952		return tgsi_helper_copy(ctx, inst);
1953	return 0;
1954}
1955
1956static int tgsi_exp(struct r600_shader_ctx *ctx)
1957{
1958	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1959	struct r600_bc_alu alu;
1960	int r;
1961
1962	/* result.x = 2^floor(src); */
1963	if (inst->Dst[0].Register.WriteMask & 1) {
1964		memset(&alu, 0, sizeof(struct r600_bc_alu));
1965
1966		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
1967		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1968
1969		alu.dst.sel = ctx->temp_reg;
1970		alu.dst.chan = 0;
1971		alu.dst.write = 1;
1972		alu.last = 1;
1973		r = r600_bc_add_alu(ctx->bc, &alu);
1974		if (r)
1975			return r;
1976
1977		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1978		alu.src[0].sel = ctx->temp_reg;
1979		alu.src[0].chan = 0;
1980
1981		alu.dst.sel = ctx->temp_reg;
1982		alu.dst.chan = 0;
1983		alu.dst.write = 1;
1984		alu.last = 1;
1985		r = r600_bc_add_alu(ctx->bc, &alu);
1986		if (r)
1987			return r;
1988	}
1989
1990	/* result.y = tmp - floor(tmp); */
1991	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1992		memset(&alu, 0, sizeof(struct r600_bc_alu));
1993
1994		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1995		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1996
1997		alu.dst.sel = ctx->temp_reg;
1998#if 0
1999		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2000		if (r)
2001			return r;
2002#endif
2003		alu.dst.write = 1;
2004		alu.dst.chan = 1;
2005
2006		alu.last = 1;
2007
2008		r = r600_bc_add_alu(ctx->bc, &alu);
2009		if (r)
2010			return r;
2011	}
2012
2013	/* result.z = RoughApprox2ToX(tmp);*/
2014	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2015		memset(&alu, 0, sizeof(struct r600_bc_alu));
2016		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2017		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2018
2019		alu.dst.sel = ctx->temp_reg;
2020		alu.dst.write = 1;
2021		alu.dst.chan = 2;
2022
2023		alu.last = 1;
2024
2025		r = r600_bc_add_alu(ctx->bc, &alu);
2026		if (r)
2027			return r;
2028	}
2029
2030	/* result.w = 1.0;*/
2031	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2032		memset(&alu, 0, sizeof(struct r600_bc_alu));
2033
2034		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2035		alu.src[0].sel = V_SQ_ALU_SRC_1;
2036		alu.src[0].chan = 0;
2037
2038		alu.dst.sel = ctx->temp_reg;
2039		alu.dst.chan = 3;
2040		alu.dst.write = 1;
2041		alu.last = 1;
2042		r = r600_bc_add_alu(ctx->bc, &alu);
2043		if (r)
2044			return r;
2045	}
2046	return tgsi_helper_copy(ctx, inst);
2047}
2048
2049static int tgsi_log(struct r600_shader_ctx *ctx)
2050{
2051	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2052	struct r600_bc_alu alu;
2053	int r;
2054
2055	/* result.x = floor(log2(src)); */
2056	if (inst->Dst[0].Register.WriteMask & 1) {
2057		memset(&alu, 0, sizeof(struct r600_bc_alu));
2058
2059		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2060		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2061
2062		alu.dst.sel = ctx->temp_reg;
2063		alu.dst.chan = 0;
2064		alu.dst.write = 1;
2065		alu.last = 1;
2066		r = r600_bc_add_alu(ctx->bc, &alu);
2067		if (r)
2068			return r;
2069
2070		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2071		alu.src[0].sel = ctx->temp_reg;
2072		alu.src[0].chan = 0;
2073
2074		alu.dst.sel = ctx->temp_reg;
2075		alu.dst.chan = 0;
2076		alu.dst.write = 1;
2077		alu.last = 1;
2078
2079		r = r600_bc_add_alu(ctx->bc, &alu);
2080		if (r)
2081			return r;
2082	}
2083
2084	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2085	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2086		memset(&alu, 0, sizeof(struct r600_bc_alu));
2087
2088		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2089		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2090
2091		alu.dst.sel = ctx->temp_reg;
2092		alu.dst.chan = 1;
2093		alu.dst.write = 1;
2094		alu.last = 1;
2095
2096		r = r600_bc_add_alu(ctx->bc, &alu);
2097		if (r)
2098			return r;
2099
2100		memset(&alu, 0, sizeof(struct r600_bc_alu));
2101
2102		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2103		alu.src[0].sel = ctx->temp_reg;
2104		alu.src[0].chan = 1;
2105
2106		alu.dst.sel = ctx->temp_reg;
2107		alu.dst.chan = 1;
2108		alu.dst.write = 1;
2109		alu.last = 1;
2110
2111		r = r600_bc_add_alu(ctx->bc, &alu);
2112		if (r)
2113			return r;
2114
2115		memset(&alu, 0, sizeof(struct r600_bc_alu));
2116
2117		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2118		alu.src[0].sel = ctx->temp_reg;
2119		alu.src[0].chan = 1;
2120
2121		alu.dst.sel = ctx->temp_reg;
2122		alu.dst.chan = 1;
2123		alu.dst.write = 1;
2124		alu.last = 1;
2125
2126		r = r600_bc_add_alu(ctx->bc, &alu);
2127		if (r)
2128			return r;
2129
2130		memset(&alu, 0, sizeof(struct r600_bc_alu));
2131
2132		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2133		alu.src[0].sel = ctx->temp_reg;
2134		alu.src[0].chan = 1;
2135
2136		alu.dst.sel = ctx->temp_reg;
2137		alu.dst.chan = 1;
2138		alu.dst.write = 1;
2139		alu.last = 1;
2140
2141		r = r600_bc_add_alu(ctx->bc, &alu);
2142		if (r)
2143			return r;
2144
2145		memset(&alu, 0, sizeof(struct r600_bc_alu));
2146
2147		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2148
2149		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2150
2151		alu.src[1].sel = ctx->temp_reg;
2152		alu.src[1].chan = 1;
2153
2154		alu.dst.sel = ctx->temp_reg;
2155		alu.dst.chan = 1;
2156		alu.dst.write = 1;
2157		alu.last = 1;
2158
2159		r = r600_bc_add_alu(ctx->bc, &alu);
2160		if (r)
2161			return r;
2162	}
2163
2164	/* result.z = log2(src);*/
2165	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2166		memset(&alu, 0, sizeof(struct r600_bc_alu));
2167
2168		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2169		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2170
2171		alu.dst.sel = ctx->temp_reg;
2172		alu.dst.write = 1;
2173		alu.dst.chan = 2;
2174		alu.last = 1;
2175
2176		r = r600_bc_add_alu(ctx->bc, &alu);
2177		if (r)
2178			return r;
2179	}
2180
2181	/* result.w = 1.0; */
2182	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2183		memset(&alu, 0, sizeof(struct r600_bc_alu));
2184
2185		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2186		alu.src[0].sel = V_SQ_ALU_SRC_1;
2187		alu.src[0].chan = 0;
2188
2189		alu.dst.sel = ctx->temp_reg;
2190		alu.dst.chan = 3;
2191		alu.dst.write = 1;
2192		alu.last = 1;
2193
2194		r = r600_bc_add_alu(ctx->bc, &alu);
2195		if (r)
2196			return r;
2197	}
2198
2199	return tgsi_helper_copy(ctx, inst);
2200}
2201
2202static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2203{
2204	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2205	struct r600_bc_alu alu;
2206	int r;
2207
2208	memset(&alu, 0, sizeof(struct r600_bc_alu));
2209
2210	switch (inst->Instruction.Opcode) {
2211	case TGSI_OPCODE_ARL:
2212		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2213		break;
2214	case TGSI_OPCODE_ARR:
2215		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2216		break;
2217	default:
2218		assert(0);
2219		return -1;
2220	}
2221
2222	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2223	alu.last = 1;
2224	alu.dst.sel = ctx->ar_reg;
2225	alu.dst.write = 1;
2226	r = r600_bc_add_alu(ctx->bc, &alu);
2227	if (r)
2228		return r;
2229
2230	/* TODO: Note that the MOVA can be avoided if we never use AR for
2231	 * indexing non-CB registers in the current ALU clause. Similarly, we
2232	 * need to load AR from ar_reg again if we started a new clause
2233	 * between ARL and AR usage. The easy way to do that is to remove
2234	 * the MOVA here, and load it for the first AR access after ar_reg
2235	 * has been modified in each clause. */
2236	memset(&alu, 0, sizeof(struct r600_bc_alu));
2237	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2238	alu.src[0].sel = ctx->ar_reg;
2239	alu.src[0].chan = 0;
2240	alu.last = 1;
2241	r = r600_bc_add_alu(ctx->bc, &alu);
2242	if (r)
2243		return r;
2244	return 0;
2245}
2246static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2247{
2248	/* TODO from r600c, ar values don't persist between clauses */
2249	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2250	struct r600_bc_alu alu;
2251	int r;
2252
2253	switch (inst->Instruction.Opcode) {
2254	case TGSI_OPCODE_ARL:
2255		memset(&alu, 0, sizeof(alu));
2256		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2257		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2258		alu.dst.sel = ctx->ar_reg;
2259		alu.dst.write = 1;
2260		alu.last = 1;
2261
2262		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2263			return r;
2264
2265		memset(&alu, 0, sizeof(alu));
2266		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2267		alu.src[0].sel = ctx->ar_reg;
2268		alu.dst.sel = ctx->ar_reg;
2269		alu.dst.write = 1;
2270		alu.last = 1;
2271
2272		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2273			return r;
2274		break;
2275	case TGSI_OPCODE_ARR:
2276		memset(&alu, 0, sizeof(alu));
2277		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2278		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2279		alu.dst.sel = ctx->ar_reg;
2280		alu.dst.write = 1;
2281		alu.last = 1;
2282
2283		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2284			return r;
2285		break;
2286	default:
2287		assert(0);
2288		return -1;
2289	}
2290
2291	memset(&alu, 0, sizeof(alu));
2292	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2293	alu.src[0].sel = ctx->ar_reg;
2294	alu.last = 1;
2295
2296	r = r600_bc_add_alu(ctx->bc, &alu);
2297	if (r)
2298		return r;
2299	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2300	return 0;
2301}
2302
2303static int tgsi_opdst(struct r600_shader_ctx *ctx)
2304{
2305	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2306	struct r600_bc_alu alu;
2307	int i, r = 0;
2308
2309	for (i = 0; i < 4; i++) {
2310		memset(&alu, 0, sizeof(struct r600_bc_alu));
2311
2312		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2313		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2314
2315		if (i == 0 || i == 3) {
2316			alu.src[0].sel = V_SQ_ALU_SRC_1;
2317		} else {
2318			r600_bc_src(&alu.src[0], &ctx->src[0], i);
2319		}
2320
2321		if (i == 0 || i == 2) {
2322			alu.src[1].sel = V_SQ_ALU_SRC_1;
2323		} else {
2324			r600_bc_src(&alu.src[1], &ctx->src[1], i);
2325		}
2326		if (i == 3)
2327			alu.last = 1;
2328		r = r600_bc_add_alu(ctx->bc, &alu);
2329		if (r)
2330			return r;
2331	}
2332	return 0;
2333}
2334
2335static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2336{
2337	struct r600_bc_alu alu;
2338	int r;
2339
2340	memset(&alu, 0, sizeof(struct r600_bc_alu));
2341	alu.inst = opcode;
2342	alu.predicate = 1;
2343
2344	alu.dst.sel = ctx->temp_reg;
2345	alu.dst.write = 1;
2346	alu.dst.chan = 0;
2347
2348	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2349	alu.src[1].sel = V_SQ_ALU_SRC_0;
2350	alu.src[1].chan = 0;
2351
2352	alu.last = 1;
2353
2354	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2355	if (r)
2356		return r;
2357	return 0;
2358}
2359
2360static int pops(struct r600_shader_ctx *ctx, int pops)
2361{
2362	int alu_pop = 3;
2363	if (ctx->bc->cf_last) {
2364		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2365			alu_pop = 0;
2366		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2367			alu_pop = 1;
2368	}
2369	alu_pop += pops;
2370	if (alu_pop == 1) {
2371		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2372		ctx->bc->force_add_cf = 1;
2373	} else if (alu_pop == 2) {
2374		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2375		ctx->bc->force_add_cf = 1;
2376	} else {
2377		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2378		ctx->bc->cf_last->pop_count = pops;
2379		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2380	}
2381	return 0;
2382}
2383
2384static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2385{
2386	switch(reason) {
2387	case FC_PUSH_VPM:
2388		ctx->bc->callstack[ctx->bc->call_sp].current--;
2389		break;
2390	case FC_PUSH_WQM:
2391	case FC_LOOP:
2392		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2393		break;
2394	case FC_REP:
2395		/* TOODO : for 16 vp asic should -= 2; */
2396		ctx->bc->callstack[ctx->bc->call_sp].current --;
2397		break;
2398	}
2399}
2400
2401static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2402{
2403	if (check_max_only) {
2404		int diff;
2405		switch (reason) {
2406		case FC_PUSH_VPM:
2407			diff = 1;
2408			break;
2409		case FC_PUSH_WQM:
2410			diff = 4;
2411			break;
2412		default:
2413			assert(0);
2414			diff = 0;
2415		}
2416		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2417		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2418			ctx->bc->callstack[ctx->bc->call_sp].max =
2419				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2420		}
2421		return;
2422	}
2423	switch (reason) {
2424	case FC_PUSH_VPM:
2425		ctx->bc->callstack[ctx->bc->call_sp].current++;
2426		break;
2427	case FC_PUSH_WQM:
2428	case FC_LOOP:
2429		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2430		break;
2431	case FC_REP:
2432		ctx->bc->callstack[ctx->bc->call_sp].current++;
2433		break;
2434	}
2435
2436	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2437	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2438		ctx->bc->callstack[ctx->bc->call_sp].max =
2439			ctx->bc->callstack[ctx->bc->call_sp].current;
2440	}
2441}
2442
2443static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2444{
2445	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2446
2447	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2448						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2449	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2450	sp->num_mid++;
2451}
2452
2453static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2454{
2455	ctx->bc->fc_sp++;
2456	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2457	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2458}
2459
2460static void fc_poplevel(struct r600_shader_ctx *ctx)
2461{
2462	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2463	if (sp->mid) {
2464		free(sp->mid);
2465		sp->mid = NULL;
2466	}
2467	sp->num_mid = 0;
2468	sp->start = NULL;
2469	sp->type = 0;
2470	ctx->bc->fc_sp--;
2471}
2472
2473#if 0
2474static int emit_return(struct r600_shader_ctx *ctx)
2475{
2476	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2477	return 0;
2478}
2479
2480static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2481{
2482
2483	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2484	ctx->bc->cf_last->pop_count = pops;
2485	/* TODO work out offset */
2486	return 0;
2487}
2488
2489static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2490{
2491	return 0;
2492}
2493
2494static void emit_testflag(struct r600_shader_ctx *ctx)
2495{
2496
2497}
2498
2499static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2500{
2501	emit_testflag(ctx);
2502	emit_jump_to_offset(ctx, 1, 4);
2503	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2504	pops(ctx, ifidx + 1);
2505	emit_return(ctx);
2506}
2507
2508static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2509{
2510	emit_testflag(ctx);
2511
2512	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2513	ctx->bc->cf_last->pop_count = 1;
2514
2515	fc_set_mid(ctx, fc_sp);
2516
2517	pops(ctx, 1);
2518}
2519#endif
2520
2521static int tgsi_if(struct r600_shader_ctx *ctx)
2522{
2523	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2524
2525	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2526
2527	fc_pushlevel(ctx, FC_IF);
2528
2529	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2530	return 0;
2531}
2532
2533static int tgsi_else(struct r600_shader_ctx *ctx)
2534{
2535	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2536	ctx->bc->cf_last->pop_count = 1;
2537
2538	fc_set_mid(ctx, ctx->bc->fc_sp);
2539	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2540	return 0;
2541}
2542
2543static int tgsi_endif(struct r600_shader_ctx *ctx)
2544{
2545	pops(ctx, 1);
2546	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2547		R600_ERR("if/endif unbalanced in shader\n");
2548		return -1;
2549	}
2550
2551	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2552		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2553		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2554	} else {
2555		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2556	}
2557	fc_poplevel(ctx);
2558
2559	callstack_decrease_current(ctx, FC_PUSH_VPM);
2560	return 0;
2561}
2562
2563static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2564{
2565	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2566
2567	fc_pushlevel(ctx, FC_LOOP);
2568
2569	/* check stack depth */
2570	callstack_check_depth(ctx, FC_LOOP, 0);
2571	return 0;
2572}
2573
2574static int tgsi_endloop(struct r600_shader_ctx *ctx)
2575{
2576	int i;
2577
2578	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2579
2580	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2581		R600_ERR("loop/endloop in shader code are not paired.\n");
2582		return -EINVAL;
2583	}
2584
2585	/* fixup loop pointers - from r600isa
2586	   LOOP END points to CF after LOOP START,
2587	   LOOP START point to CF after LOOP END
2588	   BRK/CONT point to LOOP END CF
2589	*/
2590	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2591
2592	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2593
2594	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2595		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2596	}
2597	/* TODO add LOOPRET support */
2598	fc_poplevel(ctx);
2599	callstack_decrease_current(ctx, FC_LOOP);
2600	return 0;
2601}
2602
2603static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2604{
2605	unsigned int fscp;
2606
2607	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2608	{
2609		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2610			break;
2611	}
2612
2613	if (fscp == 0) {
2614		R600_ERR("Break not inside loop/endloop pair\n");
2615		return -EINVAL;
2616	}
2617
2618	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2619	ctx->bc->cf_last->pop_count = 1;
2620
2621	fc_set_mid(ctx, fscp);
2622
2623	pops(ctx, 1);
2624	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2625	return 0;
2626}
2627
2628static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2629	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2630	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2631	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2632
2633	/* FIXME:
2634	 * For state trackers other than OpenGL, we'll want to use
2635	 * _RECIP_IEEE instead.
2636	 */
2637	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2638
2639	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2640	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2641	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2642	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2643	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2644	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2645	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2646	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2647	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2648	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2649	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2650	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2651	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2652	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2653	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2654	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2655	/* gap */
2656	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2657	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2658	/* gap */
2659	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2660	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2661	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2662	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2663	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2664	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2665	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2666	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2667	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2668	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2669	/* gap */
2670	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2671	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2672	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2673	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2674	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2675	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2676	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2677	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2678	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2679	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2680	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2681	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2682	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2683	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2684	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2685	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2686	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2687	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2688	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2689	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2690	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2691	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2692	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2693	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2694	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2695	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2696	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2697	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2698	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2699	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2700	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2701	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2702	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2703	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2704	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2705	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2706	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2707	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2708	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2709	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2710	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2711	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2712	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2713	/* gap */
2714	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2715	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2716	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2717	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2718	/* gap */
2719	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2720	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2721	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2722	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2723	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2724	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2725	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2726	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2727	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2728	/* gap */
2729	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2730	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2731	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2732	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2733	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2734	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2735	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2736	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2737	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2738	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2739	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2740	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2741	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2742	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2743	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2744	/* gap */
2745	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2746	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2747	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2748	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2749	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2750	/* gap */
2751	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2752	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2753	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2754	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2755	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2756	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2757	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2758	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2759	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2760	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2761	/* gap */
2762	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2763	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2764	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2765	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2766	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2767	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2768	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2769	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2770	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2771	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2772	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2773	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2774	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2775	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2776	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2777	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2778	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2779	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2780	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2781	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2782	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2783	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2784	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2785	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2786	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2787	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2788	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2789	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2790};
2791
2792static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2793	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2794	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2795	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2796	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2797	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2798	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2799	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2800	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2801	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2802	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2803	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2804	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2805	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2806	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2807	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2808	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2809	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2810	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2811	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2812	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2813	/* gap */
2814	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2815	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2816	/* gap */
2817	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2818	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2819	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2820	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2821	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2822	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2823	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2824	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2825	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2826	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2827	/* gap */
2828	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2829	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2830	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2831	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2832	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2833	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2834	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2835	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2836	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2837	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2839	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2840	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2841	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2842	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2843	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2844	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2845	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2846	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2847	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2848	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2849	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2851	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2853	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2854	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2855	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2856	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2857	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2858	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2859	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2860	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2862	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2863	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2864	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2865	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2867	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2868	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2869	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2870	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2871	/* gap */
2872	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2873	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2874	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2875	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2876	/* gap */
2877	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2880	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2882	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2885	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886	/* gap */
2887	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2896	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2899	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2901	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902	/* gap */
2903	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908	/* gap */
2909	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2918	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2919	/* gap */
2920	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2940	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2944	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2946	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2947	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948};
2949