r600_shader.c revision 843dfe3206c4f397c7911b748373dde5540392a4
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_formats.h"
32#include "r600_opcodes.h"
33#include "r600d.h"
34#include <stdio.h>
35#include <errno.h>
36#include <byteswap.h>
37
38#ifdef PIPE_ARCH_BIG_ENDIAN
39#define CPU_TO_LE32(x)	bswap_32(x)
40#else
41#define CPU_TO_LE32(x)	(x)
42#endif
43
44int r600_find_vs_semantic_index(struct r600_shader *vs,
45				struct r600_shader *ps, int id)
46{
47	struct r600_shader_io *input = &ps->input[id];
48
49	for (int i = 0; i < vs->noutput; i++) {
50		if (input->name == vs->output[i].name &&
51			input->sid == vs->output[i].sid) {
52			return i - 1;
53		}
54	}
55	return 0;
56}
57
58static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
59{
60	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
61	struct r600_shader *rshader = &shader->shader;
62	uint32_t *ptr;
63	int	i;
64
65	/* copy new shader */
66	if (shader->bo == NULL) {
67		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
68		if (shader->bo == NULL) {
69			return -ENOMEM;
70		}
71		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
72		for(i = 0; i < rshader->bc.ndw; i++) {
73			*(ptr + i) = CPU_TO_LE32(*(rshader->bc.bytecode + i));
74		}
75		r600_bo_unmap(rctx->radeon, shader->bo);
76	}
77	/* build state */
78	switch (rshader->processor_type) {
79	case TGSI_PROCESSOR_VERTEX:
80		if (rshader->family >= CHIP_CEDAR) {
81			evergreen_pipe_shader_vs(ctx, shader);
82		} else {
83			r600_pipe_shader_vs(ctx, shader);
84		}
85		break;
86	case TGSI_PROCESSOR_FRAGMENT:
87		if (rshader->family >= CHIP_CEDAR) {
88			evergreen_pipe_shader_ps(ctx, shader);
89		} else {
90			r600_pipe_shader_ps(ctx, shader);
91		}
92		break;
93	default:
94		return -EINVAL;
95	}
96	return 0;
97}
98
99static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
100
101int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
102{
103	static int dump_shaders = -1;
104	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
105	int r;
106
107	/* Would like some magic "get_bool_option_once" routine.
108	*/
109	if (dump_shaders == -1)
110		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
111
112	if (dump_shaders) {
113		fprintf(stderr, "--------------------------------------------------------------\n");
114		tgsi_dump(tokens, 0);
115	}
116	shader->shader.family = r600_get_family(rctx->radeon);
117	r = r600_shader_from_tgsi(tokens, &shader->shader);
118	if (r) {
119		R600_ERR("translation from TGSI failed !\n");
120		return r;
121	}
122	r = r600_bc_build(&shader->shader.bc);
123	if (r) {
124		R600_ERR("building bytecode failed !\n");
125		return r;
126	}
127	if (dump_shaders) {
128		r600_bc_dump(&shader->shader.bc);
129		fprintf(stderr, "______________________________________________________________\n");
130	}
131	return r600_pipe_shader(ctx, shader);
132}
133
134void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
135{
136	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
137
138	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
139	r600_bc_clear(&shader->shader.bc);
140}
141
142/*
143 * tgsi -> r600 shader
144 */
145struct r600_shader_tgsi_instruction;
146
147struct r600_shader_src {
148	unsigned				sel;
149	unsigned				swizzle[4];
150	unsigned				neg;
151	unsigned				abs;
152	unsigned				rel;
153	uint32_t				value[4];
154};
155
156struct r600_shader_ctx {
157	struct tgsi_shader_info			info;
158	struct tgsi_parse_context		parse;
159	const struct tgsi_token			*tokens;
160	unsigned				type;
161	unsigned				file_offset[TGSI_FILE_COUNT];
162	unsigned				temp_reg;
163	unsigned				ar_reg;
164	struct r600_shader_tgsi_instruction	*inst_info;
165	struct r600_bc				*bc;
166	struct r600_shader			*shader;
167	struct r600_shader_src			src[3];
168	u32					*literals;
169	u32					nliterals;
170	u32					max_driver_temp_used;
171	/* needed for evergreen interpolation */
172	boolean                                 input_centroid;
173	boolean                                 input_linear;
174	boolean                                 input_perspective;
175	int					num_interp_gpr;
176};
177
178struct r600_shader_tgsi_instruction {
179	unsigned	tgsi_opcode;
180	unsigned	is_op3;
181	unsigned	r600_opcode;
182	int (*process)(struct r600_shader_ctx *ctx);
183};
184
185static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
186static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
187
188static int tgsi_is_supported(struct r600_shader_ctx *ctx)
189{
190	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
191	int j;
192
193	if (i->Instruction.NumDstRegs > 1) {
194		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
195		return -EINVAL;
196	}
197	if (i->Instruction.Predicate) {
198		R600_ERR("predicate unsupported\n");
199		return -EINVAL;
200	}
201#if 0
202	if (i->Instruction.Label) {
203		R600_ERR("label unsupported\n");
204		return -EINVAL;
205	}
206#endif
207	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
208		if (i->Src[j].Register.Dimension) {
209			R600_ERR("unsupported src %d (dimension %d)\n", j,
210				 i->Src[j].Register.Dimension);
211			return -EINVAL;
212		}
213	}
214	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
215		if (i->Dst[j].Register.Dimension) {
216			R600_ERR("unsupported dst (dimension)\n");
217			return -EINVAL;
218		}
219	}
220	return 0;
221}
222
223static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
224{
225	int i, r;
226	struct r600_bc_alu alu;
227	int gpr = 0, base_chan = 0;
228	int ij_index = 0;
229
230	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
231		ij_index = 0;
232		if (ctx->shader->input[input].centroid)
233			ij_index++;
234	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
235		ij_index = 0;
236		/* if we have perspective add one */
237		if (ctx->input_perspective)  {
238			ij_index++;
239			/* if we have perspective centroid */
240			if (ctx->input_centroid)
241				ij_index++;
242		}
243		if (ctx->shader->input[input].centroid)
244			ij_index++;
245	}
246
247	/* work out gpr and base_chan from index */
248	gpr = ij_index / 2;
249	base_chan = (2 * (ij_index % 2)) + 1;
250
251	for (i = 0; i < 8; i++) {
252		memset(&alu, 0, sizeof(struct r600_bc_alu));
253
254		if (i < 4)
255			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
256		else
257			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
258
259		if ((i > 1) && (i < 6)) {
260			alu.dst.sel = ctx->shader->input[input].gpr;
261			alu.dst.write = 1;
262		}
263
264		alu.dst.chan = i % 4;
265
266		alu.src[0].sel = gpr;
267		alu.src[0].chan = (base_chan - (i % 2));
268
269		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
270
271		alu.bank_swizzle_force = SQ_ALU_VEC_210;
272		if ((i % 4) == 3)
273			alu.last = 1;
274		r = r600_bc_add_alu(ctx->bc, &alu);
275		if (r)
276			return r;
277	}
278	return 0;
279}
280
281
282static int tgsi_declaration(struct r600_shader_ctx *ctx)
283{
284	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
285	unsigned i;
286	int r;
287
288	switch (d->Declaration.File) {
289	case TGSI_FILE_INPUT:
290		i = ctx->shader->ninput++;
291		ctx->shader->input[i].name = d->Semantic.Name;
292		ctx->shader->input[i].sid = d->Semantic.Index;
293		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
294		ctx->shader->input[i].centroid = d->Declaration.Centroid;
295		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
296		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
297			/* turn input into interpolate on EG */
298			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
299				if (ctx->shader->input[i].interpolate > 0) {
300					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
301					evergreen_interp_alu(ctx, i);
302				}
303			}
304		}
305		break;
306	case TGSI_FILE_OUTPUT:
307		i = ctx->shader->noutput++;
308		ctx->shader->output[i].name = d->Semantic.Name;
309		ctx->shader->output[i].sid = d->Semantic.Index;
310		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
311		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
312		break;
313	case TGSI_FILE_CONSTANT:
314	case TGSI_FILE_TEMPORARY:
315	case TGSI_FILE_SAMPLER:
316	case TGSI_FILE_ADDRESS:
317		break;
318
319	case TGSI_FILE_SYSTEM_VALUE:
320		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
321			struct r600_bc_alu alu;
322			memset(&alu, 0, sizeof(struct r600_bc_alu));
323
324			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
325			alu.src[0].sel = 0;
326			alu.src[0].chan = 3;
327
328			alu.dst.sel = 0;
329			alu.dst.chan = 3;
330			alu.dst.write = 1;
331			alu.last = 1;
332
333			if ((r = r600_bc_add_alu(ctx->bc, &alu)))
334				return r;
335			break;
336		}
337
338	default:
339		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
340		return -EINVAL;
341	}
342	return 0;
343}
344
345static int r600_get_temp(struct r600_shader_ctx *ctx)
346{
347	return ctx->temp_reg + ctx->max_driver_temp_used++;
348}
349
350/*
351 * for evergreen we need to scan the shader to find the number of GPRs we need to
352 * reserve for interpolation.
353 *
354 * we need to know if we are going to emit
355 * any centroid inputs
356 * if perspective and linear are required
357*/
358static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
359{
360	int i;
361	int num_baryc;
362
363	ctx->input_linear = FALSE;
364	ctx->input_perspective = FALSE;
365	ctx->input_centroid = FALSE;
366	ctx->num_interp_gpr = 1;
367
368	/* any centroid inputs */
369	for (i = 0; i < ctx->info.num_inputs; i++) {
370		/* skip position/face */
371		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
372		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
373			continue;
374		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
375			ctx->input_linear = TRUE;
376		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
377			ctx->input_perspective = TRUE;
378		if (ctx->info.input_centroid[i])
379			ctx->input_centroid = TRUE;
380	}
381
382	num_baryc = 0;
383	/* ignoring sample for now */
384	if (ctx->input_perspective)
385		num_baryc++;
386	if (ctx->input_linear)
387		num_baryc++;
388	if (ctx->input_centroid)
389		num_baryc *= 2;
390
391	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
392
393	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
394	return ctx->num_interp_gpr;
395}
396
397static void tgsi_src(struct r600_shader_ctx *ctx,
398		     const struct tgsi_full_src_register *tgsi_src,
399		     struct r600_shader_src *r600_src)
400{
401	memset(r600_src, 0, sizeof(*r600_src));
402	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
403	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
404	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
405	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
406	r600_src->neg = tgsi_src->Register.Negate;
407	r600_src->abs = tgsi_src->Register.Absolute;
408
409	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
410		int index;
411		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
412			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
413			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
414
415			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
416			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
417			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
418				return;
419		}
420		index = tgsi_src->Register.Index;
421		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
422		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
423	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
424		/* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
425		r600_src->swizzle[0] = 3;
426		r600_src->swizzle[1] = 3;
427		r600_src->swizzle[2] = 3;
428		r600_src->swizzle[3] = 3;
429		r600_src->sel = 0;
430	} else {
431		if (tgsi_src->Register.Indirect)
432			r600_src->rel = V_SQ_REL_RELATIVE;
433		r600_src->sel = tgsi_src->Register.Index;
434		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
435	}
436}
437
438static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
439{
440	struct r600_bc_vtx vtx;
441	unsigned int ar_reg;
442	int r;
443
444	if (offset) {
445		struct r600_bc_alu alu;
446
447		memset(&alu, 0, sizeof(alu));
448
449		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
450		alu.src[0].sel = ctx->ar_reg;
451
452		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
453		alu.src[1].value = offset;
454
455		alu.dst.sel = dst_reg;
456		alu.dst.write = 1;
457		alu.last = 1;
458
459		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
460			return r;
461
462		ar_reg = dst_reg;
463	} else {
464		ar_reg = ctx->ar_reg;
465	}
466
467	memset(&vtx, 0, sizeof(vtx));
468	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
469	vtx.src_gpr = ar_reg;
470	vtx.mega_fetch_count = 16;
471	vtx.dst_gpr = dst_reg;
472	vtx.dst_sel_x = 0;		/* SEL_X */
473	vtx.dst_sel_y = 1;		/* SEL_Y */
474	vtx.dst_sel_z = 2;		/* SEL_Z */
475	vtx.dst_sel_w = 3;		/* SEL_W */
476	vtx.data_format = FMT_32_32_32_32_FLOAT;
477	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
478	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
479	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
480#ifdef PIPE_ARCH_BIG_ENDIAN
481	vtx.endian = ENDIAN_8IN32;
482#else
483	vtx.endian = ENDIAN_NONE;
484#endif
485
486	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
487		return r;
488
489	return 0;
490}
491
492static int tgsi_split_constant(struct r600_shader_ctx *ctx)
493{
494	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
495	struct r600_bc_alu alu;
496	int i, j, k, nconst, r;
497
498	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
499		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
500			nconst++;
501		}
502		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
503	}
504	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
505		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
506			continue;
507		}
508
509		if (ctx->src[i].rel) {
510			int treg = r600_get_temp(ctx);
511			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
512				return r;
513
514			ctx->src[i].sel = treg;
515			ctx->src[i].rel = 0;
516			j--;
517		} else if (j > 0) {
518			int treg = r600_get_temp(ctx);
519			for (k = 0; k < 4; k++) {
520				memset(&alu, 0, sizeof(struct r600_bc_alu));
521				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
522				alu.src[0].sel = ctx->src[i].sel;
523				alu.src[0].chan = k;
524				alu.src[0].rel = ctx->src[i].rel;
525				alu.dst.sel = treg;
526				alu.dst.chan = k;
527				alu.dst.write = 1;
528				if (k == 3)
529					alu.last = 1;
530				r = r600_bc_add_alu(ctx->bc, &alu);
531				if (r)
532					return r;
533			}
534			ctx->src[i].sel = treg;
535			ctx->src[i].rel =0;
536			j--;
537		}
538	}
539	return 0;
540}
541
542/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
543static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
544{
545	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
546	struct r600_bc_alu alu;
547	int i, j, k, nliteral, r;
548
549	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
550		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
551			nliteral++;
552		}
553	}
554	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
555		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
556			int treg = r600_get_temp(ctx);
557			for (k = 0; k < 4; k++) {
558				memset(&alu, 0, sizeof(struct r600_bc_alu));
559				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
560				alu.src[0].sel = ctx->src[i].sel;
561				alu.src[0].chan = k;
562				alu.src[0].value = ctx->src[i].value[k];
563				alu.dst.sel = treg;
564				alu.dst.chan = k;
565				alu.dst.write = 1;
566				if (k == 3)
567					alu.last = 1;
568				r = r600_bc_add_alu(ctx->bc, &alu);
569				if (r)
570					return r;
571			}
572			ctx->src[i].sel = treg;
573			j--;
574		}
575	}
576	return 0;
577}
578
579static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
580{
581	struct tgsi_full_immediate *immediate;
582	struct tgsi_full_property *property;
583	struct r600_shader_ctx ctx;
584	struct r600_bc_output output[32];
585	unsigned output_done, noutput;
586	unsigned opcode;
587	int i, r = 0, pos0;
588
589	ctx.bc = &shader->bc;
590	ctx.shader = shader;
591	r = r600_bc_init(ctx.bc, shader->family);
592	if (r)
593		return r;
594	ctx.tokens = tokens;
595	tgsi_scan_shader(tokens, &ctx.info);
596	tgsi_parse_init(&ctx.parse, tokens);
597	ctx.type = ctx.parse.FullHeader.Processor.Processor;
598	shader->processor_type = ctx.type;
599	ctx.bc->type = shader->processor_type;
600
601	/* register allocations */
602	/* Values [0,127] correspond to GPR[0..127].
603	 * Values [128,159] correspond to constant buffer bank 0
604	 * Values [160,191] correspond to constant buffer bank 1
605	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
606	 * Values [256,287] correspond to constant buffer bank 2 (EG)
607	 * Values [288,319] correspond to constant buffer bank 3 (EG)
608	 * Other special values are shown in the list below.
609	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
610	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
611	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
612	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
613	 * 248	SQ_ALU_SRC_0: special constant 0.0.
614	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
615	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
616	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
617	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
618	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
619	 * 254	SQ_ALU_SRC_PV: previous vector result.
620	 * 255	SQ_ALU_SRC_PS: previous scalar result.
621	 */
622	for (i = 0; i < TGSI_FILE_COUNT; i++) {
623		ctx.file_offset[i] = 0;
624	}
625	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
626		ctx.file_offset[TGSI_FILE_INPUT] = 1;
627		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
628			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
629		} else {
630			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
631		}
632	}
633	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
634		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
635	}
636	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
637						ctx.info.file_count[TGSI_FILE_INPUT];
638	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
639						ctx.info.file_count[TGSI_FILE_OUTPUT];
640
641	/* Outside the GPR range. This will be translated to one of the
642	 * kcache banks later. */
643	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
644
645	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
646	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
647			ctx.info.file_count[TGSI_FILE_TEMPORARY];
648	ctx.temp_reg = ctx.ar_reg + 1;
649
650	ctx.nliterals = 0;
651	ctx.literals = NULL;
652	shader->fs_write_all = FALSE;
653	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
654		tgsi_parse_token(&ctx.parse);
655		switch (ctx.parse.FullToken.Token.Type) {
656		case TGSI_TOKEN_TYPE_IMMEDIATE:
657			immediate = &ctx.parse.FullToken.FullImmediate;
658			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
659			if(ctx.literals == NULL) {
660				r = -ENOMEM;
661				goto out_err;
662			}
663			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
664			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
665			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
666			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
667			ctx.nliterals++;
668			break;
669		case TGSI_TOKEN_TYPE_DECLARATION:
670			r = tgsi_declaration(&ctx);
671			if (r)
672				goto out_err;
673			break;
674		case TGSI_TOKEN_TYPE_INSTRUCTION:
675			r = tgsi_is_supported(&ctx);
676			if (r)
677				goto out_err;
678			ctx.max_driver_temp_used = 0;
679			/* reserve first tmp for everyone */
680			r600_get_temp(&ctx);
681
682			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
683			if ((r = tgsi_split_constant(&ctx)))
684				goto out_err;
685			if ((r = tgsi_split_literal_constant(&ctx)))
686				goto out_err;
687			if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
688				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
689			else
690				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
691			r = ctx.inst_info->process(&ctx);
692			if (r)
693				goto out_err;
694			break;
695		case TGSI_TOKEN_TYPE_PROPERTY:
696			property = &ctx.parse.FullToken.FullProperty;
697			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
698				if (property->u[0].Data == 1)
699					shader->fs_write_all = TRUE;
700			}
701			break;
702		default:
703			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
704			r = -EINVAL;
705			goto out_err;
706		}
707	}
708	/* export output */
709	noutput = shader->noutput;
710	for (i = 0, pos0 = 0; i < noutput; i++) {
711		memset(&output[i], 0, sizeof(struct r600_bc_output));
712		output[i].gpr = shader->output[i].gpr;
713		output[i].elem_size = 3;
714		output[i].swizzle_x = 0;
715		output[i].swizzle_y = 1;
716		output[i].swizzle_z = 2;
717		output[i].swizzle_w = 3;
718		output[i].burst_count = 1;
719		output[i].barrier = 1;
720		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
721		output[i].array_base = i - pos0;
722		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
723		switch (ctx.type) {
724		case TGSI_PROCESSOR_VERTEX:
725			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
726				output[i].array_base = 60;
727				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
728				/* position doesn't count in array_base */
729				pos0++;
730			}
731			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
732				output[i].array_base = 61;
733				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
734				/* position doesn't count in array_base */
735				pos0++;
736			}
737			break;
738		case TGSI_PROCESSOR_FRAGMENT:
739			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
740				output[i].array_base = shader->output[i].sid;
741				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
742			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
743				output[i].array_base = 61;
744				output[i].swizzle_x = 2;
745				output[i].swizzle_y = 7;
746				output[i].swizzle_z = output[i].swizzle_w = 7;
747				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
748			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
749				output[i].array_base = 61;
750				output[i].swizzle_x = 7;
751				output[i].swizzle_y = 1;
752				output[i].swizzle_z = output[i].swizzle_w = 7;
753				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
754			} else {
755				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
756				r = -EINVAL;
757				goto out_err;
758			}
759			break;
760		default:
761			R600_ERR("unsupported processor type %d\n", ctx.type);
762			r = -EINVAL;
763			goto out_err;
764		}
765	}
766	/* add fake param output for vertex shader if no param is exported */
767	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
768		for (i = 0, pos0 = 0; i < noutput; i++) {
769			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
770				pos0 = 1;
771				break;
772			}
773		}
774		if (!pos0) {
775			memset(&output[i], 0, sizeof(struct r600_bc_output));
776			output[i].gpr = 0;
777			output[i].elem_size = 3;
778			output[i].swizzle_x = 0;
779			output[i].swizzle_y = 1;
780			output[i].swizzle_z = 2;
781			output[i].swizzle_w = 3;
782			output[i].burst_count = 1;
783			output[i].barrier = 1;
784			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
785			output[i].array_base = 0;
786			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
787			noutput++;
788		}
789	}
790	/* add fake pixel export */
791	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
792		memset(&output[0], 0, sizeof(struct r600_bc_output));
793		output[0].gpr = 0;
794		output[0].elem_size = 3;
795		output[0].swizzle_x = 7;
796		output[0].swizzle_y = 7;
797		output[0].swizzle_z = 7;
798		output[0].swizzle_w = 7;
799		output[0].burst_count = 1;
800		output[0].barrier = 1;
801		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
802		output[0].array_base = 0;
803		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
804		noutput++;
805	}
806	/* set export done on last export of each type */
807	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
808		if (i == (noutput - 1)) {
809			output[i].end_of_program = 1;
810		}
811		if (!(output_done & (1 << output[i].type))) {
812			output_done |= (1 << output[i].type);
813			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
814		}
815	}
816	/* add output to bytecode */
817	for (i = 0; i < noutput; i++) {
818		r = r600_bc_add_output(ctx.bc, &output[i]);
819		if (r)
820			goto out_err;
821	}
822	free(ctx.literals);
823	tgsi_parse_free(&ctx.parse);
824	return 0;
825out_err:
826	free(ctx.literals);
827	tgsi_parse_free(&ctx.parse);
828	return r;
829}
830
831static int tgsi_unsupported(struct r600_shader_ctx *ctx)
832{
833	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
834	return -EINVAL;
835}
836
837static int tgsi_end(struct r600_shader_ctx *ctx)
838{
839	return 0;
840}
841
842static void r600_bc_src(struct r600_bc_alu_src *bc_src,
843			const struct r600_shader_src *shader_src,
844			unsigned chan)
845{
846	bc_src->sel = shader_src->sel;
847	bc_src->chan = shader_src->swizzle[chan];
848	bc_src->neg = shader_src->neg;
849	bc_src->abs = shader_src->abs;
850	bc_src->rel = shader_src->rel;
851	bc_src->value = shader_src->value[bc_src->chan];
852}
853
854static void tgsi_dst(struct r600_shader_ctx *ctx,
855		     const struct tgsi_full_dst_register *tgsi_dst,
856		     unsigned swizzle,
857		     struct r600_bc_alu_dst *r600_dst)
858{
859	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
860
861	r600_dst->sel = tgsi_dst->Register.Index;
862	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
863	r600_dst->chan = swizzle;
864	r600_dst->write = 1;
865	if (tgsi_dst->Register.Indirect)
866		r600_dst->rel = V_SQ_REL_RELATIVE;
867	if (inst->Instruction.Saturate) {
868		r600_dst->clamp = 1;
869	}
870}
871
872static int tgsi_last_instruction(unsigned writemask)
873{
874	int i, lasti = 0;
875
876	for (i = 0; i < 4; i++) {
877		if (writemask & (1 << i)) {
878			lasti = i;
879		}
880	}
881	return lasti;
882}
883
884static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
885{
886	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
887	struct r600_bc_alu alu;
888	int i, j, r;
889	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
890
891	for (i = 0; i < lasti + 1; i++) {
892		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
893			continue;
894
895		memset(&alu, 0, sizeof(struct r600_bc_alu));
896		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
897
898		alu.inst = ctx->inst_info->r600_opcode;
899		if (!swap) {
900			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
901				r600_bc_src(&alu.src[j], &ctx->src[j], i);
902			}
903		} else {
904			r600_bc_src(&alu.src[0], &ctx->src[1], i);
905			r600_bc_src(&alu.src[1], &ctx->src[0], i);
906		}
907		/* handle some special cases */
908		switch (ctx->inst_info->tgsi_opcode) {
909		case TGSI_OPCODE_SUB:
910			alu.src[1].neg = 1;
911			break;
912		case TGSI_OPCODE_ABS:
913			alu.src[0].abs = 1;
914			break;
915		default:
916			break;
917		}
918		if (i == lasti) {
919			alu.last = 1;
920		}
921		r = r600_bc_add_alu(ctx->bc, &alu);
922		if (r)
923			return r;
924	}
925	return 0;
926}
927
928static int tgsi_op2(struct r600_shader_ctx *ctx)
929{
930	return tgsi_op2_s(ctx, 0);
931}
932
933static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
934{
935	return tgsi_op2_s(ctx, 1);
936}
937
938/*
939 * r600 - trunc to -PI..PI range
940 * r700 - normalize by dividing by 2PI
941 * see fdo bug 27901
942 */
943static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
944{
945	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
946	static float double_pi = 3.1415926535 * 2;
947	static float neg_pi = -3.1415926535;
948
949	int r;
950	struct r600_bc_alu alu;
951
952	memset(&alu, 0, sizeof(struct r600_bc_alu));
953	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
954	alu.is_op3 = 1;
955
956	alu.dst.chan = 0;
957	alu.dst.sel = ctx->temp_reg;
958	alu.dst.write = 1;
959
960	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
961
962	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
963	alu.src[1].chan = 0;
964	alu.src[1].value = *(uint32_t *)&half_inv_pi;
965	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
966	alu.src[2].chan = 0;
967	alu.last = 1;
968	r = r600_bc_add_alu(ctx->bc, &alu);
969	if (r)
970		return r;
971
972	memset(&alu, 0, sizeof(struct r600_bc_alu));
973	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
974
975	alu.dst.chan = 0;
976	alu.dst.sel = ctx->temp_reg;
977	alu.dst.write = 1;
978
979	alu.src[0].sel = ctx->temp_reg;
980	alu.src[0].chan = 0;
981	alu.last = 1;
982	r = r600_bc_add_alu(ctx->bc, &alu);
983	if (r)
984		return r;
985
986	memset(&alu, 0, sizeof(struct r600_bc_alu));
987	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
988	alu.is_op3 = 1;
989
990	alu.dst.chan = 0;
991	alu.dst.sel = ctx->temp_reg;
992	alu.dst.write = 1;
993
994	alu.src[0].sel = ctx->temp_reg;
995	alu.src[0].chan = 0;
996
997	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
998	alu.src[1].chan = 0;
999	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1000	alu.src[2].chan = 0;
1001
1002	if (ctx->bc->chiprev == CHIPREV_R600) {
1003		alu.src[1].value = *(uint32_t *)&double_pi;
1004		alu.src[2].value = *(uint32_t *)&neg_pi;
1005	} else {
1006		alu.src[1].sel = V_SQ_ALU_SRC_1;
1007		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1008		alu.src[2].neg = 1;
1009	}
1010
1011	alu.last = 1;
1012	r = r600_bc_add_alu(ctx->bc, &alu);
1013	if (r)
1014		return r;
1015	return 0;
1016}
1017
1018static int tgsi_trig(struct r600_shader_ctx *ctx)
1019{
1020	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1021	struct r600_bc_alu alu;
1022	int i, r;
1023	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1024
1025	r = tgsi_setup_trig(ctx);
1026	if (r)
1027		return r;
1028
1029	memset(&alu, 0, sizeof(struct r600_bc_alu));
1030	alu.inst = ctx->inst_info->r600_opcode;
1031	alu.dst.chan = 0;
1032	alu.dst.sel = ctx->temp_reg;
1033	alu.dst.write = 1;
1034
1035	alu.src[0].sel = ctx->temp_reg;
1036	alu.src[0].chan = 0;
1037	alu.last = 1;
1038	r = r600_bc_add_alu(ctx->bc, &alu);
1039	if (r)
1040		return r;
1041
1042	/* replicate result */
1043	for (i = 0; i < lasti + 1; i++) {
1044		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1045			continue;
1046
1047		memset(&alu, 0, sizeof(struct r600_bc_alu));
1048		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1049
1050		alu.src[0].sel = ctx->temp_reg;
1051		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1052		if (i == lasti)
1053			alu.last = 1;
1054		r = r600_bc_add_alu(ctx->bc, &alu);
1055		if (r)
1056			return r;
1057	}
1058	return 0;
1059}
1060
1061static int tgsi_scs(struct r600_shader_ctx *ctx)
1062{
1063	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1064	struct r600_bc_alu alu;
1065	int r;
1066
1067	/* We'll only need the trig stuff if we are going to write to the
1068	 * X or Y components of the destination vector.
1069	 */
1070	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1071		r = tgsi_setup_trig(ctx);
1072		if (r)
1073			return r;
1074	}
1075
1076	/* dst.x = COS */
1077	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1078		memset(&alu, 0, sizeof(struct r600_bc_alu));
1079		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1080		tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1081
1082		alu.src[0].sel = ctx->temp_reg;
1083		alu.src[0].chan = 0;
1084		alu.last = 1;
1085		r = r600_bc_add_alu(ctx->bc, &alu);
1086		if (r)
1087			return r;
1088	}
1089
1090	/* dst.y = SIN */
1091	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1092		memset(&alu, 0, sizeof(struct r600_bc_alu));
1093		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1094		tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1095
1096		alu.src[0].sel = ctx->temp_reg;
1097		alu.src[0].chan = 0;
1098		alu.last = 1;
1099		r = r600_bc_add_alu(ctx->bc, &alu);
1100		if (r)
1101			return r;
1102	}
1103
1104	/* dst.z = 0.0; */
1105	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1106		memset(&alu, 0, sizeof(struct r600_bc_alu));
1107
1108		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1109
1110		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1111
1112		alu.src[0].sel = V_SQ_ALU_SRC_0;
1113		alu.src[0].chan = 0;
1114
1115		alu.last = 1;
1116
1117		r = r600_bc_add_alu(ctx->bc, &alu);
1118		if (r)
1119			return r;
1120	}
1121
1122	/* dst.w = 1.0; */
1123	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1124		memset(&alu, 0, sizeof(struct r600_bc_alu));
1125
1126		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1127
1128		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1129
1130		alu.src[0].sel = V_SQ_ALU_SRC_1;
1131		alu.src[0].chan = 0;
1132
1133		alu.last = 1;
1134
1135		r = r600_bc_add_alu(ctx->bc, &alu);
1136		if (r)
1137			return r;
1138	}
1139
1140	return 0;
1141}
1142
1143static int tgsi_kill(struct r600_shader_ctx *ctx)
1144{
1145	struct r600_bc_alu alu;
1146	int i, r;
1147
1148	for (i = 0; i < 4; i++) {
1149		memset(&alu, 0, sizeof(struct r600_bc_alu));
1150		alu.inst = ctx->inst_info->r600_opcode;
1151
1152		alu.dst.chan = i;
1153
1154		alu.src[0].sel = V_SQ_ALU_SRC_0;
1155
1156		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1157			alu.src[1].sel = V_SQ_ALU_SRC_1;
1158			alu.src[1].neg = 1;
1159		} else {
1160			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1161		}
1162		if (i == 3) {
1163			alu.last = 1;
1164		}
1165		r = r600_bc_add_alu(ctx->bc, &alu);
1166		if (r)
1167			return r;
1168	}
1169
1170	/* kill must be last in ALU */
1171	ctx->bc->force_add_cf = 1;
1172	ctx->shader->uses_kill = TRUE;
1173	return 0;
1174}
1175
1176static int tgsi_lit(struct r600_shader_ctx *ctx)
1177{
1178	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1179	struct r600_bc_alu alu;
1180	int r;
1181
1182	/* dst.x, <- 1.0  */
1183	memset(&alu, 0, sizeof(struct r600_bc_alu));
1184	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1185	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1186	alu.src[0].chan = 0;
1187	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1188	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1189	r = r600_bc_add_alu(ctx->bc, &alu);
1190	if (r)
1191		return r;
1192
1193	/* dst.y = max(src.x, 0.0) */
1194	memset(&alu, 0, sizeof(struct r600_bc_alu));
1195	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1196	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1197	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1198	alu.src[1].chan = 0;
1199	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1200	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1201	r = r600_bc_add_alu(ctx->bc, &alu);
1202	if (r)
1203		return r;
1204
1205	/* dst.w, <- 1.0  */
1206	memset(&alu, 0, sizeof(struct r600_bc_alu));
1207	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1208	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1209	alu.src[0].chan = 0;
1210	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1211	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1212	alu.last = 1;
1213	r = r600_bc_add_alu(ctx->bc, &alu);
1214	if (r)
1215		return r;
1216
1217	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1218	{
1219		int chan;
1220		int sel;
1221
1222		/* dst.z = log(src.y) */
1223		memset(&alu, 0, sizeof(struct r600_bc_alu));
1224		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1225		r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1226		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1227		alu.last = 1;
1228		r = r600_bc_add_alu(ctx->bc, &alu);
1229		if (r)
1230			return r;
1231
1232		chan = alu.dst.chan;
1233		sel = alu.dst.sel;
1234
1235		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1236		memset(&alu, 0, sizeof(struct r600_bc_alu));
1237		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1238		r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1239		alu.src[1].sel  = sel;
1240		alu.src[1].chan = chan;
1241
1242		r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1243		alu.dst.sel = ctx->temp_reg;
1244		alu.dst.chan = 0;
1245		alu.dst.write = 1;
1246		alu.is_op3 = 1;
1247		alu.last = 1;
1248		r = r600_bc_add_alu(ctx->bc, &alu);
1249		if (r)
1250			return r;
1251
1252		/* dst.z = exp(tmp.x) */
1253		memset(&alu, 0, sizeof(struct r600_bc_alu));
1254		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1255		alu.src[0].sel = ctx->temp_reg;
1256		alu.src[0].chan = 0;
1257		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1258		alu.last = 1;
1259		r = r600_bc_add_alu(ctx->bc, &alu);
1260		if (r)
1261			return r;
1262	}
1263	return 0;
1264}
1265
1266static int tgsi_rsq(struct r600_shader_ctx *ctx)
1267{
1268	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1269	struct r600_bc_alu alu;
1270	int i, r;
1271
1272	memset(&alu, 0, sizeof(struct r600_bc_alu));
1273
1274	/* FIXME:
1275	 * For state trackers other than OpenGL, we'll want to use
1276	 * _RECIPSQRT_IEEE instead.
1277	 */
1278	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1279
1280	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1281		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1282		alu.src[i].abs = 1;
1283	}
1284	alu.dst.sel = ctx->temp_reg;
1285	alu.dst.write = 1;
1286	alu.last = 1;
1287	r = r600_bc_add_alu(ctx->bc, &alu);
1288	if (r)
1289		return r;
1290	/* replicate result */
1291	return tgsi_helper_tempx_replicate(ctx);
1292}
1293
1294static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1295{
1296	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1297	struct r600_bc_alu alu;
1298	int i, r;
1299
1300	for (i = 0; i < 4; i++) {
1301		memset(&alu, 0, sizeof(struct r600_bc_alu));
1302		alu.src[0].sel = ctx->temp_reg;
1303		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1304		alu.dst.chan = i;
1305		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1306		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1307		if (i == 3)
1308			alu.last = 1;
1309		r = r600_bc_add_alu(ctx->bc, &alu);
1310		if (r)
1311			return r;
1312	}
1313	return 0;
1314}
1315
1316static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1317{
1318	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1319	struct r600_bc_alu alu;
1320	int i, r;
1321
1322	memset(&alu, 0, sizeof(struct r600_bc_alu));
1323	alu.inst = ctx->inst_info->r600_opcode;
1324	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1325		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1326	}
1327	alu.dst.sel = ctx->temp_reg;
1328	alu.dst.write = 1;
1329	alu.last = 1;
1330	r = r600_bc_add_alu(ctx->bc, &alu);
1331	if (r)
1332		return r;
1333	/* replicate result */
1334	return tgsi_helper_tempx_replicate(ctx);
1335}
1336
1337static int tgsi_pow(struct r600_shader_ctx *ctx)
1338{
1339	struct r600_bc_alu alu;
1340	int r;
1341
1342	/* LOG2(a) */
1343	memset(&alu, 0, sizeof(struct r600_bc_alu));
1344	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1345	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1346	alu.dst.sel = ctx->temp_reg;
1347	alu.dst.write = 1;
1348	alu.last = 1;
1349	r = r600_bc_add_alu(ctx->bc, &alu);
1350	if (r)
1351		return r;
1352	/* b * LOG2(a) */
1353	memset(&alu, 0, sizeof(struct r600_bc_alu));
1354	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1355	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1356	alu.src[1].sel = ctx->temp_reg;
1357	alu.dst.sel = ctx->temp_reg;
1358	alu.dst.write = 1;
1359	alu.last = 1;
1360	r = r600_bc_add_alu(ctx->bc, &alu);
1361	if (r)
1362		return r;
1363	/* POW(a,b) = EXP2(b * LOG2(a))*/
1364	memset(&alu, 0, sizeof(struct r600_bc_alu));
1365	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1366	alu.src[0].sel = ctx->temp_reg;
1367	alu.dst.sel = ctx->temp_reg;
1368	alu.dst.write = 1;
1369	alu.last = 1;
1370	r = r600_bc_add_alu(ctx->bc, &alu);
1371	if (r)
1372		return r;
1373	return tgsi_helper_tempx_replicate(ctx);
1374}
1375
1376static int tgsi_ssg(struct r600_shader_ctx *ctx)
1377{
1378	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1379	struct r600_bc_alu alu;
1380	int i, r;
1381
1382	/* tmp = (src > 0 ? 1 : src) */
1383	for (i = 0; i < 4; i++) {
1384		memset(&alu, 0, sizeof(struct r600_bc_alu));
1385		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1386		alu.is_op3 = 1;
1387
1388		alu.dst.sel = ctx->temp_reg;
1389		alu.dst.chan = i;
1390
1391		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1392		alu.src[1].sel = V_SQ_ALU_SRC_1;
1393		r600_bc_src(&alu.src[2], &ctx->src[0], i);
1394
1395		if (i == 3)
1396			alu.last = 1;
1397		r = r600_bc_add_alu(ctx->bc, &alu);
1398		if (r)
1399			return r;
1400	}
1401
1402	/* dst = (-tmp > 0 ? -1 : tmp) */
1403	for (i = 0; i < 4; i++) {
1404		memset(&alu, 0, sizeof(struct r600_bc_alu));
1405		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1406		alu.is_op3 = 1;
1407		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1408
1409		alu.src[0].sel = ctx->temp_reg;
1410		alu.src[0].chan = i;
1411		alu.src[0].neg = 1;
1412
1413		alu.src[1].sel = V_SQ_ALU_SRC_1;
1414		alu.src[1].neg = 1;
1415
1416		alu.src[2].sel = ctx->temp_reg;
1417		alu.src[2].chan = i;
1418
1419		if (i == 3)
1420			alu.last = 1;
1421		r = r600_bc_add_alu(ctx->bc, &alu);
1422		if (r)
1423			return r;
1424	}
1425	return 0;
1426}
1427
1428static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1429{
1430	struct r600_bc_alu alu;
1431	int i, r;
1432
1433	for (i = 0; i < 4; i++) {
1434		memset(&alu, 0, sizeof(struct r600_bc_alu));
1435		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1436			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1437			alu.dst.chan = i;
1438		} else {
1439			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1440			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1441			alu.src[0].sel = ctx->temp_reg;
1442			alu.src[0].chan = i;
1443		}
1444		if (i == 3) {
1445			alu.last = 1;
1446		}
1447		r = r600_bc_add_alu(ctx->bc, &alu);
1448		if (r)
1449			return r;
1450	}
1451	return 0;
1452}
1453
1454static int tgsi_op3(struct r600_shader_ctx *ctx)
1455{
1456	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1457	struct r600_bc_alu alu;
1458	int i, j, r;
1459	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1460
1461	for (i = 0; i < lasti + 1; i++) {
1462		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1463			continue;
1464
1465		memset(&alu, 0, sizeof(struct r600_bc_alu));
1466		alu.inst = ctx->inst_info->r600_opcode;
1467		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1468			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1469		}
1470
1471		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1472		alu.dst.chan = i;
1473		alu.dst.write = 1;
1474		alu.is_op3 = 1;
1475		if (i == lasti) {
1476			alu.last = 1;
1477		}
1478		r = r600_bc_add_alu(ctx->bc, &alu);
1479		if (r)
1480			return r;
1481	}
1482	return 0;
1483}
1484
1485static int tgsi_dp(struct r600_shader_ctx *ctx)
1486{
1487	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1488	struct r600_bc_alu alu;
1489	int i, j, r;
1490
1491	for (i = 0; i < 4; i++) {
1492		memset(&alu, 0, sizeof(struct r600_bc_alu));
1493		alu.inst = ctx->inst_info->r600_opcode;
1494		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1495			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1496		}
1497
1498		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1499		alu.dst.chan = i;
1500		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1501		/* handle some special cases */
1502		switch (ctx->inst_info->tgsi_opcode) {
1503		case TGSI_OPCODE_DP2:
1504			if (i > 1) {
1505				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1506				alu.src[0].chan = alu.src[1].chan = 0;
1507			}
1508			break;
1509		case TGSI_OPCODE_DP3:
1510			if (i > 2) {
1511				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1512				alu.src[0].chan = alu.src[1].chan = 0;
1513			}
1514			break;
1515		case TGSI_OPCODE_DPH:
1516			if (i == 3) {
1517				alu.src[0].sel = V_SQ_ALU_SRC_1;
1518				alu.src[0].chan = 0;
1519				alu.src[0].neg = 0;
1520			}
1521			break;
1522		default:
1523			break;
1524		}
1525		if (i == 3) {
1526			alu.last = 1;
1527		}
1528		r = r600_bc_add_alu(ctx->bc, &alu);
1529		if (r)
1530			return r;
1531	}
1532	return 0;
1533}
1534
1535static int tgsi_tex(struct r600_shader_ctx *ctx)
1536{
1537	static float one_point_five = 1.5f;
1538	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1539	struct r600_bc_tex tex;
1540	struct r600_bc_alu alu;
1541	unsigned src_gpr;
1542	int r, i;
1543	int opcode;
1544	/* Texture fetch instructions can only use gprs as source.
1545	 * Also they cannot negate the source or take the absolute value */
1546	const boolean src_requires_loading =
1547		(inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1548		inst->Src[0].Register.File != TGSI_FILE_INPUT) ||
1549		ctx->src[0].neg || ctx->src[0].abs;
1550	boolean src_loaded = FALSE;
1551
1552	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1553
1554	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1555		/* Add perspective divide */
1556		memset(&alu, 0, sizeof(struct r600_bc_alu));
1557		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1558		r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1559
1560		alu.dst.sel = ctx->temp_reg;
1561		alu.dst.chan = 3;
1562		alu.last = 1;
1563		alu.dst.write = 1;
1564		r = r600_bc_add_alu(ctx->bc, &alu);
1565		if (r)
1566			return r;
1567
1568		for (i = 0; i < 3; i++) {
1569			memset(&alu, 0, sizeof(struct r600_bc_alu));
1570			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1571			alu.src[0].sel = ctx->temp_reg;
1572			alu.src[0].chan = 3;
1573			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1574			alu.dst.sel = ctx->temp_reg;
1575			alu.dst.chan = i;
1576			alu.dst.write = 1;
1577			r = r600_bc_add_alu(ctx->bc, &alu);
1578			if (r)
1579				return r;
1580		}
1581		memset(&alu, 0, sizeof(struct r600_bc_alu));
1582		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1583		alu.src[0].sel = V_SQ_ALU_SRC_1;
1584		alu.src[0].chan = 0;
1585		alu.dst.sel = ctx->temp_reg;
1586		alu.dst.chan = 3;
1587		alu.last = 1;
1588		alu.dst.write = 1;
1589		r = r600_bc_add_alu(ctx->bc, &alu);
1590		if (r)
1591			return r;
1592		src_loaded = TRUE;
1593		src_gpr = ctx->temp_reg;
1594	}
1595
1596	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1597		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1598		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1599
1600		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1601		for (i = 0; i < 4; i++) {
1602			memset(&alu, 0, sizeof(struct r600_bc_alu));
1603			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1604			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1605			r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
1606			alu.dst.sel = ctx->temp_reg;
1607			alu.dst.chan = i;
1608			if (i == 3)
1609				alu.last = 1;
1610			alu.dst.write = 1;
1611			r = r600_bc_add_alu(ctx->bc, &alu);
1612			if (r)
1613				return r;
1614		}
1615
1616		/* tmp1.z = RCP_e(|tmp1.z|) */
1617		memset(&alu, 0, sizeof(struct r600_bc_alu));
1618		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1619		alu.src[0].sel = ctx->temp_reg;
1620		alu.src[0].chan = 2;
1621		alu.src[0].abs = 1;
1622		alu.dst.sel = ctx->temp_reg;
1623		alu.dst.chan = 2;
1624		alu.dst.write = 1;
1625		alu.last = 1;
1626		r = r600_bc_add_alu(ctx->bc, &alu);
1627		if (r)
1628			return r;
1629
1630		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1631		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1632		 * muladd has no writemask, have to use another temp
1633		 */
1634		memset(&alu, 0, sizeof(struct r600_bc_alu));
1635		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1636		alu.is_op3 = 1;
1637
1638		alu.src[0].sel = ctx->temp_reg;
1639		alu.src[0].chan = 0;
1640		alu.src[1].sel = ctx->temp_reg;
1641		alu.src[1].chan = 2;
1642
1643		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1644		alu.src[2].chan = 0;
1645		alu.src[2].value = *(uint32_t *)&one_point_five;
1646
1647		alu.dst.sel = ctx->temp_reg;
1648		alu.dst.chan = 0;
1649		alu.dst.write = 1;
1650
1651		r = r600_bc_add_alu(ctx->bc, &alu);
1652		if (r)
1653			return r;
1654
1655		memset(&alu, 0, sizeof(struct r600_bc_alu));
1656		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1657		alu.is_op3 = 1;
1658
1659		alu.src[0].sel = ctx->temp_reg;
1660		alu.src[0].chan = 1;
1661		alu.src[1].sel = ctx->temp_reg;
1662		alu.src[1].chan = 2;
1663
1664		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1665		alu.src[2].chan = 0;
1666		alu.src[2].value = *(uint32_t *)&one_point_five;
1667
1668		alu.dst.sel = ctx->temp_reg;
1669		alu.dst.chan = 1;
1670		alu.dst.write = 1;
1671
1672		alu.last = 1;
1673		r = r600_bc_add_alu(ctx->bc, &alu);
1674		if (r)
1675			return r;
1676
1677		src_loaded = TRUE;
1678		src_gpr = ctx->temp_reg;
1679	}
1680
1681	if (src_requires_loading && !src_loaded) {
1682		for (i = 0; i < 4; i++) {
1683			memset(&alu, 0, sizeof(struct r600_bc_alu));
1684			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1685			r600_bc_src(&alu.src[0], &ctx->src[0], i);
1686			alu.dst.sel = ctx->temp_reg;
1687			alu.dst.chan = i;
1688			if (i == 3)
1689				alu.last = 1;
1690			alu.dst.write = 1;
1691			r = r600_bc_add_alu(ctx->bc, &alu);
1692			if (r)
1693				return r;
1694		}
1695		src_loaded = TRUE;
1696		src_gpr = ctx->temp_reg;
1697	}
1698
1699	opcode = ctx->inst_info->r600_opcode;
1700	if (opcode == SQ_TEX_INST_SAMPLE &&
1701	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1702		opcode = SQ_TEX_INST_SAMPLE_C;
1703
1704	memset(&tex, 0, sizeof(struct r600_bc_tex));
1705	tex.inst = opcode;
1706	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1707	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1708	tex.src_gpr = src_gpr;
1709	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1710	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1711	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1712	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1713	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1714	if (src_loaded) {
1715		tex.src_sel_x = 0;
1716		tex.src_sel_y = 1;
1717		tex.src_sel_z = 2;
1718		tex.src_sel_w = 3;
1719	} else {
1720		tex.src_sel_x = ctx->src[0].swizzle[0];
1721		tex.src_sel_y = ctx->src[0].swizzle[1];
1722		tex.src_sel_z = ctx->src[0].swizzle[2];
1723		tex.src_sel_w = ctx->src[0].swizzle[3];
1724		tex.src_rel = ctx->src[0].rel;
1725	}
1726
1727	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1728		tex.src_sel_x = 1;
1729		tex.src_sel_y = 0;
1730		tex.src_sel_z = 3;
1731		tex.src_sel_w = 1;
1732	}
1733
1734	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1735		tex.coord_type_x = 1;
1736		tex.coord_type_y = 1;
1737		tex.coord_type_z = 1;
1738		tex.coord_type_w = 1;
1739	}
1740
1741	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1742		tex.coord_type_z = 0;
1743		tex.src_sel_z = tex.src_sel_y;
1744	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1745		tex.coord_type_z = 0;
1746
1747	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1748		tex.src_sel_w = tex.src_sel_z;
1749
1750	r = r600_bc_add_tex(ctx->bc, &tex);
1751	if (r)
1752		return r;
1753
1754	/* add shadow ambient support  - gallium doesn't do it yet */
1755	return 0;
1756}
1757
1758static int tgsi_lrp(struct r600_shader_ctx *ctx)
1759{
1760	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1761	struct r600_bc_alu alu;
1762	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1763	unsigned i;
1764	int r;
1765
1766	/* optimize if it's just an equal balance */
1767	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1768		for (i = 0; i < lasti + 1; i++) {
1769			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1770				continue;
1771
1772			memset(&alu, 0, sizeof(struct r600_bc_alu));
1773			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1774			r600_bc_src(&alu.src[0], &ctx->src[1], i);
1775			r600_bc_src(&alu.src[1], &ctx->src[2], i);
1776			alu.omod = 3;
1777			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1778			alu.dst.chan = i;
1779			if (i == lasti) {
1780				alu.last = 1;
1781			}
1782			r = r600_bc_add_alu(ctx->bc, &alu);
1783			if (r)
1784				return r;
1785		}
1786		return 0;
1787	}
1788
1789	/* 1 - src0 */
1790	for (i = 0; i < lasti + 1; i++) {
1791		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1792			continue;
1793
1794		memset(&alu, 0, sizeof(struct r600_bc_alu));
1795		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1796		alu.src[0].sel = V_SQ_ALU_SRC_1;
1797		alu.src[0].chan = 0;
1798		r600_bc_src(&alu.src[1], &ctx->src[0], i);
1799		alu.src[1].neg = 1;
1800		alu.dst.sel = ctx->temp_reg;
1801		alu.dst.chan = i;
1802		if (i == lasti) {
1803			alu.last = 1;
1804		}
1805		alu.dst.write = 1;
1806		r = r600_bc_add_alu(ctx->bc, &alu);
1807		if (r)
1808			return r;
1809	}
1810
1811	/* (1 - src0) * src2 */
1812	for (i = 0; i < lasti + 1; i++) {
1813		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1814			continue;
1815
1816		memset(&alu, 0, sizeof(struct r600_bc_alu));
1817		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1818		alu.src[0].sel = ctx->temp_reg;
1819		alu.src[0].chan = i;
1820		r600_bc_src(&alu.src[1], &ctx->src[2], i);
1821		alu.dst.sel = ctx->temp_reg;
1822		alu.dst.chan = i;
1823		if (i == lasti) {
1824			alu.last = 1;
1825		}
1826		alu.dst.write = 1;
1827		r = r600_bc_add_alu(ctx->bc, &alu);
1828		if (r)
1829			return r;
1830	}
1831
1832	/* src0 * src1 + (1 - src0) * src2 */
1833	for (i = 0; i < lasti + 1; i++) {
1834		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1835			continue;
1836
1837		memset(&alu, 0, sizeof(struct r600_bc_alu));
1838		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1839		alu.is_op3 = 1;
1840		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1841		r600_bc_src(&alu.src[1], &ctx->src[1], i);
1842		alu.src[2].sel = ctx->temp_reg;
1843		alu.src[2].chan = i;
1844
1845		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1846		alu.dst.chan = i;
1847		if (i == lasti) {
1848			alu.last = 1;
1849		}
1850		r = r600_bc_add_alu(ctx->bc, &alu);
1851		if (r)
1852			return r;
1853	}
1854	return 0;
1855}
1856
1857static int tgsi_cmp(struct r600_shader_ctx *ctx)
1858{
1859	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1860	struct r600_bc_alu alu;
1861	int i, r;
1862	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1863
1864	for (i = 0; i < lasti + 1; i++) {
1865		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1866			continue;
1867
1868		memset(&alu, 0, sizeof(struct r600_bc_alu));
1869		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1870		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1871		r600_bc_src(&alu.src[1], &ctx->src[2], i);
1872		r600_bc_src(&alu.src[2], &ctx->src[1], i);
1873		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1874		alu.dst.chan = i;
1875		alu.dst.write = 1;
1876		alu.is_op3 = 1;
1877		if (i == lasti)
1878			alu.last = 1;
1879		r = r600_bc_add_alu(ctx->bc, &alu);
1880		if (r)
1881			return r;
1882	}
1883	return 0;
1884}
1885
1886static int tgsi_xpd(struct r600_shader_ctx *ctx)
1887{
1888	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1889	static const unsigned int src0_swizzle[] = {2, 0, 1};
1890	static const unsigned int src1_swizzle[] = {1, 2, 0};
1891	struct r600_bc_alu alu;
1892	uint32_t use_temp = 0;
1893	int i, r;
1894
1895	if (inst->Dst[0].Register.WriteMask != 0xf)
1896		use_temp = 1;
1897
1898	for (i = 0; i < 4; i++) {
1899		memset(&alu, 0, sizeof(struct r600_bc_alu));
1900		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1901		if (i < 3) {
1902			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1903			r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
1904		} else {
1905			alu.src[0].sel = V_SQ_ALU_SRC_0;
1906			alu.src[0].chan = i;
1907			alu.src[1].sel = V_SQ_ALU_SRC_0;
1908			alu.src[1].chan = i;
1909		}
1910
1911		alu.dst.sel = ctx->temp_reg;
1912		alu.dst.chan = i;
1913		alu.dst.write = 1;
1914
1915		if (i == 3)
1916			alu.last = 1;
1917		r = r600_bc_add_alu(ctx->bc, &alu);
1918		if (r)
1919			return r;
1920	}
1921
1922	for (i = 0; i < 4; i++) {
1923		memset(&alu, 0, sizeof(struct r600_bc_alu));
1924		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1925
1926		if (i < 3) {
1927			r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
1928			r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
1929		} else {
1930			alu.src[0].sel = V_SQ_ALU_SRC_0;
1931			alu.src[0].chan = i;
1932			alu.src[1].sel = V_SQ_ALU_SRC_0;
1933			alu.src[1].chan = i;
1934		}
1935
1936		alu.src[2].sel = ctx->temp_reg;
1937		alu.src[2].neg = 1;
1938		alu.src[2].chan = i;
1939
1940		if (use_temp)
1941			alu.dst.sel = ctx->temp_reg;
1942		else
1943			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1944		alu.dst.chan = i;
1945		alu.dst.write = 1;
1946		alu.is_op3 = 1;
1947		if (i == 3)
1948			alu.last = 1;
1949		r = r600_bc_add_alu(ctx->bc, &alu);
1950		if (r)
1951			return r;
1952	}
1953	if (use_temp)
1954		return tgsi_helper_copy(ctx, inst);
1955	return 0;
1956}
1957
1958static int tgsi_exp(struct r600_shader_ctx *ctx)
1959{
1960	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1961	struct r600_bc_alu alu;
1962	int r;
1963
1964	/* result.x = 2^floor(src); */
1965	if (inst->Dst[0].Register.WriteMask & 1) {
1966		memset(&alu, 0, sizeof(struct r600_bc_alu));
1967
1968		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
1969		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1970
1971		alu.dst.sel = ctx->temp_reg;
1972		alu.dst.chan = 0;
1973		alu.dst.write = 1;
1974		alu.last = 1;
1975		r = r600_bc_add_alu(ctx->bc, &alu);
1976		if (r)
1977			return r;
1978
1979		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1980		alu.src[0].sel = ctx->temp_reg;
1981		alu.src[0].chan = 0;
1982
1983		alu.dst.sel = ctx->temp_reg;
1984		alu.dst.chan = 0;
1985		alu.dst.write = 1;
1986		alu.last = 1;
1987		r = r600_bc_add_alu(ctx->bc, &alu);
1988		if (r)
1989			return r;
1990	}
1991
1992	/* result.y = tmp - floor(tmp); */
1993	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1994		memset(&alu, 0, sizeof(struct r600_bc_alu));
1995
1996		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1997		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1998
1999		alu.dst.sel = ctx->temp_reg;
2000//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2001//		if (r)
2002//			return r;
2003		alu.dst.write = 1;
2004		alu.dst.chan = 1;
2005
2006		alu.last = 1;
2007
2008		r = r600_bc_add_alu(ctx->bc, &alu);
2009		if (r)
2010			return r;
2011	}
2012
2013	/* result.z = RoughApprox2ToX(tmp);*/
2014	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2015		memset(&alu, 0, sizeof(struct r600_bc_alu));
2016		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2017		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2018
2019		alu.dst.sel = ctx->temp_reg;
2020		alu.dst.write = 1;
2021		alu.dst.chan = 2;
2022
2023		alu.last = 1;
2024
2025		r = r600_bc_add_alu(ctx->bc, &alu);
2026		if (r)
2027			return r;
2028	}
2029
2030	/* result.w = 1.0;*/
2031	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2032		memset(&alu, 0, sizeof(struct r600_bc_alu));
2033
2034		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2035		alu.src[0].sel = V_SQ_ALU_SRC_1;
2036		alu.src[0].chan = 0;
2037
2038		alu.dst.sel = ctx->temp_reg;
2039		alu.dst.chan = 3;
2040		alu.dst.write = 1;
2041		alu.last = 1;
2042		r = r600_bc_add_alu(ctx->bc, &alu);
2043		if (r)
2044			return r;
2045	}
2046	return tgsi_helper_copy(ctx, inst);
2047}
2048
2049static int tgsi_log(struct r600_shader_ctx *ctx)
2050{
2051	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2052	struct r600_bc_alu alu;
2053	int r;
2054
2055	/* result.x = floor(log2(src)); */
2056	if (inst->Dst[0].Register.WriteMask & 1) {
2057		memset(&alu, 0, sizeof(struct r600_bc_alu));
2058
2059		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2060		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2061
2062		alu.dst.sel = ctx->temp_reg;
2063		alu.dst.chan = 0;
2064		alu.dst.write = 1;
2065		alu.last = 1;
2066		r = r600_bc_add_alu(ctx->bc, &alu);
2067		if (r)
2068			return r;
2069
2070		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2071		alu.src[0].sel = ctx->temp_reg;
2072		alu.src[0].chan = 0;
2073
2074		alu.dst.sel = ctx->temp_reg;
2075		alu.dst.chan = 0;
2076		alu.dst.write = 1;
2077		alu.last = 1;
2078
2079		r = r600_bc_add_alu(ctx->bc, &alu);
2080		if (r)
2081			return r;
2082	}
2083
2084	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2085	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2086		memset(&alu, 0, sizeof(struct r600_bc_alu));
2087
2088		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2089		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2090
2091		alu.dst.sel = ctx->temp_reg;
2092		alu.dst.chan = 1;
2093		alu.dst.write = 1;
2094		alu.last = 1;
2095
2096		r = r600_bc_add_alu(ctx->bc, &alu);
2097		if (r)
2098			return r;
2099
2100		memset(&alu, 0, sizeof(struct r600_bc_alu));
2101
2102		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2103		alu.src[0].sel = ctx->temp_reg;
2104		alu.src[0].chan = 1;
2105
2106		alu.dst.sel = ctx->temp_reg;
2107		alu.dst.chan = 1;
2108		alu.dst.write = 1;
2109		alu.last = 1;
2110
2111		r = r600_bc_add_alu(ctx->bc, &alu);
2112		if (r)
2113			return r;
2114
2115		memset(&alu, 0, sizeof(struct r600_bc_alu));
2116
2117		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2118		alu.src[0].sel = ctx->temp_reg;
2119		alu.src[0].chan = 1;
2120
2121		alu.dst.sel = ctx->temp_reg;
2122		alu.dst.chan = 1;
2123		alu.dst.write = 1;
2124		alu.last = 1;
2125
2126		r = r600_bc_add_alu(ctx->bc, &alu);
2127		if (r)
2128			return r;
2129
2130		memset(&alu, 0, sizeof(struct r600_bc_alu));
2131
2132		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2133		alu.src[0].sel = ctx->temp_reg;
2134		alu.src[0].chan = 1;
2135
2136		alu.dst.sel = ctx->temp_reg;
2137		alu.dst.chan = 1;
2138		alu.dst.write = 1;
2139		alu.last = 1;
2140
2141		r = r600_bc_add_alu(ctx->bc, &alu);
2142		if (r)
2143			return r;
2144
2145		memset(&alu, 0, sizeof(struct r600_bc_alu));
2146
2147		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2148
2149		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2150
2151		alu.src[1].sel = ctx->temp_reg;
2152		alu.src[1].chan = 1;
2153
2154		alu.dst.sel = ctx->temp_reg;
2155		alu.dst.chan = 1;
2156		alu.dst.write = 1;
2157		alu.last = 1;
2158
2159		r = r600_bc_add_alu(ctx->bc, &alu);
2160		if (r)
2161			return r;
2162	}
2163
2164	/* result.z = log2(src);*/
2165	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2166		memset(&alu, 0, sizeof(struct r600_bc_alu));
2167
2168		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2169		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2170
2171		alu.dst.sel = ctx->temp_reg;
2172		alu.dst.write = 1;
2173		alu.dst.chan = 2;
2174		alu.last = 1;
2175
2176		r = r600_bc_add_alu(ctx->bc, &alu);
2177		if (r)
2178			return r;
2179	}
2180
2181	/* result.w = 1.0; */
2182	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2183		memset(&alu, 0, sizeof(struct r600_bc_alu));
2184
2185		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2186		alu.src[0].sel = V_SQ_ALU_SRC_1;
2187		alu.src[0].chan = 0;
2188
2189		alu.dst.sel = ctx->temp_reg;
2190		alu.dst.chan = 3;
2191		alu.dst.write = 1;
2192		alu.last = 1;
2193
2194		r = r600_bc_add_alu(ctx->bc, &alu);
2195		if (r)
2196			return r;
2197	}
2198
2199	return tgsi_helper_copy(ctx, inst);
2200}
2201
2202static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2203{
2204	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2205	struct r600_bc_alu alu;
2206	int r;
2207
2208	memset(&alu, 0, sizeof(struct r600_bc_alu));
2209
2210	switch (inst->Instruction.Opcode) {
2211	case TGSI_OPCODE_ARL:
2212		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2213		break;
2214	case TGSI_OPCODE_ARR:
2215		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2216		break;
2217	default:
2218		assert(0);
2219		return -1;
2220	}
2221
2222	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2223	alu.last = 1;
2224	alu.dst.sel = ctx->ar_reg;
2225	alu.dst.write = 1;
2226	r = r600_bc_add_alu(ctx->bc, &alu);
2227	if (r)
2228		return r;
2229
2230	/* TODO: Note that the MOVA can be avoided if we never use AR for
2231	 * indexing non-CB registers in the current ALU clause. Similarly, we
2232	 * need to load AR from ar_reg again if we started a new clause
2233	 * between ARL and AR usage. The easy way to do that is to remove
2234	 * the MOVA here, and load it for the first AR access after ar_reg
2235	 * has been modified in each clause. */
2236	memset(&alu, 0, sizeof(struct r600_bc_alu));
2237	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2238	alu.src[0].sel = ctx->ar_reg;
2239	alu.src[0].chan = 0;
2240	alu.last = 1;
2241	r = r600_bc_add_alu(ctx->bc, &alu);
2242	if (r)
2243		return r;
2244	return 0;
2245}
2246static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2247{
2248	/* TODO from r600c, ar values don't persist between clauses */
2249	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2250	struct r600_bc_alu alu;
2251	int r;
2252
2253	switch (inst->Instruction.Opcode) {
2254	case TGSI_OPCODE_ARL:
2255		memset(&alu, 0, sizeof(alu));
2256		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2257		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2258		alu.dst.sel = ctx->ar_reg;
2259		alu.dst.write = 1;
2260		alu.last = 1;
2261
2262		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2263			return r;
2264
2265		memset(&alu, 0, sizeof(alu));
2266		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2267		alu.src[0].sel = ctx->ar_reg;
2268		alu.dst.sel = ctx->ar_reg;
2269		alu.dst.write = 1;
2270		alu.last = 1;
2271
2272		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2273			return r;
2274		break;
2275	case TGSI_OPCODE_ARR:
2276		memset(&alu, 0, sizeof(alu));
2277		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2278		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2279		alu.dst.sel = ctx->ar_reg;
2280		alu.dst.write = 1;
2281		alu.last = 1;
2282
2283		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2284			return r;
2285		break;
2286	default:
2287		assert(0);
2288		return -1;
2289	}
2290
2291	memset(&alu, 0, sizeof(alu));
2292	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2293	alu.src[0].sel = ctx->ar_reg;
2294	alu.last = 1;
2295
2296	r = r600_bc_add_alu(ctx->bc, &alu);
2297	if (r)
2298		return r;
2299	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2300	return 0;
2301}
2302
2303static int tgsi_opdst(struct r600_shader_ctx *ctx)
2304{
2305	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2306	struct r600_bc_alu alu;
2307	int i, r = 0;
2308
2309	for (i = 0; i < 4; i++) {
2310		memset(&alu, 0, sizeof(struct r600_bc_alu));
2311
2312		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2313		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2314
2315		if (i == 0 || i == 3) {
2316			alu.src[0].sel = V_SQ_ALU_SRC_1;
2317		} else {
2318			r600_bc_src(&alu.src[0], &ctx->src[0], i);
2319		}
2320
2321		if (i == 0 || i == 2) {
2322			alu.src[1].sel = V_SQ_ALU_SRC_1;
2323		} else {
2324			r600_bc_src(&alu.src[1], &ctx->src[1], i);
2325		}
2326		if (i == 3)
2327			alu.last = 1;
2328		r = r600_bc_add_alu(ctx->bc, &alu);
2329		if (r)
2330			return r;
2331	}
2332	return 0;
2333}
2334
2335static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2336{
2337	struct r600_bc_alu alu;
2338	int r;
2339
2340	memset(&alu, 0, sizeof(struct r600_bc_alu));
2341	alu.inst = opcode;
2342	alu.predicate = 1;
2343
2344	alu.dst.sel = ctx->temp_reg;
2345	alu.dst.write = 1;
2346	alu.dst.chan = 0;
2347
2348	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2349	alu.src[1].sel = V_SQ_ALU_SRC_0;
2350	alu.src[1].chan = 0;
2351
2352	alu.last = 1;
2353
2354	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2355	if (r)
2356		return r;
2357	return 0;
2358}
2359
2360static int pops(struct r600_shader_ctx *ctx, int pops)
2361{
2362	int alu_pop = 3;
2363	if (ctx->bc->cf_last) {
2364		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2365			alu_pop = 0;
2366		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2367			alu_pop = 1;
2368	}
2369	alu_pop += pops;
2370	if (alu_pop == 1) {
2371		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2372		ctx->bc->force_add_cf = 1;
2373	} else if (alu_pop == 2) {
2374		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2375		ctx->bc->force_add_cf = 1;
2376	} else {
2377		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2378		ctx->bc->cf_last->pop_count = pops;
2379		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2380	}
2381	return 0;
2382}
2383
2384static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2385{
2386	switch(reason) {
2387	case FC_PUSH_VPM:
2388		ctx->bc->callstack[ctx->bc->call_sp].current--;
2389		break;
2390	case FC_PUSH_WQM:
2391	case FC_LOOP:
2392		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2393		break;
2394	case FC_REP:
2395		/* TOODO : for 16 vp asic should -= 2; */
2396		ctx->bc->callstack[ctx->bc->call_sp].current --;
2397		break;
2398	}
2399}
2400
2401static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2402{
2403	if (check_max_only) {
2404		int diff;
2405		switch (reason) {
2406		case FC_PUSH_VPM:
2407			diff = 1;
2408			break;
2409		case FC_PUSH_WQM:
2410			diff = 4;
2411			break;
2412		default:
2413			assert(0);
2414			diff = 0;
2415		}
2416		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2417		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2418			ctx->bc->callstack[ctx->bc->call_sp].max =
2419				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2420		}
2421		return;
2422	}
2423	switch (reason) {
2424	case FC_PUSH_VPM:
2425		ctx->bc->callstack[ctx->bc->call_sp].current++;
2426		break;
2427	case FC_PUSH_WQM:
2428	case FC_LOOP:
2429		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2430		break;
2431	case FC_REP:
2432		ctx->bc->callstack[ctx->bc->call_sp].current++;
2433		break;
2434	}
2435
2436	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2437	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2438		ctx->bc->callstack[ctx->bc->call_sp].max =
2439			ctx->bc->callstack[ctx->bc->call_sp].current;
2440	}
2441}
2442
2443static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2444{
2445	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2446
2447	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2448						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2449	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2450	sp->num_mid++;
2451}
2452
2453static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2454{
2455	ctx->bc->fc_sp++;
2456	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2457	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2458}
2459
2460static void fc_poplevel(struct r600_shader_ctx *ctx)
2461{
2462	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2463	if (sp->mid) {
2464		free(sp->mid);
2465		sp->mid = NULL;
2466	}
2467	sp->num_mid = 0;
2468	sp->start = NULL;
2469	sp->type = 0;
2470	ctx->bc->fc_sp--;
2471}
2472
2473#if 0
2474static int emit_return(struct r600_shader_ctx *ctx)
2475{
2476	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2477	return 0;
2478}
2479
2480static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2481{
2482
2483	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2484	ctx->bc->cf_last->pop_count = pops;
2485	/* TODO work out offset */
2486	return 0;
2487}
2488
2489static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2490{
2491	return 0;
2492}
2493
2494static void emit_testflag(struct r600_shader_ctx *ctx)
2495{
2496
2497}
2498
2499static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2500{
2501	emit_testflag(ctx);
2502	emit_jump_to_offset(ctx, 1, 4);
2503	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2504	pops(ctx, ifidx + 1);
2505	emit_return(ctx);
2506}
2507
2508static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2509{
2510	emit_testflag(ctx);
2511
2512	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2513	ctx->bc->cf_last->pop_count = 1;
2514
2515	fc_set_mid(ctx, fc_sp);
2516
2517	pops(ctx, 1);
2518}
2519#endif
2520
2521static int tgsi_if(struct r600_shader_ctx *ctx)
2522{
2523	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2524
2525	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2526
2527	fc_pushlevel(ctx, FC_IF);
2528
2529	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2530	return 0;
2531}
2532
2533static int tgsi_else(struct r600_shader_ctx *ctx)
2534{
2535	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2536	ctx->bc->cf_last->pop_count = 1;
2537
2538	fc_set_mid(ctx, ctx->bc->fc_sp);
2539	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2540	return 0;
2541}
2542
2543static int tgsi_endif(struct r600_shader_ctx *ctx)
2544{
2545	pops(ctx, 1);
2546	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2547		R600_ERR("if/endif unbalanced in shader\n");
2548		return -1;
2549	}
2550
2551	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2552		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2553		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2554	} else {
2555		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2556	}
2557	fc_poplevel(ctx);
2558
2559	callstack_decrease_current(ctx, FC_PUSH_VPM);
2560	return 0;
2561}
2562
2563static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2564{
2565	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2566
2567	fc_pushlevel(ctx, FC_LOOP);
2568
2569	/* check stack depth */
2570	callstack_check_depth(ctx, FC_LOOP, 0);
2571	return 0;
2572}
2573
2574static int tgsi_endloop(struct r600_shader_ctx *ctx)
2575{
2576	int i;
2577
2578	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2579
2580	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2581		R600_ERR("loop/endloop in shader code are not paired.\n");
2582		return -EINVAL;
2583	}
2584
2585	/* fixup loop pointers - from r600isa
2586	   LOOP END points to CF after LOOP START,
2587	   LOOP START point to CF after LOOP END
2588	   BRK/CONT point to LOOP END CF
2589	*/
2590	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2591
2592	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2593
2594	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2595		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2596	}
2597	/* TODO add LOOPRET support */
2598	fc_poplevel(ctx);
2599	callstack_decrease_current(ctx, FC_LOOP);
2600	return 0;
2601}
2602
2603static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2604{
2605	unsigned int fscp;
2606
2607	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2608	{
2609		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2610			break;
2611	}
2612
2613	if (fscp == 0) {
2614		R600_ERR("Break not inside loop/endloop pair\n");
2615		return -EINVAL;
2616	}
2617
2618	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2619	ctx->bc->cf_last->pop_count = 1;
2620
2621	fc_set_mid(ctx, fscp);
2622
2623	pops(ctx, 1);
2624	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2625	return 0;
2626}
2627
2628static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2629	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2630	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2631	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2632
2633	/* FIXME:
2634	 * For state trackers other than OpenGL, we'll want to use
2635	 * _RECIP_IEEE instead.
2636	 */
2637	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2638
2639	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2640	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2641	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2642	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2643	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2644	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2645	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2646	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2647	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2648	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2649	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2650	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2651	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2652	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2653	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2654	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2655	/* gap */
2656	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2657	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2658	/* gap */
2659	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2660	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2661	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2662	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2663	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2664	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2665	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2666	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2667	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2668	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2669	/* gap */
2670	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2671	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2672	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2673	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2674	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2675	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2676	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2677	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2678	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2679	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2680	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2681	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2682	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2683	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2684	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2685	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2686	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2687	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2688	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2689	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2690	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2691	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2692	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2693	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2694	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2695	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2696	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2697	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2698	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2699	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2700	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2701	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2702	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2703	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2704	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2705	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2706	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2707	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2708	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2709	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2710	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2711	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2712	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2713	/* gap */
2714	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2715	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2716	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2717	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2718	/* gap */
2719	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2720	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2721	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2722	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2723	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2724	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2725	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2726	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2727	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2728	/* gap */
2729	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2730	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2731	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2732	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2733	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2734	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2735	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2736	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2737	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2738	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2739	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2740	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2741	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2742	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2743	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2744	/* gap */
2745	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2746	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2747	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2748	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2749	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2750	/* gap */
2751	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2752	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2753	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2754	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2755	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2756	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2757	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2758	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2759	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2760	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2761	/* gap */
2762	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2763	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2764	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2765	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2766	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2767	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2768	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2769	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2770	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2771	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2772	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2773	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2774	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2775	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2776	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2777	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2778	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2779	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2780	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2781	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2782	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2783	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2784	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2785	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2786	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2787	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2788	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2789	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2790};
2791
2792static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2793	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2794	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2795	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2796	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2797	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2798	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2799	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2800	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2801	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2802	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2803	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2804	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2805	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2806	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2807	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2808	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2809	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2810	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2811	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2812	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2813	/* gap */
2814	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2815	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2816	/* gap */
2817	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2818	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2819	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2820	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2821	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2822	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2823	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2824	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2825	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2826	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2827	/* gap */
2828	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2829	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2830	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2831	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2832	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2833	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2834	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2835	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2836	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2837	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2839	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2840	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2841	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2842	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2843	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2844	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2845	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2846	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2847	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2848	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2849	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2851	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2853	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2854	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2855	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2856	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2857	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2858	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2859	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2860	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2862	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2863	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2864	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2865	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2867	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2868	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2869	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2870	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2871	/* gap */
2872	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2873	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2874	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2875	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2876	/* gap */
2877	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2880	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2882	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2885	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886	/* gap */
2887	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2896	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2899	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2901	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902	/* gap */
2903	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908	/* gap */
2909	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2913	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2914	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2915	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2918	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2919	/* gap */
2920	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2940	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2944	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2946	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2947	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2948};
2949