r600_shader.c revision 8ab1c5328b12e8b075f62599a84672024aaf2982
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_info.h"
25#include "tgsi/tgsi_parse.h"
26#include "tgsi/tgsi_scan.h"
27#include "tgsi/tgsi_dump.h"
28#include "util/u_format.h"
29#include "r600_pipe.h"
30#include "r600_asm.h"
31#include "r600_sq.h"
32#include "r600_formats.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37#include <byteswap.h>
38
39/* CAYMAN notes
40Why CAYMAN got loops for lots of instructions is explained here.
41
42-These 8xx t-slot only ops are implemented in all vector slots.
43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44These 8xx t-slot only opcodes become vector ops, with all four
45slots expecting the arguments on sources a and b. Result is
46broadcast to all channels.
47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48These 8xx t-slot only opcodes become vector ops in the z, y, and
49x slots.
50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52SQRT_IEEE/_64
53SIN/COS
54The w slot may have an independent co-issued operation, or if the
55result is required to be in the w slot, the opcode above may be
56issued in the w slot as well.
57The compiler must issue the source argument to slots z, y, and x
58*/
59
60
61int r600_find_vs_semantic_index(struct r600_shader *vs,
62				struct r600_shader *ps, int id)
63{
64	struct r600_shader_io *input = &ps->input[id];
65
66	for (int i = 0; i < vs->noutput; i++) {
67		if (input->name == vs->output[i].name &&
68			input->sid == vs->output[i].sid) {
69			return i - 1;
70		}
71	}
72	return 0;
73}
74
75static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
76{
77	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
78	struct r600_shader *rshader = &shader->shader;
79	uint32_t *ptr;
80	int	i;
81
82	/* copy new shader */
83	if (shader->bo == NULL) {
84		/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
85		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
86		if (shader->bo == NULL) {
87			return -ENOMEM;
88		}
89		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
90		if (R600_BIG_ENDIAN) {
91			for (i = 0; i < rshader->bc.ndw; ++i) {
92				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
93			}
94		} else {
95			memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
96		}
97		r600_bo_unmap(rctx->radeon, shader->bo);
98	}
99	/* build state */
100	switch (rshader->processor_type) {
101	case TGSI_PROCESSOR_VERTEX:
102		if (rshader->family >= CHIP_CEDAR) {
103			evergreen_pipe_shader_vs(ctx, shader);
104		} else {
105			r600_pipe_shader_vs(ctx, shader);
106		}
107		break;
108	case TGSI_PROCESSOR_FRAGMENT:
109		if (rshader->family >= CHIP_CEDAR) {
110			evergreen_pipe_shader_ps(ctx, shader);
111		} else {
112			r600_pipe_shader_ps(ctx, shader);
113		}
114		break;
115	default:
116		return -EINVAL;
117	}
118	return 0;
119}
120
121static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
122
123int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
124{
125	static int dump_shaders = -1;
126	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127	int r;
128
129	/* Would like some magic "get_bool_option_once" routine.
130	*/
131	if (dump_shaders == -1)
132		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
133
134	if (dump_shaders) {
135		fprintf(stderr, "--------------------------------------------------------------\n");
136		tgsi_dump(tokens, 0);
137	}
138	shader->shader.family = r600_get_family(rctx->radeon);
139	r = r600_shader_from_tgsi(tokens, &shader->shader);
140	if (r) {
141		R600_ERR("translation from TGSI failed !\n");
142		return r;
143	}
144	r = r600_bc_build(&shader->shader.bc);
145	if (r) {
146		R600_ERR("building bytecode failed !\n");
147		return r;
148	}
149	if (dump_shaders) {
150		r600_bc_dump(&shader->shader.bc);
151		fprintf(stderr, "______________________________________________________________\n");
152	}
153	return r600_pipe_shader(ctx, shader);
154}
155
156void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
157{
158	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
159
160	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
161	r600_bc_clear(&shader->shader.bc);
162}
163
164/*
165 * tgsi -> r600 shader
166 */
167struct r600_shader_tgsi_instruction;
168
169struct r600_shader_src {
170	unsigned				sel;
171	unsigned				swizzle[4];
172	unsigned				neg;
173	unsigned				abs;
174	unsigned				rel;
175	uint32_t				value[4];
176};
177
178struct r600_shader_ctx {
179	struct tgsi_shader_info			info;
180	struct tgsi_parse_context		parse;
181	const struct tgsi_token			*tokens;
182	unsigned				type;
183	unsigned				file_offset[TGSI_FILE_COUNT];
184	unsigned				temp_reg;
185	unsigned				ar_reg;
186	struct r600_shader_tgsi_instruction	*inst_info;
187	struct r600_bc				*bc;
188	struct r600_shader			*shader;
189	struct r600_shader_src			src[4];
190	u32					*literals;
191	u32					nliterals;
192	u32					max_driver_temp_used;
193	/* needed for evergreen interpolation */
194	boolean                                 input_centroid;
195	boolean                                 input_linear;
196	boolean                                 input_perspective;
197	int					num_interp_gpr;
198};
199
200struct r600_shader_tgsi_instruction {
201	unsigned	tgsi_opcode;
202	unsigned	is_op3;
203	unsigned	r600_opcode;
204	int (*process)(struct r600_shader_ctx *ctx);
205};
206
207static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
208static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
209
210static int tgsi_is_supported(struct r600_shader_ctx *ctx)
211{
212	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
213	int j;
214
215	if (i->Instruction.NumDstRegs > 1) {
216		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
217		return -EINVAL;
218	}
219	if (i->Instruction.Predicate) {
220		R600_ERR("predicate unsupported\n");
221		return -EINVAL;
222	}
223#if 0
224	if (i->Instruction.Label) {
225		R600_ERR("label unsupported\n");
226		return -EINVAL;
227	}
228#endif
229	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
230		if (i->Src[j].Register.Dimension) {
231			R600_ERR("unsupported src %d (dimension %d)\n", j,
232				 i->Src[j].Register.Dimension);
233			return -EINVAL;
234		}
235	}
236	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
237		if (i->Dst[j].Register.Dimension) {
238			R600_ERR("unsupported dst (dimension)\n");
239			return -EINVAL;
240		}
241	}
242	return 0;
243}
244
245static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
246{
247	int i, r;
248	struct r600_bc_alu alu;
249	int gpr = 0, base_chan = 0;
250	int ij_index = 0;
251
252	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
253		ij_index = 0;
254		if (ctx->shader->input[input].centroid)
255			ij_index++;
256	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
257		ij_index = 0;
258		/* if we have perspective add one */
259		if (ctx->input_perspective)  {
260			ij_index++;
261			/* if we have perspective centroid */
262			if (ctx->input_centroid)
263				ij_index++;
264		}
265		if (ctx->shader->input[input].centroid)
266			ij_index++;
267	}
268
269	/* work out gpr and base_chan from index */
270	gpr = ij_index / 2;
271	base_chan = (2 * (ij_index % 2)) + 1;
272
273	for (i = 0; i < 8; i++) {
274		memset(&alu, 0, sizeof(struct r600_bc_alu));
275
276		if (i < 4)
277			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
278		else
279			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
280
281		if ((i > 1) && (i < 6)) {
282			alu.dst.sel = ctx->shader->input[input].gpr;
283			alu.dst.write = 1;
284		}
285
286		alu.dst.chan = i % 4;
287
288		alu.src[0].sel = gpr;
289		alu.src[0].chan = (base_chan - (i % 2));
290
291		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
292
293		alu.bank_swizzle_force = SQ_ALU_VEC_210;
294		if ((i % 4) == 3)
295			alu.last = 1;
296		r = r600_bc_add_alu(ctx->bc, &alu);
297		if (r)
298			return r;
299	}
300	return 0;
301}
302
303
304static int tgsi_declaration(struct r600_shader_ctx *ctx)
305{
306	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
307	unsigned i;
308	int r;
309
310	switch (d->Declaration.File) {
311	case TGSI_FILE_INPUT:
312		i = ctx->shader->ninput++;
313		ctx->shader->input[i].name = d->Semantic.Name;
314		ctx->shader->input[i].sid = d->Semantic.Index;
315		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
316		ctx->shader->input[i].centroid = d->Declaration.Centroid;
317		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
318		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) {
319			/* turn input into interpolate on EG */
320			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
321				if (ctx->shader->input[i].interpolate > 0) {
322					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
323					evergreen_interp_alu(ctx, i);
324				}
325			}
326		}
327		break;
328	case TGSI_FILE_OUTPUT:
329		i = ctx->shader->noutput++;
330		ctx->shader->output[i].name = d->Semantic.Name;
331		ctx->shader->output[i].sid = d->Semantic.Index;
332		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
333		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
334		break;
335	case TGSI_FILE_CONSTANT:
336	case TGSI_FILE_TEMPORARY:
337	case TGSI_FILE_SAMPLER:
338	case TGSI_FILE_ADDRESS:
339		break;
340
341	case TGSI_FILE_SYSTEM_VALUE:
342		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
343			struct r600_bc_alu alu;
344			memset(&alu, 0, sizeof(struct r600_bc_alu));
345
346			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
347			alu.src[0].sel = 0;
348			alu.src[0].chan = 3;
349
350			alu.dst.sel = 0;
351			alu.dst.chan = 3;
352			alu.dst.write = 1;
353			alu.last = 1;
354
355			if ((r = r600_bc_add_alu(ctx->bc, &alu)))
356				return r;
357			break;
358		}
359
360	default:
361		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
362		return -EINVAL;
363	}
364	return 0;
365}
366
367static int r600_get_temp(struct r600_shader_ctx *ctx)
368{
369	return ctx->temp_reg + ctx->max_driver_temp_used++;
370}
371
372/*
373 * for evergreen we need to scan the shader to find the number of GPRs we need to
374 * reserve for interpolation.
375 *
376 * we need to know if we are going to emit
377 * any centroid inputs
378 * if perspective and linear are required
379*/
380static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
381{
382	int i;
383	int num_baryc;
384
385	ctx->input_linear = FALSE;
386	ctx->input_perspective = FALSE;
387	ctx->input_centroid = FALSE;
388	ctx->num_interp_gpr = 1;
389
390	/* any centroid inputs */
391	for (i = 0; i < ctx->info.num_inputs; i++) {
392		/* skip position/face */
393		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
394		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
395			continue;
396		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
397			ctx->input_linear = TRUE;
398		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
399			ctx->input_perspective = TRUE;
400		if (ctx->info.input_centroid[i])
401			ctx->input_centroid = TRUE;
402	}
403
404	num_baryc = 0;
405	/* ignoring sample for now */
406	if (ctx->input_perspective)
407		num_baryc++;
408	if (ctx->input_linear)
409		num_baryc++;
410	if (ctx->input_centroid)
411		num_baryc *= 2;
412
413	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
414
415	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
416	return ctx->num_interp_gpr;
417}
418
419static void tgsi_src(struct r600_shader_ctx *ctx,
420		     const struct tgsi_full_src_register *tgsi_src,
421		     struct r600_shader_src *r600_src)
422{
423	memset(r600_src, 0, sizeof(*r600_src));
424	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
425	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
426	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
427	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
428	r600_src->neg = tgsi_src->Register.Negate;
429	r600_src->abs = tgsi_src->Register.Absolute;
430
431	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
432		int index;
433		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
434			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
435			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
436
437			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
438			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
439			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
440				return;
441		}
442		index = tgsi_src->Register.Index;
443		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
444		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
445	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
446		/* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
447		r600_src->swizzle[0] = 3;
448		r600_src->swizzle[1] = 3;
449		r600_src->swizzle[2] = 3;
450		r600_src->swizzle[3] = 3;
451		r600_src->sel = 0;
452	} else {
453		if (tgsi_src->Register.Indirect)
454			r600_src->rel = V_SQ_REL_RELATIVE;
455		r600_src->sel = tgsi_src->Register.Index;
456		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
457	}
458}
459
460static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
461{
462	struct r600_bc_vtx vtx;
463	unsigned int ar_reg;
464	int r;
465
466	if (offset) {
467		struct r600_bc_alu alu;
468
469		memset(&alu, 0, sizeof(alu));
470
471		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
472		alu.src[0].sel = ctx->ar_reg;
473
474		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
475		alu.src[1].value = offset;
476
477		alu.dst.sel = dst_reg;
478		alu.dst.write = 1;
479		alu.last = 1;
480
481		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
482			return r;
483
484		ar_reg = dst_reg;
485	} else {
486		ar_reg = ctx->ar_reg;
487	}
488
489	memset(&vtx, 0, sizeof(vtx));
490	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
491	vtx.src_gpr = ar_reg;
492	vtx.mega_fetch_count = 16;
493	vtx.dst_gpr = dst_reg;
494	vtx.dst_sel_x = 0;		/* SEL_X */
495	vtx.dst_sel_y = 1;		/* SEL_Y */
496	vtx.dst_sel_z = 2;		/* SEL_Z */
497	vtx.dst_sel_w = 3;		/* SEL_W */
498	vtx.data_format = FMT_32_32_32_32_FLOAT;
499	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
500	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
501	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
502	vtx.endian = r600_endian_swap(32);
503
504	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
505		return r;
506
507	return 0;
508}
509
510static int tgsi_split_constant(struct r600_shader_ctx *ctx)
511{
512	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
513	struct r600_bc_alu alu;
514	int i, j, k, nconst, r;
515
516	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
517		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
518			nconst++;
519		}
520		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
521	}
522	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
523		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
524			continue;
525		}
526
527		if (ctx->src[i].rel) {
528			int treg = r600_get_temp(ctx);
529			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
530				return r;
531
532			ctx->src[i].sel = treg;
533			ctx->src[i].rel = 0;
534			j--;
535		} else if (j > 0) {
536			int treg = r600_get_temp(ctx);
537			for (k = 0; k < 4; k++) {
538				memset(&alu, 0, sizeof(struct r600_bc_alu));
539				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
540				alu.src[0].sel = ctx->src[i].sel;
541				alu.src[0].chan = k;
542				alu.src[0].rel = ctx->src[i].rel;
543				alu.dst.sel = treg;
544				alu.dst.chan = k;
545				alu.dst.write = 1;
546				if (k == 3)
547					alu.last = 1;
548				r = r600_bc_add_alu(ctx->bc, &alu);
549				if (r)
550					return r;
551			}
552			ctx->src[i].sel = treg;
553			ctx->src[i].rel =0;
554			j--;
555		}
556	}
557	return 0;
558}
559
560/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
561static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
562{
563	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
564	struct r600_bc_alu alu;
565	int i, j, k, nliteral, r;
566
567	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
568		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
569			nliteral++;
570		}
571	}
572	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
573		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
574			int treg = r600_get_temp(ctx);
575			for (k = 0; k < 4; k++) {
576				memset(&alu, 0, sizeof(struct r600_bc_alu));
577				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
578				alu.src[0].sel = ctx->src[i].sel;
579				alu.src[0].chan = k;
580				alu.src[0].value = ctx->src[i].value[k];
581				alu.dst.sel = treg;
582				alu.dst.chan = k;
583				alu.dst.write = 1;
584				if (k == 3)
585					alu.last = 1;
586				r = r600_bc_add_alu(ctx->bc, &alu);
587				if (r)
588					return r;
589			}
590			ctx->src[i].sel = treg;
591			j--;
592		}
593	}
594	return 0;
595}
596
597static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
598{
599	struct tgsi_full_immediate *immediate;
600	struct tgsi_full_property *property;
601	struct r600_shader_ctx ctx;
602	struct r600_bc_output output[32];
603	unsigned output_done, noutput;
604	unsigned opcode;
605	int i, r = 0, pos0;
606
607	ctx.bc = &shader->bc;
608	ctx.shader = shader;
609	r = r600_bc_init(ctx.bc, shader->family);
610	if (r)
611		return r;
612	ctx.tokens = tokens;
613	tgsi_scan_shader(tokens, &ctx.info);
614	tgsi_parse_init(&ctx.parse, tokens);
615	ctx.type = ctx.parse.FullHeader.Processor.Processor;
616	shader->processor_type = ctx.type;
617	ctx.bc->type = shader->processor_type;
618
619	/* register allocations */
620	/* Values [0,127] correspond to GPR[0..127].
621	 * Values [128,159] correspond to constant buffer bank 0
622	 * Values [160,191] correspond to constant buffer bank 1
623	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
624	 * Values [256,287] correspond to constant buffer bank 2 (EG)
625	 * Values [288,319] correspond to constant buffer bank 3 (EG)
626	 * Other special values are shown in the list below.
627	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
628	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
629	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
630	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
631	 * 248	SQ_ALU_SRC_0: special constant 0.0.
632	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
633	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
634	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
635	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
636	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
637	 * 254	SQ_ALU_SRC_PV: previous vector result.
638	 * 255	SQ_ALU_SRC_PS: previous scalar result.
639	 */
640	for (i = 0; i < TGSI_FILE_COUNT; i++) {
641		ctx.file_offset[i] = 0;
642	}
643	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
644		ctx.file_offset[TGSI_FILE_INPUT] = 1;
645		if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) {
646			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
647		} else {
648			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
649		}
650	}
651	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) {
652		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
653	}
654	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
655						ctx.info.file_count[TGSI_FILE_INPUT];
656	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
657						ctx.info.file_count[TGSI_FILE_OUTPUT];
658
659	/* Outside the GPR range. This will be translated to one of the
660	 * kcache banks later. */
661	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
662
663	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
664	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
665			ctx.info.file_count[TGSI_FILE_TEMPORARY];
666	ctx.temp_reg = ctx.ar_reg + 1;
667
668	ctx.nliterals = 0;
669	ctx.literals = NULL;
670	shader->fs_write_all = FALSE;
671	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
672		tgsi_parse_token(&ctx.parse);
673		switch (ctx.parse.FullToken.Token.Type) {
674		case TGSI_TOKEN_TYPE_IMMEDIATE:
675			immediate = &ctx.parse.FullToken.FullImmediate;
676			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
677			if(ctx.literals == NULL) {
678				r = -ENOMEM;
679				goto out_err;
680			}
681			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
682			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
683			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
684			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
685			ctx.nliterals++;
686			break;
687		case TGSI_TOKEN_TYPE_DECLARATION:
688			r = tgsi_declaration(&ctx);
689			if (r)
690				goto out_err;
691			break;
692		case TGSI_TOKEN_TYPE_INSTRUCTION:
693			r = tgsi_is_supported(&ctx);
694			if (r)
695				goto out_err;
696			ctx.max_driver_temp_used = 0;
697			/* reserve first tmp for everyone */
698			r600_get_temp(&ctx);
699
700			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
701			if ((r = tgsi_split_constant(&ctx)))
702				goto out_err;
703			if ((r = tgsi_split_literal_constant(&ctx)))
704				goto out_err;
705			if (ctx.bc->chiprev == CHIPREV_CAYMAN)
706				ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
707			else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN)
708				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
709			else
710				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
711			r = ctx.inst_info->process(&ctx);
712			if (r)
713				goto out_err;
714			break;
715		case TGSI_TOKEN_TYPE_PROPERTY:
716			property = &ctx.parse.FullToken.FullProperty;
717			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
718				if (property->u[0].Data == 1)
719					shader->fs_write_all = TRUE;
720			}
721			break;
722		default:
723			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
724			r = -EINVAL;
725			goto out_err;
726		}
727	}
728	/* export output */
729	noutput = shader->noutput;
730	for (i = 0, pos0 = 0; i < noutput; i++) {
731		memset(&output[i], 0, sizeof(struct r600_bc_output));
732		output[i].gpr = shader->output[i].gpr;
733		output[i].elem_size = 3;
734		output[i].swizzle_x = 0;
735		output[i].swizzle_y = 1;
736		output[i].swizzle_z = 2;
737		output[i].swizzle_w = 3;
738		output[i].burst_count = 1;
739		output[i].barrier = 1;
740		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
741		output[i].array_base = i - pos0;
742		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
743		switch (ctx.type) {
744		case TGSI_PROCESSOR_VERTEX:
745			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
746				output[i].array_base = 60;
747				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
748				/* position doesn't count in array_base */
749				pos0++;
750			}
751			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
752				output[i].array_base = 61;
753				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
754				/* position doesn't count in array_base */
755				pos0++;
756			}
757			break;
758		case TGSI_PROCESSOR_FRAGMENT:
759			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
760				output[i].array_base = shader->output[i].sid;
761				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
762			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
763				output[i].array_base = 61;
764				output[i].swizzle_x = 2;
765				output[i].swizzle_y = 7;
766				output[i].swizzle_z = output[i].swizzle_w = 7;
767				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
768			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
769				output[i].array_base = 61;
770				output[i].swizzle_x = 7;
771				output[i].swizzle_y = 1;
772				output[i].swizzle_z = output[i].swizzle_w = 7;
773				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
774			} else {
775				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
776				r = -EINVAL;
777				goto out_err;
778			}
779			break;
780		default:
781			R600_ERR("unsupported processor type %d\n", ctx.type);
782			r = -EINVAL;
783			goto out_err;
784		}
785	}
786	/* add fake param output for vertex shader if no param is exported */
787	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
788		for (i = 0, pos0 = 0; i < noutput; i++) {
789			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
790				pos0 = 1;
791				break;
792			}
793		}
794		if (!pos0) {
795			memset(&output[i], 0, sizeof(struct r600_bc_output));
796			output[i].gpr = 0;
797			output[i].elem_size = 3;
798			output[i].swizzle_x = 0;
799			output[i].swizzle_y = 1;
800			output[i].swizzle_z = 2;
801			output[i].swizzle_w = 3;
802			output[i].burst_count = 1;
803			output[i].barrier = 1;
804			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
805			output[i].array_base = 0;
806			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
807			noutput++;
808		}
809	}
810	/* add fake pixel export */
811	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
812		memset(&output[0], 0, sizeof(struct r600_bc_output));
813		output[0].gpr = 0;
814		output[0].elem_size = 3;
815		output[0].swizzle_x = 7;
816		output[0].swizzle_y = 7;
817		output[0].swizzle_z = 7;
818		output[0].swizzle_w = 7;
819		output[0].burst_count = 1;
820		output[0].barrier = 1;
821		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
822		output[0].array_base = 0;
823		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
824		noutput++;
825	}
826	/* set export done on last export of each type */
827	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
828		if (ctx.bc->chiprev < CHIPREV_CAYMAN) {
829			if (i == (noutput - 1)) {
830				output[i].end_of_program = 1;
831			}
832		}
833		if (!(output_done & (1 << output[i].type))) {
834			output_done |= (1 << output[i].type);
835			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
836		}
837	}
838	/* add output to bytecode */
839	for (i = 0; i < noutput; i++) {
840		r = r600_bc_add_output(ctx.bc, &output[i]);
841		if (r)
842			goto out_err;
843	}
844	/* add program end */
845	if (ctx.bc->chiprev == CHIPREV_CAYMAN)
846		cm_bc_add_cf_end(ctx.bc);
847
848	free(ctx.literals);
849	tgsi_parse_free(&ctx.parse);
850	return 0;
851out_err:
852	free(ctx.literals);
853	tgsi_parse_free(&ctx.parse);
854	return r;
855}
856
857static int tgsi_unsupported(struct r600_shader_ctx *ctx)
858{
859	R600_ERR("%s tgsi opcode unsupported\n",
860		 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
861	return -EINVAL;
862}
863
864static int tgsi_end(struct r600_shader_ctx *ctx)
865{
866	return 0;
867}
868
869static void r600_bc_src(struct r600_bc_alu_src *bc_src,
870			const struct r600_shader_src *shader_src,
871			unsigned chan)
872{
873	bc_src->sel = shader_src->sel;
874	bc_src->chan = shader_src->swizzle[chan];
875	bc_src->neg = shader_src->neg;
876	bc_src->abs = shader_src->abs;
877	bc_src->rel = shader_src->rel;
878	bc_src->value = shader_src->value[bc_src->chan];
879}
880
881static void tgsi_dst(struct r600_shader_ctx *ctx,
882		     const struct tgsi_full_dst_register *tgsi_dst,
883		     unsigned swizzle,
884		     struct r600_bc_alu_dst *r600_dst)
885{
886	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
887
888	r600_dst->sel = tgsi_dst->Register.Index;
889	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
890	r600_dst->chan = swizzle;
891	r600_dst->write = 1;
892	if (tgsi_dst->Register.Indirect)
893		r600_dst->rel = V_SQ_REL_RELATIVE;
894	if (inst->Instruction.Saturate) {
895		r600_dst->clamp = 1;
896	}
897}
898
899static int tgsi_last_instruction(unsigned writemask)
900{
901	int i, lasti = 0;
902
903	for (i = 0; i < 4; i++) {
904		if (writemask & (1 << i)) {
905			lasti = i;
906		}
907	}
908	return lasti;
909}
910
911static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
912{
913	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
914	struct r600_bc_alu alu;
915	int i, j, r;
916	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
917
918	for (i = 0; i < lasti + 1; i++) {
919		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
920			continue;
921
922		memset(&alu, 0, sizeof(struct r600_bc_alu));
923		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
924
925		alu.inst = ctx->inst_info->r600_opcode;
926		if (!swap) {
927			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
928				r600_bc_src(&alu.src[j], &ctx->src[j], i);
929			}
930		} else {
931			r600_bc_src(&alu.src[0], &ctx->src[1], i);
932			r600_bc_src(&alu.src[1], &ctx->src[0], i);
933		}
934		/* handle some special cases */
935		switch (ctx->inst_info->tgsi_opcode) {
936		case TGSI_OPCODE_SUB:
937			alu.src[1].neg = 1;
938			break;
939		case TGSI_OPCODE_ABS:
940			alu.src[0].abs = 1;
941			if (alu.src[0].neg)
942			  alu.src[0].neg = 0;
943			break;
944		default:
945			break;
946		}
947		if (i == lasti) {
948			alu.last = 1;
949		}
950		r = r600_bc_add_alu(ctx->bc, &alu);
951		if (r)
952			return r;
953	}
954	return 0;
955}
956
957static int tgsi_op2(struct r600_shader_ctx *ctx)
958{
959	return tgsi_op2_s(ctx, 0);
960}
961
962static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
963{
964	return tgsi_op2_s(ctx, 1);
965}
966
967static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
968{
969	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
970	int i, j, r;
971	struct r600_bc_alu alu;
972	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
973
974	for (i = 0 ; i < last_slot; i++) {
975		memset(&alu, 0, sizeof(struct r600_bc_alu));
976		alu.inst = ctx->inst_info->r600_opcode;
977		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
978			r600_bc_src(&alu.src[j], &ctx->src[j], 0);
979		}
980		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
981		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
982
983		if (i == last_slot - 1)
984			alu.last = 1;
985		r = r600_bc_add_alu(ctx->bc, &alu);
986		if (r)
987			return r;
988	}
989	return 0;
990}
991
992/*
993 * r600 - trunc to -PI..PI range
994 * r700 - normalize by dividing by 2PI
995 * see fdo bug 27901
996 */
997static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
998{
999	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1000	static float double_pi = 3.1415926535 * 2;
1001	static float neg_pi = -3.1415926535;
1002
1003	int r;
1004	struct r600_bc_alu alu;
1005
1006	memset(&alu, 0, sizeof(struct r600_bc_alu));
1007	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1008	alu.is_op3 = 1;
1009
1010	alu.dst.chan = 0;
1011	alu.dst.sel = ctx->temp_reg;
1012	alu.dst.write = 1;
1013
1014	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1015
1016	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1017	alu.src[1].chan = 0;
1018	alu.src[1].value = *(uint32_t *)&half_inv_pi;
1019	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1020	alu.src[2].chan = 0;
1021	alu.last = 1;
1022	r = r600_bc_add_alu(ctx->bc, &alu);
1023	if (r)
1024		return r;
1025
1026	memset(&alu, 0, sizeof(struct r600_bc_alu));
1027	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1028
1029	alu.dst.chan = 0;
1030	alu.dst.sel = ctx->temp_reg;
1031	alu.dst.write = 1;
1032
1033	alu.src[0].sel = ctx->temp_reg;
1034	alu.src[0].chan = 0;
1035	alu.last = 1;
1036	r = r600_bc_add_alu(ctx->bc, &alu);
1037	if (r)
1038		return r;
1039
1040	memset(&alu, 0, sizeof(struct r600_bc_alu));
1041	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1042	alu.is_op3 = 1;
1043
1044	alu.dst.chan = 0;
1045	alu.dst.sel = ctx->temp_reg;
1046	alu.dst.write = 1;
1047
1048	alu.src[0].sel = ctx->temp_reg;
1049	alu.src[0].chan = 0;
1050
1051	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1052	alu.src[1].chan = 0;
1053	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1054	alu.src[2].chan = 0;
1055
1056	if (ctx->bc->chiprev == CHIPREV_R600) {
1057		alu.src[1].value = *(uint32_t *)&double_pi;
1058		alu.src[2].value = *(uint32_t *)&neg_pi;
1059	} else {
1060		alu.src[1].sel = V_SQ_ALU_SRC_1;
1061		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1062		alu.src[2].neg = 1;
1063	}
1064
1065	alu.last = 1;
1066	r = r600_bc_add_alu(ctx->bc, &alu);
1067	if (r)
1068		return r;
1069	return 0;
1070}
1071
1072static int cayman_trig(struct r600_shader_ctx *ctx)
1073{
1074	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1075	struct r600_bc_alu alu;
1076	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1077	int i, r;
1078
1079	r = tgsi_setup_trig(ctx);
1080	if (r)
1081		return r;
1082
1083
1084	for (i = 0; i < last_slot; i++) {
1085		memset(&alu, 0, sizeof(struct r600_bc_alu));
1086		alu.inst = ctx->inst_info->r600_opcode;
1087		alu.dst.chan = i;
1088
1089		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1090		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1091
1092		alu.src[0].sel = ctx->temp_reg;
1093		alu.src[0].chan = 0;
1094		if (i == last_slot - 1)
1095			alu.last = 1;
1096		r = r600_bc_add_alu(ctx->bc, &alu);
1097		if (r)
1098			return r;
1099	}
1100	return 0;
1101}
1102
1103static int tgsi_trig(struct r600_shader_ctx *ctx)
1104{
1105	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1106	struct r600_bc_alu alu;
1107	int i, r;
1108	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1109
1110	r = tgsi_setup_trig(ctx);
1111	if (r)
1112		return r;
1113
1114	memset(&alu, 0, sizeof(struct r600_bc_alu));
1115	alu.inst = ctx->inst_info->r600_opcode;
1116	alu.dst.chan = 0;
1117	alu.dst.sel = ctx->temp_reg;
1118	alu.dst.write = 1;
1119
1120	alu.src[0].sel = ctx->temp_reg;
1121	alu.src[0].chan = 0;
1122	alu.last = 1;
1123	r = r600_bc_add_alu(ctx->bc, &alu);
1124	if (r)
1125		return r;
1126
1127	/* replicate result */
1128	for (i = 0; i < lasti + 1; i++) {
1129		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1130			continue;
1131
1132		memset(&alu, 0, sizeof(struct r600_bc_alu));
1133		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1134
1135		alu.src[0].sel = ctx->temp_reg;
1136		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1137		if (i == lasti)
1138			alu.last = 1;
1139		r = r600_bc_add_alu(ctx->bc, &alu);
1140		if (r)
1141			return r;
1142	}
1143	return 0;
1144}
1145
1146static int tgsi_scs(struct r600_shader_ctx *ctx)
1147{
1148	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1149	struct r600_bc_alu alu;
1150	int i, r;
1151
1152	/* We'll only need the trig stuff if we are going to write to the
1153	 * X or Y components of the destination vector.
1154	 */
1155	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1156		r = tgsi_setup_trig(ctx);
1157		if (r)
1158			return r;
1159	}
1160
1161	/* dst.x = COS */
1162	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1163		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1164			for (i = 0 ; i < 3; i++) {
1165				memset(&alu, 0, sizeof(struct r600_bc_alu));
1166				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1167				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1168
1169				if (i == 0)
1170					alu.dst.write = 1;
1171				else
1172					alu.dst.write = 0;
1173				alu.src[0].sel = ctx->temp_reg;
1174				alu.src[0].chan = 0;
1175				if (i == 2)
1176					alu.last = 1;
1177				r = r600_bc_add_alu(ctx->bc, &alu);
1178				if (r)
1179					return r;
1180			}
1181		} else {
1182			memset(&alu, 0, sizeof(struct r600_bc_alu));
1183			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1184			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1185
1186			alu.src[0].sel = ctx->temp_reg;
1187			alu.src[0].chan = 0;
1188			alu.last = 1;
1189			r = r600_bc_add_alu(ctx->bc, &alu);
1190			if (r)
1191				return r;
1192		}
1193	}
1194
1195	/* dst.y = SIN */
1196	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1197		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1198			for (i = 0 ; i < 3; i++) {
1199				memset(&alu, 0, sizeof(struct r600_bc_alu));
1200				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1201				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1202				if (i == 1)
1203					alu.dst.write = 1;
1204				else
1205					alu.dst.write = 0;
1206				alu.src[0].sel = ctx->temp_reg;
1207				alu.src[0].chan = 0;
1208				if (i == 2)
1209					alu.last = 1;
1210				r = r600_bc_add_alu(ctx->bc, &alu);
1211				if (r)
1212					return r;
1213			}
1214		} else {
1215			memset(&alu, 0, sizeof(struct r600_bc_alu));
1216			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1217			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1218
1219			alu.src[0].sel = ctx->temp_reg;
1220			alu.src[0].chan = 0;
1221			alu.last = 1;
1222			r = r600_bc_add_alu(ctx->bc, &alu);
1223			if (r)
1224				return r;
1225		}
1226	}
1227
1228	/* dst.z = 0.0; */
1229	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1230		memset(&alu, 0, sizeof(struct r600_bc_alu));
1231
1232		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1233
1234		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1235
1236		alu.src[0].sel = V_SQ_ALU_SRC_0;
1237		alu.src[0].chan = 0;
1238
1239		alu.last = 1;
1240
1241		r = r600_bc_add_alu(ctx->bc, &alu);
1242		if (r)
1243			return r;
1244	}
1245
1246	/* dst.w = 1.0; */
1247	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1248		memset(&alu, 0, sizeof(struct r600_bc_alu));
1249
1250		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1251
1252		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1253
1254		alu.src[0].sel = V_SQ_ALU_SRC_1;
1255		alu.src[0].chan = 0;
1256
1257		alu.last = 1;
1258
1259		r = r600_bc_add_alu(ctx->bc, &alu);
1260		if (r)
1261			return r;
1262	}
1263
1264	return 0;
1265}
1266
1267static int tgsi_kill(struct r600_shader_ctx *ctx)
1268{
1269	struct r600_bc_alu alu;
1270	int i, r;
1271
1272	for (i = 0; i < 4; i++) {
1273		memset(&alu, 0, sizeof(struct r600_bc_alu));
1274		alu.inst = ctx->inst_info->r600_opcode;
1275
1276		alu.dst.chan = i;
1277
1278		alu.src[0].sel = V_SQ_ALU_SRC_0;
1279
1280		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1281			alu.src[1].sel = V_SQ_ALU_SRC_1;
1282			alu.src[1].neg = 1;
1283		} else {
1284			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1285		}
1286		if (i == 3) {
1287			alu.last = 1;
1288		}
1289		r = r600_bc_add_alu(ctx->bc, &alu);
1290		if (r)
1291			return r;
1292	}
1293
1294	/* kill must be last in ALU */
1295	ctx->bc->force_add_cf = 1;
1296	ctx->shader->uses_kill = TRUE;
1297	return 0;
1298}
1299
1300static int tgsi_lit(struct r600_shader_ctx *ctx)
1301{
1302	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1303	struct r600_bc_alu alu;
1304	int r;
1305
1306	/* dst.x, <- 1.0  */
1307	memset(&alu, 0, sizeof(struct r600_bc_alu));
1308	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1309	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1310	alu.src[0].chan = 0;
1311	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1312	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1313	r = r600_bc_add_alu(ctx->bc, &alu);
1314	if (r)
1315		return r;
1316
1317	/* dst.y = max(src.x, 0.0) */
1318	memset(&alu, 0, sizeof(struct r600_bc_alu));
1319	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1320	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1321	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1322	alu.src[1].chan = 0;
1323	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1324	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1325	r = r600_bc_add_alu(ctx->bc, &alu);
1326	if (r)
1327		return r;
1328
1329	/* dst.w, <- 1.0  */
1330	memset(&alu, 0, sizeof(struct r600_bc_alu));
1331	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1332	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1333	alu.src[0].chan = 0;
1334	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1335	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1336	alu.last = 1;
1337	r = r600_bc_add_alu(ctx->bc, &alu);
1338	if (r)
1339		return r;
1340
1341	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1342	{
1343		int chan;
1344		int sel;
1345		int i;
1346
1347		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1348			for (i = 0; i < 3; i++) {
1349				/* dst.z = log(src.y) */
1350				memset(&alu, 0, sizeof(struct r600_bc_alu));
1351				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1352				r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1353				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1354				if (i == 2) {
1355					alu.dst.write = 1;
1356					alu.last = 1;
1357				} else
1358					alu.dst.write = 0;
1359
1360				r = r600_bc_add_alu(ctx->bc, &alu);
1361				if (r)
1362					return r;
1363			}
1364		} else {
1365			/* dst.z = log(src.y) */
1366			memset(&alu, 0, sizeof(struct r600_bc_alu));
1367			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1368			r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1369			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1370			alu.last = 1;
1371			r = r600_bc_add_alu(ctx->bc, &alu);
1372			if (r)
1373				return r;
1374		}
1375
1376		chan = alu.dst.chan;
1377		sel = alu.dst.sel;
1378
1379		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1380		memset(&alu, 0, sizeof(struct r600_bc_alu));
1381		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1382		r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1383		alu.src[1].sel  = sel;
1384		alu.src[1].chan = chan;
1385
1386		r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1387		alu.dst.sel = ctx->temp_reg;
1388		alu.dst.chan = 0;
1389		alu.dst.write = 1;
1390		alu.is_op3 = 1;
1391		alu.last = 1;
1392		r = r600_bc_add_alu(ctx->bc, &alu);
1393		if (r)
1394			return r;
1395
1396		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1397			for (i = 0; i < 3; i++) {
1398				/* dst.z = exp(tmp.x) */
1399				memset(&alu, 0, sizeof(struct r600_bc_alu));
1400				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1401				alu.src[0].sel = ctx->temp_reg;
1402				alu.src[0].chan = 0;
1403				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1404				if (i == 2) {
1405					alu.dst.write = 1;
1406					alu.last = 1;
1407				} else
1408					alu.dst.write = 0;
1409				r = r600_bc_add_alu(ctx->bc, &alu);
1410				if (r)
1411					return r;
1412			}
1413		} else {
1414			/* dst.z = exp(tmp.x) */
1415			memset(&alu, 0, sizeof(struct r600_bc_alu));
1416			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1417			alu.src[0].sel = ctx->temp_reg;
1418			alu.src[0].chan = 0;
1419			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1420			alu.last = 1;
1421			r = r600_bc_add_alu(ctx->bc, &alu);
1422			if (r)
1423				return r;
1424		}
1425	}
1426	return 0;
1427}
1428
1429static int tgsi_rsq(struct r600_shader_ctx *ctx)
1430{
1431	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1432	struct r600_bc_alu alu;
1433	int i, r;
1434
1435	memset(&alu, 0, sizeof(struct r600_bc_alu));
1436
1437	/* FIXME:
1438	 * For state trackers other than OpenGL, we'll want to use
1439	 * _RECIPSQRT_IEEE instead.
1440	 */
1441	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1442
1443	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1444		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1445		alu.src[i].abs = 1;
1446	}
1447	alu.dst.sel = ctx->temp_reg;
1448	alu.dst.write = 1;
1449	alu.last = 1;
1450	r = r600_bc_add_alu(ctx->bc, &alu);
1451	if (r)
1452		return r;
1453	/* replicate result */
1454	return tgsi_helper_tempx_replicate(ctx);
1455}
1456
1457static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1458{
1459	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1460	struct r600_bc_alu alu;
1461	int i, r;
1462
1463	for (i = 0; i < 4; i++) {
1464		memset(&alu, 0, sizeof(struct r600_bc_alu));
1465		alu.src[0].sel = ctx->temp_reg;
1466		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1467		alu.dst.chan = i;
1468		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1469		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1470		if (i == 3)
1471			alu.last = 1;
1472		r = r600_bc_add_alu(ctx->bc, &alu);
1473		if (r)
1474			return r;
1475	}
1476	return 0;
1477}
1478
1479static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1480{
1481	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1482	struct r600_bc_alu alu;
1483	int i, r;
1484
1485	memset(&alu, 0, sizeof(struct r600_bc_alu));
1486	alu.inst = ctx->inst_info->r600_opcode;
1487	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1488		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1489	}
1490	alu.dst.sel = ctx->temp_reg;
1491	alu.dst.write = 1;
1492	alu.last = 1;
1493	r = r600_bc_add_alu(ctx->bc, &alu);
1494	if (r)
1495		return r;
1496	/* replicate result */
1497	return tgsi_helper_tempx_replicate(ctx);
1498}
1499
1500static int cayman_pow(struct r600_shader_ctx *ctx)
1501{
1502	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1503	int i, r;
1504	struct r600_bc_alu alu;
1505	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1506
1507	for (i = 0; i < 3; i++) {
1508		memset(&alu, 0, sizeof(struct r600_bc_alu));
1509		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1510		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1511		alu.dst.sel = ctx->temp_reg;
1512		alu.dst.chan = i;
1513		alu.dst.write = 1;
1514		if (i == 2)
1515			alu.last = 1;
1516		r = r600_bc_add_alu(ctx->bc, &alu);
1517		if (r)
1518			return r;
1519	}
1520
1521	/* b * LOG2(a) */
1522	memset(&alu, 0, sizeof(struct r600_bc_alu));
1523	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1524	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1525	alu.src[1].sel = ctx->temp_reg;
1526	alu.dst.sel = ctx->temp_reg;
1527	alu.dst.write = 1;
1528	alu.last = 1;
1529	r = r600_bc_add_alu(ctx->bc, &alu);
1530	if (r)
1531		return r;
1532
1533	for (i = 0; i < last_slot; i++) {
1534		/* POW(a,b) = EXP2(b * LOG2(a))*/
1535		memset(&alu, 0, sizeof(struct r600_bc_alu));
1536		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1537		alu.src[0].sel = ctx->temp_reg;
1538
1539		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1540		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1541		if (i == last_slot - 1)
1542			alu.last = 1;
1543		r = r600_bc_add_alu(ctx->bc, &alu);
1544		if (r)
1545			return r;
1546	}
1547	return 0;
1548}
1549
1550static int tgsi_pow(struct r600_shader_ctx *ctx)
1551{
1552	struct r600_bc_alu alu;
1553	int r;
1554
1555	/* LOG2(a) */
1556	memset(&alu, 0, sizeof(struct r600_bc_alu));
1557	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1558	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1559	alu.dst.sel = ctx->temp_reg;
1560	alu.dst.write = 1;
1561	alu.last = 1;
1562	r = r600_bc_add_alu(ctx->bc, &alu);
1563	if (r)
1564		return r;
1565	/* b * LOG2(a) */
1566	memset(&alu, 0, sizeof(struct r600_bc_alu));
1567	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1568	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1569	alu.src[1].sel = ctx->temp_reg;
1570	alu.dst.sel = ctx->temp_reg;
1571	alu.dst.write = 1;
1572	alu.last = 1;
1573	r = r600_bc_add_alu(ctx->bc, &alu);
1574	if (r)
1575		return r;
1576	/* POW(a,b) = EXP2(b * LOG2(a))*/
1577	memset(&alu, 0, sizeof(struct r600_bc_alu));
1578	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1579	alu.src[0].sel = ctx->temp_reg;
1580	alu.dst.sel = ctx->temp_reg;
1581	alu.dst.write = 1;
1582	alu.last = 1;
1583	r = r600_bc_add_alu(ctx->bc, &alu);
1584	if (r)
1585		return r;
1586	return tgsi_helper_tempx_replicate(ctx);
1587}
1588
1589static int tgsi_ssg(struct r600_shader_ctx *ctx)
1590{
1591	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1592	struct r600_bc_alu alu;
1593	int i, r;
1594
1595	/* tmp = (src > 0 ? 1 : src) */
1596	for (i = 0; i < 4; i++) {
1597		memset(&alu, 0, sizeof(struct r600_bc_alu));
1598		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1599		alu.is_op3 = 1;
1600
1601		alu.dst.sel = ctx->temp_reg;
1602		alu.dst.chan = i;
1603
1604		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1605		alu.src[1].sel = V_SQ_ALU_SRC_1;
1606		r600_bc_src(&alu.src[2], &ctx->src[0], i);
1607
1608		if (i == 3)
1609			alu.last = 1;
1610		r = r600_bc_add_alu(ctx->bc, &alu);
1611		if (r)
1612			return r;
1613	}
1614
1615	/* dst = (-tmp > 0 ? -1 : tmp) */
1616	for (i = 0; i < 4; i++) {
1617		memset(&alu, 0, sizeof(struct r600_bc_alu));
1618		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1619		alu.is_op3 = 1;
1620		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1621
1622		alu.src[0].sel = ctx->temp_reg;
1623		alu.src[0].chan = i;
1624		alu.src[0].neg = 1;
1625
1626		alu.src[1].sel = V_SQ_ALU_SRC_1;
1627		alu.src[1].neg = 1;
1628
1629		alu.src[2].sel = ctx->temp_reg;
1630		alu.src[2].chan = i;
1631
1632		if (i == 3)
1633			alu.last = 1;
1634		r = r600_bc_add_alu(ctx->bc, &alu);
1635		if (r)
1636			return r;
1637	}
1638	return 0;
1639}
1640
1641static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1642{
1643	struct r600_bc_alu alu;
1644	int i, r;
1645
1646	for (i = 0; i < 4; i++) {
1647		memset(&alu, 0, sizeof(struct r600_bc_alu));
1648		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1649			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1650			alu.dst.chan = i;
1651		} else {
1652			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1653			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1654			alu.src[0].sel = ctx->temp_reg;
1655			alu.src[0].chan = i;
1656		}
1657		if (i == 3) {
1658			alu.last = 1;
1659		}
1660		r = r600_bc_add_alu(ctx->bc, &alu);
1661		if (r)
1662			return r;
1663	}
1664	return 0;
1665}
1666
1667static int tgsi_op3(struct r600_shader_ctx *ctx)
1668{
1669	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1670	struct r600_bc_alu alu;
1671	int i, j, r;
1672	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1673
1674	for (i = 0; i < lasti + 1; i++) {
1675		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1676			continue;
1677
1678		memset(&alu, 0, sizeof(struct r600_bc_alu));
1679		alu.inst = ctx->inst_info->r600_opcode;
1680		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1681			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1682		}
1683
1684		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1685		alu.dst.chan = i;
1686		alu.dst.write = 1;
1687		alu.is_op3 = 1;
1688		if (i == lasti) {
1689			alu.last = 1;
1690		}
1691		r = r600_bc_add_alu(ctx->bc, &alu);
1692		if (r)
1693			return r;
1694	}
1695	return 0;
1696}
1697
1698static int tgsi_dp(struct r600_shader_ctx *ctx)
1699{
1700	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1701	struct r600_bc_alu alu;
1702	int i, j, r;
1703
1704	for (i = 0; i < 4; i++) {
1705		memset(&alu, 0, sizeof(struct r600_bc_alu));
1706		alu.inst = ctx->inst_info->r600_opcode;
1707		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1708			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1709		}
1710
1711		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1712		alu.dst.chan = i;
1713		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1714		/* handle some special cases */
1715		switch (ctx->inst_info->tgsi_opcode) {
1716		case TGSI_OPCODE_DP2:
1717			if (i > 1) {
1718				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1719				alu.src[0].chan = alu.src[1].chan = 0;
1720			}
1721			break;
1722		case TGSI_OPCODE_DP3:
1723			if (i > 2) {
1724				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1725				alu.src[0].chan = alu.src[1].chan = 0;
1726			}
1727			break;
1728		case TGSI_OPCODE_DPH:
1729			if (i == 3) {
1730				alu.src[0].sel = V_SQ_ALU_SRC_1;
1731				alu.src[0].chan = 0;
1732				alu.src[0].neg = 0;
1733			}
1734			break;
1735		default:
1736			break;
1737		}
1738		if (i == 3) {
1739			alu.last = 1;
1740		}
1741		r = r600_bc_add_alu(ctx->bc, &alu);
1742		if (r)
1743			return r;
1744	}
1745	return 0;
1746}
1747
1748static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
1749						    unsigned index)
1750{
1751	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1752	return 	(inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
1753		inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
1754		ctx->src[index].neg || ctx->src[index].abs;
1755}
1756
1757static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
1758					unsigned index)
1759{
1760	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1761	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
1762}
1763
1764static int tgsi_tex(struct r600_shader_ctx *ctx)
1765{
1766	static float one_point_five = 1.5f;
1767	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1768	struct r600_bc_tex tex;
1769	struct r600_bc_alu alu;
1770	unsigned src_gpr;
1771	int r, i, j;
1772	int opcode;
1773	/* Texture fetch instructions can only use gprs as source.
1774	 * Also they cannot negate the source or take the absolute value */
1775	const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
1776	boolean src_loaded = FALSE;
1777	unsigned sampler_src_reg = 1;
1778
1779	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
1780
1781	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1782		/* TGSI moves the sampler to src reg 3 for TXD */
1783		sampler_src_reg = 3;
1784
1785		for (i = 1; i < 3; i++) {
1786			/* set gradients h/v */
1787			memset(&tex, 0, sizeof(struct r600_bc_tex));
1788			tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
1789				SQ_TEX_INST_SET_GRADIENTS_V;
1790			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
1791			tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1792
1793			if (tgsi_tex_src_requires_loading(ctx, i)) {
1794				tex.src_gpr = r600_get_temp(ctx);
1795				tex.src_sel_x = 0;
1796				tex.src_sel_y = 1;
1797				tex.src_sel_z = 2;
1798				tex.src_sel_w = 3;
1799
1800				for (j = 0; j < 4; j++) {
1801					memset(&alu, 0, sizeof(struct r600_bc_alu));
1802					alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1803                                        r600_bc_src(&alu.src[0], &ctx->src[i], j);
1804                                        alu.dst.sel = tex.src_gpr;
1805                                        alu.dst.chan = j;
1806                                        if (j == 3)
1807                                                alu.last = 1;
1808                                        alu.dst.write = 1;
1809                                        r = r600_bc_add_alu(ctx->bc, &alu);
1810                                        if (r)
1811                                                return r;
1812				}
1813
1814			} else {
1815				tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
1816				tex.src_sel_x = ctx->src[i].swizzle[0];
1817				tex.src_sel_y = ctx->src[i].swizzle[1];
1818				tex.src_sel_z = ctx->src[i].swizzle[2];
1819				tex.src_sel_w = ctx->src[i].swizzle[3];
1820				tex.src_rel = ctx->src[i].rel;
1821			}
1822			tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
1823			tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
1824			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1825				tex.coord_type_x = 1;
1826				tex.coord_type_y = 1;
1827				tex.coord_type_z = 1;
1828				tex.coord_type_w = 1;
1829			}
1830			r = r600_bc_add_tex(ctx->bc, &tex);
1831			if (r)
1832				return r;
1833		}
1834	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1835		int out_chan;
1836		/* Add perspective divide */
1837		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1838			out_chan = 2;
1839			for (i = 0; i < 3; i++) {
1840				memset(&alu, 0, sizeof(struct r600_bc_alu));
1841				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1842				r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1843
1844				alu.dst.sel = ctx->temp_reg;
1845				alu.dst.chan = i;
1846				if (i == 2)
1847					alu.last = 1;
1848				if (out_chan == i)
1849					alu.dst.write = 1;
1850				r = r600_bc_add_alu(ctx->bc, &alu);
1851				if (r)
1852					return r;
1853			}
1854
1855		} else {
1856			out_chan = 3;
1857			memset(&alu, 0, sizeof(struct r600_bc_alu));
1858			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1859			r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1860
1861			alu.dst.sel = ctx->temp_reg;
1862			alu.dst.chan = out_chan;
1863			alu.last = 1;
1864			alu.dst.write = 1;
1865			r = r600_bc_add_alu(ctx->bc, &alu);
1866			if (r)
1867				return r;
1868		}
1869
1870		for (i = 0; i < 3; i++) {
1871			memset(&alu, 0, sizeof(struct r600_bc_alu));
1872			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1873			alu.src[0].sel = ctx->temp_reg;
1874			alu.src[0].chan = out_chan;
1875			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1876			alu.dst.sel = ctx->temp_reg;
1877			alu.dst.chan = i;
1878			alu.dst.write = 1;
1879			r = r600_bc_add_alu(ctx->bc, &alu);
1880			if (r)
1881				return r;
1882		}
1883		memset(&alu, 0, sizeof(struct r600_bc_alu));
1884		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1885		alu.src[0].sel = V_SQ_ALU_SRC_1;
1886		alu.src[0].chan = 0;
1887		alu.dst.sel = ctx->temp_reg;
1888		alu.dst.chan = 3;
1889		alu.last = 1;
1890		alu.dst.write = 1;
1891		r = r600_bc_add_alu(ctx->bc, &alu);
1892		if (r)
1893			return r;
1894		src_loaded = TRUE;
1895		src_gpr = ctx->temp_reg;
1896	}
1897
1898	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1899		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1900		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1901
1902		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1903		for (i = 0; i < 4; i++) {
1904			memset(&alu, 0, sizeof(struct r600_bc_alu));
1905			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1906			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1907			r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
1908			alu.dst.sel = ctx->temp_reg;
1909			alu.dst.chan = i;
1910			if (i == 3)
1911				alu.last = 1;
1912			alu.dst.write = 1;
1913			r = r600_bc_add_alu(ctx->bc, &alu);
1914			if (r)
1915				return r;
1916		}
1917
1918		/* tmp1.z = RCP_e(|tmp1.z|) */
1919		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
1920			for (i = 0; i < 3; i++) {
1921				memset(&alu, 0, sizeof(struct r600_bc_alu));
1922				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1923				alu.src[0].sel = ctx->temp_reg;
1924				alu.src[0].chan = 2;
1925				alu.src[0].abs = 1;
1926				alu.dst.sel = ctx->temp_reg;
1927				alu.dst.chan = i;
1928				if (i == 2)
1929					alu.dst.write = 1;
1930				if (i == 2)
1931					alu.last = 1;
1932				r = r600_bc_add_alu(ctx->bc, &alu);
1933				if (r)
1934					return r;
1935			}
1936		} else {
1937			memset(&alu, 0, sizeof(struct r600_bc_alu));
1938			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1939			alu.src[0].sel = ctx->temp_reg;
1940			alu.src[0].chan = 2;
1941			alu.src[0].abs = 1;
1942			alu.dst.sel = ctx->temp_reg;
1943			alu.dst.chan = 2;
1944			alu.dst.write = 1;
1945			alu.last = 1;
1946			r = r600_bc_add_alu(ctx->bc, &alu);
1947			if (r)
1948				return r;
1949		}
1950
1951		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1952		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1953		 * muladd has no writemask, have to use another temp
1954		 */
1955		memset(&alu, 0, sizeof(struct r600_bc_alu));
1956		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1957		alu.is_op3 = 1;
1958
1959		alu.src[0].sel = ctx->temp_reg;
1960		alu.src[0].chan = 0;
1961		alu.src[1].sel = ctx->temp_reg;
1962		alu.src[1].chan = 2;
1963
1964		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1965		alu.src[2].chan = 0;
1966		alu.src[2].value = *(uint32_t *)&one_point_five;
1967
1968		alu.dst.sel = ctx->temp_reg;
1969		alu.dst.chan = 0;
1970		alu.dst.write = 1;
1971
1972		r = r600_bc_add_alu(ctx->bc, &alu);
1973		if (r)
1974			return r;
1975
1976		memset(&alu, 0, sizeof(struct r600_bc_alu));
1977		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1978		alu.is_op3 = 1;
1979
1980		alu.src[0].sel = ctx->temp_reg;
1981		alu.src[0].chan = 1;
1982		alu.src[1].sel = ctx->temp_reg;
1983		alu.src[1].chan = 2;
1984
1985		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1986		alu.src[2].chan = 0;
1987		alu.src[2].value = *(uint32_t *)&one_point_five;
1988
1989		alu.dst.sel = ctx->temp_reg;
1990		alu.dst.chan = 1;
1991		alu.dst.write = 1;
1992
1993		alu.last = 1;
1994		r = r600_bc_add_alu(ctx->bc, &alu);
1995		if (r)
1996			return r;
1997
1998		src_loaded = TRUE;
1999		src_gpr = ctx->temp_reg;
2000	}
2001
2002	if (src_requires_loading && !src_loaded) {
2003		for (i = 0; i < 4; i++) {
2004			memset(&alu, 0, sizeof(struct r600_bc_alu));
2005			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2006			r600_bc_src(&alu.src[0], &ctx->src[0], i);
2007			alu.dst.sel = ctx->temp_reg;
2008			alu.dst.chan = i;
2009			if (i == 3)
2010				alu.last = 1;
2011			alu.dst.write = 1;
2012			r = r600_bc_add_alu(ctx->bc, &alu);
2013			if (r)
2014				return r;
2015		}
2016		src_loaded = TRUE;
2017		src_gpr = ctx->temp_reg;
2018	}
2019
2020	opcode = ctx->inst_info->r600_opcode;
2021	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) {
2022		switch (opcode) {
2023		case SQ_TEX_INST_SAMPLE:
2024			opcode = SQ_TEX_INST_SAMPLE_C;
2025			break;
2026		case SQ_TEX_INST_SAMPLE_L:
2027			opcode = SQ_TEX_INST_SAMPLE_C_L;
2028			break;
2029		case SQ_TEX_INST_SAMPLE_G:
2030			opcode = SQ_TEX_INST_SAMPLE_C_G;
2031			break;
2032		}
2033	}
2034
2035	memset(&tex, 0, sizeof(struct r600_bc_tex));
2036	tex.inst = opcode;
2037
2038	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2039	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2040	tex.src_gpr = src_gpr;
2041	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2042	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2043	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2044	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2045	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2046	if (src_loaded) {
2047		tex.src_sel_x = 0;
2048		tex.src_sel_y = 1;
2049		tex.src_sel_z = 2;
2050		tex.src_sel_w = 3;
2051	} else {
2052		tex.src_sel_x = ctx->src[0].swizzle[0];
2053		tex.src_sel_y = ctx->src[0].swizzle[1];
2054		tex.src_sel_z = ctx->src[0].swizzle[2];
2055		tex.src_sel_w = ctx->src[0].swizzle[3];
2056		tex.src_rel = ctx->src[0].rel;
2057	}
2058
2059	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2060		tex.src_sel_x = 1;
2061		tex.src_sel_y = 0;
2062		tex.src_sel_z = 3;
2063		tex.src_sel_w = 1;
2064	}
2065
2066	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2067		tex.coord_type_x = 1;
2068		tex.coord_type_y = 1;
2069		tex.coord_type_z = 1;
2070		tex.coord_type_w = 1;
2071	}
2072
2073	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
2074		tex.coord_type_z = 0;
2075		tex.src_sel_z = tex.src_sel_y;
2076	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
2077		tex.coord_type_z = 0;
2078
2079	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2080		tex.src_sel_w = tex.src_sel_z;
2081
2082	r = r600_bc_add_tex(ctx->bc, &tex);
2083	if (r)
2084		return r;
2085
2086	/* add shadow ambient support  - gallium doesn't do it yet */
2087	return 0;
2088}
2089
2090static int tgsi_lrp(struct r600_shader_ctx *ctx)
2091{
2092	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2093	struct r600_bc_alu alu;
2094	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2095	unsigned i;
2096	int r;
2097
2098	/* optimize if it's just an equal balance */
2099	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2100		for (i = 0; i < lasti + 1; i++) {
2101			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2102				continue;
2103
2104			memset(&alu, 0, sizeof(struct r600_bc_alu));
2105			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2106			r600_bc_src(&alu.src[0], &ctx->src[1], i);
2107			r600_bc_src(&alu.src[1], &ctx->src[2], i);
2108			alu.omod = 3;
2109			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2110			alu.dst.chan = i;
2111			if (i == lasti) {
2112				alu.last = 1;
2113			}
2114			r = r600_bc_add_alu(ctx->bc, &alu);
2115			if (r)
2116				return r;
2117		}
2118		return 0;
2119	}
2120
2121	/* 1 - src0 */
2122	for (i = 0; i < lasti + 1; i++) {
2123		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2124			continue;
2125
2126		memset(&alu, 0, sizeof(struct r600_bc_alu));
2127		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2128		alu.src[0].sel = V_SQ_ALU_SRC_1;
2129		alu.src[0].chan = 0;
2130		r600_bc_src(&alu.src[1], &ctx->src[0], i);
2131		alu.src[1].neg = 1;
2132		alu.dst.sel = ctx->temp_reg;
2133		alu.dst.chan = i;
2134		if (i == lasti) {
2135			alu.last = 1;
2136		}
2137		alu.dst.write = 1;
2138		r = r600_bc_add_alu(ctx->bc, &alu);
2139		if (r)
2140			return r;
2141	}
2142
2143	/* (1 - src0) * src2 */
2144	for (i = 0; i < lasti + 1; i++) {
2145		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2146			continue;
2147
2148		memset(&alu, 0, sizeof(struct r600_bc_alu));
2149		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2150		alu.src[0].sel = ctx->temp_reg;
2151		alu.src[0].chan = i;
2152		r600_bc_src(&alu.src[1], &ctx->src[2], i);
2153		alu.dst.sel = ctx->temp_reg;
2154		alu.dst.chan = i;
2155		if (i == lasti) {
2156			alu.last = 1;
2157		}
2158		alu.dst.write = 1;
2159		r = r600_bc_add_alu(ctx->bc, &alu);
2160		if (r)
2161			return r;
2162	}
2163
2164	/* src0 * src1 + (1 - src0) * src2 */
2165	for (i = 0; i < lasti + 1; i++) {
2166		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2167			continue;
2168
2169		memset(&alu, 0, sizeof(struct r600_bc_alu));
2170		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2171		alu.is_op3 = 1;
2172		r600_bc_src(&alu.src[0], &ctx->src[0], i);
2173		r600_bc_src(&alu.src[1], &ctx->src[1], i);
2174		alu.src[2].sel = ctx->temp_reg;
2175		alu.src[2].chan = i;
2176
2177		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2178		alu.dst.chan = i;
2179		if (i == lasti) {
2180			alu.last = 1;
2181		}
2182		r = r600_bc_add_alu(ctx->bc, &alu);
2183		if (r)
2184			return r;
2185	}
2186	return 0;
2187}
2188
2189static int tgsi_cmp(struct r600_shader_ctx *ctx)
2190{
2191	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2192	struct r600_bc_alu alu;
2193	int i, r;
2194	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2195
2196	for (i = 0; i < lasti + 1; i++) {
2197		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2198			continue;
2199
2200		memset(&alu, 0, sizeof(struct r600_bc_alu));
2201		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2202		r600_bc_src(&alu.src[0], &ctx->src[0], i);
2203		r600_bc_src(&alu.src[1], &ctx->src[2], i);
2204		r600_bc_src(&alu.src[2], &ctx->src[1], i);
2205		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2206		alu.dst.chan = i;
2207		alu.dst.write = 1;
2208		alu.is_op3 = 1;
2209		if (i == lasti)
2210			alu.last = 1;
2211		r = r600_bc_add_alu(ctx->bc, &alu);
2212		if (r)
2213			return r;
2214	}
2215	return 0;
2216}
2217
2218static int tgsi_xpd(struct r600_shader_ctx *ctx)
2219{
2220	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2221	static const unsigned int src0_swizzle[] = {2, 0, 1};
2222	static const unsigned int src1_swizzle[] = {1, 2, 0};
2223	struct r600_bc_alu alu;
2224	uint32_t use_temp = 0;
2225	int i, r;
2226
2227	if (inst->Dst[0].Register.WriteMask != 0xf)
2228		use_temp = 1;
2229
2230	for (i = 0; i < 4; i++) {
2231		memset(&alu, 0, sizeof(struct r600_bc_alu));
2232		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2233		if (i < 3) {
2234			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2235			r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2236		} else {
2237			alu.src[0].sel = V_SQ_ALU_SRC_0;
2238			alu.src[0].chan = i;
2239			alu.src[1].sel = V_SQ_ALU_SRC_0;
2240			alu.src[1].chan = i;
2241		}
2242
2243		alu.dst.sel = ctx->temp_reg;
2244		alu.dst.chan = i;
2245		alu.dst.write = 1;
2246
2247		if (i == 3)
2248			alu.last = 1;
2249		r = r600_bc_add_alu(ctx->bc, &alu);
2250		if (r)
2251			return r;
2252	}
2253
2254	for (i = 0; i < 4; i++) {
2255		memset(&alu, 0, sizeof(struct r600_bc_alu));
2256		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2257
2258		if (i < 3) {
2259			r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2260			r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2261		} else {
2262			alu.src[0].sel = V_SQ_ALU_SRC_0;
2263			alu.src[0].chan = i;
2264			alu.src[1].sel = V_SQ_ALU_SRC_0;
2265			alu.src[1].chan = i;
2266		}
2267
2268		alu.src[2].sel = ctx->temp_reg;
2269		alu.src[2].neg = 1;
2270		alu.src[2].chan = i;
2271
2272		if (use_temp)
2273			alu.dst.sel = ctx->temp_reg;
2274		else
2275			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2276		alu.dst.chan = i;
2277		alu.dst.write = 1;
2278		alu.is_op3 = 1;
2279		if (i == 3)
2280			alu.last = 1;
2281		r = r600_bc_add_alu(ctx->bc, &alu);
2282		if (r)
2283			return r;
2284	}
2285	if (use_temp)
2286		return tgsi_helper_copy(ctx, inst);
2287	return 0;
2288}
2289
2290static int tgsi_exp(struct r600_shader_ctx *ctx)
2291{
2292	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2293	struct r600_bc_alu alu;
2294	int r;
2295	int i;
2296
2297	/* result.x = 2^floor(src); */
2298	if (inst->Dst[0].Register.WriteMask & 1) {
2299		memset(&alu, 0, sizeof(struct r600_bc_alu));
2300
2301		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2302		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2303
2304		alu.dst.sel = ctx->temp_reg;
2305		alu.dst.chan = 0;
2306		alu.dst.write = 1;
2307		alu.last = 1;
2308		r = r600_bc_add_alu(ctx->bc, &alu);
2309		if (r)
2310			return r;
2311
2312		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2313			for (i = 0; i < 3; i++) {
2314				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2315				alu.src[0].sel = ctx->temp_reg;
2316				alu.src[0].chan = 0;
2317
2318				alu.dst.sel = ctx->temp_reg;
2319				alu.dst.chan = i;
2320				if (i == 0)
2321					alu.dst.write = 1;
2322				if (i == 2)
2323					alu.last = 1;
2324				r = r600_bc_add_alu(ctx->bc, &alu);
2325				if (r)
2326					return r;
2327			}
2328		} else {
2329			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2330			alu.src[0].sel = ctx->temp_reg;
2331			alu.src[0].chan = 0;
2332
2333			alu.dst.sel = ctx->temp_reg;
2334			alu.dst.chan = 0;
2335			alu.dst.write = 1;
2336			alu.last = 1;
2337			r = r600_bc_add_alu(ctx->bc, &alu);
2338			if (r)
2339				return r;
2340		}
2341	}
2342
2343	/* result.y = tmp - floor(tmp); */
2344	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2345		memset(&alu, 0, sizeof(struct r600_bc_alu));
2346
2347		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2348		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2349
2350		alu.dst.sel = ctx->temp_reg;
2351#if 0
2352		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2353		if (r)
2354			return r;
2355#endif
2356		alu.dst.write = 1;
2357		alu.dst.chan = 1;
2358
2359		alu.last = 1;
2360
2361		r = r600_bc_add_alu(ctx->bc, &alu);
2362		if (r)
2363			return r;
2364	}
2365
2366	/* result.z = RoughApprox2ToX(tmp);*/
2367	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2368		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2369			for (i = 0; i < 3; i++) {
2370				memset(&alu, 0, sizeof(struct r600_bc_alu));
2371				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2372				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2373
2374				alu.dst.sel = ctx->temp_reg;
2375				alu.dst.chan = i;
2376				if (i == 2) {
2377					alu.dst.write = 1;
2378					alu.last = 1;
2379				}
2380
2381				r = r600_bc_add_alu(ctx->bc, &alu);
2382				if (r)
2383					return r;
2384			}
2385		} else {
2386			memset(&alu, 0, sizeof(struct r600_bc_alu));
2387			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2388			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2389
2390			alu.dst.sel = ctx->temp_reg;
2391			alu.dst.write = 1;
2392			alu.dst.chan = 2;
2393
2394			alu.last = 1;
2395
2396			r = r600_bc_add_alu(ctx->bc, &alu);
2397			if (r)
2398				return r;
2399		}
2400	}
2401
2402	/* result.w = 1.0;*/
2403	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2404		memset(&alu, 0, sizeof(struct r600_bc_alu));
2405
2406		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2407		alu.src[0].sel = V_SQ_ALU_SRC_1;
2408		alu.src[0].chan = 0;
2409
2410		alu.dst.sel = ctx->temp_reg;
2411		alu.dst.chan = 3;
2412		alu.dst.write = 1;
2413		alu.last = 1;
2414		r = r600_bc_add_alu(ctx->bc, &alu);
2415		if (r)
2416			return r;
2417	}
2418	return tgsi_helper_copy(ctx, inst);
2419}
2420
2421static int tgsi_log(struct r600_shader_ctx *ctx)
2422{
2423	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2424	struct r600_bc_alu alu;
2425	int r;
2426	int i;
2427
2428	/* result.x = floor(log2(src)); */
2429	if (inst->Dst[0].Register.WriteMask & 1) {
2430		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2431			for (i = 0; i < 3; i++) {
2432				memset(&alu, 0, sizeof(struct r600_bc_alu));
2433
2434				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2435				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2436
2437				alu.dst.sel = ctx->temp_reg;
2438				alu.dst.chan = i;
2439				if (i == 0)
2440					alu.dst.write = 1;
2441				if (i == 2)
2442					alu.last = 1;
2443				r = r600_bc_add_alu(ctx->bc, &alu);
2444				if (r)
2445					return r;
2446			}
2447
2448		} else {
2449			memset(&alu, 0, sizeof(struct r600_bc_alu));
2450
2451			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2452			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2453
2454			alu.dst.sel = ctx->temp_reg;
2455			alu.dst.chan = 0;
2456			alu.dst.write = 1;
2457			alu.last = 1;
2458			r = r600_bc_add_alu(ctx->bc, &alu);
2459			if (r)
2460				return r;
2461		}
2462
2463		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2464		alu.src[0].sel = ctx->temp_reg;
2465		alu.src[0].chan = 0;
2466
2467		alu.dst.sel = ctx->temp_reg;
2468		alu.dst.chan = 0;
2469		alu.dst.write = 1;
2470		alu.last = 1;
2471
2472		r = r600_bc_add_alu(ctx->bc, &alu);
2473		if (r)
2474			return r;
2475	}
2476
2477	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2478	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2479
2480		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2481			for (i = 0; i < 3; i++) {
2482				memset(&alu, 0, sizeof(struct r600_bc_alu));
2483
2484				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2485				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2486
2487				alu.dst.sel = ctx->temp_reg;
2488				alu.dst.chan = i;
2489				if (i == 1)
2490					alu.dst.write = 1;
2491				if (i == 2)
2492					alu.last = 1;
2493
2494				r = r600_bc_add_alu(ctx->bc, &alu);
2495				if (r)
2496					return r;
2497			}
2498		} else {
2499			memset(&alu, 0, sizeof(struct r600_bc_alu));
2500
2501			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2502			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2503
2504			alu.dst.sel = ctx->temp_reg;
2505			alu.dst.chan = 1;
2506			alu.dst.write = 1;
2507			alu.last = 1;
2508
2509			r = r600_bc_add_alu(ctx->bc, &alu);
2510			if (r)
2511				return r;
2512		}
2513
2514		memset(&alu, 0, sizeof(struct r600_bc_alu));
2515
2516		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2517		alu.src[0].sel = ctx->temp_reg;
2518		alu.src[0].chan = 1;
2519
2520		alu.dst.sel = ctx->temp_reg;
2521		alu.dst.chan = 1;
2522		alu.dst.write = 1;
2523		alu.last = 1;
2524
2525		r = r600_bc_add_alu(ctx->bc, &alu);
2526		if (r)
2527			return r;
2528
2529		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2530			for (i = 0; i < 3; i++) {
2531				memset(&alu, 0, sizeof(struct r600_bc_alu));
2532				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2533				alu.src[0].sel = ctx->temp_reg;
2534				alu.src[0].chan = 1;
2535
2536				alu.dst.sel = ctx->temp_reg;
2537				alu.dst.chan = i;
2538				if (i == 1)
2539					alu.dst.write = 1;
2540				if (i == 2)
2541					alu.last = 1;
2542
2543				r = r600_bc_add_alu(ctx->bc, &alu);
2544				if (r)
2545					return r;
2546			}
2547		} else {
2548			memset(&alu, 0, sizeof(struct r600_bc_alu));
2549			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2550			alu.src[0].sel = ctx->temp_reg;
2551			alu.src[0].chan = 1;
2552
2553			alu.dst.sel = ctx->temp_reg;
2554			alu.dst.chan = 1;
2555			alu.dst.write = 1;
2556			alu.last = 1;
2557
2558			r = r600_bc_add_alu(ctx->bc, &alu);
2559			if (r)
2560				return r;
2561		}
2562
2563		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2564			for (i = 0; i < 3; i++) {
2565				memset(&alu, 0, sizeof(struct r600_bc_alu));
2566				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2567				alu.src[0].sel = ctx->temp_reg;
2568				alu.src[0].chan = 1;
2569
2570				alu.dst.sel = ctx->temp_reg;
2571				alu.dst.chan = i;
2572				if (i == 1)
2573					alu.dst.write = 1;
2574				if (i == 2)
2575					alu.last = 1;
2576
2577				r = r600_bc_add_alu(ctx->bc, &alu);
2578				if (r)
2579					return r;
2580			}
2581		} else {
2582			memset(&alu, 0, sizeof(struct r600_bc_alu));
2583			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2584			alu.src[0].sel = ctx->temp_reg;
2585			alu.src[0].chan = 1;
2586
2587			alu.dst.sel = ctx->temp_reg;
2588			alu.dst.chan = 1;
2589			alu.dst.write = 1;
2590			alu.last = 1;
2591
2592			r = r600_bc_add_alu(ctx->bc, &alu);
2593			if (r)
2594				return r;
2595		}
2596
2597		memset(&alu, 0, sizeof(struct r600_bc_alu));
2598
2599		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2600
2601		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2602
2603		alu.src[1].sel = ctx->temp_reg;
2604		alu.src[1].chan = 1;
2605
2606		alu.dst.sel = ctx->temp_reg;
2607		alu.dst.chan = 1;
2608		alu.dst.write = 1;
2609		alu.last = 1;
2610
2611		r = r600_bc_add_alu(ctx->bc, &alu);
2612		if (r)
2613			return r;
2614	}
2615
2616	/* result.z = log2(src);*/
2617	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2618		if (ctx->bc->chiprev == CHIPREV_CAYMAN) {
2619			for (i = 0; i < 3; i++) {
2620				memset(&alu, 0, sizeof(struct r600_bc_alu));
2621
2622				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2623				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2624
2625				alu.dst.sel = ctx->temp_reg;
2626				if (i == 2)
2627					alu.dst.write = 1;
2628				alu.dst.chan = i;
2629				if (i == 2)
2630					alu.last = 1;
2631
2632				r = r600_bc_add_alu(ctx->bc, &alu);
2633				if (r)
2634					return r;
2635			}
2636		} else {
2637			memset(&alu, 0, sizeof(struct r600_bc_alu));
2638
2639			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2640			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2641
2642			alu.dst.sel = ctx->temp_reg;
2643			alu.dst.write = 1;
2644			alu.dst.chan = 2;
2645			alu.last = 1;
2646
2647			r = r600_bc_add_alu(ctx->bc, &alu);
2648			if (r)
2649				return r;
2650		}
2651	}
2652
2653	/* result.w = 1.0; */
2654	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2655		memset(&alu, 0, sizeof(struct r600_bc_alu));
2656
2657		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2658		alu.src[0].sel = V_SQ_ALU_SRC_1;
2659		alu.src[0].chan = 0;
2660
2661		alu.dst.sel = ctx->temp_reg;
2662		alu.dst.chan = 3;
2663		alu.dst.write = 1;
2664		alu.last = 1;
2665
2666		r = r600_bc_add_alu(ctx->bc, &alu);
2667		if (r)
2668			return r;
2669	}
2670
2671	return tgsi_helper_copy(ctx, inst);
2672}
2673
2674static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2675{
2676	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2677	struct r600_bc_alu alu;
2678	int r;
2679
2680	memset(&alu, 0, sizeof(struct r600_bc_alu));
2681
2682	switch (inst->Instruction.Opcode) {
2683	case TGSI_OPCODE_ARL:
2684		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2685		break;
2686	case TGSI_OPCODE_ARR:
2687		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2688		break;
2689	default:
2690		assert(0);
2691		return -1;
2692	}
2693
2694	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2695	alu.last = 1;
2696	alu.dst.sel = ctx->ar_reg;
2697	alu.dst.write = 1;
2698	r = r600_bc_add_alu(ctx->bc, &alu);
2699	if (r)
2700		return r;
2701
2702	/* TODO: Note that the MOVA can be avoided if we never use AR for
2703	 * indexing non-CB registers in the current ALU clause. Similarly, we
2704	 * need to load AR from ar_reg again if we started a new clause
2705	 * between ARL and AR usage. The easy way to do that is to remove
2706	 * the MOVA here, and load it for the first AR access after ar_reg
2707	 * has been modified in each clause. */
2708	memset(&alu, 0, sizeof(struct r600_bc_alu));
2709	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2710	alu.src[0].sel = ctx->ar_reg;
2711	alu.src[0].chan = 0;
2712	alu.last = 1;
2713	r = r600_bc_add_alu(ctx->bc, &alu);
2714	if (r)
2715		return r;
2716	return 0;
2717}
2718static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2719{
2720	/* TODO from r600c, ar values don't persist between clauses */
2721	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2722	struct r600_bc_alu alu;
2723	int r;
2724
2725	switch (inst->Instruction.Opcode) {
2726	case TGSI_OPCODE_ARL:
2727		memset(&alu, 0, sizeof(alu));
2728		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2729		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2730		alu.dst.sel = ctx->ar_reg;
2731		alu.dst.write = 1;
2732		alu.last = 1;
2733
2734		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2735			return r;
2736
2737		memset(&alu, 0, sizeof(alu));
2738		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2739		alu.src[0].sel = ctx->ar_reg;
2740		alu.dst.sel = ctx->ar_reg;
2741		alu.dst.write = 1;
2742		alu.last = 1;
2743
2744		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2745			return r;
2746		break;
2747	case TGSI_OPCODE_ARR:
2748		memset(&alu, 0, sizeof(alu));
2749		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2750		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2751		alu.dst.sel = ctx->ar_reg;
2752		alu.dst.write = 1;
2753		alu.last = 1;
2754
2755		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2756			return r;
2757		break;
2758	default:
2759		assert(0);
2760		return -1;
2761	}
2762
2763	memset(&alu, 0, sizeof(alu));
2764	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2765	alu.src[0].sel = ctx->ar_reg;
2766	alu.last = 1;
2767
2768	r = r600_bc_add_alu(ctx->bc, &alu);
2769	if (r)
2770		return r;
2771	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2772	return 0;
2773}
2774
2775static int tgsi_opdst(struct r600_shader_ctx *ctx)
2776{
2777	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2778	struct r600_bc_alu alu;
2779	int i, r = 0;
2780
2781	for (i = 0; i < 4; i++) {
2782		memset(&alu, 0, sizeof(struct r600_bc_alu));
2783
2784		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2785		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2786
2787		if (i == 0 || i == 3) {
2788			alu.src[0].sel = V_SQ_ALU_SRC_1;
2789		} else {
2790			r600_bc_src(&alu.src[0], &ctx->src[0], i);
2791		}
2792
2793		if (i == 0 || i == 2) {
2794			alu.src[1].sel = V_SQ_ALU_SRC_1;
2795		} else {
2796			r600_bc_src(&alu.src[1], &ctx->src[1], i);
2797		}
2798		if (i == 3)
2799			alu.last = 1;
2800		r = r600_bc_add_alu(ctx->bc, &alu);
2801		if (r)
2802			return r;
2803	}
2804	return 0;
2805}
2806
2807static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2808{
2809	struct r600_bc_alu alu;
2810	int r;
2811
2812	memset(&alu, 0, sizeof(struct r600_bc_alu));
2813	alu.inst = opcode;
2814	alu.predicate = 1;
2815
2816	alu.dst.sel = ctx->temp_reg;
2817	alu.dst.write = 1;
2818	alu.dst.chan = 0;
2819
2820	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2821	alu.src[1].sel = V_SQ_ALU_SRC_0;
2822	alu.src[1].chan = 0;
2823
2824	alu.last = 1;
2825
2826	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2827	if (r)
2828		return r;
2829	return 0;
2830}
2831
2832static int pops(struct r600_shader_ctx *ctx, int pops)
2833{
2834	int alu_pop = 3;
2835	if (ctx->bc->cf_last) {
2836		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2837			alu_pop = 0;
2838		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2839			alu_pop = 1;
2840	}
2841	alu_pop += pops;
2842	if (alu_pop == 1) {
2843		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2844		ctx->bc->force_add_cf = 1;
2845	} else if (alu_pop == 2) {
2846		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2847		ctx->bc->force_add_cf = 1;
2848	} else {
2849		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2850		ctx->bc->cf_last->pop_count = pops;
2851		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2852	}
2853	return 0;
2854}
2855
2856static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2857{
2858	switch(reason) {
2859	case FC_PUSH_VPM:
2860		ctx->bc->callstack[ctx->bc->call_sp].current--;
2861		break;
2862	case FC_PUSH_WQM:
2863	case FC_LOOP:
2864		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2865		break;
2866	case FC_REP:
2867		/* TOODO : for 16 vp asic should -= 2; */
2868		ctx->bc->callstack[ctx->bc->call_sp].current --;
2869		break;
2870	}
2871}
2872
2873static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2874{
2875	if (check_max_only) {
2876		int diff;
2877		switch (reason) {
2878		case FC_PUSH_VPM:
2879			diff = 1;
2880			break;
2881		case FC_PUSH_WQM:
2882			diff = 4;
2883			break;
2884		default:
2885			assert(0);
2886			diff = 0;
2887		}
2888		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2889		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2890			ctx->bc->callstack[ctx->bc->call_sp].max =
2891				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2892		}
2893		return;
2894	}
2895	switch (reason) {
2896	case FC_PUSH_VPM:
2897		ctx->bc->callstack[ctx->bc->call_sp].current++;
2898		break;
2899	case FC_PUSH_WQM:
2900	case FC_LOOP:
2901		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2902		break;
2903	case FC_REP:
2904		ctx->bc->callstack[ctx->bc->call_sp].current++;
2905		break;
2906	}
2907
2908	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2909	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2910		ctx->bc->callstack[ctx->bc->call_sp].max =
2911			ctx->bc->callstack[ctx->bc->call_sp].current;
2912	}
2913}
2914
2915static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2916{
2917	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2918
2919	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2920						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2921	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2922	sp->num_mid++;
2923}
2924
2925static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2926{
2927	ctx->bc->fc_sp++;
2928	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2929	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2930}
2931
2932static void fc_poplevel(struct r600_shader_ctx *ctx)
2933{
2934	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2935	if (sp->mid) {
2936		free(sp->mid);
2937		sp->mid = NULL;
2938	}
2939	sp->num_mid = 0;
2940	sp->start = NULL;
2941	sp->type = 0;
2942	ctx->bc->fc_sp--;
2943}
2944
2945#if 0
2946static int emit_return(struct r600_shader_ctx *ctx)
2947{
2948	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2949	return 0;
2950}
2951
2952static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2953{
2954
2955	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2956	ctx->bc->cf_last->pop_count = pops;
2957	/* TODO work out offset */
2958	return 0;
2959}
2960
2961static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2962{
2963	return 0;
2964}
2965
2966static void emit_testflag(struct r600_shader_ctx *ctx)
2967{
2968
2969}
2970
2971static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2972{
2973	emit_testflag(ctx);
2974	emit_jump_to_offset(ctx, 1, 4);
2975	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2976	pops(ctx, ifidx + 1);
2977	emit_return(ctx);
2978}
2979
2980static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2981{
2982	emit_testflag(ctx);
2983
2984	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2985	ctx->bc->cf_last->pop_count = 1;
2986
2987	fc_set_mid(ctx, fc_sp);
2988
2989	pops(ctx, 1);
2990}
2991#endif
2992
2993static int tgsi_if(struct r600_shader_ctx *ctx)
2994{
2995	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2996
2997	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2998
2999	fc_pushlevel(ctx, FC_IF);
3000
3001	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3002	return 0;
3003}
3004
3005static int tgsi_else(struct r600_shader_ctx *ctx)
3006{
3007	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3008	ctx->bc->cf_last->pop_count = 1;
3009
3010	fc_set_mid(ctx, ctx->bc->fc_sp);
3011	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3012	return 0;
3013}
3014
3015static int tgsi_endif(struct r600_shader_ctx *ctx)
3016{
3017	pops(ctx, 1);
3018	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3019		R600_ERR("if/endif unbalanced in shader\n");
3020		return -1;
3021	}
3022
3023	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3024		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3025		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3026	} else {
3027		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3028	}
3029	fc_poplevel(ctx);
3030
3031	callstack_decrease_current(ctx, FC_PUSH_VPM);
3032	return 0;
3033}
3034
3035static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3036{
3037	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3038
3039	fc_pushlevel(ctx, FC_LOOP);
3040
3041	/* check stack depth */
3042	callstack_check_depth(ctx, FC_LOOP, 0);
3043	return 0;
3044}
3045
3046static int tgsi_endloop(struct r600_shader_ctx *ctx)
3047{
3048	int i;
3049
3050	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3051
3052	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3053		R600_ERR("loop/endloop in shader code are not paired.\n");
3054		return -EINVAL;
3055	}
3056
3057	/* fixup loop pointers - from r600isa
3058	   LOOP END points to CF after LOOP START,
3059	   LOOP START point to CF after LOOP END
3060	   BRK/CONT point to LOOP END CF
3061	*/
3062	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3063
3064	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3065
3066	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3067		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3068	}
3069	/* TODO add LOOPRET support */
3070	fc_poplevel(ctx);
3071	callstack_decrease_current(ctx, FC_LOOP);
3072	return 0;
3073}
3074
3075static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3076{
3077	unsigned int fscp;
3078
3079	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3080	{
3081		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3082			break;
3083	}
3084
3085	if (fscp == 0) {
3086		R600_ERR("Break not inside loop/endloop pair\n");
3087		return -EINVAL;
3088	}
3089
3090	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3091	ctx->bc->cf_last->pop_count = 1;
3092
3093	fc_set_mid(ctx, fscp);
3094
3095	pops(ctx, 1);
3096	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3097	return 0;
3098}
3099
3100static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3101	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3102	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3103	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3104
3105	/* FIXME:
3106	 * For state trackers other than OpenGL, we'll want to use
3107	 * _RECIP_IEEE instead.
3108	 */
3109	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3110
3111	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3112	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3113	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3114	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3115	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3116	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3117	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3118	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3119	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3120	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3121	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3122	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3123	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3124	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3125	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3126	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127	/* gap */
3128	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3130	/* gap */
3131	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3132	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3133	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3134	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3136	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3137	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3138	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3139	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3140	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3141	/* gap */
3142	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3143	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3144	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3145	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3146	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3147	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3148	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3149	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3150	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3151	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3152	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3153	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3154	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3155	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3156	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3157	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3158	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3159	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3160	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3161	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3162	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3163	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3164	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3165	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3167	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3168	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3169	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3170	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3171	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3172	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3173	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3174	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3175	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3176	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3177	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3178	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3179	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3180	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3181	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3182	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3183	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3184	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3185	/* gap */
3186	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3187	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3188	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3189	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3190	/* gap */
3191	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3192	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3193	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3194	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3195	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3197	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3198	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3199	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3200	/* gap */
3201	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3203	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3204	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3205	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3206	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3207	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3208	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3209	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3210	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3211	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3212	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3213	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3214	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3215	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3216	/* gap */
3217	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3218	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3219	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3220	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3221	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3222	/* gap */
3223	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3224	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3225	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3226	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3227	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3228	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3229	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3230	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3231	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3232	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3233	/* gap */
3234	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3235	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3236	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3237	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3238	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3239	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3241	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3242	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3243	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3244	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3245	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3246	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3247	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3248	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3249	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3250	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3251	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3252	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3253	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3254	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3255	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3256	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3257	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3258	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3259	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3260	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3261	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3262};
3263
3264static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3265	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3266	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3267	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3268	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3269	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3270	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3271	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3272	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3273	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3274	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3275	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3276	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3277	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3278	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3279	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3280	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3281	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3282	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3283	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3284	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3285	/* gap */
3286	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3288	/* gap */
3289	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3290	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3291	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3292	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3293	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3294	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3295	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3296	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3297	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3298	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3299	/* gap */
3300	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3301	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3302	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3303	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3304	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3305	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3306	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3307	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3308	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3309	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3312	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3314	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3316	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3317	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3318	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3319	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3321	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3322	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3323	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3325	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3326	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3327	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3328	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3329	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3330	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3331	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3332	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3333	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3334	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3335	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3336	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3337	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3339	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3340	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3341	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3342	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3343	/* gap */
3344	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3345	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3346	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3347	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3348	/* gap */
3349	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3350	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3351	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3352	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3353	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3357	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3358	/* gap */
3359	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3360	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3361	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3362	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3363	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3364	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3365	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3366	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3367	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3368	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3369	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3370	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3371	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3372	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3373	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3374	/* gap */
3375	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3376	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3377	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3378	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3379	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3380	/* gap */
3381	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3382	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3383	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3384	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3385	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3386	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3387	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3388	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3389	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3390	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3391	/* gap */
3392	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3393	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3394	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3395	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3396	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3397	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3398	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3399	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3400	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3401	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3402	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3403	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3404	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3405	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3406	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3407	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3408	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3409	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3410	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3411	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3412	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3413	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3414	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3415	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3416	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3417	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3418	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3419	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3420};
3421
3422static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3423	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3424	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3425	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3426	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3427	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3428	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3429	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3430	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3431	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3432	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3433	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3434	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3435	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3436	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3437	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3438	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3439	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3440	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3441	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3442	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3443	/* gap */
3444	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3445	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3446	/* gap */
3447	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3448	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3449	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3450	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3451	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3452	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3453	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3454	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3455	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3456	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3457	/* gap */
3458	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3459	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3460	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3461	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3462	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3463	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3464	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3465	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3466	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3467	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3468	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3469	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3470	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3471	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3472	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3473	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3474	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3475	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3476	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3477	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3478	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3479	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3480	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3481	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3482	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3483	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3484	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3485	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3486	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3487	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3488	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3489	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3490	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3491	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3492	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3493	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3494	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3495	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3496	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3497	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3498	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3499	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3500	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3501	/* gap */
3502	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3503	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3504	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3505	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3506	/* gap */
3507	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3508	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3509	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3510	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3511	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3512	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3513	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3514	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3515	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3516	/* gap */
3517	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3518	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3519	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3520	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3521	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3522	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3523	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3524	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3525	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3526	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3527	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3528	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3529	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3530	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3531	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3532	/* gap */
3533	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3534	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3535	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3536	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3537	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3538	/* gap */
3539	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3540	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3541	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3542	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3543	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3544	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3545	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3546	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3547	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3548	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3549	/* gap */
3550	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3551	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3552	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3553	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3554	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3555	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3556	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3557	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3558	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3559	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3560	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3561	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3562	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3563	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3564	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3565	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3566	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3567	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3568	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3569	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3570	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3571	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3572	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3573	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3574	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3575	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3576	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3577	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3578};
3579