r600_shader.c revision 21c5607e64ca4ef68730d8e846d8e7744ecdd024
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_info.h"
25#include "tgsi/tgsi_parse.h"
26#include "tgsi/tgsi_scan.h"
27#include "tgsi/tgsi_dump.h"
28#include "util/u_format.h"
29#include "r600_pipe.h"
30#include "r600_asm.h"
31#include "r600_sq.h"
32#include "r600_formats.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37#include <byteswap.h>
38
39/* CAYMAN notes
40Why CAYMAN got loops for lots of instructions is explained here.
41
42-These 8xx t-slot only ops are implemented in all vector slots.
43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44These 8xx t-slot only opcodes become vector ops, with all four
45slots expecting the arguments on sources a and b. Result is
46broadcast to all channels.
47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48These 8xx t-slot only opcodes become vector ops in the z, y, and
49x slots.
50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52SQRT_IEEE/_64
53SIN/COS
54The w slot may have an independent co-issued operation, or if the
55result is required to be in the w slot, the opcode above may be
56issued in the w slot as well.
57The compiler must issue the source argument to slots z, y, and x
58*/
59
60
61int r600_find_vs_semantic_index(struct r600_shader *vs,
62				struct r600_shader *ps, int id)
63{
64	struct r600_shader_io *input = &ps->input[id];
65
66	for (int i = 0; i < vs->noutput; i++) {
67		if (input->name == vs->output[i].name &&
68			input->sid == vs->output[i].sid) {
69			return i - 1;
70		}
71	}
72	return 0;
73}
74
75static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
76{
77	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
78	struct r600_shader *rshader = &shader->shader;
79	uint32_t *ptr;
80	int	i;
81
82	/* copy new shader */
83	if (shader->bo == NULL) {
84		/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
85		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
86		if (shader->bo == NULL) {
87			return -ENOMEM;
88		}
89		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
90		if (R600_BIG_ENDIAN) {
91			for (i = 0; i < rshader->bc.ndw; ++i) {
92				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
93			}
94		} else {
95			memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
96		}
97		r600_bo_unmap(rctx->radeon, shader->bo);
98	}
99	/* build state */
100	switch (rshader->processor_type) {
101	case TGSI_PROCESSOR_VERTEX:
102		if (rctx->chip_class >= EVERGREEN) {
103			evergreen_pipe_shader_vs(ctx, shader);
104		} else {
105			r600_pipe_shader_vs(ctx, shader);
106		}
107		break;
108	case TGSI_PROCESSOR_FRAGMENT:
109		if (rctx->chip_class >= EVERGREEN) {
110			evergreen_pipe_shader_ps(ctx, shader);
111		} else {
112			r600_pipe_shader_ps(ctx, shader);
113		}
114		break;
115	default:
116		return -EINVAL;
117	}
118	return 0;
119}
120
121static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
122
123int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
124{
125	static int dump_shaders = -1;
126	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127	int r;
128
129	/* Would like some magic "get_bool_option_once" routine.
130	*/
131	if (dump_shaders == -1)
132		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
133
134	if (dump_shaders) {
135		fprintf(stderr, "--------------------------------------------------------------\n");
136		tgsi_dump(shader->tokens, 0);
137	}
138	r = r600_shader_from_tgsi(rctx, shader);
139	if (r) {
140		R600_ERR("translation from TGSI failed !\n");
141		return r;
142	}
143	r = r600_bytecode_build(&shader->shader.bc);
144	if (r) {
145		R600_ERR("building bytecode failed !\n");
146		return r;
147	}
148	if (dump_shaders) {
149		r600_bytecode_dump(&shader->shader.bc);
150		fprintf(stderr, "______________________________________________________________\n");
151	}
152	return r600_pipe_shader(ctx, shader);
153}
154
155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156{
157	r600_bo_reference(&shader->bo, NULL);
158	r600_bytecode_clear(&shader->shader.bc);
159
160	memset(&shader->shader,0,sizeof(struct r600_shader));
161}
162
163/*
164 * tgsi -> r600 shader
165 */
166struct r600_shader_tgsi_instruction;
167
168struct r600_shader_src {
169	unsigned				sel;
170	unsigned				swizzle[4];
171	unsigned				neg;
172	unsigned				abs;
173	unsigned				rel;
174	uint32_t				value[4];
175};
176
177struct r600_shader_ctx {
178	struct tgsi_shader_info			info;
179	struct tgsi_parse_context		parse;
180	const struct tgsi_token			*tokens;
181	unsigned				type;
182	unsigned				file_offset[TGSI_FILE_COUNT];
183	unsigned				temp_reg;
184	unsigned				ar_reg;
185	struct r600_shader_tgsi_instruction	*inst_info;
186	struct r600_bytecode				*bc;
187	struct r600_shader			*shader;
188	struct r600_shader_src			src[4];
189	u32					*literals;
190	u32					nliterals;
191	u32					max_driver_temp_used;
192	/* needed for evergreen interpolation */
193	boolean                                 input_centroid;
194	boolean                                 input_linear;
195	boolean                                 input_perspective;
196	int					num_interp_gpr;
197};
198
199struct r600_shader_tgsi_instruction {
200	unsigned	tgsi_opcode;
201	unsigned	is_op3;
202	unsigned	r600_opcode;
203	int (*process)(struct r600_shader_ctx *ctx);
204};
205
206static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
207static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
208
209static int tgsi_is_supported(struct r600_shader_ctx *ctx)
210{
211	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
212	int j;
213
214	if (i->Instruction.NumDstRegs > 1) {
215		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
216		return -EINVAL;
217	}
218	if (i->Instruction.Predicate) {
219		R600_ERR("predicate unsupported\n");
220		return -EINVAL;
221	}
222#if 0
223	if (i->Instruction.Label) {
224		R600_ERR("label unsupported\n");
225		return -EINVAL;
226	}
227#endif
228	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
229		if (i->Src[j].Register.Dimension) {
230			R600_ERR("unsupported src %d (dimension %d)\n", j,
231				 i->Src[j].Register.Dimension);
232			return -EINVAL;
233		}
234	}
235	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
236		if (i->Dst[j].Register.Dimension) {
237			R600_ERR("unsupported dst (dimension)\n");
238			return -EINVAL;
239		}
240	}
241	return 0;
242}
243
244static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
245{
246	int i, r;
247	struct r600_bytecode_alu alu;
248	int gpr = 0, base_chan = 0;
249	int ij_index = 0;
250
251	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
252		ij_index = 0;
253		if (ctx->shader->input[input].centroid)
254			ij_index++;
255	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
256		ij_index = 0;
257		/* if we have perspective add one */
258		if (ctx->input_perspective)  {
259			ij_index++;
260			/* if we have perspective centroid */
261			if (ctx->input_centroid)
262				ij_index++;
263		}
264		if (ctx->shader->input[input].centroid)
265			ij_index++;
266	}
267
268	/* work out gpr and base_chan from index */
269	gpr = ij_index / 2;
270	base_chan = (2 * (ij_index % 2)) + 1;
271
272	for (i = 0; i < 8; i++) {
273		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
274
275		if (i < 4)
276			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
277		else
278			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
279
280		if ((i > 1) && (i < 6)) {
281			alu.dst.sel = ctx->shader->input[input].gpr;
282			alu.dst.write = 1;
283		}
284
285		alu.dst.chan = i % 4;
286
287		alu.src[0].sel = gpr;
288		alu.src[0].chan = (base_chan - (i % 2));
289
290		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
291
292		alu.bank_swizzle_force = SQ_ALU_VEC_210;
293		if ((i % 4) == 3)
294			alu.last = 1;
295		r = r600_bytecode_add_alu(ctx->bc, &alu);
296		if (r)
297			return r;
298	}
299	return 0;
300}
301
302static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
303{
304	int i, r;
305	struct r600_bytecode_alu alu;
306
307	for (i = 0; i < 4; i++) {
308		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
309
310		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0;
311
312		alu.dst.sel = ctx->shader->input[input].gpr;
313		alu.dst.write = 1;
314
315		alu.dst.chan = i;
316
317		alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
318		alu.src[0].chan = i;
319
320		if (i == 3)
321			alu.last = 1;
322		r = r600_bytecode_add_alu(ctx->bc, &alu);
323		if (r)
324			return r;
325	}
326	return 0;
327}
328
329static int tgsi_declaration(struct r600_shader_ctx *ctx)
330{
331	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
332	unsigned i;
333	int r;
334
335	switch (d->Declaration.File) {
336	case TGSI_FILE_INPUT:
337		i = ctx->shader->ninput++;
338		ctx->shader->input[i].name = d->Semantic.Name;
339		ctx->shader->input[i].sid = d->Semantic.Index;
340		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
341		ctx->shader->input[i].centroid = d->Declaration.Centroid;
342		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
343		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) {
344			/* turn input into interpolate on EG */
345			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION &&
346			    ctx->shader->input[i].name != TGSI_SEMANTIC_FACE) {
347				ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
348				if (ctx->shader->input[i].interpolate > 0) {
349					evergreen_interp_alu(ctx, i);
350				} else {
351					evergreen_interp_flat(ctx, i);
352				}
353			}
354		}
355		break;
356	case TGSI_FILE_OUTPUT:
357		i = ctx->shader->noutput++;
358		ctx->shader->output[i].name = d->Semantic.Name;
359		ctx->shader->output[i].sid = d->Semantic.Index;
360		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
361		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
362		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
363			/* these don't count as vertex param exports */
364			if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) ||
365			    (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE))
366				ctx->shader->npos++;
367		}
368		break;
369	case TGSI_FILE_CONSTANT:
370	case TGSI_FILE_TEMPORARY:
371	case TGSI_FILE_SAMPLER:
372	case TGSI_FILE_ADDRESS:
373		break;
374
375	case TGSI_FILE_SYSTEM_VALUE:
376		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
377			struct r600_bytecode_alu alu;
378			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
379
380			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
381			alu.src[0].sel = 0;
382			alu.src[0].chan = 3;
383
384			alu.dst.sel = 0;
385			alu.dst.chan = 3;
386			alu.dst.write = 1;
387			alu.last = 1;
388
389			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
390				return r;
391			break;
392		}
393
394	default:
395		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
396		return -EINVAL;
397	}
398	return 0;
399}
400
401static int r600_get_temp(struct r600_shader_ctx *ctx)
402{
403	return ctx->temp_reg + ctx->max_driver_temp_used++;
404}
405
406/*
407 * for evergreen we need to scan the shader to find the number of GPRs we need to
408 * reserve for interpolation.
409 *
410 * we need to know if we are going to emit
411 * any centroid inputs
412 * if perspective and linear are required
413*/
414static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
415{
416	int i;
417	int num_baryc;
418
419	ctx->input_linear = FALSE;
420	ctx->input_perspective = FALSE;
421	ctx->input_centroid = FALSE;
422	ctx->num_interp_gpr = 1;
423
424	/* any centroid inputs */
425	for (i = 0; i < ctx->info.num_inputs; i++) {
426		/* skip position/face */
427		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
428		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
429			continue;
430		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
431			ctx->input_linear = TRUE;
432		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
433			ctx->input_perspective = TRUE;
434		if (ctx->info.input_centroid[i])
435			ctx->input_centroid = TRUE;
436	}
437
438	num_baryc = 0;
439	/* ignoring sample for now */
440	if (ctx->input_perspective)
441		num_baryc++;
442	if (ctx->input_linear)
443		num_baryc++;
444	if (ctx->input_centroid)
445		num_baryc *= 2;
446
447	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
448
449	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
450	return ctx->num_interp_gpr;
451}
452
453static void tgsi_src(struct r600_shader_ctx *ctx,
454		     const struct tgsi_full_src_register *tgsi_src,
455		     struct r600_shader_src *r600_src)
456{
457	memset(r600_src, 0, sizeof(*r600_src));
458	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
459	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
460	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
461	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
462	r600_src->neg = tgsi_src->Register.Negate;
463	r600_src->abs = tgsi_src->Register.Absolute;
464
465	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
466		int index;
467		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
468			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
469			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
470
471			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
472			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
473			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
474				return;
475		}
476		index = tgsi_src->Register.Index;
477		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
478		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
479	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
480		/* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
481		r600_src->swizzle[0] = 3;
482		r600_src->swizzle[1] = 3;
483		r600_src->swizzle[2] = 3;
484		r600_src->swizzle[3] = 3;
485		r600_src->sel = 0;
486	} else {
487		if (tgsi_src->Register.Indirect)
488			r600_src->rel = V_SQ_REL_RELATIVE;
489		r600_src->sel = tgsi_src->Register.Index;
490		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
491	}
492}
493
494static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
495{
496	struct r600_bytecode_vtx vtx;
497	unsigned int ar_reg;
498	int r;
499
500	if (offset) {
501		struct r600_bytecode_alu alu;
502
503		memset(&alu, 0, sizeof(alu));
504
505		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
506		alu.src[0].sel = ctx->ar_reg;
507
508		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
509		alu.src[1].value = offset;
510
511		alu.dst.sel = dst_reg;
512		alu.dst.write = 1;
513		alu.last = 1;
514
515		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
516			return r;
517
518		ar_reg = dst_reg;
519	} else {
520		ar_reg = ctx->ar_reg;
521	}
522
523	memset(&vtx, 0, sizeof(vtx));
524	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
525	vtx.src_gpr = ar_reg;
526	vtx.mega_fetch_count = 16;
527	vtx.dst_gpr = dst_reg;
528	vtx.dst_sel_x = 0;		/* SEL_X */
529	vtx.dst_sel_y = 1;		/* SEL_Y */
530	vtx.dst_sel_z = 2;		/* SEL_Z */
531	vtx.dst_sel_w = 3;		/* SEL_W */
532	vtx.data_format = FMT_32_32_32_32_FLOAT;
533	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
534	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
535	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
536	vtx.endian = r600_endian_swap(32);
537
538	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
539		return r;
540
541	return 0;
542}
543
544static int tgsi_split_constant(struct r600_shader_ctx *ctx)
545{
546	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
547	struct r600_bytecode_alu alu;
548	int i, j, k, nconst, r;
549
550	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
551		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
552			nconst++;
553		}
554		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
555	}
556	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
557		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
558			continue;
559		}
560
561		if (ctx->src[i].rel) {
562			int treg = r600_get_temp(ctx);
563			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
564				return r;
565
566			ctx->src[i].sel = treg;
567			ctx->src[i].rel = 0;
568			j--;
569		} else if (j > 0) {
570			int treg = r600_get_temp(ctx);
571			for (k = 0; k < 4; k++) {
572				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
573				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
574				alu.src[0].sel = ctx->src[i].sel;
575				alu.src[0].chan = k;
576				alu.src[0].rel = ctx->src[i].rel;
577				alu.dst.sel = treg;
578				alu.dst.chan = k;
579				alu.dst.write = 1;
580				if (k == 3)
581					alu.last = 1;
582				r = r600_bytecode_add_alu(ctx->bc, &alu);
583				if (r)
584					return r;
585			}
586			ctx->src[i].sel = treg;
587			ctx->src[i].rel =0;
588			j--;
589		}
590	}
591	return 0;
592}
593
594/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
595static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
596{
597	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
598	struct r600_bytecode_alu alu;
599	int i, j, k, nliteral, r;
600
601	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
602		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
603			nliteral++;
604		}
605	}
606	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
607		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
608			int treg = r600_get_temp(ctx);
609			for (k = 0; k < 4; k++) {
610				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
611				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
612				alu.src[0].sel = ctx->src[i].sel;
613				alu.src[0].chan = k;
614				alu.src[0].value = ctx->src[i].value[k];
615				alu.dst.sel = treg;
616				alu.dst.chan = k;
617				alu.dst.write = 1;
618				if (k == 3)
619					alu.last = 1;
620				r = r600_bytecode_add_alu(ctx->bc, &alu);
621				if (r)
622					return r;
623			}
624			ctx->src[i].sel = treg;
625			j--;
626		}
627	}
628	return 0;
629}
630
631static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
632{
633	struct r600_shader *shader = &pipeshader->shader;
634	struct tgsi_token *tokens = pipeshader->tokens;
635	struct tgsi_full_immediate *immediate;
636	struct tgsi_full_property *property;
637	struct r600_shader_ctx ctx;
638	struct r600_bytecode_output output[32];
639	unsigned output_done, noutput;
640	unsigned opcode;
641	int i, j, r = 0, pos0;
642
643	ctx.bc = &shader->bc;
644	ctx.shader = shader;
645	r600_bytecode_init(ctx.bc, rctx->chip_class);
646	ctx.tokens = tokens;
647	tgsi_scan_shader(tokens, &ctx.info);
648	tgsi_parse_init(&ctx.parse, tokens);
649	ctx.type = ctx.parse.FullHeader.Processor.Processor;
650	shader->processor_type = ctx.type;
651	ctx.bc->type = shader->processor_type;
652
653	shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
654		((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
655
656	shader->nr_cbufs = rctx->nr_cbufs;
657
658	/* register allocations */
659	/* Values [0,127] correspond to GPR[0..127].
660	 * Values [128,159] correspond to constant buffer bank 0
661	 * Values [160,191] correspond to constant buffer bank 1
662	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
663	 * Values [256,287] correspond to constant buffer bank 2 (EG)
664	 * Values [288,319] correspond to constant buffer bank 3 (EG)
665	 * Other special values are shown in the list below.
666	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
667	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
668	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
669	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
670	 * 248	SQ_ALU_SRC_0: special constant 0.0.
671	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
672	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
673	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
674	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
675	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
676	 * 254	SQ_ALU_SRC_PV: previous vector result.
677	 * 255	SQ_ALU_SRC_PS: previous scalar result.
678	 */
679	for (i = 0; i < TGSI_FILE_COUNT; i++) {
680		ctx.file_offset[i] = 0;
681	}
682	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
683		ctx.file_offset[TGSI_FILE_INPUT] = 1;
684		if (ctx.bc->chip_class >= EVERGREEN) {
685			r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
686		} else {
687			r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
688		}
689	}
690	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
691		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
692	}
693	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
694						ctx.info.file_max[TGSI_FILE_INPUT] + 1;
695	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
696						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
697
698	/* Outside the GPR range. This will be translated to one of the
699	 * kcache banks later. */
700	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
701
702	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
703	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
704			ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
705	ctx.temp_reg = ctx.ar_reg + 1;
706
707	ctx.nliterals = 0;
708	ctx.literals = NULL;
709	shader->fs_write_all = FALSE;
710	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
711		tgsi_parse_token(&ctx.parse);
712		switch (ctx.parse.FullToken.Token.Type) {
713		case TGSI_TOKEN_TYPE_IMMEDIATE:
714			immediate = &ctx.parse.FullToken.FullImmediate;
715			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
716			if(ctx.literals == NULL) {
717				r = -ENOMEM;
718				goto out_err;
719			}
720			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
721			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
722			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
723			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
724			ctx.nliterals++;
725			break;
726		case TGSI_TOKEN_TYPE_DECLARATION:
727			r = tgsi_declaration(&ctx);
728			if (r)
729				goto out_err;
730			break;
731		case TGSI_TOKEN_TYPE_INSTRUCTION:
732			r = tgsi_is_supported(&ctx);
733			if (r)
734				goto out_err;
735			ctx.max_driver_temp_used = 0;
736			/* reserve first tmp for everyone */
737			r600_get_temp(&ctx);
738
739			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
740			if ((r = tgsi_split_constant(&ctx)))
741				goto out_err;
742			if ((r = tgsi_split_literal_constant(&ctx)))
743				goto out_err;
744			if (ctx.bc->chip_class == CAYMAN)
745				ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
746			else if (ctx.bc->chip_class >= EVERGREEN)
747				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
748			else
749				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
750			r = ctx.inst_info->process(&ctx);
751			if (r)
752				goto out_err;
753			break;
754		case TGSI_TOKEN_TYPE_PROPERTY:
755			property = &ctx.parse.FullToken.FullProperty;
756			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
757				if (property->u[0].Data == 1)
758					shader->fs_write_all = TRUE;
759			}
760			break;
761		default:
762			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
763			r = -EINVAL;
764			goto out_err;
765		}
766	}
767
768	noutput = shader->noutput;
769
770	/* clamp color outputs */
771	if (shader->clamp_color) {
772		for (i = 0; i < noutput; i++) {
773			if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
774				shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
775
776				int j;
777				for (j = 0; j < 4; j++) {
778					struct r600_bytecode_alu alu;
779					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
780
781					/* MOV_SAT R, R */
782					alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
783					alu.dst.sel = shader->output[i].gpr;
784					alu.dst.chan = j;
785					alu.dst.write = 1;
786					alu.dst.clamp = 1;
787					alu.src[0].sel = alu.dst.sel;
788					alu.src[0].chan = j;
789
790					if (j == 3) {
791						alu.last = 1;
792					}
793					r = r600_bytecode_add_alu(ctx.bc, &alu);
794					if (r)
795						return r;
796				}
797			}
798		}
799	}
800
801	/* export output */
802	j = 0;
803	for (i = 0, pos0 = 0; i < noutput; i++) {
804		memset(&output[i], 0, sizeof(struct r600_bytecode_output));
805		output[i + j].gpr = shader->output[i].gpr;
806		output[i + j].elem_size = 3;
807		output[i + j].swizzle_x = 0;
808		output[i + j].swizzle_y = 1;
809		output[i + j].swizzle_z = 2;
810		output[i + j].swizzle_w = 3;
811		output[i + j].burst_count = 1;
812		output[i + j].barrier = 1;
813		output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
814		output[i + j].array_base = i - pos0;
815		output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
816		switch (ctx.type) {
817		case TGSI_PROCESSOR_VERTEX:
818			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
819				output[i + j].array_base = 60;
820				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
821				/* position doesn't count in array_base */
822				pos0++;
823			}
824			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
825				output[i + j].array_base = 61;
826				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
827				/* position doesn't count in array_base */
828				pos0++;
829			}
830			break;
831		case TGSI_PROCESSOR_FRAGMENT:
832			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
833				output[i + j].array_base = shader->output[i].sid;
834				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
835				if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
836					for (j = 1; j < shader->nr_cbufs; j++) {
837						memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
838						output[i + j].gpr = shader->output[i].gpr;
839						output[i + j].elem_size = 3;
840						output[i + j].swizzle_x = 0;
841						output[i + j].swizzle_y = 1;
842						output[i + j].swizzle_z = 2;
843						output[i + j].swizzle_w = 3;
844						output[i + j].burst_count = 1;
845						output[i + j].barrier = 1;
846						output[i + j].array_base = shader->output[i].sid + j;
847						output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
848						output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
849					}
850					j--;
851				}
852			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
853				output[i + j].array_base = 61;
854				output[i + j].swizzle_x = 2;
855				output[i + j].swizzle_y = 7;
856				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
857				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
858			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
859				output[i + j].array_base = 61;
860				output[i + j].swizzle_x = 7;
861				output[i + j].swizzle_y = 1;
862				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
863				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
864			} else {
865				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
866				r = -EINVAL;
867				goto out_err;
868			}
869			break;
870		default:
871			R600_ERR("unsupported processor type %d\n", ctx.type);
872			r = -EINVAL;
873			goto out_err;
874		}
875	}
876	noutput += j;
877	/* add fake param output for vertex shader if no param is exported */
878	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
879		for (i = 0, pos0 = 0; i < noutput; i++) {
880			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
881				pos0 = 1;
882				break;
883			}
884		}
885		if (!pos0) {
886			memset(&output[i], 0, sizeof(struct r600_bytecode_output));
887			output[i].gpr = 0;
888			output[i].elem_size = 3;
889			output[i].swizzle_x = 0;
890			output[i].swizzle_y = 1;
891			output[i].swizzle_z = 2;
892			output[i].swizzle_w = 3;
893			output[i].burst_count = 1;
894			output[i].barrier = 1;
895			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
896			output[i].array_base = 0;
897			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
898			noutput++;
899		}
900	}
901	/* add fake pixel export */
902	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
903		memset(&output[0], 0, sizeof(struct r600_bytecode_output));
904		output[0].gpr = 0;
905		output[0].elem_size = 3;
906		output[0].swizzle_x = 7;
907		output[0].swizzle_y = 7;
908		output[0].swizzle_z = 7;
909		output[0].swizzle_w = 7;
910		output[0].burst_count = 1;
911		output[0].barrier = 1;
912		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
913		output[0].array_base = 0;
914		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
915		noutput++;
916	}
917	/* set export done on last export of each type */
918	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
919		if (ctx.bc->chip_class < CAYMAN) {
920			if (i == (noutput - 1)) {
921				output[i].end_of_program = 1;
922			}
923		}
924		if (!(output_done & (1 << output[i].type))) {
925			output_done |= (1 << output[i].type);
926			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
927		}
928	}
929	/* add output to bytecode */
930	for (i = 0; i < noutput; i++) {
931		r = r600_bytecode_add_output(ctx.bc, &output[i]);
932		if (r)
933			goto out_err;
934	}
935	/* add program end */
936	if (ctx.bc->chip_class == CAYMAN)
937		cm_bytecode_add_cf_end(ctx.bc);
938
939	free(ctx.literals);
940	tgsi_parse_free(&ctx.parse);
941	return 0;
942out_err:
943	free(ctx.literals);
944	tgsi_parse_free(&ctx.parse);
945	return r;
946}
947
948static int tgsi_unsupported(struct r600_shader_ctx *ctx)
949{
950	R600_ERR("%s tgsi opcode unsupported\n",
951		 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
952	return -EINVAL;
953}
954
955static int tgsi_end(struct r600_shader_ctx *ctx)
956{
957	return 0;
958}
959
960static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
961			const struct r600_shader_src *shader_src,
962			unsigned chan)
963{
964	bc_src->sel = shader_src->sel;
965	bc_src->chan = shader_src->swizzle[chan];
966	bc_src->neg = shader_src->neg;
967	bc_src->abs = shader_src->abs;
968	bc_src->rel = shader_src->rel;
969	bc_src->value = shader_src->value[bc_src->chan];
970}
971
972static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
973{
974	bc_src->abs = 1;
975	bc_src->neg = 0;
976}
977
978static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
979{
980	bc_src->neg = !bc_src->neg;
981}
982
983static void tgsi_dst(struct r600_shader_ctx *ctx,
984		     const struct tgsi_full_dst_register *tgsi_dst,
985		     unsigned swizzle,
986		     struct r600_bytecode_alu_dst *r600_dst)
987{
988	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
989
990	r600_dst->sel = tgsi_dst->Register.Index;
991	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
992	r600_dst->chan = swizzle;
993	r600_dst->write = 1;
994	if (tgsi_dst->Register.Indirect)
995		r600_dst->rel = V_SQ_REL_RELATIVE;
996	if (inst->Instruction.Saturate) {
997		r600_dst->clamp = 1;
998	}
999}
1000
1001static int tgsi_last_instruction(unsigned writemask)
1002{
1003	int i, lasti = 0;
1004
1005	for (i = 0; i < 4; i++) {
1006		if (writemask & (1 << i)) {
1007			lasti = i;
1008		}
1009	}
1010	return lasti;
1011}
1012
1013static int tgsi_int_to_flt(struct r600_shader_ctx *ctx)
1014{
1015	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1016	struct r600_bytecode_alu alu;
1017	int i, j, r;
1018	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1019
1020	for (i = 0; i < lasti + 1; i++) {
1021		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1022			continue;
1023
1024		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1025		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1026
1027		alu.inst = ctx->inst_info->r600_opcode;
1028		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1029			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1030		}
1031		alu.last = 1;
1032		r = r600_bytecode_add_alu(ctx->bc, &alu);
1033		if (r)
1034			return r;
1035	}
1036	return 0;
1037}
1038
1039static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1040{
1041	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1042	struct r600_bytecode_alu alu;
1043	int i, j, r;
1044	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1045
1046	for (i = 0; i < lasti + 1; i++) {
1047		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1048			continue;
1049
1050		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1051		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1052
1053		alu.inst = ctx->inst_info->r600_opcode;
1054		if (!swap) {
1055			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1056				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1057			}
1058		} else {
1059			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1060			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1061		}
1062		/* handle some special cases */
1063		switch (ctx->inst_info->tgsi_opcode) {
1064		case TGSI_OPCODE_SUB:
1065			r600_bytecode_src_toggle_neg(&alu.src[1]);
1066			break;
1067		case TGSI_OPCODE_ABS:
1068			r600_bytecode_src_set_abs(&alu.src[0]);
1069			break;
1070		default:
1071			break;
1072		}
1073		if (i == lasti) {
1074			alu.last = 1;
1075		}
1076		r = r600_bytecode_add_alu(ctx->bc, &alu);
1077		if (r)
1078			return r;
1079	}
1080	return 0;
1081}
1082
1083static int tgsi_op2(struct r600_shader_ctx *ctx)
1084{
1085	return tgsi_op2_s(ctx, 0);
1086}
1087
1088static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1089{
1090	return tgsi_op2_s(ctx, 1);
1091}
1092
1093static int tgsi_ineg(struct r600_shader_ctx *ctx)
1094{
1095	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1096	struct r600_bytecode_alu alu;
1097	int i, r;
1098	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1099
1100	for (i = 0; i < lasti + 1; i++) {
1101
1102		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1103			continue;
1104		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1105		alu.inst = ctx->inst_info->r600_opcode;
1106
1107		alu.src[0].sel = V_SQ_ALU_SRC_0;
1108
1109		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1110
1111		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1112
1113		if (i == lasti) {
1114			alu.last = 1;
1115		}
1116		r = r600_bytecode_add_alu(ctx->bc, &alu);
1117		if (r)
1118			return r;
1119	}
1120	return 0;
1121
1122}
1123
1124static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1125{
1126	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1127	int i, j, r;
1128	struct r600_bytecode_alu alu;
1129	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1130
1131	for (i = 0 ; i < last_slot; i++) {
1132		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1133		alu.inst = ctx->inst_info->r600_opcode;
1134		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1135			r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1136		}
1137		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1138		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1139
1140		if (i == last_slot - 1)
1141			alu.last = 1;
1142		r = r600_bytecode_add_alu(ctx->bc, &alu);
1143		if (r)
1144			return r;
1145	}
1146	return 0;
1147}
1148
1149/*
1150 * r600 - trunc to -PI..PI range
1151 * r700 - normalize by dividing by 2PI
1152 * see fdo bug 27901
1153 */
1154static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1155{
1156	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1157	static float double_pi = 3.1415926535 * 2;
1158	static float neg_pi = -3.1415926535;
1159
1160	int r;
1161	struct r600_bytecode_alu alu;
1162
1163	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1164	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1165	alu.is_op3 = 1;
1166
1167	alu.dst.chan = 0;
1168	alu.dst.sel = ctx->temp_reg;
1169	alu.dst.write = 1;
1170
1171	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1172
1173	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1174	alu.src[1].chan = 0;
1175	alu.src[1].value = *(uint32_t *)&half_inv_pi;
1176	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1177	alu.src[2].chan = 0;
1178	alu.last = 1;
1179	r = r600_bytecode_add_alu(ctx->bc, &alu);
1180	if (r)
1181		return r;
1182
1183	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1184	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1185
1186	alu.dst.chan = 0;
1187	alu.dst.sel = ctx->temp_reg;
1188	alu.dst.write = 1;
1189
1190	alu.src[0].sel = ctx->temp_reg;
1191	alu.src[0].chan = 0;
1192	alu.last = 1;
1193	r = r600_bytecode_add_alu(ctx->bc, &alu);
1194	if (r)
1195		return r;
1196
1197	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1198	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1199	alu.is_op3 = 1;
1200
1201	alu.dst.chan = 0;
1202	alu.dst.sel = ctx->temp_reg;
1203	alu.dst.write = 1;
1204
1205	alu.src[0].sel = ctx->temp_reg;
1206	alu.src[0].chan = 0;
1207
1208	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1209	alu.src[1].chan = 0;
1210	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1211	alu.src[2].chan = 0;
1212
1213	if (ctx->bc->chip_class == R600) {
1214		alu.src[1].value = *(uint32_t *)&double_pi;
1215		alu.src[2].value = *(uint32_t *)&neg_pi;
1216	} else {
1217		alu.src[1].sel = V_SQ_ALU_SRC_1;
1218		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1219		alu.src[2].neg = 1;
1220	}
1221
1222	alu.last = 1;
1223	r = r600_bytecode_add_alu(ctx->bc, &alu);
1224	if (r)
1225		return r;
1226	return 0;
1227}
1228
1229static int cayman_trig(struct r600_shader_ctx *ctx)
1230{
1231	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1232	struct r600_bytecode_alu alu;
1233	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1234	int i, r;
1235
1236	r = tgsi_setup_trig(ctx);
1237	if (r)
1238		return r;
1239
1240
1241	for (i = 0; i < last_slot; i++) {
1242		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1243		alu.inst = ctx->inst_info->r600_opcode;
1244		alu.dst.chan = i;
1245
1246		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1247		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1248
1249		alu.src[0].sel = ctx->temp_reg;
1250		alu.src[0].chan = 0;
1251		if (i == last_slot - 1)
1252			alu.last = 1;
1253		r = r600_bytecode_add_alu(ctx->bc, &alu);
1254		if (r)
1255			return r;
1256	}
1257	return 0;
1258}
1259
1260static int tgsi_trig(struct r600_shader_ctx *ctx)
1261{
1262	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1263	struct r600_bytecode_alu alu;
1264	int i, r;
1265	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1266
1267	r = tgsi_setup_trig(ctx);
1268	if (r)
1269		return r;
1270
1271	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1272	alu.inst = ctx->inst_info->r600_opcode;
1273	alu.dst.chan = 0;
1274	alu.dst.sel = ctx->temp_reg;
1275	alu.dst.write = 1;
1276
1277	alu.src[0].sel = ctx->temp_reg;
1278	alu.src[0].chan = 0;
1279	alu.last = 1;
1280	r = r600_bytecode_add_alu(ctx->bc, &alu);
1281	if (r)
1282		return r;
1283
1284	/* replicate result */
1285	for (i = 0; i < lasti + 1; i++) {
1286		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1287			continue;
1288
1289		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1290		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1291
1292		alu.src[0].sel = ctx->temp_reg;
1293		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1294		if (i == lasti)
1295			alu.last = 1;
1296		r = r600_bytecode_add_alu(ctx->bc, &alu);
1297		if (r)
1298			return r;
1299	}
1300	return 0;
1301}
1302
1303static int tgsi_scs(struct r600_shader_ctx *ctx)
1304{
1305	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1306	struct r600_bytecode_alu alu;
1307	int i, r;
1308
1309	/* We'll only need the trig stuff if we are going to write to the
1310	 * X or Y components of the destination vector.
1311	 */
1312	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1313		r = tgsi_setup_trig(ctx);
1314		if (r)
1315			return r;
1316	}
1317
1318	/* dst.x = COS */
1319	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1320		if (ctx->bc->chip_class == CAYMAN) {
1321			for (i = 0 ; i < 3; i++) {
1322				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1323				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1324				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1325
1326				if (i == 0)
1327					alu.dst.write = 1;
1328				else
1329					alu.dst.write = 0;
1330				alu.src[0].sel = ctx->temp_reg;
1331				alu.src[0].chan = 0;
1332				if (i == 2)
1333					alu.last = 1;
1334				r = r600_bytecode_add_alu(ctx->bc, &alu);
1335				if (r)
1336					return r;
1337			}
1338		} else {
1339			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1340			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1341			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1342
1343			alu.src[0].sel = ctx->temp_reg;
1344			alu.src[0].chan = 0;
1345			alu.last = 1;
1346			r = r600_bytecode_add_alu(ctx->bc, &alu);
1347			if (r)
1348				return r;
1349		}
1350	}
1351
1352	/* dst.y = SIN */
1353	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1354		if (ctx->bc->chip_class == CAYMAN) {
1355			for (i = 0 ; i < 3; i++) {
1356				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1357				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1358				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1359				if (i == 1)
1360					alu.dst.write = 1;
1361				else
1362					alu.dst.write = 0;
1363				alu.src[0].sel = ctx->temp_reg;
1364				alu.src[0].chan = 0;
1365				if (i == 2)
1366					alu.last = 1;
1367				r = r600_bytecode_add_alu(ctx->bc, &alu);
1368				if (r)
1369					return r;
1370			}
1371		} else {
1372			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1373			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1374			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1375
1376			alu.src[0].sel = ctx->temp_reg;
1377			alu.src[0].chan = 0;
1378			alu.last = 1;
1379			r = r600_bytecode_add_alu(ctx->bc, &alu);
1380			if (r)
1381				return r;
1382		}
1383	}
1384
1385	/* dst.z = 0.0; */
1386	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1387		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1388
1389		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1390
1391		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1392
1393		alu.src[0].sel = V_SQ_ALU_SRC_0;
1394		alu.src[0].chan = 0;
1395
1396		alu.last = 1;
1397
1398		r = r600_bytecode_add_alu(ctx->bc, &alu);
1399		if (r)
1400			return r;
1401	}
1402
1403	/* dst.w = 1.0; */
1404	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1405		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1406
1407		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1408
1409		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1410
1411		alu.src[0].sel = V_SQ_ALU_SRC_1;
1412		alu.src[0].chan = 0;
1413
1414		alu.last = 1;
1415
1416		r = r600_bytecode_add_alu(ctx->bc, &alu);
1417		if (r)
1418			return r;
1419	}
1420
1421	return 0;
1422}
1423
1424static int tgsi_kill(struct r600_shader_ctx *ctx)
1425{
1426	struct r600_bytecode_alu alu;
1427	int i, r;
1428
1429	for (i = 0; i < 4; i++) {
1430		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1431		alu.inst = ctx->inst_info->r600_opcode;
1432
1433		alu.dst.chan = i;
1434
1435		alu.src[0].sel = V_SQ_ALU_SRC_0;
1436
1437		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1438			alu.src[1].sel = V_SQ_ALU_SRC_1;
1439			alu.src[1].neg = 1;
1440		} else {
1441			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1442		}
1443		if (i == 3) {
1444			alu.last = 1;
1445		}
1446		r = r600_bytecode_add_alu(ctx->bc, &alu);
1447		if (r)
1448			return r;
1449	}
1450
1451	/* kill must be last in ALU */
1452	ctx->bc->force_add_cf = 1;
1453	ctx->shader->uses_kill = TRUE;
1454	return 0;
1455}
1456
1457static int tgsi_lit(struct r600_shader_ctx *ctx)
1458{
1459	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1460	struct r600_bytecode_alu alu;
1461	int r;
1462
1463	/* tmp.x = max(src.y, 0.0) */
1464	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1465	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1466	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1467	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1468	alu.src[1].chan = 1;
1469
1470	alu.dst.sel = ctx->temp_reg;
1471	alu.dst.chan = 0;
1472	alu.dst.write = 1;
1473
1474	alu.last = 1;
1475	r = r600_bytecode_add_alu(ctx->bc, &alu);
1476	if (r)
1477		return r;
1478
1479	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1480	{
1481		int chan;
1482		int sel;
1483		int i;
1484
1485		if (ctx->bc->chip_class == CAYMAN) {
1486			for (i = 0; i < 3; i++) {
1487				/* tmp.z = log(tmp.x) */
1488				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1489				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1490				alu.src[0].sel = ctx->temp_reg;
1491				alu.src[0].chan = 0;
1492				alu.dst.sel = ctx->temp_reg;
1493				alu.dst.chan = i;
1494				if (i == 2) {
1495					alu.dst.write = 1;
1496					alu.last = 1;
1497				} else
1498					alu.dst.write = 0;
1499
1500				r = r600_bytecode_add_alu(ctx->bc, &alu);
1501				if (r)
1502					return r;
1503			}
1504		} else {
1505			/* tmp.z = log(tmp.x) */
1506			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1507			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1508			alu.src[0].sel = ctx->temp_reg;
1509			alu.src[0].chan = 0;
1510			alu.dst.sel = ctx->temp_reg;
1511			alu.dst.chan = 2;
1512			alu.dst.write = 1;
1513			alu.last = 1;
1514			r = r600_bytecode_add_alu(ctx->bc, &alu);
1515			if (r)
1516				return r;
1517		}
1518
1519		chan = alu.dst.chan;
1520		sel = alu.dst.sel;
1521
1522		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1523		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1524		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1525		alu.src[0].sel  = sel;
1526		alu.src[0].chan = chan;
1527		r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
1528		r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
1529		alu.dst.sel = ctx->temp_reg;
1530		alu.dst.chan = 0;
1531		alu.dst.write = 1;
1532		alu.is_op3 = 1;
1533		alu.last = 1;
1534		r = r600_bytecode_add_alu(ctx->bc, &alu);
1535		if (r)
1536			return r;
1537
1538		if (ctx->bc->chip_class == CAYMAN) {
1539			for (i = 0; i < 3; i++) {
1540				/* dst.z = exp(tmp.x) */
1541				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1542				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1543				alu.src[0].sel = ctx->temp_reg;
1544				alu.src[0].chan = 0;
1545				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1546				if (i == 2) {
1547					alu.dst.write = 1;
1548					alu.last = 1;
1549				} else
1550					alu.dst.write = 0;
1551				r = r600_bytecode_add_alu(ctx->bc, &alu);
1552				if (r)
1553					return r;
1554			}
1555		} else {
1556			/* dst.z = exp(tmp.x) */
1557			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1558			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1559			alu.src[0].sel = ctx->temp_reg;
1560			alu.src[0].chan = 0;
1561			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1562			alu.last = 1;
1563			r = r600_bytecode_add_alu(ctx->bc, &alu);
1564			if (r)
1565				return r;
1566		}
1567	}
1568
1569	/* dst.x, <- 1.0  */
1570	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1571	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1572	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1573	alu.src[0].chan = 0;
1574	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1575	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1576	r = r600_bytecode_add_alu(ctx->bc, &alu);
1577	if (r)
1578		return r;
1579
1580	/* dst.y = max(src.x, 0.0) */
1581	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1582	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1583	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1584	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1585	alu.src[1].chan = 0;
1586	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1587	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1588	r = r600_bytecode_add_alu(ctx->bc, &alu);
1589	if (r)
1590		return r;
1591
1592	/* dst.w, <- 1.0  */
1593	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1594	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1595	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1596	alu.src[0].chan = 0;
1597	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1598	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1599	alu.last = 1;
1600	r = r600_bytecode_add_alu(ctx->bc, &alu);
1601	if (r)
1602		return r;
1603
1604	return 0;
1605}
1606
1607static int tgsi_rsq(struct r600_shader_ctx *ctx)
1608{
1609	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1610	struct r600_bytecode_alu alu;
1611	int i, r;
1612
1613	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1614
1615	/* FIXME:
1616	 * For state trackers other than OpenGL, we'll want to use
1617	 * _RECIPSQRT_IEEE instead.
1618	 */
1619	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1620
1621	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1622		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1623		r600_bytecode_src_set_abs(&alu.src[i]);
1624	}
1625	alu.dst.sel = ctx->temp_reg;
1626	alu.dst.write = 1;
1627	alu.last = 1;
1628	r = r600_bytecode_add_alu(ctx->bc, &alu);
1629	if (r)
1630		return r;
1631	/* replicate result */
1632	return tgsi_helper_tempx_replicate(ctx);
1633}
1634
1635static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1636{
1637	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1638	struct r600_bytecode_alu alu;
1639	int i, r;
1640
1641	for (i = 0; i < 4; i++) {
1642		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1643		alu.src[0].sel = ctx->temp_reg;
1644		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1645		alu.dst.chan = i;
1646		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1647		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1648		if (i == 3)
1649			alu.last = 1;
1650		r = r600_bytecode_add_alu(ctx->bc, &alu);
1651		if (r)
1652			return r;
1653	}
1654	return 0;
1655}
1656
1657static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1658{
1659	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1660	struct r600_bytecode_alu alu;
1661	int i, r;
1662
1663	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1664	alu.inst = ctx->inst_info->r600_opcode;
1665	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1666		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1667	}
1668	alu.dst.sel = ctx->temp_reg;
1669	alu.dst.write = 1;
1670	alu.last = 1;
1671	r = r600_bytecode_add_alu(ctx->bc, &alu);
1672	if (r)
1673		return r;
1674	/* replicate result */
1675	return tgsi_helper_tempx_replicate(ctx);
1676}
1677
1678static int cayman_pow(struct r600_shader_ctx *ctx)
1679{
1680	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1681	int i, r;
1682	struct r600_bytecode_alu alu;
1683	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1684
1685	for (i = 0; i < 3; i++) {
1686		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1687		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1688		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1689		alu.dst.sel = ctx->temp_reg;
1690		alu.dst.chan = i;
1691		alu.dst.write = 1;
1692		if (i == 2)
1693			alu.last = 1;
1694		r = r600_bytecode_add_alu(ctx->bc, &alu);
1695		if (r)
1696			return r;
1697	}
1698
1699	/* b * LOG2(a) */
1700	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1701	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1702	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1703	alu.src[1].sel = ctx->temp_reg;
1704	alu.dst.sel = ctx->temp_reg;
1705	alu.dst.write = 1;
1706	alu.last = 1;
1707	r = r600_bytecode_add_alu(ctx->bc, &alu);
1708	if (r)
1709		return r;
1710
1711	for (i = 0; i < last_slot; i++) {
1712		/* POW(a,b) = EXP2(b * LOG2(a))*/
1713		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1714		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1715		alu.src[0].sel = ctx->temp_reg;
1716
1717		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1718		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1719		if (i == last_slot - 1)
1720			alu.last = 1;
1721		r = r600_bytecode_add_alu(ctx->bc, &alu);
1722		if (r)
1723			return r;
1724	}
1725	return 0;
1726}
1727
1728static int tgsi_pow(struct r600_shader_ctx *ctx)
1729{
1730	struct r600_bytecode_alu alu;
1731	int r;
1732
1733	/* LOG2(a) */
1734	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1735	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1736	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1737	alu.dst.sel = ctx->temp_reg;
1738	alu.dst.write = 1;
1739	alu.last = 1;
1740	r = r600_bytecode_add_alu(ctx->bc, &alu);
1741	if (r)
1742		return r;
1743	/* b * LOG2(a) */
1744	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1745	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1746	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1747	alu.src[1].sel = ctx->temp_reg;
1748	alu.dst.sel = ctx->temp_reg;
1749	alu.dst.write = 1;
1750	alu.last = 1;
1751	r = r600_bytecode_add_alu(ctx->bc, &alu);
1752	if (r)
1753		return r;
1754	/* POW(a,b) = EXP2(b * LOG2(a))*/
1755	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1756	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1757	alu.src[0].sel = ctx->temp_reg;
1758	alu.dst.sel = ctx->temp_reg;
1759	alu.dst.write = 1;
1760	alu.last = 1;
1761	r = r600_bytecode_add_alu(ctx->bc, &alu);
1762	if (r)
1763		return r;
1764	return tgsi_helper_tempx_replicate(ctx);
1765}
1766
1767static int tgsi_ssg(struct r600_shader_ctx *ctx)
1768{
1769	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1770	struct r600_bytecode_alu alu;
1771	int i, r;
1772
1773	/* tmp = (src > 0 ? 1 : src) */
1774	for (i = 0; i < 4; i++) {
1775		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1776		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1777		alu.is_op3 = 1;
1778
1779		alu.dst.sel = ctx->temp_reg;
1780		alu.dst.chan = i;
1781
1782		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1783		alu.src[1].sel = V_SQ_ALU_SRC_1;
1784		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
1785
1786		if (i == 3)
1787			alu.last = 1;
1788		r = r600_bytecode_add_alu(ctx->bc, &alu);
1789		if (r)
1790			return r;
1791	}
1792
1793	/* dst = (-tmp > 0 ? -1 : tmp) */
1794	for (i = 0; i < 4; i++) {
1795		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1796		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1797		alu.is_op3 = 1;
1798		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1799
1800		alu.src[0].sel = ctx->temp_reg;
1801		alu.src[0].chan = i;
1802		alu.src[0].neg = 1;
1803
1804		alu.src[1].sel = V_SQ_ALU_SRC_1;
1805		alu.src[1].neg = 1;
1806
1807		alu.src[2].sel = ctx->temp_reg;
1808		alu.src[2].chan = i;
1809
1810		if (i == 3)
1811			alu.last = 1;
1812		r = r600_bytecode_add_alu(ctx->bc, &alu);
1813		if (r)
1814			return r;
1815	}
1816	return 0;
1817}
1818
1819static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1820{
1821	struct r600_bytecode_alu alu;
1822	int i, r;
1823
1824	for (i = 0; i < 4; i++) {
1825		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1826		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1827			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1828			alu.dst.chan = i;
1829		} else {
1830			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1831			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1832			alu.src[0].sel = ctx->temp_reg;
1833			alu.src[0].chan = i;
1834		}
1835		if (i == 3) {
1836			alu.last = 1;
1837		}
1838		r = r600_bytecode_add_alu(ctx->bc, &alu);
1839		if (r)
1840			return r;
1841	}
1842	return 0;
1843}
1844
1845static int tgsi_op3(struct r600_shader_ctx *ctx)
1846{
1847	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1848	struct r600_bytecode_alu alu;
1849	int i, j, r;
1850	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1851
1852	for (i = 0; i < lasti + 1; i++) {
1853		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1854			continue;
1855
1856		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1857		alu.inst = ctx->inst_info->r600_opcode;
1858		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1859			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1860		}
1861
1862		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1863		alu.dst.chan = i;
1864		alu.dst.write = 1;
1865		alu.is_op3 = 1;
1866		if (i == lasti) {
1867			alu.last = 1;
1868		}
1869		r = r600_bytecode_add_alu(ctx->bc, &alu);
1870		if (r)
1871			return r;
1872	}
1873	return 0;
1874}
1875
1876static int tgsi_dp(struct r600_shader_ctx *ctx)
1877{
1878	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1879	struct r600_bytecode_alu alu;
1880	int i, j, r;
1881
1882	for (i = 0; i < 4; i++) {
1883		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1884		alu.inst = ctx->inst_info->r600_opcode;
1885		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1886			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1887		}
1888
1889		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1890		alu.dst.chan = i;
1891		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1892		/* handle some special cases */
1893		switch (ctx->inst_info->tgsi_opcode) {
1894		case TGSI_OPCODE_DP2:
1895			if (i > 1) {
1896				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1897				alu.src[0].chan = alu.src[1].chan = 0;
1898			}
1899			break;
1900		case TGSI_OPCODE_DP3:
1901			if (i > 2) {
1902				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1903				alu.src[0].chan = alu.src[1].chan = 0;
1904			}
1905			break;
1906		case TGSI_OPCODE_DPH:
1907			if (i == 3) {
1908				alu.src[0].sel = V_SQ_ALU_SRC_1;
1909				alu.src[0].chan = 0;
1910				alu.src[0].neg = 0;
1911			}
1912			break;
1913		default:
1914			break;
1915		}
1916		if (i == 3) {
1917			alu.last = 1;
1918		}
1919		r = r600_bytecode_add_alu(ctx->bc, &alu);
1920		if (r)
1921			return r;
1922	}
1923	return 0;
1924}
1925
1926static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
1927						    unsigned index)
1928{
1929	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1930	return 	(inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
1931		inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
1932		ctx->src[index].neg || ctx->src[index].abs;
1933}
1934
1935static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
1936					unsigned index)
1937{
1938	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1939	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
1940}
1941
1942static int tgsi_tex(struct r600_shader_ctx *ctx)
1943{
1944	static float one_point_five = 1.5f;
1945	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1946	struct r600_bytecode_tex tex;
1947	struct r600_bytecode_alu alu;
1948	unsigned src_gpr;
1949	int r, i, j;
1950	int opcode;
1951	/* Texture fetch instructions can only use gprs as source.
1952	 * Also they cannot negate the source or take the absolute value */
1953	const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
1954	boolean src_loaded = FALSE;
1955	unsigned sampler_src_reg = 1;
1956	u8 offset_x = 0, offset_y = 0, offset_z = 0;
1957
1958	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
1959
1960	if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
1961		/* get offset values */
1962		if (inst->Texture.NumOffsets) {
1963			assert(inst->Texture.NumOffsets == 1);
1964
1965			offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
1966			offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
1967			offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
1968		}
1969	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1970		/* TGSI moves the sampler to src reg 3 for TXD */
1971		sampler_src_reg = 3;
1972
1973		for (i = 1; i < 3; i++) {
1974			/* set gradients h/v */
1975			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
1976			tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
1977				SQ_TEX_INST_SET_GRADIENTS_V;
1978			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
1979			tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1980
1981			if (tgsi_tex_src_requires_loading(ctx, i)) {
1982				tex.src_gpr = r600_get_temp(ctx);
1983				tex.src_sel_x = 0;
1984				tex.src_sel_y = 1;
1985				tex.src_sel_z = 2;
1986				tex.src_sel_w = 3;
1987
1988				for (j = 0; j < 4; j++) {
1989					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1990					alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1991                                        r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
1992                                        alu.dst.sel = tex.src_gpr;
1993                                        alu.dst.chan = j;
1994                                        if (j == 3)
1995                                                alu.last = 1;
1996                                        alu.dst.write = 1;
1997                                        r = r600_bytecode_add_alu(ctx->bc, &alu);
1998                                        if (r)
1999                                                return r;
2000				}
2001
2002			} else {
2003				tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
2004				tex.src_sel_x = ctx->src[i].swizzle[0];
2005				tex.src_sel_y = ctx->src[i].swizzle[1];
2006				tex.src_sel_z = ctx->src[i].swizzle[2];
2007				tex.src_sel_w = ctx->src[i].swizzle[3];
2008				tex.src_rel = ctx->src[i].rel;
2009			}
2010			tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
2011			tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
2012			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2013				tex.coord_type_x = 1;
2014				tex.coord_type_y = 1;
2015				tex.coord_type_z = 1;
2016				tex.coord_type_w = 1;
2017			}
2018			r = r600_bytecode_add_tex(ctx->bc, &tex);
2019			if (r)
2020				return r;
2021		}
2022	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
2023		int out_chan;
2024		/* Add perspective divide */
2025		if (ctx->bc->chip_class == CAYMAN) {
2026			out_chan = 2;
2027			for (i = 0; i < 3; i++) {
2028				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2029				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2030				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2031
2032				alu.dst.sel = ctx->temp_reg;
2033				alu.dst.chan = i;
2034				if (i == 2)
2035					alu.last = 1;
2036				if (out_chan == i)
2037					alu.dst.write = 1;
2038				r = r600_bytecode_add_alu(ctx->bc, &alu);
2039				if (r)
2040					return r;
2041			}
2042
2043		} else {
2044			out_chan = 3;
2045			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2046			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2047			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2048
2049			alu.dst.sel = ctx->temp_reg;
2050			alu.dst.chan = out_chan;
2051			alu.last = 1;
2052			alu.dst.write = 1;
2053			r = r600_bytecode_add_alu(ctx->bc, &alu);
2054			if (r)
2055				return r;
2056		}
2057
2058		for (i = 0; i < 3; i++) {
2059			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2060			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2061			alu.src[0].sel = ctx->temp_reg;
2062			alu.src[0].chan = out_chan;
2063			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2064			alu.dst.sel = ctx->temp_reg;
2065			alu.dst.chan = i;
2066			alu.dst.write = 1;
2067			r = r600_bytecode_add_alu(ctx->bc, &alu);
2068			if (r)
2069				return r;
2070		}
2071		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2072		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2073		alu.src[0].sel = V_SQ_ALU_SRC_1;
2074		alu.src[0].chan = 0;
2075		alu.dst.sel = ctx->temp_reg;
2076		alu.dst.chan = 3;
2077		alu.last = 1;
2078		alu.dst.write = 1;
2079		r = r600_bytecode_add_alu(ctx->bc, &alu);
2080		if (r)
2081			return r;
2082		src_loaded = TRUE;
2083		src_gpr = ctx->temp_reg;
2084	}
2085
2086	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2087		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
2088		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
2089
2090		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
2091		for (i = 0; i < 4; i++) {
2092			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2093			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
2094			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2095			r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
2096			alu.dst.sel = ctx->temp_reg;
2097			alu.dst.chan = i;
2098			if (i == 3)
2099				alu.last = 1;
2100			alu.dst.write = 1;
2101			r = r600_bytecode_add_alu(ctx->bc, &alu);
2102			if (r)
2103				return r;
2104		}
2105
2106		/* tmp1.z = RCP_e(|tmp1.z|) */
2107		if (ctx->bc->chip_class == CAYMAN) {
2108			for (i = 0; i < 3; i++) {
2109				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2110				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2111				alu.src[0].sel = ctx->temp_reg;
2112				alu.src[0].chan = 2;
2113				alu.src[0].abs = 1;
2114				alu.dst.sel = ctx->temp_reg;
2115				alu.dst.chan = i;
2116				if (i == 2)
2117					alu.dst.write = 1;
2118				if (i == 2)
2119					alu.last = 1;
2120				r = r600_bytecode_add_alu(ctx->bc, &alu);
2121				if (r)
2122					return r;
2123			}
2124		} else {
2125			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2126			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2127			alu.src[0].sel = ctx->temp_reg;
2128			alu.src[0].chan = 2;
2129			alu.src[0].abs = 1;
2130			alu.dst.sel = ctx->temp_reg;
2131			alu.dst.chan = 2;
2132			alu.dst.write = 1;
2133			alu.last = 1;
2134			r = r600_bytecode_add_alu(ctx->bc, &alu);
2135			if (r)
2136				return r;
2137		}
2138
2139		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
2140		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
2141		 * muladd has no writemask, have to use another temp
2142		 */
2143		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2144		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2145		alu.is_op3 = 1;
2146
2147		alu.src[0].sel = ctx->temp_reg;
2148		alu.src[0].chan = 0;
2149		alu.src[1].sel = ctx->temp_reg;
2150		alu.src[1].chan = 2;
2151
2152		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2153		alu.src[2].chan = 0;
2154		alu.src[2].value = *(uint32_t *)&one_point_five;
2155
2156		alu.dst.sel = ctx->temp_reg;
2157		alu.dst.chan = 0;
2158		alu.dst.write = 1;
2159
2160		r = r600_bytecode_add_alu(ctx->bc, &alu);
2161		if (r)
2162			return r;
2163
2164		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2165		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2166		alu.is_op3 = 1;
2167
2168		alu.src[0].sel = ctx->temp_reg;
2169		alu.src[0].chan = 1;
2170		alu.src[1].sel = ctx->temp_reg;
2171		alu.src[1].chan = 2;
2172
2173		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2174		alu.src[2].chan = 0;
2175		alu.src[2].value = *(uint32_t *)&one_point_five;
2176
2177		alu.dst.sel = ctx->temp_reg;
2178		alu.dst.chan = 1;
2179		alu.dst.write = 1;
2180
2181		alu.last = 1;
2182		r = r600_bytecode_add_alu(ctx->bc, &alu);
2183		if (r)
2184			return r;
2185
2186		src_loaded = TRUE;
2187		src_gpr = ctx->temp_reg;
2188	}
2189
2190	if (src_requires_loading && !src_loaded) {
2191		for (i = 0; i < 4; i++) {
2192			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2193			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2194			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2195			alu.dst.sel = ctx->temp_reg;
2196			alu.dst.chan = i;
2197			if (i == 3)
2198				alu.last = 1;
2199			alu.dst.write = 1;
2200			r = r600_bytecode_add_alu(ctx->bc, &alu);
2201			if (r)
2202				return r;
2203		}
2204		src_loaded = TRUE;
2205		src_gpr = ctx->temp_reg;
2206	}
2207
2208	opcode = ctx->inst_info->r600_opcode;
2209	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2210	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2211	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2212	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
2213	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
2214		switch (opcode) {
2215		case SQ_TEX_INST_SAMPLE:
2216			opcode = SQ_TEX_INST_SAMPLE_C;
2217			break;
2218		case SQ_TEX_INST_SAMPLE_L:
2219			opcode = SQ_TEX_INST_SAMPLE_C_L;
2220			break;
2221		case SQ_TEX_INST_SAMPLE_LB:
2222			opcode = SQ_TEX_INST_SAMPLE_C_LB;
2223			break;
2224		case SQ_TEX_INST_SAMPLE_G:
2225			opcode = SQ_TEX_INST_SAMPLE_C_G;
2226			break;
2227		}
2228	}
2229
2230	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2231	tex.inst = opcode;
2232
2233	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2234	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2235	tex.src_gpr = src_gpr;
2236	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2237	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2238	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2239	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2240	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2241	if (src_loaded) {
2242		tex.src_sel_x = 0;
2243		tex.src_sel_y = 1;
2244		tex.src_sel_z = 2;
2245		tex.src_sel_w = 3;
2246	} else {
2247		tex.src_sel_x = ctx->src[0].swizzle[0];
2248		tex.src_sel_y = ctx->src[0].swizzle[1];
2249		tex.src_sel_z = ctx->src[0].swizzle[2];
2250		tex.src_sel_w = ctx->src[0].swizzle[3];
2251		tex.src_rel = ctx->src[0].rel;
2252	}
2253
2254	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2255		tex.src_sel_x = 1;
2256		tex.src_sel_y = 0;
2257		tex.src_sel_z = 3;
2258		tex.src_sel_w = 1;
2259	}
2260
2261	if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
2262	    inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
2263		tex.coord_type_x = 1;
2264		tex.coord_type_y = 1;
2265	}
2266	tex.coord_type_z = 1;
2267	tex.coord_type_w = 1;
2268
2269	tex.offset_x = offset_x;
2270	tex.offset_y = offset_y;
2271	tex.offset_z = offset_z;
2272
2273	/* Put the depth for comparison in W.
2274	 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
2275	 * Some instructions expect the depth in Z. */
2276	if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2277	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2278	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2279	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
2280	    opcode != SQ_TEX_INST_SAMPLE_C_L &&
2281	    opcode != SQ_TEX_INST_SAMPLE_C_LB) {
2282		tex.src_sel_w = tex.src_sel_z;
2283	}
2284
2285	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
2286	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
2287		if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
2288		    opcode == SQ_TEX_INST_SAMPLE_C_LB) {
2289			/* the array index is read from Y */
2290			tex.coord_type_y = 0;
2291		} else {
2292			/* the array index is read from Z */
2293			tex.coord_type_z = 0;
2294			tex.src_sel_z = tex.src_sel_y;
2295		}
2296	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
2297		   inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
2298		/* the array index is read from Z */
2299		tex.coord_type_z = 0;
2300
2301	r = r600_bytecode_add_tex(ctx->bc, &tex);
2302	if (r)
2303		return r;
2304
2305	/* add shadow ambient support  - gallium doesn't do it yet */
2306	return 0;
2307}
2308
2309static int tgsi_lrp(struct r600_shader_ctx *ctx)
2310{
2311	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2312	struct r600_bytecode_alu alu;
2313	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2314	unsigned i;
2315	int r;
2316
2317	/* optimize if it's just an equal balance */
2318	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2319		for (i = 0; i < lasti + 1; i++) {
2320			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2321				continue;
2322
2323			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2324			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2325			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2326			r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2327			alu.omod = 3;
2328			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2329			alu.dst.chan = i;
2330			if (i == lasti) {
2331				alu.last = 1;
2332			}
2333			r = r600_bytecode_add_alu(ctx->bc, &alu);
2334			if (r)
2335				return r;
2336		}
2337		return 0;
2338	}
2339
2340	/* 1 - src0 */
2341	for (i = 0; i < lasti + 1; i++) {
2342		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2343			continue;
2344
2345		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2346		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2347		alu.src[0].sel = V_SQ_ALU_SRC_1;
2348		alu.src[0].chan = 0;
2349		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2350		r600_bytecode_src_toggle_neg(&alu.src[1]);
2351		alu.dst.sel = ctx->temp_reg;
2352		alu.dst.chan = i;
2353		if (i == lasti) {
2354			alu.last = 1;
2355		}
2356		alu.dst.write = 1;
2357		r = r600_bytecode_add_alu(ctx->bc, &alu);
2358		if (r)
2359			return r;
2360	}
2361
2362	/* (1 - src0) * src2 */
2363	for (i = 0; i < lasti + 1; i++) {
2364		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2365			continue;
2366
2367		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2368		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2369		alu.src[0].sel = ctx->temp_reg;
2370		alu.src[0].chan = i;
2371		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2372		alu.dst.sel = ctx->temp_reg;
2373		alu.dst.chan = i;
2374		if (i == lasti) {
2375			alu.last = 1;
2376		}
2377		alu.dst.write = 1;
2378		r = r600_bytecode_add_alu(ctx->bc, &alu);
2379		if (r)
2380			return r;
2381	}
2382
2383	/* src0 * src1 + (1 - src0) * src2 */
2384	for (i = 0; i < lasti + 1; i++) {
2385		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2386			continue;
2387
2388		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2389		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2390		alu.is_op3 = 1;
2391		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2392		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2393		alu.src[2].sel = ctx->temp_reg;
2394		alu.src[2].chan = i;
2395
2396		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2397		alu.dst.chan = i;
2398		if (i == lasti) {
2399			alu.last = 1;
2400		}
2401		r = r600_bytecode_add_alu(ctx->bc, &alu);
2402		if (r)
2403			return r;
2404	}
2405	return 0;
2406}
2407
2408static int tgsi_cmp(struct r600_shader_ctx *ctx)
2409{
2410	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2411	struct r600_bytecode_alu alu;
2412	int i, r;
2413	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2414
2415	for (i = 0; i < lasti + 1; i++) {
2416		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2417			continue;
2418
2419		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2420		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2421		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2422		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2423		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
2424		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2425		alu.dst.chan = i;
2426		alu.dst.write = 1;
2427		alu.is_op3 = 1;
2428		if (i == lasti)
2429			alu.last = 1;
2430		r = r600_bytecode_add_alu(ctx->bc, &alu);
2431		if (r)
2432			return r;
2433	}
2434	return 0;
2435}
2436
2437static int tgsi_xpd(struct r600_shader_ctx *ctx)
2438{
2439	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2440	static const unsigned int src0_swizzle[] = {2, 0, 1};
2441	static const unsigned int src1_swizzle[] = {1, 2, 0};
2442	struct r600_bytecode_alu alu;
2443	uint32_t use_temp = 0;
2444	int i, r;
2445
2446	if (inst->Dst[0].Register.WriteMask != 0xf)
2447		use_temp = 1;
2448
2449	for (i = 0; i < 4; i++) {
2450		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2451		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2452		if (i < 3) {
2453			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2454			r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2455		} else {
2456			alu.src[0].sel = V_SQ_ALU_SRC_0;
2457			alu.src[0].chan = i;
2458			alu.src[1].sel = V_SQ_ALU_SRC_0;
2459			alu.src[1].chan = i;
2460		}
2461
2462		alu.dst.sel = ctx->temp_reg;
2463		alu.dst.chan = i;
2464		alu.dst.write = 1;
2465
2466		if (i == 3)
2467			alu.last = 1;
2468		r = r600_bytecode_add_alu(ctx->bc, &alu);
2469		if (r)
2470			return r;
2471	}
2472
2473	for (i = 0; i < 4; i++) {
2474		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2475		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2476
2477		if (i < 3) {
2478			r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2479			r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2480		} else {
2481			alu.src[0].sel = V_SQ_ALU_SRC_0;
2482			alu.src[0].chan = i;
2483			alu.src[1].sel = V_SQ_ALU_SRC_0;
2484			alu.src[1].chan = i;
2485		}
2486
2487		alu.src[2].sel = ctx->temp_reg;
2488		alu.src[2].neg = 1;
2489		alu.src[2].chan = i;
2490
2491		if (use_temp)
2492			alu.dst.sel = ctx->temp_reg;
2493		else
2494			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2495		alu.dst.chan = i;
2496		alu.dst.write = 1;
2497		alu.is_op3 = 1;
2498		if (i == 3)
2499			alu.last = 1;
2500		r = r600_bytecode_add_alu(ctx->bc, &alu);
2501		if (r)
2502			return r;
2503	}
2504	if (use_temp)
2505		return tgsi_helper_copy(ctx, inst);
2506	return 0;
2507}
2508
2509static int tgsi_exp(struct r600_shader_ctx *ctx)
2510{
2511	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2512	struct r600_bytecode_alu alu;
2513	int r;
2514	int i;
2515
2516	/* result.x = 2^floor(src); */
2517	if (inst->Dst[0].Register.WriteMask & 1) {
2518		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2519
2520		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2521		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2522
2523		alu.dst.sel = ctx->temp_reg;
2524		alu.dst.chan = 0;
2525		alu.dst.write = 1;
2526		alu.last = 1;
2527		r = r600_bytecode_add_alu(ctx->bc, &alu);
2528		if (r)
2529			return r;
2530
2531		if (ctx->bc->chip_class == CAYMAN) {
2532			for (i = 0; i < 3; i++) {
2533				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2534				alu.src[0].sel = ctx->temp_reg;
2535				alu.src[0].chan = 0;
2536
2537				alu.dst.sel = ctx->temp_reg;
2538				alu.dst.chan = i;
2539				if (i == 0)
2540					alu.dst.write = 1;
2541				if (i == 2)
2542					alu.last = 1;
2543				r = r600_bytecode_add_alu(ctx->bc, &alu);
2544				if (r)
2545					return r;
2546			}
2547		} else {
2548			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2549			alu.src[0].sel = ctx->temp_reg;
2550			alu.src[0].chan = 0;
2551
2552			alu.dst.sel = ctx->temp_reg;
2553			alu.dst.chan = 0;
2554			alu.dst.write = 1;
2555			alu.last = 1;
2556			r = r600_bytecode_add_alu(ctx->bc, &alu);
2557			if (r)
2558				return r;
2559		}
2560	}
2561
2562	/* result.y = tmp - floor(tmp); */
2563	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2564		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2565
2566		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2567		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2568
2569		alu.dst.sel = ctx->temp_reg;
2570#if 0
2571		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2572		if (r)
2573			return r;
2574#endif
2575		alu.dst.write = 1;
2576		alu.dst.chan = 1;
2577
2578		alu.last = 1;
2579
2580		r = r600_bytecode_add_alu(ctx->bc, &alu);
2581		if (r)
2582			return r;
2583	}
2584
2585	/* result.z = RoughApprox2ToX(tmp);*/
2586	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2587		if (ctx->bc->chip_class == CAYMAN) {
2588			for (i = 0; i < 3; i++) {
2589				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2590				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2591				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2592
2593				alu.dst.sel = ctx->temp_reg;
2594				alu.dst.chan = i;
2595				if (i == 2) {
2596					alu.dst.write = 1;
2597					alu.last = 1;
2598				}
2599
2600				r = r600_bytecode_add_alu(ctx->bc, &alu);
2601				if (r)
2602					return r;
2603			}
2604		} else {
2605			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2606			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2607			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2608
2609			alu.dst.sel = ctx->temp_reg;
2610			alu.dst.write = 1;
2611			alu.dst.chan = 2;
2612
2613			alu.last = 1;
2614
2615			r = r600_bytecode_add_alu(ctx->bc, &alu);
2616			if (r)
2617				return r;
2618		}
2619	}
2620
2621	/* result.w = 1.0;*/
2622	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2623		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2624
2625		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2626		alu.src[0].sel = V_SQ_ALU_SRC_1;
2627		alu.src[0].chan = 0;
2628
2629		alu.dst.sel = ctx->temp_reg;
2630		alu.dst.chan = 3;
2631		alu.dst.write = 1;
2632		alu.last = 1;
2633		r = r600_bytecode_add_alu(ctx->bc, &alu);
2634		if (r)
2635			return r;
2636	}
2637	return tgsi_helper_copy(ctx, inst);
2638}
2639
2640static int tgsi_log(struct r600_shader_ctx *ctx)
2641{
2642	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2643	struct r600_bytecode_alu alu;
2644	int r;
2645	int i;
2646
2647	/* result.x = floor(log2(|src|)); */
2648	if (inst->Dst[0].Register.WriteMask & 1) {
2649		if (ctx->bc->chip_class == CAYMAN) {
2650			for (i = 0; i < 3; i++) {
2651				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2652
2653				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2654				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2655				r600_bytecode_src_set_abs(&alu.src[0]);
2656
2657				alu.dst.sel = ctx->temp_reg;
2658				alu.dst.chan = i;
2659				if (i == 0)
2660					alu.dst.write = 1;
2661				if (i == 2)
2662					alu.last = 1;
2663				r = r600_bytecode_add_alu(ctx->bc, &alu);
2664				if (r)
2665					return r;
2666			}
2667
2668		} else {
2669			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2670
2671			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2672			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2673			r600_bytecode_src_set_abs(&alu.src[0]);
2674
2675			alu.dst.sel = ctx->temp_reg;
2676			alu.dst.chan = 0;
2677			alu.dst.write = 1;
2678			alu.last = 1;
2679			r = r600_bytecode_add_alu(ctx->bc, &alu);
2680			if (r)
2681				return r;
2682		}
2683
2684		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2685		alu.src[0].sel = ctx->temp_reg;
2686		alu.src[0].chan = 0;
2687
2688		alu.dst.sel = ctx->temp_reg;
2689		alu.dst.chan = 0;
2690		alu.dst.write = 1;
2691		alu.last = 1;
2692
2693		r = r600_bytecode_add_alu(ctx->bc, &alu);
2694		if (r)
2695			return r;
2696	}
2697
2698	/* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2699	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2700
2701		if (ctx->bc->chip_class == CAYMAN) {
2702			for (i = 0; i < 3; i++) {
2703				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2704
2705				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2706				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2707				r600_bytecode_src_set_abs(&alu.src[0]);
2708
2709				alu.dst.sel = ctx->temp_reg;
2710				alu.dst.chan = i;
2711				if (i == 1)
2712					alu.dst.write = 1;
2713				if (i == 2)
2714					alu.last = 1;
2715
2716				r = r600_bytecode_add_alu(ctx->bc, &alu);
2717				if (r)
2718					return r;
2719			}
2720		} else {
2721			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2722
2723			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2724			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2725			r600_bytecode_src_set_abs(&alu.src[0]);
2726
2727			alu.dst.sel = ctx->temp_reg;
2728			alu.dst.chan = 1;
2729			alu.dst.write = 1;
2730			alu.last = 1;
2731
2732			r = r600_bytecode_add_alu(ctx->bc, &alu);
2733			if (r)
2734				return r;
2735		}
2736
2737		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2738
2739		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2740		alu.src[0].sel = ctx->temp_reg;
2741		alu.src[0].chan = 1;
2742
2743		alu.dst.sel = ctx->temp_reg;
2744		alu.dst.chan = 1;
2745		alu.dst.write = 1;
2746		alu.last = 1;
2747
2748		r = r600_bytecode_add_alu(ctx->bc, &alu);
2749		if (r)
2750			return r;
2751
2752		if (ctx->bc->chip_class == CAYMAN) {
2753			for (i = 0; i < 3; i++) {
2754				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2755				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2756				alu.src[0].sel = ctx->temp_reg;
2757				alu.src[0].chan = 1;
2758
2759				alu.dst.sel = ctx->temp_reg;
2760				alu.dst.chan = i;
2761				if (i == 1)
2762					alu.dst.write = 1;
2763				if (i == 2)
2764					alu.last = 1;
2765
2766				r = r600_bytecode_add_alu(ctx->bc, &alu);
2767				if (r)
2768					return r;
2769			}
2770		} else {
2771			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2772			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2773			alu.src[0].sel = ctx->temp_reg;
2774			alu.src[0].chan = 1;
2775
2776			alu.dst.sel = ctx->temp_reg;
2777			alu.dst.chan = 1;
2778			alu.dst.write = 1;
2779			alu.last = 1;
2780
2781			r = r600_bytecode_add_alu(ctx->bc, &alu);
2782			if (r)
2783				return r;
2784		}
2785
2786		if (ctx->bc->chip_class == CAYMAN) {
2787			for (i = 0; i < 3; i++) {
2788				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2789				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2790				alu.src[0].sel = ctx->temp_reg;
2791				alu.src[0].chan = 1;
2792
2793				alu.dst.sel = ctx->temp_reg;
2794				alu.dst.chan = i;
2795				if (i == 1)
2796					alu.dst.write = 1;
2797				if (i == 2)
2798					alu.last = 1;
2799
2800				r = r600_bytecode_add_alu(ctx->bc, &alu);
2801				if (r)
2802					return r;
2803			}
2804		} else {
2805			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2806			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2807			alu.src[0].sel = ctx->temp_reg;
2808			alu.src[0].chan = 1;
2809
2810			alu.dst.sel = ctx->temp_reg;
2811			alu.dst.chan = 1;
2812			alu.dst.write = 1;
2813			alu.last = 1;
2814
2815			r = r600_bytecode_add_alu(ctx->bc, &alu);
2816			if (r)
2817				return r;
2818		}
2819
2820		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2821
2822		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2823
2824		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2825		r600_bytecode_src_set_abs(&alu.src[0]);
2826
2827		alu.src[1].sel = ctx->temp_reg;
2828		alu.src[1].chan = 1;
2829
2830		alu.dst.sel = ctx->temp_reg;
2831		alu.dst.chan = 1;
2832		alu.dst.write = 1;
2833		alu.last = 1;
2834
2835		r = r600_bytecode_add_alu(ctx->bc, &alu);
2836		if (r)
2837			return r;
2838	}
2839
2840	/* result.z = log2(|src|);*/
2841	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2842		if (ctx->bc->chip_class == CAYMAN) {
2843			for (i = 0; i < 3; i++) {
2844				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2845
2846				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2847				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2848				r600_bytecode_src_set_abs(&alu.src[0]);
2849
2850				alu.dst.sel = ctx->temp_reg;
2851				if (i == 2)
2852					alu.dst.write = 1;
2853				alu.dst.chan = i;
2854				if (i == 2)
2855					alu.last = 1;
2856
2857				r = r600_bytecode_add_alu(ctx->bc, &alu);
2858				if (r)
2859					return r;
2860			}
2861		} else {
2862			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2863
2864			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2865			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2866			r600_bytecode_src_set_abs(&alu.src[0]);
2867
2868			alu.dst.sel = ctx->temp_reg;
2869			alu.dst.write = 1;
2870			alu.dst.chan = 2;
2871			alu.last = 1;
2872
2873			r = r600_bytecode_add_alu(ctx->bc, &alu);
2874			if (r)
2875				return r;
2876		}
2877	}
2878
2879	/* result.w = 1.0; */
2880	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2881		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2882
2883		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2884		alu.src[0].sel = V_SQ_ALU_SRC_1;
2885		alu.src[0].chan = 0;
2886
2887		alu.dst.sel = ctx->temp_reg;
2888		alu.dst.chan = 3;
2889		alu.dst.write = 1;
2890		alu.last = 1;
2891
2892		r = r600_bytecode_add_alu(ctx->bc, &alu);
2893		if (r)
2894			return r;
2895	}
2896
2897	return tgsi_helper_copy(ctx, inst);
2898}
2899
2900static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2901{
2902	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2903	struct r600_bytecode_alu alu;
2904	int r;
2905
2906	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2907
2908	switch (inst->Instruction.Opcode) {
2909	case TGSI_OPCODE_ARL:
2910		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2911		break;
2912	case TGSI_OPCODE_ARR:
2913		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2914		break;
2915	default:
2916		assert(0);
2917		return -1;
2918	}
2919
2920	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2921	alu.last = 1;
2922	alu.dst.sel = ctx->ar_reg;
2923	alu.dst.write = 1;
2924	r = r600_bytecode_add_alu(ctx->bc, &alu);
2925	if (r)
2926		return r;
2927
2928	/* TODO: Note that the MOVA can be avoided if we never use AR for
2929	 * indexing non-CB registers in the current ALU clause. Similarly, we
2930	 * need to load AR from ar_reg again if we started a new clause
2931	 * between ARL and AR usage. The easy way to do that is to remove
2932	 * the MOVA here, and load it for the first AR access after ar_reg
2933	 * has been modified in each clause. */
2934	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2935	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2936	alu.src[0].sel = ctx->ar_reg;
2937	alu.src[0].chan = 0;
2938	alu.last = 1;
2939	r = r600_bytecode_add_alu(ctx->bc, &alu);
2940	if (r)
2941		return r;
2942	return 0;
2943}
2944static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2945{
2946	/* TODO from r600c, ar values don't persist between clauses */
2947	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2948	struct r600_bytecode_alu alu;
2949	int r;
2950
2951	switch (inst->Instruction.Opcode) {
2952	case TGSI_OPCODE_ARL:
2953		memset(&alu, 0, sizeof(alu));
2954		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2955		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2956		alu.dst.sel = ctx->ar_reg;
2957		alu.dst.write = 1;
2958		alu.last = 1;
2959
2960		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2961			return r;
2962
2963		memset(&alu, 0, sizeof(alu));
2964		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2965		alu.src[0].sel = ctx->ar_reg;
2966		alu.dst.sel = ctx->ar_reg;
2967		alu.dst.write = 1;
2968		alu.last = 1;
2969
2970		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2971			return r;
2972		break;
2973	case TGSI_OPCODE_ARR:
2974		memset(&alu, 0, sizeof(alu));
2975		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2976		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2977		alu.dst.sel = ctx->ar_reg;
2978		alu.dst.write = 1;
2979		alu.last = 1;
2980
2981		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2982			return r;
2983		break;
2984	default:
2985		assert(0);
2986		return -1;
2987	}
2988
2989	memset(&alu, 0, sizeof(alu));
2990	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2991	alu.src[0].sel = ctx->ar_reg;
2992	alu.last = 1;
2993
2994	r = r600_bytecode_add_alu(ctx->bc, &alu);
2995	if (r)
2996		return r;
2997	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2998	return 0;
2999}
3000
3001static int tgsi_opdst(struct r600_shader_ctx *ctx)
3002{
3003	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3004	struct r600_bytecode_alu alu;
3005	int i, r = 0;
3006
3007	for (i = 0; i < 4; i++) {
3008		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3009
3010		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3011		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3012
3013		if (i == 0 || i == 3) {
3014			alu.src[0].sel = V_SQ_ALU_SRC_1;
3015		} else {
3016			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3017		}
3018
3019		if (i == 0 || i == 2) {
3020			alu.src[1].sel = V_SQ_ALU_SRC_1;
3021		} else {
3022			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3023		}
3024		if (i == 3)
3025			alu.last = 1;
3026		r = r600_bytecode_add_alu(ctx->bc, &alu);
3027		if (r)
3028			return r;
3029	}
3030	return 0;
3031}
3032
3033static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
3034{
3035	struct r600_bytecode_alu alu;
3036	int r;
3037
3038	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3039	alu.inst = opcode;
3040	alu.predicate = 1;
3041
3042	alu.dst.sel = ctx->temp_reg;
3043	alu.dst.write = 1;
3044	alu.dst.chan = 0;
3045
3046	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3047	alu.src[1].sel = V_SQ_ALU_SRC_0;
3048	alu.src[1].chan = 0;
3049
3050	alu.last = 1;
3051
3052	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
3053	if (r)
3054		return r;
3055	return 0;
3056}
3057
3058static int pops(struct r600_shader_ctx *ctx, int pops)
3059{
3060	unsigned force_pop = ctx->bc->force_add_cf;
3061
3062	if (!force_pop) {
3063		int alu_pop = 3;
3064		if (ctx->bc->cf_last) {
3065			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
3066				alu_pop = 0;
3067			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
3068				alu_pop = 1;
3069		}
3070		alu_pop += pops;
3071		if (alu_pop == 1) {
3072			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
3073			ctx->bc->force_add_cf = 1;
3074		} else if (alu_pop == 2) {
3075			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
3076			ctx->bc->force_add_cf = 1;
3077		} else {
3078			force_pop = 1;
3079		}
3080	}
3081
3082	if (force_pop) {
3083		r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
3084		ctx->bc->cf_last->pop_count = pops;
3085		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
3086	}
3087
3088	return 0;
3089}
3090
3091static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
3092{
3093	switch(reason) {
3094	case FC_PUSH_VPM:
3095		ctx->bc->callstack[ctx->bc->call_sp].current--;
3096		break;
3097	case FC_PUSH_WQM:
3098	case FC_LOOP:
3099		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
3100		break;
3101	case FC_REP:
3102		/* TOODO : for 16 vp asic should -= 2; */
3103		ctx->bc->callstack[ctx->bc->call_sp].current --;
3104		break;
3105	}
3106}
3107
3108static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
3109{
3110	if (check_max_only) {
3111		int diff;
3112		switch (reason) {
3113		case FC_PUSH_VPM:
3114			diff = 1;
3115			break;
3116		case FC_PUSH_WQM:
3117			diff = 4;
3118			break;
3119		default:
3120			assert(0);
3121			diff = 0;
3122		}
3123		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
3124		    ctx->bc->callstack[ctx->bc->call_sp].max) {
3125			ctx->bc->callstack[ctx->bc->call_sp].max =
3126				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
3127		}
3128		return;
3129	}
3130	switch (reason) {
3131	case FC_PUSH_VPM:
3132		ctx->bc->callstack[ctx->bc->call_sp].current++;
3133		break;
3134	case FC_PUSH_WQM:
3135	case FC_LOOP:
3136		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
3137		break;
3138	case FC_REP:
3139		ctx->bc->callstack[ctx->bc->call_sp].current++;
3140		break;
3141	}
3142
3143	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3144	    ctx->bc->callstack[ctx->bc->call_sp].max) {
3145		ctx->bc->callstack[ctx->bc->call_sp].max =
3146			ctx->bc->callstack[ctx->bc->call_sp].current;
3147	}
3148}
3149
3150static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3151{
3152	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3153
3154	sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
3155						sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
3156	sp->mid[sp->num_mid] = ctx->bc->cf_last;
3157	sp->num_mid++;
3158}
3159
3160static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3161{
3162	ctx->bc->fc_sp++;
3163	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3164	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3165}
3166
3167static void fc_poplevel(struct r600_shader_ctx *ctx)
3168{
3169	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3170	if (sp->mid) {
3171		free(sp->mid);
3172		sp->mid = NULL;
3173	}
3174	sp->num_mid = 0;
3175	sp->start = NULL;
3176	sp->type = 0;
3177	ctx->bc->fc_sp--;
3178}
3179
3180#if 0
3181static int emit_return(struct r600_shader_ctx *ctx)
3182{
3183	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
3184	return 0;
3185}
3186
3187static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3188{
3189
3190	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
3191	ctx->bc->cf_last->pop_count = pops;
3192	/* TODO work out offset */
3193	return 0;
3194}
3195
3196static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3197{
3198	return 0;
3199}
3200
3201static void emit_testflag(struct r600_shader_ctx *ctx)
3202{
3203
3204}
3205
3206static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3207{
3208	emit_testflag(ctx);
3209	emit_jump_to_offset(ctx, 1, 4);
3210	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3211	pops(ctx, ifidx + 1);
3212	emit_return(ctx);
3213}
3214
3215static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3216{
3217	emit_testflag(ctx);
3218
3219	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3220	ctx->bc->cf_last->pop_count = 1;
3221
3222	fc_set_mid(ctx, fc_sp);
3223
3224	pops(ctx, 1);
3225}
3226#endif
3227
3228static int tgsi_if(struct r600_shader_ctx *ctx)
3229{
3230	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
3231
3232	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3233
3234	fc_pushlevel(ctx, FC_IF);
3235
3236	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3237	return 0;
3238}
3239
3240static int tgsi_else(struct r600_shader_ctx *ctx)
3241{
3242	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3243	ctx->bc->cf_last->pop_count = 1;
3244
3245	fc_set_mid(ctx, ctx->bc->fc_sp);
3246	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3247	return 0;
3248}
3249
3250static int tgsi_endif(struct r600_shader_ctx *ctx)
3251{
3252	pops(ctx, 1);
3253	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3254		R600_ERR("if/endif unbalanced in shader\n");
3255		return -1;
3256	}
3257
3258	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3259		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3260		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3261	} else {
3262		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3263	}
3264	fc_poplevel(ctx);
3265
3266	callstack_decrease_current(ctx, FC_PUSH_VPM);
3267	return 0;
3268}
3269
3270static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3271{
3272	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3273
3274	fc_pushlevel(ctx, FC_LOOP);
3275
3276	/* check stack depth */
3277	callstack_check_depth(ctx, FC_LOOP, 0);
3278	return 0;
3279}
3280
3281static int tgsi_endloop(struct r600_shader_ctx *ctx)
3282{
3283	int i;
3284
3285	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3286
3287	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3288		R600_ERR("loop/endloop in shader code are not paired.\n");
3289		return -EINVAL;
3290	}
3291
3292	/* fixup loop pointers - from r600isa
3293	   LOOP END points to CF after LOOP START,
3294	   LOOP START point to CF after LOOP END
3295	   BRK/CONT point to LOOP END CF
3296	*/
3297	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3298
3299	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3300
3301	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3302		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3303	}
3304	/* TODO add LOOPRET support */
3305	fc_poplevel(ctx);
3306	callstack_decrease_current(ctx, FC_LOOP);
3307	return 0;
3308}
3309
3310static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3311{
3312	unsigned int fscp;
3313
3314	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3315	{
3316		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3317			break;
3318	}
3319
3320	if (fscp == 0) {
3321		R600_ERR("Break not inside loop/endloop pair\n");
3322		return -EINVAL;
3323	}
3324
3325	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3326	ctx->bc->cf_last->pop_count = 1;
3327
3328	fc_set_mid(ctx, fscp);
3329
3330	pops(ctx, 1);
3331	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3332	return 0;
3333}
3334
3335static int tgsi_umad(struct r600_shader_ctx *ctx)
3336{
3337	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3338	struct r600_bytecode_alu alu;
3339	int i, j, r;
3340	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3341
3342	/* src0 * src1 */
3343	for (i = 0; i < lasti + 1; i++) {
3344		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3345			continue;
3346
3347		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3348
3349		alu.dst.chan = i;
3350		alu.dst.sel = ctx->temp_reg;
3351		alu.dst.write = 1;
3352
3353		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT;
3354		for (j = 0; j < 2; j++) {
3355		        r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3356		}
3357
3358		if (i == lasti) {
3359			alu.last = 1;
3360		}
3361		r = r600_bytecode_add_alu(ctx->bc, &alu);
3362		if (r)
3363			return r;
3364	}
3365
3366
3367	for (i = 0; i < lasti + 1; i++) {
3368		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3369			continue;
3370
3371		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3372		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3373
3374		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT;
3375
3376		alu.src[0].sel = ctx->temp_reg;
3377		alu.src[0].chan = i;
3378
3379		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
3380		if (i == lasti) {
3381			alu.last = 1;
3382		}
3383		r = r600_bytecode_add_alu(ctx->bc, &alu);
3384		if (r)
3385			return r;
3386	}
3387	return 0;
3388}
3389
3390static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3391	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3392	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3393	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3394
3395	/* FIXME:
3396	 * For state trackers other than OpenGL, we'll want to use
3397	 * _RECIP_IEEE instead.
3398	 */
3399	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3400
3401	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3402	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3403	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3404	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3405	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3406	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3407	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3408	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3409	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3410	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3411	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3412	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3413	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3414	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3415	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3416	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3417	/* gap */
3418	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3419	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3420	/* gap */
3421	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3422	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3423	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3424	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3425	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3426	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3427	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3428	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3429	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3430	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3431	/* gap */
3432	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3433	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3434	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3435	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3436	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3437	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3438	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3439	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3440	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3441	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3442	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3443	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3444	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3445	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3446	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3447	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3448	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3449	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3450	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3451	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3452	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3453	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3454	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3455	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3456	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3457	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3458	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3459	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3460	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3461	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3462	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3463	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3464	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3465	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3466	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3467	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3468	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3469	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3470	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3471	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3472	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3473	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3474	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3475	/* gap */
3476	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3477	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3478	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3479	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3480	/* gap */
3481	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3482	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3483	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3484	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3485	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3486	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3487	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3488	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3489	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3490	/* gap */
3491	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3492	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3493	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3494	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3495	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3496	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3497	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
3498	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3499	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3500	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3501	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3502	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3503	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3504	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3505	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3506	/* gap */
3507	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3508	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3509	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3510	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3511	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3512	/* gap */
3513	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3514	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3515	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3516	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3517	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3518	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3519	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3520	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3521	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3522	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3523	/* gap */
3524	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3525	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3526	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3527	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3528	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3529	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3530	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3531	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3532	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3533	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3534	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3535	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3536	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3537	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3538	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3539	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3540	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3541	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3542	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3543	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3544	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3545	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3546	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3547	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3548	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3549	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3550	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3551	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
3552	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
3553	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
3554	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
3555	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
3556	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3557	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
3558	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
3559	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
3560	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
3561	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3562	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3563	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3564};
3565
3566static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3567	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3568	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3569	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3570	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3571	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3572	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3573	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3574	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3575	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3576	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3577	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3578	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3579	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3580	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3581	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3582	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3583	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3584	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3585	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3586	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3587	/* gap */
3588	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3589	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3590	/* gap */
3591	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3592	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3593	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3594	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3595	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3596	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3597	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3598	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3599	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3600	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3601	/* gap */
3602	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3603	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3604	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3605	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3606	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3607	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3608	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3609	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3610	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3611	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3612	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3613	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3614	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3615	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3616	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3617	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3618	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3619	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3620	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3621	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3622	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3623	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3624	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3625	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3626	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3627	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3628	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3629	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3630	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3631	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3632	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3633	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3634	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3635	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3636	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3637	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3638	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3639	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3640	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3641	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3642	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3643	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3644	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3645	/* gap */
3646	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3647	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3648	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3649	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3650	/* gap */
3651	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3652	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3653	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3654	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3655	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3656	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_int_to_flt},
3657	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3658	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3659	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3660	/* gap */
3661	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3662	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
3663	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
3664	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3665	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3666	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3667	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
3668	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3669	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3670	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3671	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3672	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3673	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3674	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3675	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3676	/* gap */
3677	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3678	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3679	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3680	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3681	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3682	/* gap */
3683	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3684	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3685	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3686	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3687	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3688	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3689	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3690	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3691	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3692	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3693	/* gap */
3694	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3695	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
3696	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3697	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3698	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3699	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
3700	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
3701	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3702	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3703	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3704	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
3705	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
3706	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3707	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
3708	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
3709	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
3710	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3711	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2},
3712	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
3713	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
3714	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3715	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3716	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
3717	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3718	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3719	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3720	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3721	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
3722	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
3723	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
3724	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
3725	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
3726	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3727	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
3728	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
3729	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
3730	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
3731	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3732	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3733	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3734};
3735
3736static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3737	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3738	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3739	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3740	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3741	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3742	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3743	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3744	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3745	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3746	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3747	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3748	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3749	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3750	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3751	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3752	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3753	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3754	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3755	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3756	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3757	/* gap */
3758	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3759	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3760	/* gap */
3761	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3762	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3763	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3764	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3765	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3766	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3767	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3768	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3769	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3770	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3771	/* gap */
3772	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3773	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3774	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3775	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3776	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3777	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3778	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3779	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3780	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3781	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3782	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3783	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3784	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3785	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3786	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3787	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3788	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3789	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3790	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3791	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3792	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3793	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3794	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3795	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3796	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3797	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3798	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3799	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3800	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3801	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3802	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3803	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3804	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3805	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3806	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3807	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3808	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3809	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3810	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3811	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3812	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3813	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3814	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3815	/* gap */
3816	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3817	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3818	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3819	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3820	/* gap */
3821	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3822	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3823	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3824	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3825	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3826	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3827	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3828	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3829	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3830	/* gap */
3831	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3832	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3833	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3834	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3835	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3836	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3837	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
3838	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3839	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3840	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3841	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3842	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3843	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3844	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3845	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3846	/* gap */
3847	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3848	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3849	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3850	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3851	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3852	/* gap */
3853	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3854	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3855	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3856	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3857	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3858	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3859	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3860	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3861	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3862	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3863	/* gap */
3864	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3865	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3866	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3867	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3868	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3869	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3870	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3871	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3872	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3873	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3874	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3875	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3876	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3877	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3878	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3879	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3880	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3881	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3882	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3883	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3884	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3885	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3886	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3887	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3888	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3889	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3890	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3891	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
3892	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
3893	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
3894	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
3895	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
3896	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3897	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
3898	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
3899	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
3900	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
3901	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3902	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3903	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3904};
3905