r600_shader.c revision cc9a8915f093c57d2748370d18ed47f66c933013
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_info.h"
25#include "tgsi/tgsi_parse.h"
26#include "tgsi/tgsi_scan.h"
27#include "tgsi/tgsi_dump.h"
28#include "util/u_format.h"
29#include "r600_pipe.h"
30#include "r600_asm.h"
31#include "r600_sq.h"
32#include "r600_formats.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37#include <byteswap.h>
38
39/* CAYMAN notes
40Why CAYMAN got loops for lots of instructions is explained here.
41
42-These 8xx t-slot only ops are implemented in all vector slots.
43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44These 8xx t-slot only opcodes become vector ops, with all four
45slots expecting the arguments on sources a and b. Result is
46broadcast to all channels.
47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48These 8xx t-slot only opcodes become vector ops in the z, y, and
49x slots.
50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52SQRT_IEEE/_64
53SIN/COS
54The w slot may have an independent co-issued operation, or if the
55result is required to be in the w slot, the opcode above may be
56issued in the w slot as well.
57The compiler must issue the source argument to slots z, y, and x
58*/
59
60
61int r600_find_vs_semantic_index(struct r600_shader *vs,
62				struct r600_shader *ps, int id)
63{
64	struct r600_shader_io *input = &ps->input[id];
65
66	for (int i = 0; i < vs->noutput; i++) {
67		if (input->name == vs->output[i].name &&
68			input->sid == vs->output[i].sid) {
69			return i - 1;
70		}
71	}
72	return 0;
73}
74
75static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
76{
77	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
78	struct r600_shader *rshader = &shader->shader;
79	uint32_t *ptr;
80	int	i;
81
82	/* copy new shader */
83	if (shader->bo == NULL) {
84		/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
85		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
86		if (shader->bo == NULL) {
87			return -ENOMEM;
88		}
89		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
90		if (R600_BIG_ENDIAN) {
91			for (i = 0; i < rshader->bc.ndw; ++i) {
92				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
93			}
94		} else {
95			memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
96		}
97		r600_bo_unmap(rctx->radeon, shader->bo);
98	}
99	/* build state */
100	switch (rshader->processor_type) {
101	case TGSI_PROCESSOR_VERTEX:
102		if (rctx->chip_class >= EVERGREEN) {
103			evergreen_pipe_shader_vs(ctx, shader);
104		} else {
105			r600_pipe_shader_vs(ctx, shader);
106		}
107		break;
108	case TGSI_PROCESSOR_FRAGMENT:
109		if (rctx->chip_class >= EVERGREEN) {
110			evergreen_pipe_shader_ps(ctx, shader);
111		} else {
112			r600_pipe_shader_ps(ctx, shader);
113		}
114		break;
115	default:
116		return -EINVAL;
117	}
118	return 0;
119}
120
121static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
122
123int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
124{
125	static int dump_shaders = -1;
126	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
127	int r;
128
129	/* Would like some magic "get_bool_option_once" routine.
130	*/
131	if (dump_shaders == -1)
132		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
133
134	if (dump_shaders) {
135		fprintf(stderr, "--------------------------------------------------------------\n");
136		tgsi_dump(shader->tokens, 0);
137	}
138	r = r600_shader_from_tgsi(rctx, shader);
139	if (r) {
140		R600_ERR("translation from TGSI failed !\n");
141		return r;
142	}
143	r = r600_bytecode_build(&shader->shader.bc);
144	if (r) {
145		R600_ERR("building bytecode failed !\n");
146		return r;
147	}
148	if (dump_shaders) {
149		r600_bytecode_dump(&shader->shader.bc);
150		fprintf(stderr, "______________________________________________________________\n");
151	}
152	return r600_pipe_shader(ctx, shader);
153}
154
155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156{
157	r600_bo_reference(&shader->bo, NULL);
158	r600_bytecode_clear(&shader->shader.bc);
159
160	memset(&shader->shader,0,sizeof(struct r600_shader));
161}
162
163/*
164 * tgsi -> r600 shader
165 */
166struct r600_shader_tgsi_instruction;
167
168struct r600_shader_src {
169	unsigned				sel;
170	unsigned				swizzle[4];
171	unsigned				neg;
172	unsigned				abs;
173	unsigned				rel;
174	uint32_t				value[4];
175};
176
177struct r600_shader_ctx {
178	struct tgsi_shader_info			info;
179	struct tgsi_parse_context		parse;
180	const struct tgsi_token			*tokens;
181	unsigned				type;
182	unsigned				file_offset[TGSI_FILE_COUNT];
183	unsigned				temp_reg;
184	unsigned				ar_reg;
185	struct r600_shader_tgsi_instruction	*inst_info;
186	struct r600_bytecode				*bc;
187	struct r600_shader			*shader;
188	struct r600_shader_src			src[4];
189	u32					*literals;
190	u32					nliterals;
191	u32					max_driver_temp_used;
192	/* needed for evergreen interpolation */
193	boolean                                 input_centroid;
194	boolean                                 input_linear;
195	boolean                                 input_perspective;
196	int					num_interp_gpr;
197};
198
199struct r600_shader_tgsi_instruction {
200	unsigned	tgsi_opcode;
201	unsigned	is_op3;
202	unsigned	r600_opcode;
203	int (*process)(struct r600_shader_ctx *ctx);
204};
205
206static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
207static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
208
209static int tgsi_is_supported(struct r600_shader_ctx *ctx)
210{
211	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
212	int j;
213
214	if (i->Instruction.NumDstRegs > 1) {
215		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
216		return -EINVAL;
217	}
218	if (i->Instruction.Predicate) {
219		R600_ERR("predicate unsupported\n");
220		return -EINVAL;
221	}
222#if 0
223	if (i->Instruction.Label) {
224		R600_ERR("label unsupported\n");
225		return -EINVAL;
226	}
227#endif
228	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
229		if (i->Src[j].Register.Dimension) {
230			R600_ERR("unsupported src %d (dimension %d)\n", j,
231				 i->Src[j].Register.Dimension);
232			return -EINVAL;
233		}
234	}
235	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
236		if (i->Dst[j].Register.Dimension) {
237			R600_ERR("unsupported dst (dimension)\n");
238			return -EINVAL;
239		}
240	}
241	return 0;
242}
243
244static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
245{
246	int i, r;
247	struct r600_bytecode_alu alu;
248	int gpr = 0, base_chan = 0;
249	int ij_index = 0;
250
251	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
252		ij_index = 0;
253		if (ctx->shader->input[input].centroid)
254			ij_index++;
255	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
256		ij_index = 0;
257		/* if we have perspective add one */
258		if (ctx->input_perspective)  {
259			ij_index++;
260			/* if we have perspective centroid */
261			if (ctx->input_centroid)
262				ij_index++;
263		}
264		if (ctx->shader->input[input].centroid)
265			ij_index++;
266	}
267
268	/* work out gpr and base_chan from index */
269	gpr = ij_index / 2;
270	base_chan = (2 * (ij_index % 2)) + 1;
271
272	for (i = 0; i < 8; i++) {
273		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
274
275		if (i < 4)
276			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
277		else
278			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
279
280		if ((i > 1) && (i < 6)) {
281			alu.dst.sel = ctx->shader->input[input].gpr;
282			alu.dst.write = 1;
283		}
284
285		alu.dst.chan = i % 4;
286
287		alu.src[0].sel = gpr;
288		alu.src[0].chan = (base_chan - (i % 2));
289
290		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
291
292		alu.bank_swizzle_force = SQ_ALU_VEC_210;
293		if ((i % 4) == 3)
294			alu.last = 1;
295		r = r600_bytecode_add_alu(ctx->bc, &alu);
296		if (r)
297			return r;
298	}
299	return 0;
300}
301
302
303static int tgsi_declaration(struct r600_shader_ctx *ctx)
304{
305	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
306	unsigned i;
307	int r;
308
309	switch (d->Declaration.File) {
310	case TGSI_FILE_INPUT:
311		i = ctx->shader->ninput++;
312		ctx->shader->input[i].name = d->Semantic.Name;
313		ctx->shader->input[i].sid = d->Semantic.Index;
314		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
315		ctx->shader->input[i].centroid = d->Declaration.Centroid;
316		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
317		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) {
318			/* turn input into interpolate on EG */
319			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
320				if (ctx->shader->input[i].interpolate > 0) {
321					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
322					evergreen_interp_alu(ctx, i);
323				}
324			}
325		}
326		break;
327	case TGSI_FILE_OUTPUT:
328		i = ctx->shader->noutput++;
329		ctx->shader->output[i].name = d->Semantic.Name;
330		ctx->shader->output[i].sid = d->Semantic.Index;
331		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
332		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
333		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
334			/* these don't count as vertex param exports */
335			if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) ||
336			    (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE))
337				ctx->shader->npos++;
338		}
339		break;
340	case TGSI_FILE_CONSTANT:
341	case TGSI_FILE_TEMPORARY:
342	case TGSI_FILE_SAMPLER:
343	case TGSI_FILE_ADDRESS:
344		break;
345
346	case TGSI_FILE_SYSTEM_VALUE:
347		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
348			struct r600_bytecode_alu alu;
349			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
350
351			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
352			alu.src[0].sel = 0;
353			alu.src[0].chan = 3;
354
355			alu.dst.sel = 0;
356			alu.dst.chan = 3;
357			alu.dst.write = 1;
358			alu.last = 1;
359
360			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
361				return r;
362			break;
363		}
364
365	default:
366		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
367		return -EINVAL;
368	}
369	return 0;
370}
371
372static int r600_get_temp(struct r600_shader_ctx *ctx)
373{
374	return ctx->temp_reg + ctx->max_driver_temp_used++;
375}
376
377/*
378 * for evergreen we need to scan the shader to find the number of GPRs we need to
379 * reserve for interpolation.
380 *
381 * we need to know if we are going to emit
382 * any centroid inputs
383 * if perspective and linear are required
384*/
385static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
386{
387	int i;
388	int num_baryc;
389
390	ctx->input_linear = FALSE;
391	ctx->input_perspective = FALSE;
392	ctx->input_centroid = FALSE;
393	ctx->num_interp_gpr = 1;
394
395	/* any centroid inputs */
396	for (i = 0; i < ctx->info.num_inputs; i++) {
397		/* skip position/face */
398		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
399		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
400			continue;
401		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
402			ctx->input_linear = TRUE;
403		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
404			ctx->input_perspective = TRUE;
405		if (ctx->info.input_centroid[i])
406			ctx->input_centroid = TRUE;
407	}
408
409	num_baryc = 0;
410	/* ignoring sample for now */
411	if (ctx->input_perspective)
412		num_baryc++;
413	if (ctx->input_linear)
414		num_baryc++;
415	if (ctx->input_centroid)
416		num_baryc *= 2;
417
418	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
419
420	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
421	return ctx->num_interp_gpr;
422}
423
424static void tgsi_src(struct r600_shader_ctx *ctx,
425		     const struct tgsi_full_src_register *tgsi_src,
426		     struct r600_shader_src *r600_src)
427{
428	memset(r600_src, 0, sizeof(*r600_src));
429	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
430	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
431	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
432	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
433	r600_src->neg = tgsi_src->Register.Negate;
434	r600_src->abs = tgsi_src->Register.Absolute;
435
436	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
437		int index;
438		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
439			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
440			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
441
442			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
443			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
444			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
445				return;
446		}
447		index = tgsi_src->Register.Index;
448		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
449		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
450	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
451		/* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
452		r600_src->swizzle[0] = 3;
453		r600_src->swizzle[1] = 3;
454		r600_src->swizzle[2] = 3;
455		r600_src->swizzle[3] = 3;
456		r600_src->sel = 0;
457	} else {
458		if (tgsi_src->Register.Indirect)
459			r600_src->rel = V_SQ_REL_RELATIVE;
460		r600_src->sel = tgsi_src->Register.Index;
461		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
462	}
463}
464
465static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
466{
467	struct r600_bytecode_vtx vtx;
468	unsigned int ar_reg;
469	int r;
470
471	if (offset) {
472		struct r600_bytecode_alu alu;
473
474		memset(&alu, 0, sizeof(alu));
475
476		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
477		alu.src[0].sel = ctx->ar_reg;
478
479		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
480		alu.src[1].value = offset;
481
482		alu.dst.sel = dst_reg;
483		alu.dst.write = 1;
484		alu.last = 1;
485
486		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
487			return r;
488
489		ar_reg = dst_reg;
490	} else {
491		ar_reg = ctx->ar_reg;
492	}
493
494	memset(&vtx, 0, sizeof(vtx));
495	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
496	vtx.src_gpr = ar_reg;
497	vtx.mega_fetch_count = 16;
498	vtx.dst_gpr = dst_reg;
499	vtx.dst_sel_x = 0;		/* SEL_X */
500	vtx.dst_sel_y = 1;		/* SEL_Y */
501	vtx.dst_sel_z = 2;		/* SEL_Z */
502	vtx.dst_sel_w = 3;		/* SEL_W */
503	vtx.data_format = FMT_32_32_32_32_FLOAT;
504	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
505	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
506	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
507	vtx.endian = r600_endian_swap(32);
508
509	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
510		return r;
511
512	return 0;
513}
514
515static int tgsi_split_constant(struct r600_shader_ctx *ctx)
516{
517	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
518	struct r600_bytecode_alu alu;
519	int i, j, k, nconst, r;
520
521	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
522		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
523			nconst++;
524		}
525		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
526	}
527	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
528		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
529			continue;
530		}
531
532		if (ctx->src[i].rel) {
533			int treg = r600_get_temp(ctx);
534			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
535				return r;
536
537			ctx->src[i].sel = treg;
538			ctx->src[i].rel = 0;
539			j--;
540		} else if (j > 0) {
541			int treg = r600_get_temp(ctx);
542			for (k = 0; k < 4; k++) {
543				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
544				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
545				alu.src[0].sel = ctx->src[i].sel;
546				alu.src[0].chan = k;
547				alu.src[0].rel = ctx->src[i].rel;
548				alu.dst.sel = treg;
549				alu.dst.chan = k;
550				alu.dst.write = 1;
551				if (k == 3)
552					alu.last = 1;
553				r = r600_bytecode_add_alu(ctx->bc, &alu);
554				if (r)
555					return r;
556			}
557			ctx->src[i].sel = treg;
558			ctx->src[i].rel =0;
559			j--;
560		}
561	}
562	return 0;
563}
564
565/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
566static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
567{
568	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
569	struct r600_bytecode_alu alu;
570	int i, j, k, nliteral, r;
571
572	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
573		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
574			nliteral++;
575		}
576	}
577	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
578		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
579			int treg = r600_get_temp(ctx);
580			for (k = 0; k < 4; k++) {
581				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
582				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
583				alu.src[0].sel = ctx->src[i].sel;
584				alu.src[0].chan = k;
585				alu.src[0].value = ctx->src[i].value[k];
586				alu.dst.sel = treg;
587				alu.dst.chan = k;
588				alu.dst.write = 1;
589				if (k == 3)
590					alu.last = 1;
591				r = r600_bytecode_add_alu(ctx->bc, &alu);
592				if (r)
593					return r;
594			}
595			ctx->src[i].sel = treg;
596			j--;
597		}
598	}
599	return 0;
600}
601
602static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
603{
604	struct r600_shader *shader = &pipeshader->shader;
605	struct tgsi_token *tokens = pipeshader->tokens;
606	struct tgsi_full_immediate *immediate;
607	struct tgsi_full_property *property;
608	struct r600_shader_ctx ctx;
609	struct r600_bytecode_output output[32];
610	unsigned output_done, noutput;
611	unsigned opcode;
612	int i, j, r = 0, pos0;
613
614	ctx.bc = &shader->bc;
615	ctx.shader = shader;
616	r600_bytecode_init(ctx.bc, rctx->chip_class);
617	ctx.tokens = tokens;
618	tgsi_scan_shader(tokens, &ctx.info);
619	tgsi_parse_init(&ctx.parse, tokens);
620	ctx.type = ctx.parse.FullHeader.Processor.Processor;
621	shader->processor_type = ctx.type;
622	ctx.bc->type = shader->processor_type;
623
624	shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
625		((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
626
627	shader->nr_cbufs = rctx->nr_cbufs;
628
629	/* register allocations */
630	/* Values [0,127] correspond to GPR[0..127].
631	 * Values [128,159] correspond to constant buffer bank 0
632	 * Values [160,191] correspond to constant buffer bank 1
633	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
634	 * Values [256,287] correspond to constant buffer bank 2 (EG)
635	 * Values [288,319] correspond to constant buffer bank 3 (EG)
636	 * Other special values are shown in the list below.
637	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
638	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
639	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
640	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
641	 * 248	SQ_ALU_SRC_0: special constant 0.0.
642	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
643	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
644	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
645	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
646	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
647	 * 254	SQ_ALU_SRC_PV: previous vector result.
648	 * 255	SQ_ALU_SRC_PS: previous scalar result.
649	 */
650	for (i = 0; i < TGSI_FILE_COUNT; i++) {
651		ctx.file_offset[i] = 0;
652	}
653	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
654		ctx.file_offset[TGSI_FILE_INPUT] = 1;
655		if (ctx.bc->chip_class >= EVERGREEN) {
656			r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
657		} else {
658			r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
659		}
660	}
661	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
662		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
663	}
664	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
665						ctx.info.file_max[TGSI_FILE_INPUT] + 1;
666	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
667						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
668
669	/* Outside the GPR range. This will be translated to one of the
670	 * kcache banks later. */
671	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
672
673	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
674	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
675			ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
676	ctx.temp_reg = ctx.ar_reg + 1;
677
678	ctx.nliterals = 0;
679	ctx.literals = NULL;
680	shader->fs_write_all = FALSE;
681	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
682		tgsi_parse_token(&ctx.parse);
683		switch (ctx.parse.FullToken.Token.Type) {
684		case TGSI_TOKEN_TYPE_IMMEDIATE:
685			immediate = &ctx.parse.FullToken.FullImmediate;
686			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
687			if(ctx.literals == NULL) {
688				r = -ENOMEM;
689				goto out_err;
690			}
691			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
692			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
693			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
694			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
695			ctx.nliterals++;
696			break;
697		case TGSI_TOKEN_TYPE_DECLARATION:
698			r = tgsi_declaration(&ctx);
699			if (r)
700				goto out_err;
701			break;
702		case TGSI_TOKEN_TYPE_INSTRUCTION:
703			r = tgsi_is_supported(&ctx);
704			if (r)
705				goto out_err;
706			ctx.max_driver_temp_used = 0;
707			/* reserve first tmp for everyone */
708			r600_get_temp(&ctx);
709
710			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
711			if ((r = tgsi_split_constant(&ctx)))
712				goto out_err;
713			if ((r = tgsi_split_literal_constant(&ctx)))
714				goto out_err;
715			if (ctx.bc->chip_class == CAYMAN)
716				ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
717			else if (ctx.bc->chip_class >= EVERGREEN)
718				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
719			else
720				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
721			r = ctx.inst_info->process(&ctx);
722			if (r)
723				goto out_err;
724			break;
725		case TGSI_TOKEN_TYPE_PROPERTY:
726			property = &ctx.parse.FullToken.FullProperty;
727			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
728				if (property->u[0].Data == 1)
729					shader->fs_write_all = TRUE;
730			}
731			break;
732		default:
733			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
734			r = -EINVAL;
735			goto out_err;
736		}
737	}
738
739	noutput = shader->noutput;
740
741	/* clamp color outputs */
742	if (shader->clamp_color) {
743		for (i = 0; i < noutput; i++) {
744			if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
745				shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
746
747				int j;
748				for (j = 0; j < 4; j++) {
749					struct r600_bytecode_alu alu;
750					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
751
752					/* MOV_SAT R, R */
753					alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
754					alu.dst.sel = shader->output[i].gpr;
755					alu.dst.chan = j;
756					alu.dst.write = 1;
757					alu.dst.clamp = 1;
758					alu.src[0].sel = alu.dst.sel;
759					alu.src[0].chan = j;
760
761					if (j == 3) {
762						alu.last = 1;
763					}
764					r = r600_bytecode_add_alu(ctx.bc, &alu);
765					if (r)
766						return r;
767				}
768			}
769		}
770	}
771
772	/* export output */
773	j = 0;
774	for (i = 0, pos0 = 0; i < noutput; i++) {
775		memset(&output[i], 0, sizeof(struct r600_bytecode_output));
776		output[i + j].gpr = shader->output[i].gpr;
777		output[i + j].elem_size = 3;
778		output[i + j].swizzle_x = 0;
779		output[i + j].swizzle_y = 1;
780		output[i + j].swizzle_z = 2;
781		output[i + j].swizzle_w = 3;
782		output[i + j].burst_count = 1;
783		output[i + j].barrier = 1;
784		output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
785		output[i + j].array_base = i - pos0;
786		output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
787		switch (ctx.type) {
788		case TGSI_PROCESSOR_VERTEX:
789			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
790				output[i + j].array_base = 60;
791				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
792				/* position doesn't count in array_base */
793				pos0++;
794			}
795			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
796				output[i + j].array_base = 61;
797				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
798				/* position doesn't count in array_base */
799				pos0++;
800			}
801			break;
802		case TGSI_PROCESSOR_FRAGMENT:
803			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
804				output[i + j].array_base = shader->output[i].sid;
805				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
806				if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
807					for (j = 1; j < shader->nr_cbufs; j++) {
808						memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
809						output[i + j].gpr = shader->output[i].gpr;
810						output[i + j].elem_size = 3;
811						output[i + j].swizzle_x = 0;
812						output[i + j].swizzle_y = 1;
813						output[i + j].swizzle_z = 2;
814						output[i + j].swizzle_w = 3;
815						output[i + j].burst_count = 1;
816						output[i + j].barrier = 1;
817						output[i + j].array_base = shader->output[i].sid + j;
818						output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
819						output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
820					}
821					j--;
822				}
823			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
824				output[i + j].array_base = 61;
825				output[i + j].swizzle_x = 2;
826				output[i + j].swizzle_y = 7;
827				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
828				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
829			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
830				output[i + j].array_base = 61;
831				output[i + j].swizzle_x = 7;
832				output[i + j].swizzle_y = 1;
833				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
834				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
835			} else {
836				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
837				r = -EINVAL;
838				goto out_err;
839			}
840			break;
841		default:
842			R600_ERR("unsupported processor type %d\n", ctx.type);
843			r = -EINVAL;
844			goto out_err;
845		}
846	}
847	noutput += j;
848	/* add fake param output for vertex shader if no param is exported */
849	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
850		for (i = 0, pos0 = 0; i < noutput; i++) {
851			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
852				pos0 = 1;
853				break;
854			}
855		}
856		if (!pos0) {
857			memset(&output[i], 0, sizeof(struct r600_bytecode_output));
858			output[i].gpr = 0;
859			output[i].elem_size = 3;
860			output[i].swizzle_x = 0;
861			output[i].swizzle_y = 1;
862			output[i].swizzle_z = 2;
863			output[i].swizzle_w = 3;
864			output[i].burst_count = 1;
865			output[i].barrier = 1;
866			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
867			output[i].array_base = 0;
868			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
869			noutput++;
870		}
871	}
872	/* add fake pixel export */
873	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
874		memset(&output[0], 0, sizeof(struct r600_bytecode_output));
875		output[0].gpr = 0;
876		output[0].elem_size = 3;
877		output[0].swizzle_x = 7;
878		output[0].swizzle_y = 7;
879		output[0].swizzle_z = 7;
880		output[0].swizzle_w = 7;
881		output[0].burst_count = 1;
882		output[0].barrier = 1;
883		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
884		output[0].array_base = 0;
885		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
886		noutput++;
887	}
888	/* set export done on last export of each type */
889	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
890		if (ctx.bc->chip_class < CAYMAN) {
891			if (i == (noutput - 1)) {
892				output[i].end_of_program = 1;
893			}
894		}
895		if (!(output_done & (1 << output[i].type))) {
896			output_done |= (1 << output[i].type);
897			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
898		}
899	}
900	/* add output to bytecode */
901	for (i = 0; i < noutput; i++) {
902		r = r600_bytecode_add_output(ctx.bc, &output[i]);
903		if (r)
904			goto out_err;
905	}
906	/* add program end */
907	if (ctx.bc->chip_class == CAYMAN)
908		cm_bytecode_add_cf_end(ctx.bc);
909
910	free(ctx.literals);
911	tgsi_parse_free(&ctx.parse);
912	return 0;
913out_err:
914	free(ctx.literals);
915	tgsi_parse_free(&ctx.parse);
916	return r;
917}
918
919static int tgsi_unsupported(struct r600_shader_ctx *ctx)
920{
921	R600_ERR("%s tgsi opcode unsupported\n",
922		 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
923	return -EINVAL;
924}
925
926static int tgsi_end(struct r600_shader_ctx *ctx)
927{
928	return 0;
929}
930
931static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
932			const struct r600_shader_src *shader_src,
933			unsigned chan)
934{
935	bc_src->sel = shader_src->sel;
936	bc_src->chan = shader_src->swizzle[chan];
937	bc_src->neg = shader_src->neg;
938	bc_src->abs = shader_src->abs;
939	bc_src->rel = shader_src->rel;
940	bc_src->value = shader_src->value[bc_src->chan];
941}
942
943static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
944{
945	bc_src->abs = 1;
946	bc_src->neg = 0;
947}
948
949static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
950{
951	bc_src->neg = !bc_src->neg;
952}
953
954static void tgsi_dst(struct r600_shader_ctx *ctx,
955		     const struct tgsi_full_dst_register *tgsi_dst,
956		     unsigned swizzle,
957		     struct r600_bytecode_alu_dst *r600_dst)
958{
959	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
960
961	r600_dst->sel = tgsi_dst->Register.Index;
962	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
963	r600_dst->chan = swizzle;
964	r600_dst->write = 1;
965	if (tgsi_dst->Register.Indirect)
966		r600_dst->rel = V_SQ_REL_RELATIVE;
967	if (inst->Instruction.Saturate) {
968		r600_dst->clamp = 1;
969	}
970}
971
972static int tgsi_last_instruction(unsigned writemask)
973{
974	int i, lasti = 0;
975
976	for (i = 0; i < 4; i++) {
977		if (writemask & (1 << i)) {
978			lasti = i;
979		}
980	}
981	return lasti;
982}
983
984static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
985{
986	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
987	struct r600_bytecode_alu alu;
988	int i, j, r;
989	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
990
991	for (i = 0; i < lasti + 1; i++) {
992		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
993			continue;
994
995		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
996		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
997
998		alu.inst = ctx->inst_info->r600_opcode;
999		if (!swap) {
1000			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1001				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1002			}
1003		} else {
1004			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1005			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1006		}
1007		/* handle some special cases */
1008		switch (ctx->inst_info->tgsi_opcode) {
1009		case TGSI_OPCODE_SUB:
1010			r600_bytecode_src_toggle_neg(&alu.src[1]);
1011			break;
1012		case TGSI_OPCODE_ABS:
1013			r600_bytecode_src_set_abs(&alu.src[0]);
1014			break;
1015		default:
1016			break;
1017		}
1018		if (i == lasti) {
1019			alu.last = 1;
1020		}
1021		r = r600_bytecode_add_alu(ctx->bc, &alu);
1022		if (r)
1023			return r;
1024	}
1025	return 0;
1026}
1027
1028static int tgsi_op2(struct r600_shader_ctx *ctx)
1029{
1030	return tgsi_op2_s(ctx, 0);
1031}
1032
1033static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1034{
1035	return tgsi_op2_s(ctx, 1);
1036}
1037
1038static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1039{
1040	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1041	int i, j, r;
1042	struct r600_bytecode_alu alu;
1043	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1044
1045	for (i = 0 ; i < last_slot; i++) {
1046		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1047		alu.inst = ctx->inst_info->r600_opcode;
1048		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1049			r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1050		}
1051		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1052		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1053
1054		if (i == last_slot - 1)
1055			alu.last = 1;
1056		r = r600_bytecode_add_alu(ctx->bc, &alu);
1057		if (r)
1058			return r;
1059	}
1060	return 0;
1061}
1062
1063/*
1064 * r600 - trunc to -PI..PI range
1065 * r700 - normalize by dividing by 2PI
1066 * see fdo bug 27901
1067 */
1068static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1069{
1070	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1071	static float double_pi = 3.1415926535 * 2;
1072	static float neg_pi = -3.1415926535;
1073
1074	int r;
1075	struct r600_bytecode_alu alu;
1076
1077	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1078	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1079	alu.is_op3 = 1;
1080
1081	alu.dst.chan = 0;
1082	alu.dst.sel = ctx->temp_reg;
1083	alu.dst.write = 1;
1084
1085	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1086
1087	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1088	alu.src[1].chan = 0;
1089	alu.src[1].value = *(uint32_t *)&half_inv_pi;
1090	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1091	alu.src[2].chan = 0;
1092	alu.last = 1;
1093	r = r600_bytecode_add_alu(ctx->bc, &alu);
1094	if (r)
1095		return r;
1096
1097	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1098	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1099
1100	alu.dst.chan = 0;
1101	alu.dst.sel = ctx->temp_reg;
1102	alu.dst.write = 1;
1103
1104	alu.src[0].sel = ctx->temp_reg;
1105	alu.src[0].chan = 0;
1106	alu.last = 1;
1107	r = r600_bytecode_add_alu(ctx->bc, &alu);
1108	if (r)
1109		return r;
1110
1111	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1112	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1113	alu.is_op3 = 1;
1114
1115	alu.dst.chan = 0;
1116	alu.dst.sel = ctx->temp_reg;
1117	alu.dst.write = 1;
1118
1119	alu.src[0].sel = ctx->temp_reg;
1120	alu.src[0].chan = 0;
1121
1122	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1123	alu.src[1].chan = 0;
1124	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1125	alu.src[2].chan = 0;
1126
1127	if (ctx->bc->chip_class == R600) {
1128		alu.src[1].value = *(uint32_t *)&double_pi;
1129		alu.src[2].value = *(uint32_t *)&neg_pi;
1130	} else {
1131		alu.src[1].sel = V_SQ_ALU_SRC_1;
1132		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1133		alu.src[2].neg = 1;
1134	}
1135
1136	alu.last = 1;
1137	r = r600_bytecode_add_alu(ctx->bc, &alu);
1138	if (r)
1139		return r;
1140	return 0;
1141}
1142
1143static int cayman_trig(struct r600_shader_ctx *ctx)
1144{
1145	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1146	struct r600_bytecode_alu alu;
1147	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1148	int i, r;
1149
1150	r = tgsi_setup_trig(ctx);
1151	if (r)
1152		return r;
1153
1154
1155	for (i = 0; i < last_slot; i++) {
1156		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1157		alu.inst = ctx->inst_info->r600_opcode;
1158		alu.dst.chan = i;
1159
1160		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1161		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1162
1163		alu.src[0].sel = ctx->temp_reg;
1164		alu.src[0].chan = 0;
1165		if (i == last_slot - 1)
1166			alu.last = 1;
1167		r = r600_bytecode_add_alu(ctx->bc, &alu);
1168		if (r)
1169			return r;
1170	}
1171	return 0;
1172}
1173
1174static int tgsi_trig(struct r600_shader_ctx *ctx)
1175{
1176	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1177	struct r600_bytecode_alu alu;
1178	int i, r;
1179	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1180
1181	r = tgsi_setup_trig(ctx);
1182	if (r)
1183		return r;
1184
1185	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1186	alu.inst = ctx->inst_info->r600_opcode;
1187	alu.dst.chan = 0;
1188	alu.dst.sel = ctx->temp_reg;
1189	alu.dst.write = 1;
1190
1191	alu.src[0].sel = ctx->temp_reg;
1192	alu.src[0].chan = 0;
1193	alu.last = 1;
1194	r = r600_bytecode_add_alu(ctx->bc, &alu);
1195	if (r)
1196		return r;
1197
1198	/* replicate result */
1199	for (i = 0; i < lasti + 1; i++) {
1200		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1201			continue;
1202
1203		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1204		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1205
1206		alu.src[0].sel = ctx->temp_reg;
1207		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1208		if (i == lasti)
1209			alu.last = 1;
1210		r = r600_bytecode_add_alu(ctx->bc, &alu);
1211		if (r)
1212			return r;
1213	}
1214	return 0;
1215}
1216
1217static int tgsi_scs(struct r600_shader_ctx *ctx)
1218{
1219	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1220	struct r600_bytecode_alu alu;
1221	int i, r;
1222
1223	/* We'll only need the trig stuff if we are going to write to the
1224	 * X or Y components of the destination vector.
1225	 */
1226	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1227		r = tgsi_setup_trig(ctx);
1228		if (r)
1229			return r;
1230	}
1231
1232	/* dst.x = COS */
1233	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1234		if (ctx->bc->chip_class == CAYMAN) {
1235			for (i = 0 ; i < 3; i++) {
1236				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1237				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1238				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1239
1240				if (i == 0)
1241					alu.dst.write = 1;
1242				else
1243					alu.dst.write = 0;
1244				alu.src[0].sel = ctx->temp_reg;
1245				alu.src[0].chan = 0;
1246				if (i == 2)
1247					alu.last = 1;
1248				r = r600_bytecode_add_alu(ctx->bc, &alu);
1249				if (r)
1250					return r;
1251			}
1252		} else {
1253			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1254			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1255			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1256
1257			alu.src[0].sel = ctx->temp_reg;
1258			alu.src[0].chan = 0;
1259			alu.last = 1;
1260			r = r600_bytecode_add_alu(ctx->bc, &alu);
1261			if (r)
1262				return r;
1263		}
1264	}
1265
1266	/* dst.y = SIN */
1267	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1268		if (ctx->bc->chip_class == CAYMAN) {
1269			for (i = 0 ; i < 3; i++) {
1270				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1271				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1272				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1273				if (i == 1)
1274					alu.dst.write = 1;
1275				else
1276					alu.dst.write = 0;
1277				alu.src[0].sel = ctx->temp_reg;
1278				alu.src[0].chan = 0;
1279				if (i == 2)
1280					alu.last = 1;
1281				r = r600_bytecode_add_alu(ctx->bc, &alu);
1282				if (r)
1283					return r;
1284			}
1285		} else {
1286			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1287			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1288			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1289
1290			alu.src[0].sel = ctx->temp_reg;
1291			alu.src[0].chan = 0;
1292			alu.last = 1;
1293			r = r600_bytecode_add_alu(ctx->bc, &alu);
1294			if (r)
1295				return r;
1296		}
1297	}
1298
1299	/* dst.z = 0.0; */
1300	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1301		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1302
1303		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1304
1305		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1306
1307		alu.src[0].sel = V_SQ_ALU_SRC_0;
1308		alu.src[0].chan = 0;
1309
1310		alu.last = 1;
1311
1312		r = r600_bytecode_add_alu(ctx->bc, &alu);
1313		if (r)
1314			return r;
1315	}
1316
1317	/* dst.w = 1.0; */
1318	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1319		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1320
1321		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1322
1323		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1324
1325		alu.src[0].sel = V_SQ_ALU_SRC_1;
1326		alu.src[0].chan = 0;
1327
1328		alu.last = 1;
1329
1330		r = r600_bytecode_add_alu(ctx->bc, &alu);
1331		if (r)
1332			return r;
1333	}
1334
1335	return 0;
1336}
1337
1338static int tgsi_kill(struct r600_shader_ctx *ctx)
1339{
1340	struct r600_bytecode_alu alu;
1341	int i, r;
1342
1343	for (i = 0; i < 4; i++) {
1344		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1345		alu.inst = ctx->inst_info->r600_opcode;
1346
1347		alu.dst.chan = i;
1348
1349		alu.src[0].sel = V_SQ_ALU_SRC_0;
1350
1351		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1352			alu.src[1].sel = V_SQ_ALU_SRC_1;
1353			alu.src[1].neg = 1;
1354		} else {
1355			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1356		}
1357		if (i == 3) {
1358			alu.last = 1;
1359		}
1360		r = r600_bytecode_add_alu(ctx->bc, &alu);
1361		if (r)
1362			return r;
1363	}
1364
1365	/* kill must be last in ALU */
1366	ctx->bc->force_add_cf = 1;
1367	ctx->shader->uses_kill = TRUE;
1368	return 0;
1369}
1370
1371static int tgsi_lit(struct r600_shader_ctx *ctx)
1372{
1373	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1374	struct r600_bytecode_alu alu;
1375	int r;
1376
1377	/* tmp.x = max(src.y, 0.0) */
1378	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1379	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1380	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1381	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1382	alu.src[1].chan = 1;
1383
1384	alu.dst.sel = ctx->temp_reg;
1385	alu.dst.chan = 0;
1386	alu.dst.write = 1;
1387
1388	alu.last = 1;
1389	r = r600_bytecode_add_alu(ctx->bc, &alu);
1390	if (r)
1391		return r;
1392
1393	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1394	{
1395		int chan;
1396		int sel;
1397		int i;
1398
1399		if (ctx->bc->chip_class == CAYMAN) {
1400			for (i = 0; i < 3; i++) {
1401				/* tmp.z = log(tmp.x) */
1402				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1403				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1404				alu.src[0].sel = ctx->temp_reg;
1405				alu.src[0].chan = 0;
1406				alu.dst.sel = ctx->temp_reg;
1407				alu.dst.chan = i;
1408				if (i == 2) {
1409					alu.dst.write = 1;
1410					alu.last = 1;
1411				} else
1412					alu.dst.write = 0;
1413
1414				r = r600_bytecode_add_alu(ctx->bc, &alu);
1415				if (r)
1416					return r;
1417			}
1418		} else {
1419			/* tmp.z = log(tmp.x) */
1420			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1421			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1422			alu.src[0].sel = ctx->temp_reg;
1423			alu.src[0].chan = 0;
1424			alu.dst.sel = ctx->temp_reg;
1425			alu.dst.chan = 2;
1426			alu.dst.write = 1;
1427			alu.last = 1;
1428			r = r600_bytecode_add_alu(ctx->bc, &alu);
1429			if (r)
1430				return r;
1431		}
1432
1433		chan = alu.dst.chan;
1434		sel = alu.dst.sel;
1435
1436		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1437		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1438		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1439		alu.src[0].sel  = sel;
1440		alu.src[0].chan = chan;
1441		r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
1442		r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
1443		alu.dst.sel = ctx->temp_reg;
1444		alu.dst.chan = 0;
1445		alu.dst.write = 1;
1446		alu.is_op3 = 1;
1447		alu.last = 1;
1448		r = r600_bytecode_add_alu(ctx->bc, &alu);
1449		if (r)
1450			return r;
1451
1452		if (ctx->bc->chip_class == CAYMAN) {
1453			for (i = 0; i < 3; i++) {
1454				/* dst.z = exp(tmp.x) */
1455				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1456				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1457				alu.src[0].sel = ctx->temp_reg;
1458				alu.src[0].chan = 0;
1459				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1460				if (i == 2) {
1461					alu.dst.write = 1;
1462					alu.last = 1;
1463				} else
1464					alu.dst.write = 0;
1465				r = r600_bytecode_add_alu(ctx->bc, &alu);
1466				if (r)
1467					return r;
1468			}
1469		} else {
1470			/* dst.z = exp(tmp.x) */
1471			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1472			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1473			alu.src[0].sel = ctx->temp_reg;
1474			alu.src[0].chan = 0;
1475			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1476			alu.last = 1;
1477			r = r600_bytecode_add_alu(ctx->bc, &alu);
1478			if (r)
1479				return r;
1480		}
1481	}
1482
1483	/* dst.x, <- 1.0  */
1484	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1485	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1486	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1487	alu.src[0].chan = 0;
1488	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1489	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1490	r = r600_bytecode_add_alu(ctx->bc, &alu);
1491	if (r)
1492		return r;
1493
1494	/* dst.y = max(src.x, 0.0) */
1495	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1496	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1497	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1498	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1499	alu.src[1].chan = 0;
1500	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1501	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1502	r = r600_bytecode_add_alu(ctx->bc, &alu);
1503	if (r)
1504		return r;
1505
1506	/* dst.w, <- 1.0  */
1507	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1508	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1509	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1510	alu.src[0].chan = 0;
1511	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1512	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1513	alu.last = 1;
1514	r = r600_bytecode_add_alu(ctx->bc, &alu);
1515	if (r)
1516		return r;
1517
1518	return 0;
1519}
1520
1521static int tgsi_rsq(struct r600_shader_ctx *ctx)
1522{
1523	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1524	struct r600_bytecode_alu alu;
1525	int i, r;
1526
1527	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1528
1529	/* FIXME:
1530	 * For state trackers other than OpenGL, we'll want to use
1531	 * _RECIPSQRT_IEEE instead.
1532	 */
1533	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1534
1535	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1536		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1537		r600_bytecode_src_set_abs(&alu.src[i]);
1538	}
1539	alu.dst.sel = ctx->temp_reg;
1540	alu.dst.write = 1;
1541	alu.last = 1;
1542	r = r600_bytecode_add_alu(ctx->bc, &alu);
1543	if (r)
1544		return r;
1545	/* replicate result */
1546	return tgsi_helper_tempx_replicate(ctx);
1547}
1548
1549static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1550{
1551	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1552	struct r600_bytecode_alu alu;
1553	int i, r;
1554
1555	for (i = 0; i < 4; i++) {
1556		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1557		alu.src[0].sel = ctx->temp_reg;
1558		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1559		alu.dst.chan = i;
1560		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1561		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1562		if (i == 3)
1563			alu.last = 1;
1564		r = r600_bytecode_add_alu(ctx->bc, &alu);
1565		if (r)
1566			return r;
1567	}
1568	return 0;
1569}
1570
1571static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1572{
1573	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1574	struct r600_bytecode_alu alu;
1575	int i, r;
1576
1577	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1578	alu.inst = ctx->inst_info->r600_opcode;
1579	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1580		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1581	}
1582	alu.dst.sel = ctx->temp_reg;
1583	alu.dst.write = 1;
1584	alu.last = 1;
1585	r = r600_bytecode_add_alu(ctx->bc, &alu);
1586	if (r)
1587		return r;
1588	/* replicate result */
1589	return tgsi_helper_tempx_replicate(ctx);
1590}
1591
1592static int cayman_pow(struct r600_shader_ctx *ctx)
1593{
1594	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1595	int i, r;
1596	struct r600_bytecode_alu alu;
1597	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1598
1599	for (i = 0; i < 3; i++) {
1600		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1601		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1602		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1603		alu.dst.sel = ctx->temp_reg;
1604		alu.dst.chan = i;
1605		alu.dst.write = 1;
1606		if (i == 2)
1607			alu.last = 1;
1608		r = r600_bytecode_add_alu(ctx->bc, &alu);
1609		if (r)
1610			return r;
1611	}
1612
1613	/* b * LOG2(a) */
1614	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1615	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1616	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1617	alu.src[1].sel = ctx->temp_reg;
1618	alu.dst.sel = ctx->temp_reg;
1619	alu.dst.write = 1;
1620	alu.last = 1;
1621	r = r600_bytecode_add_alu(ctx->bc, &alu);
1622	if (r)
1623		return r;
1624
1625	for (i = 0; i < last_slot; i++) {
1626		/* POW(a,b) = EXP2(b * LOG2(a))*/
1627		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1628		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1629		alu.src[0].sel = ctx->temp_reg;
1630
1631		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1632		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1633		if (i == last_slot - 1)
1634			alu.last = 1;
1635		r = r600_bytecode_add_alu(ctx->bc, &alu);
1636		if (r)
1637			return r;
1638	}
1639	return 0;
1640}
1641
1642static int tgsi_pow(struct r600_shader_ctx *ctx)
1643{
1644	struct r600_bytecode_alu alu;
1645	int r;
1646
1647	/* LOG2(a) */
1648	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1649	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1650	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1651	alu.dst.sel = ctx->temp_reg;
1652	alu.dst.write = 1;
1653	alu.last = 1;
1654	r = r600_bytecode_add_alu(ctx->bc, &alu);
1655	if (r)
1656		return r;
1657	/* b * LOG2(a) */
1658	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1659	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1660	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1661	alu.src[1].sel = ctx->temp_reg;
1662	alu.dst.sel = ctx->temp_reg;
1663	alu.dst.write = 1;
1664	alu.last = 1;
1665	r = r600_bytecode_add_alu(ctx->bc, &alu);
1666	if (r)
1667		return r;
1668	/* POW(a,b) = EXP2(b * LOG2(a))*/
1669	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1670	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1671	alu.src[0].sel = ctx->temp_reg;
1672	alu.dst.sel = ctx->temp_reg;
1673	alu.dst.write = 1;
1674	alu.last = 1;
1675	r = r600_bytecode_add_alu(ctx->bc, &alu);
1676	if (r)
1677		return r;
1678	return tgsi_helper_tempx_replicate(ctx);
1679}
1680
1681static int tgsi_ssg(struct r600_shader_ctx *ctx)
1682{
1683	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1684	struct r600_bytecode_alu alu;
1685	int i, r;
1686
1687	/* tmp = (src > 0 ? 1 : src) */
1688	for (i = 0; i < 4; i++) {
1689		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1690		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1691		alu.is_op3 = 1;
1692
1693		alu.dst.sel = ctx->temp_reg;
1694		alu.dst.chan = i;
1695
1696		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1697		alu.src[1].sel = V_SQ_ALU_SRC_1;
1698		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
1699
1700		if (i == 3)
1701			alu.last = 1;
1702		r = r600_bytecode_add_alu(ctx->bc, &alu);
1703		if (r)
1704			return r;
1705	}
1706
1707	/* dst = (-tmp > 0 ? -1 : tmp) */
1708	for (i = 0; i < 4; i++) {
1709		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1710		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1711		alu.is_op3 = 1;
1712		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1713
1714		alu.src[0].sel = ctx->temp_reg;
1715		alu.src[0].chan = i;
1716		alu.src[0].neg = 1;
1717
1718		alu.src[1].sel = V_SQ_ALU_SRC_1;
1719		alu.src[1].neg = 1;
1720
1721		alu.src[2].sel = ctx->temp_reg;
1722		alu.src[2].chan = i;
1723
1724		if (i == 3)
1725			alu.last = 1;
1726		r = r600_bytecode_add_alu(ctx->bc, &alu);
1727		if (r)
1728			return r;
1729	}
1730	return 0;
1731}
1732
1733static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1734{
1735	struct r600_bytecode_alu alu;
1736	int i, r;
1737
1738	for (i = 0; i < 4; i++) {
1739		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1740		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1741			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1742			alu.dst.chan = i;
1743		} else {
1744			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1745			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1746			alu.src[0].sel = ctx->temp_reg;
1747			alu.src[0].chan = i;
1748		}
1749		if (i == 3) {
1750			alu.last = 1;
1751		}
1752		r = r600_bytecode_add_alu(ctx->bc, &alu);
1753		if (r)
1754			return r;
1755	}
1756	return 0;
1757}
1758
1759static int tgsi_op3(struct r600_shader_ctx *ctx)
1760{
1761	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1762	struct r600_bytecode_alu alu;
1763	int i, j, r;
1764	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1765
1766	for (i = 0; i < lasti + 1; i++) {
1767		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1768			continue;
1769
1770		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1771		alu.inst = ctx->inst_info->r600_opcode;
1772		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1773			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1774		}
1775
1776		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1777		alu.dst.chan = i;
1778		alu.dst.write = 1;
1779		alu.is_op3 = 1;
1780		if (i == lasti) {
1781			alu.last = 1;
1782		}
1783		r = r600_bytecode_add_alu(ctx->bc, &alu);
1784		if (r)
1785			return r;
1786	}
1787	return 0;
1788}
1789
1790static int tgsi_dp(struct r600_shader_ctx *ctx)
1791{
1792	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1793	struct r600_bytecode_alu alu;
1794	int i, j, r;
1795
1796	for (i = 0; i < 4; i++) {
1797		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1798		alu.inst = ctx->inst_info->r600_opcode;
1799		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1800			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1801		}
1802
1803		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1804		alu.dst.chan = i;
1805		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1806		/* handle some special cases */
1807		switch (ctx->inst_info->tgsi_opcode) {
1808		case TGSI_OPCODE_DP2:
1809			if (i > 1) {
1810				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1811				alu.src[0].chan = alu.src[1].chan = 0;
1812			}
1813			break;
1814		case TGSI_OPCODE_DP3:
1815			if (i > 2) {
1816				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1817				alu.src[0].chan = alu.src[1].chan = 0;
1818			}
1819			break;
1820		case TGSI_OPCODE_DPH:
1821			if (i == 3) {
1822				alu.src[0].sel = V_SQ_ALU_SRC_1;
1823				alu.src[0].chan = 0;
1824				alu.src[0].neg = 0;
1825			}
1826			break;
1827		default:
1828			break;
1829		}
1830		if (i == 3) {
1831			alu.last = 1;
1832		}
1833		r = r600_bytecode_add_alu(ctx->bc, &alu);
1834		if (r)
1835			return r;
1836	}
1837	return 0;
1838}
1839
1840static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
1841						    unsigned index)
1842{
1843	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1844	return 	(inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
1845		inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
1846		ctx->src[index].neg || ctx->src[index].abs;
1847}
1848
1849static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
1850					unsigned index)
1851{
1852	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1853	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
1854}
1855
1856static int tgsi_tex(struct r600_shader_ctx *ctx)
1857{
1858	static float one_point_five = 1.5f;
1859	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1860	struct r600_bytecode_tex tex;
1861	struct r600_bytecode_alu alu;
1862	unsigned src_gpr;
1863	int r, i, j;
1864	int opcode;
1865	/* Texture fetch instructions can only use gprs as source.
1866	 * Also they cannot negate the source or take the absolute value */
1867	const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
1868	boolean src_loaded = FALSE;
1869	unsigned sampler_src_reg = 1;
1870
1871	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
1872
1873	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1874		/* TGSI moves the sampler to src reg 3 for TXD */
1875		sampler_src_reg = 3;
1876
1877		for (i = 1; i < 3; i++) {
1878			/* set gradients h/v */
1879			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
1880			tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
1881				SQ_TEX_INST_SET_GRADIENTS_V;
1882			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
1883			tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1884
1885			if (tgsi_tex_src_requires_loading(ctx, i)) {
1886				tex.src_gpr = r600_get_temp(ctx);
1887				tex.src_sel_x = 0;
1888				tex.src_sel_y = 1;
1889				tex.src_sel_z = 2;
1890				tex.src_sel_w = 3;
1891
1892				for (j = 0; j < 4; j++) {
1893					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1894					alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1895                                        r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
1896                                        alu.dst.sel = tex.src_gpr;
1897                                        alu.dst.chan = j;
1898                                        if (j == 3)
1899                                                alu.last = 1;
1900                                        alu.dst.write = 1;
1901                                        r = r600_bytecode_add_alu(ctx->bc, &alu);
1902                                        if (r)
1903                                                return r;
1904				}
1905
1906			} else {
1907				tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
1908				tex.src_sel_x = ctx->src[i].swizzle[0];
1909				tex.src_sel_y = ctx->src[i].swizzle[1];
1910				tex.src_sel_z = ctx->src[i].swizzle[2];
1911				tex.src_sel_w = ctx->src[i].swizzle[3];
1912				tex.src_rel = ctx->src[i].rel;
1913			}
1914			tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
1915			tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
1916			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1917				tex.coord_type_x = 1;
1918				tex.coord_type_y = 1;
1919				tex.coord_type_z = 1;
1920				tex.coord_type_w = 1;
1921			}
1922			r = r600_bytecode_add_tex(ctx->bc, &tex);
1923			if (r)
1924				return r;
1925		}
1926	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1927		int out_chan;
1928		/* Add perspective divide */
1929		if (ctx->bc->chip_class == CAYMAN) {
1930			out_chan = 2;
1931			for (i = 0; i < 3; i++) {
1932				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1933				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1934				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
1935
1936				alu.dst.sel = ctx->temp_reg;
1937				alu.dst.chan = i;
1938				if (i == 2)
1939					alu.last = 1;
1940				if (out_chan == i)
1941					alu.dst.write = 1;
1942				r = r600_bytecode_add_alu(ctx->bc, &alu);
1943				if (r)
1944					return r;
1945			}
1946
1947		} else {
1948			out_chan = 3;
1949			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1950			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1951			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
1952
1953			alu.dst.sel = ctx->temp_reg;
1954			alu.dst.chan = out_chan;
1955			alu.last = 1;
1956			alu.dst.write = 1;
1957			r = r600_bytecode_add_alu(ctx->bc, &alu);
1958			if (r)
1959				return r;
1960		}
1961
1962		for (i = 0; i < 3; i++) {
1963			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1964			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1965			alu.src[0].sel = ctx->temp_reg;
1966			alu.src[0].chan = out_chan;
1967			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1968			alu.dst.sel = ctx->temp_reg;
1969			alu.dst.chan = i;
1970			alu.dst.write = 1;
1971			r = r600_bytecode_add_alu(ctx->bc, &alu);
1972			if (r)
1973				return r;
1974		}
1975		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1976		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1977		alu.src[0].sel = V_SQ_ALU_SRC_1;
1978		alu.src[0].chan = 0;
1979		alu.dst.sel = ctx->temp_reg;
1980		alu.dst.chan = 3;
1981		alu.last = 1;
1982		alu.dst.write = 1;
1983		r = r600_bytecode_add_alu(ctx->bc, &alu);
1984		if (r)
1985			return r;
1986		src_loaded = TRUE;
1987		src_gpr = ctx->temp_reg;
1988	}
1989
1990	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1991		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1992		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1993
1994		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1995		for (i = 0; i < 4; i++) {
1996			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1997			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1998			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1999			r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
2000			alu.dst.sel = ctx->temp_reg;
2001			alu.dst.chan = i;
2002			if (i == 3)
2003				alu.last = 1;
2004			alu.dst.write = 1;
2005			r = r600_bytecode_add_alu(ctx->bc, &alu);
2006			if (r)
2007				return r;
2008		}
2009
2010		/* tmp1.z = RCP_e(|tmp1.z|) */
2011		if (ctx->bc->chip_class == CAYMAN) {
2012			for (i = 0; i < 3; i++) {
2013				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2014				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2015				alu.src[0].sel = ctx->temp_reg;
2016				alu.src[0].chan = 2;
2017				alu.src[0].abs = 1;
2018				alu.dst.sel = ctx->temp_reg;
2019				alu.dst.chan = i;
2020				if (i == 2)
2021					alu.dst.write = 1;
2022				if (i == 2)
2023					alu.last = 1;
2024				r = r600_bytecode_add_alu(ctx->bc, &alu);
2025				if (r)
2026					return r;
2027			}
2028		} else {
2029			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2030			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2031			alu.src[0].sel = ctx->temp_reg;
2032			alu.src[0].chan = 2;
2033			alu.src[0].abs = 1;
2034			alu.dst.sel = ctx->temp_reg;
2035			alu.dst.chan = 2;
2036			alu.dst.write = 1;
2037			alu.last = 1;
2038			r = r600_bytecode_add_alu(ctx->bc, &alu);
2039			if (r)
2040				return r;
2041		}
2042
2043		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
2044		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
2045		 * muladd has no writemask, have to use another temp
2046		 */
2047		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2048		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2049		alu.is_op3 = 1;
2050
2051		alu.src[0].sel = ctx->temp_reg;
2052		alu.src[0].chan = 0;
2053		alu.src[1].sel = ctx->temp_reg;
2054		alu.src[1].chan = 2;
2055
2056		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2057		alu.src[2].chan = 0;
2058		alu.src[2].value = *(uint32_t *)&one_point_five;
2059
2060		alu.dst.sel = ctx->temp_reg;
2061		alu.dst.chan = 0;
2062		alu.dst.write = 1;
2063
2064		r = r600_bytecode_add_alu(ctx->bc, &alu);
2065		if (r)
2066			return r;
2067
2068		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2069		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2070		alu.is_op3 = 1;
2071
2072		alu.src[0].sel = ctx->temp_reg;
2073		alu.src[0].chan = 1;
2074		alu.src[1].sel = ctx->temp_reg;
2075		alu.src[1].chan = 2;
2076
2077		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2078		alu.src[2].chan = 0;
2079		alu.src[2].value = *(uint32_t *)&one_point_five;
2080
2081		alu.dst.sel = ctx->temp_reg;
2082		alu.dst.chan = 1;
2083		alu.dst.write = 1;
2084
2085		alu.last = 1;
2086		r = r600_bytecode_add_alu(ctx->bc, &alu);
2087		if (r)
2088			return r;
2089
2090		src_loaded = TRUE;
2091		src_gpr = ctx->temp_reg;
2092	}
2093
2094	if (src_requires_loading && !src_loaded) {
2095		for (i = 0; i < 4; i++) {
2096			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2097			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2098			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2099			alu.dst.sel = ctx->temp_reg;
2100			alu.dst.chan = i;
2101			if (i == 3)
2102				alu.last = 1;
2103			alu.dst.write = 1;
2104			r = r600_bytecode_add_alu(ctx->bc, &alu);
2105			if (r)
2106				return r;
2107		}
2108		src_loaded = TRUE;
2109		src_gpr = ctx->temp_reg;
2110	}
2111
2112	opcode = ctx->inst_info->r600_opcode;
2113	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) {
2114		switch (opcode) {
2115		case SQ_TEX_INST_SAMPLE:
2116			opcode = SQ_TEX_INST_SAMPLE_C;
2117			break;
2118		case SQ_TEX_INST_SAMPLE_L:
2119			opcode = SQ_TEX_INST_SAMPLE_C_L;
2120			break;
2121		case SQ_TEX_INST_SAMPLE_G:
2122			opcode = SQ_TEX_INST_SAMPLE_C_G;
2123			break;
2124		}
2125	}
2126
2127	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2128	tex.inst = opcode;
2129
2130	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2131	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2132	tex.src_gpr = src_gpr;
2133	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2134	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2135	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2136	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2137	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2138	if (src_loaded) {
2139		tex.src_sel_x = 0;
2140		tex.src_sel_y = 1;
2141		tex.src_sel_z = 2;
2142		tex.src_sel_w = 3;
2143	} else {
2144		tex.src_sel_x = ctx->src[0].swizzle[0];
2145		tex.src_sel_y = ctx->src[0].swizzle[1];
2146		tex.src_sel_z = ctx->src[0].swizzle[2];
2147		tex.src_sel_w = ctx->src[0].swizzle[3];
2148		tex.src_rel = ctx->src[0].rel;
2149	}
2150
2151	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2152		tex.src_sel_x = 1;
2153		tex.src_sel_y = 0;
2154		tex.src_sel_z = 3;
2155		tex.src_sel_w = 1;
2156	}
2157
2158	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2159		tex.coord_type_x = 1;
2160		tex.coord_type_y = 1;
2161		tex.coord_type_z = 1;
2162		tex.coord_type_w = 1;
2163	}
2164
2165	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
2166		tex.coord_type_z = 0;
2167		tex.src_sel_z = tex.src_sel_y;
2168	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
2169		tex.coord_type_z = 0;
2170
2171	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
2172		tex.src_sel_w = tex.src_sel_z;
2173
2174	r = r600_bytecode_add_tex(ctx->bc, &tex);
2175	if (r)
2176		return r;
2177
2178	/* add shadow ambient support  - gallium doesn't do it yet */
2179	return 0;
2180}
2181
2182static int tgsi_lrp(struct r600_shader_ctx *ctx)
2183{
2184	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2185	struct r600_bytecode_alu alu;
2186	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2187	unsigned i;
2188	int r;
2189
2190	/* optimize if it's just an equal balance */
2191	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2192		for (i = 0; i < lasti + 1; i++) {
2193			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2194				continue;
2195
2196			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2197			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2198			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2199			r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2200			alu.omod = 3;
2201			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2202			alu.dst.chan = i;
2203			if (i == lasti) {
2204				alu.last = 1;
2205			}
2206			r = r600_bytecode_add_alu(ctx->bc, &alu);
2207			if (r)
2208				return r;
2209		}
2210		return 0;
2211	}
2212
2213	/* 1 - src0 */
2214	for (i = 0; i < lasti + 1; i++) {
2215		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2216			continue;
2217
2218		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2219		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2220		alu.src[0].sel = V_SQ_ALU_SRC_1;
2221		alu.src[0].chan = 0;
2222		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2223		r600_bytecode_src_toggle_neg(&alu.src[1]);
2224		alu.dst.sel = ctx->temp_reg;
2225		alu.dst.chan = i;
2226		if (i == lasti) {
2227			alu.last = 1;
2228		}
2229		alu.dst.write = 1;
2230		r = r600_bytecode_add_alu(ctx->bc, &alu);
2231		if (r)
2232			return r;
2233	}
2234
2235	/* (1 - src0) * src2 */
2236	for (i = 0; i < lasti + 1; i++) {
2237		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2238			continue;
2239
2240		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2241		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2242		alu.src[0].sel = ctx->temp_reg;
2243		alu.src[0].chan = i;
2244		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2245		alu.dst.sel = ctx->temp_reg;
2246		alu.dst.chan = i;
2247		if (i == lasti) {
2248			alu.last = 1;
2249		}
2250		alu.dst.write = 1;
2251		r = r600_bytecode_add_alu(ctx->bc, &alu);
2252		if (r)
2253			return r;
2254	}
2255
2256	/* src0 * src1 + (1 - src0) * src2 */
2257	for (i = 0; i < lasti + 1; i++) {
2258		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2259			continue;
2260
2261		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2262		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2263		alu.is_op3 = 1;
2264		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2265		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2266		alu.src[2].sel = ctx->temp_reg;
2267		alu.src[2].chan = i;
2268
2269		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2270		alu.dst.chan = i;
2271		if (i == lasti) {
2272			alu.last = 1;
2273		}
2274		r = r600_bytecode_add_alu(ctx->bc, &alu);
2275		if (r)
2276			return r;
2277	}
2278	return 0;
2279}
2280
2281static int tgsi_cmp(struct r600_shader_ctx *ctx)
2282{
2283	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2284	struct r600_bytecode_alu alu;
2285	int i, r;
2286	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2287
2288	for (i = 0; i < lasti + 1; i++) {
2289		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2290			continue;
2291
2292		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2293		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2294		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2295		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2296		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
2297		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2298		alu.dst.chan = i;
2299		alu.dst.write = 1;
2300		alu.is_op3 = 1;
2301		if (i == lasti)
2302			alu.last = 1;
2303		r = r600_bytecode_add_alu(ctx->bc, &alu);
2304		if (r)
2305			return r;
2306	}
2307	return 0;
2308}
2309
2310static int tgsi_xpd(struct r600_shader_ctx *ctx)
2311{
2312	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2313	static const unsigned int src0_swizzle[] = {2, 0, 1};
2314	static const unsigned int src1_swizzle[] = {1, 2, 0};
2315	struct r600_bytecode_alu alu;
2316	uint32_t use_temp = 0;
2317	int i, r;
2318
2319	if (inst->Dst[0].Register.WriteMask != 0xf)
2320		use_temp = 1;
2321
2322	for (i = 0; i < 4; i++) {
2323		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2324		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2325		if (i < 3) {
2326			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2327			r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2328		} else {
2329			alu.src[0].sel = V_SQ_ALU_SRC_0;
2330			alu.src[0].chan = i;
2331			alu.src[1].sel = V_SQ_ALU_SRC_0;
2332			alu.src[1].chan = i;
2333		}
2334
2335		alu.dst.sel = ctx->temp_reg;
2336		alu.dst.chan = i;
2337		alu.dst.write = 1;
2338
2339		if (i == 3)
2340			alu.last = 1;
2341		r = r600_bytecode_add_alu(ctx->bc, &alu);
2342		if (r)
2343			return r;
2344	}
2345
2346	for (i = 0; i < 4; i++) {
2347		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2348		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2349
2350		if (i < 3) {
2351			r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2352			r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2353		} else {
2354			alu.src[0].sel = V_SQ_ALU_SRC_0;
2355			alu.src[0].chan = i;
2356			alu.src[1].sel = V_SQ_ALU_SRC_0;
2357			alu.src[1].chan = i;
2358		}
2359
2360		alu.src[2].sel = ctx->temp_reg;
2361		alu.src[2].neg = 1;
2362		alu.src[2].chan = i;
2363
2364		if (use_temp)
2365			alu.dst.sel = ctx->temp_reg;
2366		else
2367			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2368		alu.dst.chan = i;
2369		alu.dst.write = 1;
2370		alu.is_op3 = 1;
2371		if (i == 3)
2372			alu.last = 1;
2373		r = r600_bytecode_add_alu(ctx->bc, &alu);
2374		if (r)
2375			return r;
2376	}
2377	if (use_temp)
2378		return tgsi_helper_copy(ctx, inst);
2379	return 0;
2380}
2381
2382static int tgsi_exp(struct r600_shader_ctx *ctx)
2383{
2384	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2385	struct r600_bytecode_alu alu;
2386	int r;
2387	int i;
2388
2389	/* result.x = 2^floor(src); */
2390	if (inst->Dst[0].Register.WriteMask & 1) {
2391		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2392
2393		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2394		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2395
2396		alu.dst.sel = ctx->temp_reg;
2397		alu.dst.chan = 0;
2398		alu.dst.write = 1;
2399		alu.last = 1;
2400		r = r600_bytecode_add_alu(ctx->bc, &alu);
2401		if (r)
2402			return r;
2403
2404		if (ctx->bc->chip_class == CAYMAN) {
2405			for (i = 0; i < 3; i++) {
2406				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2407				alu.src[0].sel = ctx->temp_reg;
2408				alu.src[0].chan = 0;
2409
2410				alu.dst.sel = ctx->temp_reg;
2411				alu.dst.chan = i;
2412				if (i == 0)
2413					alu.dst.write = 1;
2414				if (i == 2)
2415					alu.last = 1;
2416				r = r600_bytecode_add_alu(ctx->bc, &alu);
2417				if (r)
2418					return r;
2419			}
2420		} else {
2421			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2422			alu.src[0].sel = ctx->temp_reg;
2423			alu.src[0].chan = 0;
2424
2425			alu.dst.sel = ctx->temp_reg;
2426			alu.dst.chan = 0;
2427			alu.dst.write = 1;
2428			alu.last = 1;
2429			r = r600_bytecode_add_alu(ctx->bc, &alu);
2430			if (r)
2431				return r;
2432		}
2433	}
2434
2435	/* result.y = tmp - floor(tmp); */
2436	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2437		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2438
2439		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2440		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2441
2442		alu.dst.sel = ctx->temp_reg;
2443#if 0
2444		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2445		if (r)
2446			return r;
2447#endif
2448		alu.dst.write = 1;
2449		alu.dst.chan = 1;
2450
2451		alu.last = 1;
2452
2453		r = r600_bytecode_add_alu(ctx->bc, &alu);
2454		if (r)
2455			return r;
2456	}
2457
2458	/* result.z = RoughApprox2ToX(tmp);*/
2459	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2460		if (ctx->bc->chip_class == CAYMAN) {
2461			for (i = 0; i < 3; i++) {
2462				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2463				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2464				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2465
2466				alu.dst.sel = ctx->temp_reg;
2467				alu.dst.chan = i;
2468				if (i == 2) {
2469					alu.dst.write = 1;
2470					alu.last = 1;
2471				}
2472
2473				r = r600_bytecode_add_alu(ctx->bc, &alu);
2474				if (r)
2475					return r;
2476			}
2477		} else {
2478			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2479			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2480			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2481
2482			alu.dst.sel = ctx->temp_reg;
2483			alu.dst.write = 1;
2484			alu.dst.chan = 2;
2485
2486			alu.last = 1;
2487
2488			r = r600_bytecode_add_alu(ctx->bc, &alu);
2489			if (r)
2490				return r;
2491		}
2492	}
2493
2494	/* result.w = 1.0;*/
2495	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2496		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2497
2498		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2499		alu.src[0].sel = V_SQ_ALU_SRC_1;
2500		alu.src[0].chan = 0;
2501
2502		alu.dst.sel = ctx->temp_reg;
2503		alu.dst.chan = 3;
2504		alu.dst.write = 1;
2505		alu.last = 1;
2506		r = r600_bytecode_add_alu(ctx->bc, &alu);
2507		if (r)
2508			return r;
2509	}
2510	return tgsi_helper_copy(ctx, inst);
2511}
2512
2513static int tgsi_log(struct r600_shader_ctx *ctx)
2514{
2515	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2516	struct r600_bytecode_alu alu;
2517	int r;
2518	int i;
2519
2520	/* result.x = floor(log2(|src|)); */
2521	if (inst->Dst[0].Register.WriteMask & 1) {
2522		if (ctx->bc->chip_class == CAYMAN) {
2523			for (i = 0; i < 3; i++) {
2524				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2525
2526				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2527				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2528				r600_bytecode_src_set_abs(&alu.src[0]);
2529
2530				alu.dst.sel = ctx->temp_reg;
2531				alu.dst.chan = i;
2532				if (i == 0)
2533					alu.dst.write = 1;
2534				if (i == 2)
2535					alu.last = 1;
2536				r = r600_bytecode_add_alu(ctx->bc, &alu);
2537				if (r)
2538					return r;
2539			}
2540
2541		} else {
2542			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2543
2544			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2545			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2546			r600_bytecode_src_set_abs(&alu.src[0]);
2547
2548			alu.dst.sel = ctx->temp_reg;
2549			alu.dst.chan = 0;
2550			alu.dst.write = 1;
2551			alu.last = 1;
2552			r = r600_bytecode_add_alu(ctx->bc, &alu);
2553			if (r)
2554				return r;
2555		}
2556
2557		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2558		alu.src[0].sel = ctx->temp_reg;
2559		alu.src[0].chan = 0;
2560
2561		alu.dst.sel = ctx->temp_reg;
2562		alu.dst.chan = 0;
2563		alu.dst.write = 1;
2564		alu.last = 1;
2565
2566		r = r600_bytecode_add_alu(ctx->bc, &alu);
2567		if (r)
2568			return r;
2569	}
2570
2571	/* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2572	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2573
2574		if (ctx->bc->chip_class == CAYMAN) {
2575			for (i = 0; i < 3; i++) {
2576				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2577
2578				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2579				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2580				r600_bytecode_src_set_abs(&alu.src[0]);
2581
2582				alu.dst.sel = ctx->temp_reg;
2583				alu.dst.chan = i;
2584				if (i == 1)
2585					alu.dst.write = 1;
2586				if (i == 2)
2587					alu.last = 1;
2588
2589				r = r600_bytecode_add_alu(ctx->bc, &alu);
2590				if (r)
2591					return r;
2592			}
2593		} else {
2594			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2595
2596			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2597			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2598			r600_bytecode_src_set_abs(&alu.src[0]);
2599
2600			alu.dst.sel = ctx->temp_reg;
2601			alu.dst.chan = 1;
2602			alu.dst.write = 1;
2603			alu.last = 1;
2604
2605			r = r600_bytecode_add_alu(ctx->bc, &alu);
2606			if (r)
2607				return r;
2608		}
2609
2610		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2611
2612		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2613		alu.src[0].sel = ctx->temp_reg;
2614		alu.src[0].chan = 1;
2615
2616		alu.dst.sel = ctx->temp_reg;
2617		alu.dst.chan = 1;
2618		alu.dst.write = 1;
2619		alu.last = 1;
2620
2621		r = r600_bytecode_add_alu(ctx->bc, &alu);
2622		if (r)
2623			return r;
2624
2625		if (ctx->bc->chip_class == CAYMAN) {
2626			for (i = 0; i < 3; i++) {
2627				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2628				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2629				alu.src[0].sel = ctx->temp_reg;
2630				alu.src[0].chan = 1;
2631
2632				alu.dst.sel = ctx->temp_reg;
2633				alu.dst.chan = i;
2634				if (i == 1)
2635					alu.dst.write = 1;
2636				if (i == 2)
2637					alu.last = 1;
2638
2639				r = r600_bytecode_add_alu(ctx->bc, &alu);
2640				if (r)
2641					return r;
2642			}
2643		} else {
2644			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2645			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2646			alu.src[0].sel = ctx->temp_reg;
2647			alu.src[0].chan = 1;
2648
2649			alu.dst.sel = ctx->temp_reg;
2650			alu.dst.chan = 1;
2651			alu.dst.write = 1;
2652			alu.last = 1;
2653
2654			r = r600_bytecode_add_alu(ctx->bc, &alu);
2655			if (r)
2656				return r;
2657		}
2658
2659		if (ctx->bc->chip_class == CAYMAN) {
2660			for (i = 0; i < 3; i++) {
2661				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2662				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2663				alu.src[0].sel = ctx->temp_reg;
2664				alu.src[0].chan = 1;
2665
2666				alu.dst.sel = ctx->temp_reg;
2667				alu.dst.chan = i;
2668				if (i == 1)
2669					alu.dst.write = 1;
2670				if (i == 2)
2671					alu.last = 1;
2672
2673				r = r600_bytecode_add_alu(ctx->bc, &alu);
2674				if (r)
2675					return r;
2676			}
2677		} else {
2678			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2679			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2680			alu.src[0].sel = ctx->temp_reg;
2681			alu.src[0].chan = 1;
2682
2683			alu.dst.sel = ctx->temp_reg;
2684			alu.dst.chan = 1;
2685			alu.dst.write = 1;
2686			alu.last = 1;
2687
2688			r = r600_bytecode_add_alu(ctx->bc, &alu);
2689			if (r)
2690				return r;
2691		}
2692
2693		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2694
2695		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2696
2697		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2698		r600_bytecode_src_set_abs(&alu.src[0]);
2699
2700		alu.src[1].sel = ctx->temp_reg;
2701		alu.src[1].chan = 1;
2702
2703		alu.dst.sel = ctx->temp_reg;
2704		alu.dst.chan = 1;
2705		alu.dst.write = 1;
2706		alu.last = 1;
2707
2708		r = r600_bytecode_add_alu(ctx->bc, &alu);
2709		if (r)
2710			return r;
2711	}
2712
2713	/* result.z = log2(|src|);*/
2714	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2715		if (ctx->bc->chip_class == CAYMAN) {
2716			for (i = 0; i < 3; i++) {
2717				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2718
2719				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2720				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2721				r600_bytecode_src_set_abs(&alu.src[0]);
2722
2723				alu.dst.sel = ctx->temp_reg;
2724				if (i == 2)
2725					alu.dst.write = 1;
2726				alu.dst.chan = i;
2727				if (i == 2)
2728					alu.last = 1;
2729
2730				r = r600_bytecode_add_alu(ctx->bc, &alu);
2731				if (r)
2732					return r;
2733			}
2734		} else {
2735			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2736
2737			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2738			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2739			r600_bytecode_src_set_abs(&alu.src[0]);
2740
2741			alu.dst.sel = ctx->temp_reg;
2742			alu.dst.write = 1;
2743			alu.dst.chan = 2;
2744			alu.last = 1;
2745
2746			r = r600_bytecode_add_alu(ctx->bc, &alu);
2747			if (r)
2748				return r;
2749		}
2750	}
2751
2752	/* result.w = 1.0; */
2753	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2754		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2755
2756		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2757		alu.src[0].sel = V_SQ_ALU_SRC_1;
2758		alu.src[0].chan = 0;
2759
2760		alu.dst.sel = ctx->temp_reg;
2761		alu.dst.chan = 3;
2762		alu.dst.write = 1;
2763		alu.last = 1;
2764
2765		r = r600_bytecode_add_alu(ctx->bc, &alu);
2766		if (r)
2767			return r;
2768	}
2769
2770	return tgsi_helper_copy(ctx, inst);
2771}
2772
2773static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2774{
2775	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2776	struct r600_bytecode_alu alu;
2777	int r;
2778
2779	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2780
2781	switch (inst->Instruction.Opcode) {
2782	case TGSI_OPCODE_ARL:
2783		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2784		break;
2785	case TGSI_OPCODE_ARR:
2786		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2787		break;
2788	default:
2789		assert(0);
2790		return -1;
2791	}
2792
2793	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2794	alu.last = 1;
2795	alu.dst.sel = ctx->ar_reg;
2796	alu.dst.write = 1;
2797	r = r600_bytecode_add_alu(ctx->bc, &alu);
2798	if (r)
2799		return r;
2800
2801	/* TODO: Note that the MOVA can be avoided if we never use AR for
2802	 * indexing non-CB registers in the current ALU clause. Similarly, we
2803	 * need to load AR from ar_reg again if we started a new clause
2804	 * between ARL and AR usage. The easy way to do that is to remove
2805	 * the MOVA here, and load it for the first AR access after ar_reg
2806	 * has been modified in each clause. */
2807	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2808	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2809	alu.src[0].sel = ctx->ar_reg;
2810	alu.src[0].chan = 0;
2811	alu.last = 1;
2812	r = r600_bytecode_add_alu(ctx->bc, &alu);
2813	if (r)
2814		return r;
2815	return 0;
2816}
2817static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2818{
2819	/* TODO from r600c, ar values don't persist between clauses */
2820	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2821	struct r600_bytecode_alu alu;
2822	int r;
2823
2824	switch (inst->Instruction.Opcode) {
2825	case TGSI_OPCODE_ARL:
2826		memset(&alu, 0, sizeof(alu));
2827		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2828		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2829		alu.dst.sel = ctx->ar_reg;
2830		alu.dst.write = 1;
2831		alu.last = 1;
2832
2833		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2834			return r;
2835
2836		memset(&alu, 0, sizeof(alu));
2837		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2838		alu.src[0].sel = ctx->ar_reg;
2839		alu.dst.sel = ctx->ar_reg;
2840		alu.dst.write = 1;
2841		alu.last = 1;
2842
2843		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2844			return r;
2845		break;
2846	case TGSI_OPCODE_ARR:
2847		memset(&alu, 0, sizeof(alu));
2848		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2849		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2850		alu.dst.sel = ctx->ar_reg;
2851		alu.dst.write = 1;
2852		alu.last = 1;
2853
2854		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
2855			return r;
2856		break;
2857	default:
2858		assert(0);
2859		return -1;
2860	}
2861
2862	memset(&alu, 0, sizeof(alu));
2863	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2864	alu.src[0].sel = ctx->ar_reg;
2865	alu.last = 1;
2866
2867	r = r600_bytecode_add_alu(ctx->bc, &alu);
2868	if (r)
2869		return r;
2870	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2871	return 0;
2872}
2873
2874static int tgsi_opdst(struct r600_shader_ctx *ctx)
2875{
2876	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2877	struct r600_bytecode_alu alu;
2878	int i, r = 0;
2879
2880	for (i = 0; i < 4; i++) {
2881		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2882
2883		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2884		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2885
2886		if (i == 0 || i == 3) {
2887			alu.src[0].sel = V_SQ_ALU_SRC_1;
2888		} else {
2889			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2890		}
2891
2892		if (i == 0 || i == 2) {
2893			alu.src[1].sel = V_SQ_ALU_SRC_1;
2894		} else {
2895			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2896		}
2897		if (i == 3)
2898			alu.last = 1;
2899		r = r600_bytecode_add_alu(ctx->bc, &alu);
2900		if (r)
2901			return r;
2902	}
2903	return 0;
2904}
2905
2906static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2907{
2908	struct r600_bytecode_alu alu;
2909	int r;
2910
2911	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2912	alu.inst = opcode;
2913	alu.predicate = 1;
2914
2915	alu.dst.sel = ctx->temp_reg;
2916	alu.dst.write = 1;
2917	alu.dst.chan = 0;
2918
2919	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2920	alu.src[1].sel = V_SQ_ALU_SRC_0;
2921	alu.src[1].chan = 0;
2922
2923	alu.last = 1;
2924
2925	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2926	if (r)
2927		return r;
2928	return 0;
2929}
2930
2931static int pops(struct r600_shader_ctx *ctx, int pops)
2932{
2933	unsigned force_pop = ctx->bc->force_add_cf;
2934
2935	if (!force_pop) {
2936		int alu_pop = 3;
2937		if (ctx->bc->cf_last) {
2938			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2939				alu_pop = 0;
2940			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2941				alu_pop = 1;
2942		}
2943		alu_pop += pops;
2944		if (alu_pop == 1) {
2945			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2946			ctx->bc->force_add_cf = 1;
2947		} else if (alu_pop == 2) {
2948			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2949			ctx->bc->force_add_cf = 1;
2950		} else {
2951			force_pop = 1;
2952		}
2953	}
2954
2955	if (force_pop) {
2956		r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2957		ctx->bc->cf_last->pop_count = pops;
2958		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2959	}
2960
2961	return 0;
2962}
2963
2964static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2965{
2966	switch(reason) {
2967	case FC_PUSH_VPM:
2968		ctx->bc->callstack[ctx->bc->call_sp].current--;
2969		break;
2970	case FC_PUSH_WQM:
2971	case FC_LOOP:
2972		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2973		break;
2974	case FC_REP:
2975		/* TOODO : for 16 vp asic should -= 2; */
2976		ctx->bc->callstack[ctx->bc->call_sp].current --;
2977		break;
2978	}
2979}
2980
2981static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2982{
2983	if (check_max_only) {
2984		int diff;
2985		switch (reason) {
2986		case FC_PUSH_VPM:
2987			diff = 1;
2988			break;
2989		case FC_PUSH_WQM:
2990			diff = 4;
2991			break;
2992		default:
2993			assert(0);
2994			diff = 0;
2995		}
2996		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2997		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2998			ctx->bc->callstack[ctx->bc->call_sp].max =
2999				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
3000		}
3001		return;
3002	}
3003	switch (reason) {
3004	case FC_PUSH_VPM:
3005		ctx->bc->callstack[ctx->bc->call_sp].current++;
3006		break;
3007	case FC_PUSH_WQM:
3008	case FC_LOOP:
3009		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
3010		break;
3011	case FC_REP:
3012		ctx->bc->callstack[ctx->bc->call_sp].current++;
3013		break;
3014	}
3015
3016	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3017	    ctx->bc->callstack[ctx->bc->call_sp].max) {
3018		ctx->bc->callstack[ctx->bc->call_sp].max =
3019			ctx->bc->callstack[ctx->bc->call_sp].current;
3020	}
3021}
3022
3023static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3024{
3025	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3026
3027	sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
3028						sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
3029	sp->mid[sp->num_mid] = ctx->bc->cf_last;
3030	sp->num_mid++;
3031}
3032
3033static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3034{
3035	ctx->bc->fc_sp++;
3036	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3037	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3038}
3039
3040static void fc_poplevel(struct r600_shader_ctx *ctx)
3041{
3042	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3043	if (sp->mid) {
3044		free(sp->mid);
3045		sp->mid = NULL;
3046	}
3047	sp->num_mid = 0;
3048	sp->start = NULL;
3049	sp->type = 0;
3050	ctx->bc->fc_sp--;
3051}
3052
3053#if 0
3054static int emit_return(struct r600_shader_ctx *ctx)
3055{
3056	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
3057	return 0;
3058}
3059
3060static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3061{
3062
3063	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
3064	ctx->bc->cf_last->pop_count = pops;
3065	/* TODO work out offset */
3066	return 0;
3067}
3068
3069static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3070{
3071	return 0;
3072}
3073
3074static void emit_testflag(struct r600_shader_ctx *ctx)
3075{
3076
3077}
3078
3079static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3080{
3081	emit_testflag(ctx);
3082	emit_jump_to_offset(ctx, 1, 4);
3083	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3084	pops(ctx, ifidx + 1);
3085	emit_return(ctx);
3086}
3087
3088static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3089{
3090	emit_testflag(ctx);
3091
3092	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3093	ctx->bc->cf_last->pop_count = 1;
3094
3095	fc_set_mid(ctx, fc_sp);
3096
3097	pops(ctx, 1);
3098}
3099#endif
3100
3101static int tgsi_if(struct r600_shader_ctx *ctx)
3102{
3103	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
3104
3105	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3106
3107	fc_pushlevel(ctx, FC_IF);
3108
3109	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3110	return 0;
3111}
3112
3113static int tgsi_else(struct r600_shader_ctx *ctx)
3114{
3115	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3116	ctx->bc->cf_last->pop_count = 1;
3117
3118	fc_set_mid(ctx, ctx->bc->fc_sp);
3119	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3120	return 0;
3121}
3122
3123static int tgsi_endif(struct r600_shader_ctx *ctx)
3124{
3125	pops(ctx, 1);
3126	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3127		R600_ERR("if/endif unbalanced in shader\n");
3128		return -1;
3129	}
3130
3131	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3132		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3133		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3134	} else {
3135		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3136	}
3137	fc_poplevel(ctx);
3138
3139	callstack_decrease_current(ctx, FC_PUSH_VPM);
3140	return 0;
3141}
3142
3143static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3144{
3145	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3146
3147	fc_pushlevel(ctx, FC_LOOP);
3148
3149	/* check stack depth */
3150	callstack_check_depth(ctx, FC_LOOP, 0);
3151	return 0;
3152}
3153
3154static int tgsi_endloop(struct r600_shader_ctx *ctx)
3155{
3156	int i;
3157
3158	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3159
3160	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3161		R600_ERR("loop/endloop in shader code are not paired.\n");
3162		return -EINVAL;
3163	}
3164
3165	/* fixup loop pointers - from r600isa
3166	   LOOP END points to CF after LOOP START,
3167	   LOOP START point to CF after LOOP END
3168	   BRK/CONT point to LOOP END CF
3169	*/
3170	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3171
3172	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3173
3174	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3175		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3176	}
3177	/* TODO add LOOPRET support */
3178	fc_poplevel(ctx);
3179	callstack_decrease_current(ctx, FC_LOOP);
3180	return 0;
3181}
3182
3183static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3184{
3185	unsigned int fscp;
3186
3187	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3188	{
3189		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3190			break;
3191	}
3192
3193	if (fscp == 0) {
3194		R600_ERR("Break not inside loop/endloop pair\n");
3195		return -EINVAL;
3196	}
3197
3198	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3199	ctx->bc->cf_last->pop_count = 1;
3200
3201	fc_set_mid(ctx, fscp);
3202
3203	pops(ctx, 1);
3204	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3205	return 0;
3206}
3207
3208static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3209	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3210	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3211	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3212
3213	/* FIXME:
3214	 * For state trackers other than OpenGL, we'll want to use
3215	 * _RECIP_IEEE instead.
3216	 */
3217	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3218
3219	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3220	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3221	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3222	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3223	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3224	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3225	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3226	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3227	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3228	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3229	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3230	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3231	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3232	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3233	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3234	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3235	/* gap */
3236	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3237	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3238	/* gap */
3239	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3241	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3242	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3243	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3244	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3245	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3246	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3247	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3248	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3249	/* gap */
3250	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3251	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3252	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3253	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3254	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3255	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3256	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3257	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3258	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3259	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3260	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3261	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3262	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3263	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3264	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3266	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3267	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3268	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3269	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3270	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3271	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3272	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3273	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3274	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3275	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3276	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3277	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3278	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3279	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3280	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3281	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3282	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3284	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3285	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3286	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3287	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3288	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3289	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3290	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3291	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3292	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3293	/* gap */
3294	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3295	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3296	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3297	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3298	/* gap */
3299	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3300	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3301	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3302	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3303	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3304	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3305	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3306	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3307	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3308	/* gap */
3309	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3310	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3311	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3312	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3313	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3314	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3315	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3316	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3317	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3318	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3319	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3320	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3321	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3322	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3323	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3324	/* gap */
3325	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3326	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3327	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3328	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3329	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3330	/* gap */
3331	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3332	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3333	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3334	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3335	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3336	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3337	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3338	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3339	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3340	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3341	/* gap */
3342	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3343	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3344	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3345	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3346	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3347	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3348	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3349	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3350	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3351	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3352	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3353	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3354	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3355	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3356	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3357	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3358	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3359	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3360	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3361	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3362	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3363	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3364	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3365	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3366	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3367	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3368	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3369	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
3370	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
3371	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
3372	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
3373	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
3374	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3375	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
3376	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
3377	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
3378	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
3379	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3380	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3381	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3382};
3383
3384static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3385	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3386	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3387	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3388	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3389	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3390	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3391	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3392	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3393	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3394	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3395	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3396	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3397	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3398	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3399	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3400	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3401	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3402	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3403	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3404	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3405	/* gap */
3406	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3407	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3408	/* gap */
3409	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3410	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3411	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3412	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3413	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3414	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3415	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3416	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3417	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3418	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3419	/* gap */
3420	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3421	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3422	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3423	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3424	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3425	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3426	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3427	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3428	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3429	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3430	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3431	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3432	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3433	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3434	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3435	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3436	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3437	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3438	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3439	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3440	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3441	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3442	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3443	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3444	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3445	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3446	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3447	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3448	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3449	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3450	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3451	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3452	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3453	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3454	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3455	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3456	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3457	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3458	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3459	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3460	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3461	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3462	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3463	/* gap */
3464	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3465	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3466	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3467	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3468	/* gap */
3469	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3470	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3471	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3472	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3473	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3474	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3475	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3476	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3477	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3478	/* gap */
3479	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3480	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3481	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3482	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3483	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3484	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3485	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3486	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3487	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3488	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3489	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3490	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3491	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3492	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3493	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3494	/* gap */
3495	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3496	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3497	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3498	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3499	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3500	/* gap */
3501	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3502	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3503	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3504	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3505	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3506	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3507	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3508	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3509	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3510	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3511	/* gap */
3512	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3513	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3514	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3515	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3516	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3517	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3518	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3519	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3520	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3521	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3522	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3523	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3524	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3525	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3526	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3527	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3528	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3529	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3530	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3531	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3532	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3533	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3534	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3535	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3536	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3537	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3538	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3539	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
3540	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
3541	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
3542	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
3543	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
3544	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3545	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
3546	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
3547	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
3548	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
3549	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3550	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3551	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3552};
3553
3554static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3555	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3556	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3557	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3558	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3559	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3560	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3561	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3562	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3563	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3564	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3565	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3566	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3567	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3568	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3569	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3570	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3571	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3572	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3573	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3574	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3575	/* gap */
3576	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3577	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3578	/* gap */
3579	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3580	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3581	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3582	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3583	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3584	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3585	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3586	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3587	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3588	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3589	/* gap */
3590	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3591	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3592	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3593	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3594	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3595	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3596	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3597	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3598	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3599	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3600	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3601	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3602	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3603	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3604	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3605	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3606	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3607	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3608	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3609	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3610	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3611	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3612	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3613	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3614	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3615	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3616	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3617	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3618	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3619	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3620	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3621	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3622	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3623	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3624	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3625	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3626	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3627	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3628	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3629	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3630	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3631	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3632	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3633	/* gap */
3634	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3635	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3636	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3637	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3638	/* gap */
3639	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3640	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3641	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3642	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3643	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3644	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3645	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3646	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3647	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3648	/* gap */
3649	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3650	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3651	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3652	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3653	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3654	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3655	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3656	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3657	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3658	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3659	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3660	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3661	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3662	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3663	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3664	/* gap */
3665	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3666	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3667	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3668	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3669	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3670	/* gap */
3671	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3672	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3673	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3674	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3675	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3676	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3677	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3678	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3679	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3680	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3681	/* gap */
3682	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3683	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3684	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3685	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3686	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3687	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3688	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3689	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3690	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3691	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3692	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3693	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3694	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3695	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3696	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3697	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3698	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3699	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3700	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3701	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3702	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3703	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3704	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3705	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3706	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3707	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3708	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3709	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
3710	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
3711	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
3712	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
3713	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
3714	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3715	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
3716	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
3717	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
3718	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
3719	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3720	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3721	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3722};
3723