r600_shader.c revision d84ab821c5f5bfe9f6a57e434af9ca06d54f45b3
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_info.h"
25#include "tgsi/tgsi_parse.h"
26#include "tgsi/tgsi_scan.h"
27#include "tgsi/tgsi_dump.h"
28#include "util/u_format.h"
29#include "r600_pipe.h"
30#include "r600_asm.h"
31#include "r600_sq.h"
32#include "r600_formats.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37#include <byteswap.h>
38
39/* CAYMAN notes
40Why CAYMAN got loops for lots of instructions is explained here.
41
42-These 8xx t-slot only ops are implemented in all vector slots.
43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44These 8xx t-slot only opcodes become vector ops, with all four
45slots expecting the arguments on sources a and b. Result is
46broadcast to all channels.
47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48These 8xx t-slot only opcodes become vector ops in the z, y, and
49x slots.
50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52SQRT_IEEE/_64
53SIN/COS
54The w slot may have an independent co-issued operation, or if the
55result is required to be in the w slot, the opcode above may be
56issued in the w slot as well.
57The compiler must issue the source argument to slots z, y, and x
58*/
59
60static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61{
62	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
63	struct r600_shader *rshader = &shader->shader;
64	uint32_t *ptr;
65	int	i;
66
67	/* copy new shader */
68	if (shader->bo == NULL) {
69		shader->bo = (struct r600_resource*)
70			pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71		if (shader->bo == NULL) {
72			return -ENOMEM;
73		}
74		ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
75		if (R600_BIG_ENDIAN) {
76			for (i = 0; i < rshader->bc.ndw; ++i) {
77				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78			}
79		} else {
80			memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81		}
82		rctx->ws->buffer_unmap(shader->bo->buf);
83	}
84	/* build state */
85	switch (rshader->processor_type) {
86	case TGSI_PROCESSOR_VERTEX:
87		if (rctx->chip_class >= EVERGREEN) {
88			evergreen_pipe_shader_vs(ctx, shader);
89		} else {
90			r600_pipe_shader_vs(ctx, shader);
91		}
92		break;
93	case TGSI_PROCESSOR_FRAGMENT:
94		if (rctx->chip_class >= EVERGREEN) {
95			evergreen_pipe_shader_ps(ctx, shader);
96		} else {
97			r600_pipe_shader_ps(ctx, shader);
98		}
99		break;
100	default:
101		return -EINVAL;
102	}
103	return 0;
104}
105
106static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
107
108int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109{
110	static int dump_shaders = -1;
111	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
112	int r;
113
114	/* Would like some magic "get_bool_option_once" routine.
115	*/
116	if (dump_shaders == -1)
117		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
118
119	if (dump_shaders) {
120		fprintf(stderr, "--------------------------------------------------------------\n");
121		tgsi_dump(shader->tokens, 0);
122
123		if (shader->so.num_outputs) {
124			unsigned i;
125			fprintf(stderr, "STREAMOUT\n");
126			for (i = 0; i < shader->so.num_outputs; i++) {
127				unsigned mask = ((1 << shader->so.output[i].num_components) - 1) <<
128						shader->so.output[i].start_component;
129				fprintf(stderr, "  %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
130					shader->so.output[i].output_buffer, shader->so.output[i].register_index,
131				        mask & 1 ? "x" : "_",
132				        (mask >> 1) & 1 ? "y" : "_",
133				        (mask >> 2) & 1 ? "z" : "_",
134				        (mask >> 3) & 1 ? "w" : "_");
135			}
136		}
137	}
138	r = r600_shader_from_tgsi(rctx, shader);
139	if (r) {
140		R600_ERR("translation from TGSI failed !\n");
141		return r;
142	}
143	r = r600_bytecode_build(&shader->shader.bc);
144	if (r) {
145		R600_ERR("building bytecode failed !\n");
146		return r;
147	}
148	if (dump_shaders) {
149		r600_bytecode_dump(&shader->shader.bc);
150		fprintf(stderr, "______________________________________________________________\n");
151	}
152	return r600_pipe_shader(ctx, shader);
153}
154
155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156{
157	pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
158	r600_bytecode_clear(&shader->shader.bc);
159
160	memset(&shader->shader,0,sizeof(struct r600_shader));
161}
162
163/*
164 * tgsi -> r600 shader
165 */
166struct r600_shader_tgsi_instruction;
167
168struct r600_shader_src {
169	unsigned				sel;
170	unsigned				swizzle[4];
171	unsigned				neg;
172	unsigned				abs;
173	unsigned				rel;
174	uint32_t				value[4];
175};
176
177struct r600_shader_ctx {
178	struct tgsi_shader_info			info;
179	struct tgsi_parse_context		parse;
180	const struct tgsi_token			*tokens;
181	unsigned				type;
182	unsigned				file_offset[TGSI_FILE_COUNT];
183	unsigned				temp_reg;
184	struct r600_shader_tgsi_instruction	*inst_info;
185	struct r600_bytecode				*bc;
186	struct r600_shader			*shader;
187	struct r600_shader_src			src[4];
188	u32					*literals;
189	u32					nliterals;
190	u32					max_driver_temp_used;
191	/* needed for evergreen interpolation */
192	boolean                                 input_centroid;
193	boolean                                 input_linear;
194	boolean                                 input_perspective;
195	int					num_interp_gpr;
196};
197
198struct r600_shader_tgsi_instruction {
199	unsigned	tgsi_opcode;
200	unsigned	is_op3;
201	unsigned	r600_opcode;
202	int (*process)(struct r600_shader_ctx *ctx);
203};
204
205static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
206static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
207
208static int tgsi_is_supported(struct r600_shader_ctx *ctx)
209{
210	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
211	int j;
212
213	if (i->Instruction.NumDstRegs > 1) {
214		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
215		return -EINVAL;
216	}
217	if (i->Instruction.Predicate) {
218		R600_ERR("predicate unsupported\n");
219		return -EINVAL;
220	}
221#if 0
222	if (i->Instruction.Label) {
223		R600_ERR("label unsupported\n");
224		return -EINVAL;
225	}
226#endif
227	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
228		if (i->Src[j].Register.Dimension) {
229			R600_ERR("unsupported src %d (dimension %d)\n", j,
230				 i->Src[j].Register.Dimension);
231			return -EINVAL;
232		}
233	}
234	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
235		if (i->Dst[j].Register.Dimension) {
236			R600_ERR("unsupported dst (dimension)\n");
237			return -EINVAL;
238		}
239	}
240	return 0;
241}
242
243static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
244{
245	int i, r;
246	struct r600_bytecode_alu alu;
247	int gpr = 0, base_chan = 0;
248	int ij_index = 0;
249
250	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
251		ij_index = 0;
252		if (ctx->shader->input[input].centroid)
253			ij_index++;
254	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
255		ij_index = 0;
256		/* if we have perspective add one */
257		if (ctx->input_perspective)  {
258			ij_index++;
259			/* if we have perspective centroid */
260			if (ctx->input_centroid)
261				ij_index++;
262		}
263		if (ctx->shader->input[input].centroid)
264			ij_index++;
265	}
266
267	/* work out gpr and base_chan from index */
268	gpr = ij_index / 2;
269	base_chan = (2 * (ij_index % 2)) + 1;
270
271	for (i = 0; i < 8; i++) {
272		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
273
274		if (i < 4)
275			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
276		else
277			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
278
279		if ((i > 1) && (i < 6)) {
280			alu.dst.sel = ctx->shader->input[input].gpr;
281			alu.dst.write = 1;
282		}
283
284		alu.dst.chan = i % 4;
285
286		alu.src[0].sel = gpr;
287		alu.src[0].chan = (base_chan - (i % 2));
288
289		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
290
291		alu.bank_swizzle_force = SQ_ALU_VEC_210;
292		if ((i % 4) == 3)
293			alu.last = 1;
294		r = r600_bytecode_add_alu(ctx->bc, &alu);
295		if (r)
296			return r;
297	}
298	return 0;
299}
300
301static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
302{
303	int i, r;
304	struct r600_bytecode_alu alu;
305
306	for (i = 0; i < 4; i++) {
307		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
308
309		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0;
310
311		alu.dst.sel = ctx->shader->input[input].gpr;
312		alu.dst.write = 1;
313
314		alu.dst.chan = i;
315
316		alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
317		alu.src[0].chan = i;
318
319		if (i == 3)
320			alu.last = 1;
321		r = r600_bytecode_add_alu(ctx->bc, &alu);
322		if (r)
323			return r;
324	}
325	return 0;
326}
327
328/*
329 * Special export handling in shaders
330 *
331 * shader export ARRAY_BASE for EXPORT_POS:
332 * 60 is position
333 * 61 is misc vector
334 * 62, 63 are clip distance vectors
335 *
336 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
337 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
338 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
339 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
340 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
341 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
342 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
343 * exclusive from render target index)
344 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
345 *
346 *
347 * shader export ARRAY_BASE for EXPORT_PIXEL:
348 * 0-7 CB targets
349 * 61 computed Z vector
350 *
351 * The use of the values exported in the computed Z vector are controlled
352 * by DB_SHADER_CONTROL:
353 * Z_EXPORT_ENABLE - Z as a float in RED
354 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
355 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
356 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
357 * DB_SOURCE_FORMAT - export control restrictions
358 *
359 */
360
361
362/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
363static int r600_spi_sid(struct r600_shader_io * io)
364{
365	int index, name = io->name;
366
367	/* These params are handled differently, they don't need
368	 * semantic indices, so we'll use 0 for them.
369	 */
370	if (name == TGSI_SEMANTIC_POSITION ||
371		name == TGSI_SEMANTIC_PSIZE ||
372		name == TGSI_SEMANTIC_FACE)
373		index = 0;
374	else {
375		if (name == TGSI_SEMANTIC_GENERIC) {
376			/* For generic params simply use sid from tgsi */
377			index = io->sid;
378		} else {
379
380			/* FIXME: two-side rendering is broken in r600g, this will
381			 * keep old functionality */
382			if (name == TGSI_SEMANTIC_BCOLOR)
383				name = TGSI_SEMANTIC_COLOR;
384
385			/* For non-generic params - pack name and sid into 8 bits */
386			index = 0x80 | (name<<3) | (io->sid);
387		}
388
389		/* Make sure that all really used indices have nonzero value, so
390		 * we can just compare it to 0 later instead of comparing the name
391		 * with different values to detect special cases. */
392		index++;
393	}
394
395	return index;
396};
397
398static int tgsi_declaration(struct r600_shader_ctx *ctx)
399{
400	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
401	unsigned i;
402	int r;
403
404	switch (d->Declaration.File) {
405	case TGSI_FILE_INPUT:
406		i = ctx->shader->ninput++;
407		ctx->shader->input[i].name = d->Semantic.Name;
408		ctx->shader->input[i].sid = d->Semantic.Index;
409		ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
410		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
411		ctx->shader->input[i].centroid = d->Declaration.Centroid;
412		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
413		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) {
414			/* turn input into interpolate on EG */
415			if (ctx->shader->input[i].spi_sid) {
416				ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
417				if (ctx->shader->input[i].interpolate > 0) {
418					evergreen_interp_alu(ctx, i);
419				} else {
420					evergreen_interp_flat(ctx, i);
421				}
422			}
423		}
424		break;
425	case TGSI_FILE_OUTPUT:
426		i = ctx->shader->noutput++;
427		ctx->shader->output[i].name = d->Semantic.Name;
428		ctx->shader->output[i].sid = d->Semantic.Index;
429		ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
430		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
431		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
432		break;
433	case TGSI_FILE_CONSTANT:
434	case TGSI_FILE_TEMPORARY:
435	case TGSI_FILE_SAMPLER:
436	case TGSI_FILE_ADDRESS:
437		break;
438
439	case TGSI_FILE_SYSTEM_VALUE:
440		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
441			struct r600_bytecode_alu alu;
442			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
443
444			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
445			alu.src[0].sel = 0;
446			alu.src[0].chan = 3;
447
448			alu.dst.sel = 0;
449			alu.dst.chan = 3;
450			alu.dst.write = 1;
451			alu.last = 1;
452
453			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
454				return r;
455			break;
456		} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
457			break;
458	default:
459		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
460		return -EINVAL;
461	}
462	return 0;
463}
464
465static int r600_get_temp(struct r600_shader_ctx *ctx)
466{
467	return ctx->temp_reg + ctx->max_driver_temp_used++;
468}
469
470/*
471 * for evergreen we need to scan the shader to find the number of GPRs we need to
472 * reserve for interpolation.
473 *
474 * we need to know if we are going to emit
475 * any centroid inputs
476 * if perspective and linear are required
477*/
478static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
479{
480	int i;
481	int num_baryc;
482
483	ctx->input_linear = FALSE;
484	ctx->input_perspective = FALSE;
485	ctx->input_centroid = FALSE;
486	ctx->num_interp_gpr = 1;
487
488	/* any centroid inputs */
489	for (i = 0; i < ctx->info.num_inputs; i++) {
490		/* skip position/face */
491		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
492		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
493			continue;
494		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
495			ctx->input_linear = TRUE;
496		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
497			ctx->input_perspective = TRUE;
498		if (ctx->info.input_centroid[i])
499			ctx->input_centroid = TRUE;
500	}
501
502	num_baryc = 0;
503	/* ignoring sample for now */
504	if (ctx->input_perspective)
505		num_baryc++;
506	if (ctx->input_linear)
507		num_baryc++;
508	if (ctx->input_centroid)
509		num_baryc *= 2;
510
511	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
512
513	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
514	return ctx->num_interp_gpr;
515}
516
517static void tgsi_src(struct r600_shader_ctx *ctx,
518		     const struct tgsi_full_src_register *tgsi_src,
519		     struct r600_shader_src *r600_src)
520{
521	memset(r600_src, 0, sizeof(*r600_src));
522	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
523	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
524	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
525	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
526	r600_src->neg = tgsi_src->Register.Negate;
527	r600_src->abs = tgsi_src->Register.Absolute;
528
529	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
530		int index;
531		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
532			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
533			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
534
535			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
536			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
537			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
538				return;
539		}
540		index = tgsi_src->Register.Index;
541		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
542		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
543	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
544		if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
545			r600_src->swizzle[0] = 3;
546			r600_src->swizzle[1] = 3;
547			r600_src->swizzle[2] = 3;
548			r600_src->swizzle[3] = 3;
549			r600_src->sel = 0;
550		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) {
551			r600_src->swizzle[0] = 0;
552			r600_src->swizzle[1] = 0;
553			r600_src->swizzle[2] = 0;
554			r600_src->swizzle[3] = 0;
555			r600_src->sel = 0;
556		}
557	} else {
558		if (tgsi_src->Register.Indirect)
559			r600_src->rel = V_SQ_REL_RELATIVE;
560		r600_src->sel = tgsi_src->Register.Index;
561		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
562	}
563}
564
565static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
566{
567	struct r600_bytecode_vtx vtx;
568	unsigned int ar_reg;
569	int r;
570
571	if (offset) {
572		struct r600_bytecode_alu alu;
573
574		memset(&alu, 0, sizeof(alu));
575
576		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
577		alu.src[0].sel = ctx->bc->ar_reg;
578
579		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
580		alu.src[1].value = offset;
581
582		alu.dst.sel = dst_reg;
583		alu.dst.write = 1;
584		alu.last = 1;
585
586		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
587			return r;
588
589		ar_reg = dst_reg;
590	} else {
591		ar_reg = ctx->bc->ar_reg;
592	}
593
594	memset(&vtx, 0, sizeof(vtx));
595	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
596	vtx.src_gpr = ar_reg;
597	vtx.mega_fetch_count = 16;
598	vtx.dst_gpr = dst_reg;
599	vtx.dst_sel_x = 0;		/* SEL_X */
600	vtx.dst_sel_y = 1;		/* SEL_Y */
601	vtx.dst_sel_z = 2;		/* SEL_Z */
602	vtx.dst_sel_w = 3;		/* SEL_W */
603	vtx.data_format = FMT_32_32_32_32_FLOAT;
604	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
605	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
606	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
607	vtx.endian = r600_endian_swap(32);
608
609	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
610		return r;
611
612	return 0;
613}
614
615static int tgsi_split_constant(struct r600_shader_ctx *ctx)
616{
617	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
618	struct r600_bytecode_alu alu;
619	int i, j, k, nconst, r;
620
621	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
622		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
623			nconst++;
624		}
625		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
626	}
627	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
628		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
629			continue;
630		}
631
632		if (ctx->src[i].rel) {
633			int treg = r600_get_temp(ctx);
634			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
635				return r;
636
637			ctx->src[i].sel = treg;
638			ctx->src[i].rel = 0;
639			j--;
640		} else if (j > 0) {
641			int treg = r600_get_temp(ctx);
642			for (k = 0; k < 4; k++) {
643				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
644				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
645				alu.src[0].sel = ctx->src[i].sel;
646				alu.src[0].chan = k;
647				alu.src[0].rel = ctx->src[i].rel;
648				alu.dst.sel = treg;
649				alu.dst.chan = k;
650				alu.dst.write = 1;
651				if (k == 3)
652					alu.last = 1;
653				r = r600_bytecode_add_alu(ctx->bc, &alu);
654				if (r)
655					return r;
656			}
657			ctx->src[i].sel = treg;
658			ctx->src[i].rel =0;
659			j--;
660		}
661	}
662	return 0;
663}
664
665/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
666static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
667{
668	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
669	struct r600_bytecode_alu alu;
670	int i, j, k, nliteral, r;
671
672	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
673		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
674			nliteral++;
675		}
676	}
677	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
678		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
679			int treg = r600_get_temp(ctx);
680			for (k = 0; k < 4; k++) {
681				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
682				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
683				alu.src[0].sel = ctx->src[i].sel;
684				alu.src[0].chan = k;
685				alu.src[0].value = ctx->src[i].value[k];
686				alu.dst.sel = treg;
687				alu.dst.chan = k;
688				alu.dst.write = 1;
689				if (k == 3)
690					alu.last = 1;
691				r = r600_bytecode_add_alu(ctx->bc, &alu);
692				if (r)
693					return r;
694			}
695			ctx->src[i].sel = treg;
696			j--;
697		}
698	}
699	return 0;
700}
701
702static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
703{
704	struct r600_shader *shader = &pipeshader->shader;
705	struct tgsi_token *tokens = pipeshader->tokens;
706	struct pipe_stream_output_info so = pipeshader->so;
707	struct tgsi_full_immediate *immediate;
708	struct tgsi_full_property *property;
709	struct r600_shader_ctx ctx;
710	struct r600_bytecode_output output[32];
711	unsigned output_done, noutput;
712	unsigned opcode;
713	int i, j, r = 0, pos0;
714
715	ctx.bc = &shader->bc;
716	ctx.shader = shader;
717	r600_bytecode_init(ctx.bc, rctx->chip_class);
718	ctx.tokens = tokens;
719	tgsi_scan_shader(tokens, &ctx.info);
720	tgsi_parse_init(&ctx.parse, tokens);
721	ctx.type = ctx.parse.FullHeader.Processor.Processor;
722	shader->processor_type = ctx.type;
723	ctx.bc->type = shader->processor_type;
724
725	shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
726		((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
727
728	shader->nr_cbufs = rctx->nr_cbufs;
729
730	/* register allocations */
731	/* Values [0,127] correspond to GPR[0..127].
732	 * Values [128,159] correspond to constant buffer bank 0
733	 * Values [160,191] correspond to constant buffer bank 1
734	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
735	 * Values [256,287] correspond to constant buffer bank 2 (EG)
736	 * Values [288,319] correspond to constant buffer bank 3 (EG)
737	 * Other special values are shown in the list below.
738	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
739	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
740	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
741	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
742	 * 248	SQ_ALU_SRC_0: special constant 0.0.
743	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
744	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
745	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
746	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
747	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
748	 * 254	SQ_ALU_SRC_PV: previous vector result.
749	 * 255	SQ_ALU_SRC_PS: previous scalar result.
750	 */
751	for (i = 0; i < TGSI_FILE_COUNT; i++) {
752		ctx.file_offset[i] = 0;
753	}
754	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
755		ctx.file_offset[TGSI_FILE_INPUT] = 1;
756		if (ctx.bc->chip_class >= EVERGREEN) {
757			r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
758		} else {
759			r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
760		}
761	}
762	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
763		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
764	}
765	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
766						ctx.info.file_max[TGSI_FILE_INPUT] + 1;
767	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
768						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
769
770	/* Outside the GPR range. This will be translated to one of the
771	 * kcache banks later. */
772	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
773
774	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
775	ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
776			ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
777	ctx.temp_reg = ctx.bc->ar_reg + 1;
778
779	ctx.nliterals = 0;
780	ctx.literals = NULL;
781	shader->fs_write_all = FALSE;
782	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
783		tgsi_parse_token(&ctx.parse);
784		switch (ctx.parse.FullToken.Token.Type) {
785		case TGSI_TOKEN_TYPE_IMMEDIATE:
786			immediate = &ctx.parse.FullToken.FullImmediate;
787			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
788			if(ctx.literals == NULL) {
789				r = -ENOMEM;
790				goto out_err;
791			}
792			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
793			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
794			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
795			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
796			ctx.nliterals++;
797			break;
798		case TGSI_TOKEN_TYPE_DECLARATION:
799			r = tgsi_declaration(&ctx);
800			if (r)
801				goto out_err;
802			break;
803		case TGSI_TOKEN_TYPE_INSTRUCTION:
804			r = tgsi_is_supported(&ctx);
805			if (r)
806				goto out_err;
807			ctx.max_driver_temp_used = 0;
808			/* reserve first tmp for everyone */
809			r600_get_temp(&ctx);
810
811			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
812			if ((r = tgsi_split_constant(&ctx)))
813				goto out_err;
814			if ((r = tgsi_split_literal_constant(&ctx)))
815				goto out_err;
816			if (ctx.bc->chip_class == CAYMAN)
817				ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
818			else if (ctx.bc->chip_class >= EVERGREEN)
819				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
820			else
821				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
822			r = ctx.inst_info->process(&ctx);
823			if (r)
824				goto out_err;
825			break;
826		case TGSI_TOKEN_TYPE_PROPERTY:
827			property = &ctx.parse.FullToken.FullProperty;
828			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
829				if (property->u[0].Data == 1)
830					shader->fs_write_all = TRUE;
831			}
832			break;
833		default:
834			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
835			r = -EINVAL;
836			goto out_err;
837		}
838	}
839
840	noutput = shader->noutput;
841
842	/* clamp color outputs */
843	if (shader->clamp_color) {
844		for (i = 0; i < noutput; i++) {
845			if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
846				shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
847
848				int j;
849				for (j = 0; j < 4; j++) {
850					struct r600_bytecode_alu alu;
851					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
852
853					/* MOV_SAT R, R */
854					alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
855					alu.dst.sel = shader->output[i].gpr;
856					alu.dst.chan = j;
857					alu.dst.write = 1;
858					alu.dst.clamp = 1;
859					alu.src[0].sel = alu.dst.sel;
860					alu.src[0].chan = j;
861
862					if (j == 3) {
863						alu.last = 1;
864					}
865					r = r600_bytecode_add_alu(ctx.bc, &alu);
866					if (r)
867						return r;
868				}
869			}
870		}
871	}
872
873	/* Add stream outputs. */
874	if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
875		for (i = 0; i < so.num_outputs; i++) {
876			struct r600_bytecode_output output;
877
878			if (so.output[i].output_buffer >= 4) {
879				R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
880					 so.output[i].output_buffer);
881				r = -EINVAL;
882				goto out_err;
883			}
884			if (so.output[i].start_component) {
885			   R600_ERR("stream_output - start_component cannot be non-zero\n");
886			   r = -EINVAL;
887			   goto out_err;
888			}
889
890			memset(&output, 0, sizeof(struct r600_bytecode_output));
891			output.gpr = shader->output[so.output[i].register_index].gpr;
892			output.elem_size = 0;
893			output.array_base = so.output[i].dst_offset;
894			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
895			output.burst_count = 1;
896			output.barrier = 1;
897			output.array_size = 0;
898			output.comp_mask = (1 << so.output[i].num_components) - 1;
899			if (ctx.bc->chip_class >= EVERGREEN) {
900				switch (so.output[i].output_buffer) {
901				case 0:
902					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
903					break;
904				case 1:
905					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
906					break;
907				case 2:
908					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
909					break;
910				case 3:
911					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
912					break;
913				}
914			} else {
915				switch (so.output[i].output_buffer) {
916				case 0:
917					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
918					break;
919				case 1:
920					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
921					break;
922				case 2:
923					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
924					break;
925				case 3:
926					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
927					break;
928				}
929			}
930			r = r600_bytecode_add_output(ctx.bc, &output);
931			if (r)
932				goto out_err;
933		}
934	}
935
936	/* export output */
937	j = 0;
938	for (i = 0, pos0 = 0; i < noutput; i++) {
939		memset(&output[i], 0, sizeof(struct r600_bytecode_output));
940		output[i + j].gpr = shader->output[i].gpr;
941		output[i + j].elem_size = 3;
942		output[i + j].swizzle_x = 0;
943		output[i + j].swizzle_y = 1;
944		output[i + j].swizzle_z = 2;
945		output[i + j].swizzle_w = 3;
946		output[i + j].burst_count = 1;
947		output[i + j].barrier = 1;
948		output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
949		output[i + j].array_base = i - pos0;
950		output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
951		switch (ctx.type) {
952		case TGSI_PROCESSOR_VERTEX:
953			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
954				output[i + j].array_base = 60;
955				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
956				/* position doesn't count in array_base */
957				pos0++;
958			}
959			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
960				output[i + j].array_base = 61;
961				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
962				/* position doesn't count in array_base */
963				pos0++;
964			}
965			break;
966		case TGSI_PROCESSOR_FRAGMENT:
967			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
968				output[i + j].array_base = shader->output[i].sid;
969				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
970				if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
971					for (j = 1; j < shader->nr_cbufs; j++) {
972						memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
973						output[i + j].gpr = shader->output[i].gpr;
974						output[i + j].elem_size = 3;
975						output[i + j].swizzle_x = 0;
976						output[i + j].swizzle_y = 1;
977						output[i + j].swizzle_z = 2;
978						output[i + j].swizzle_w = 3;
979						output[i + j].burst_count = 1;
980						output[i + j].barrier = 1;
981						output[i + j].array_base = shader->output[i].sid + j;
982						output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
983						output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
984					}
985					j = shader->nr_cbufs-1;
986				}
987			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
988				output[i + j].array_base = 61;
989				output[i + j].swizzle_x = 2;
990				output[i + j].swizzle_y = 7;
991				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
992				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
993			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
994				output[i + j].array_base = 61;
995				output[i + j].swizzle_x = 7;
996				output[i + j].swizzle_y = 1;
997				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
998				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
999			} else {
1000				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
1001				r = -EINVAL;
1002				goto out_err;
1003			}
1004			break;
1005		default:
1006			R600_ERR("unsupported processor type %d\n", ctx.type);
1007			r = -EINVAL;
1008			goto out_err;
1009		}
1010	}
1011	noutput += j;
1012	/* add fake param output for vertex shader if no param is exported */
1013	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1014		for (i = 0, pos0 = 0; i < noutput; i++) {
1015			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
1016				pos0 = 1;
1017				break;
1018			}
1019		}
1020		if (!pos0) {
1021			memset(&output[i], 0, sizeof(struct r600_bytecode_output));
1022			output[i].gpr = 0;
1023			output[i].elem_size = 3;
1024			output[i].swizzle_x = 7;
1025			output[i].swizzle_y = 7;
1026			output[i].swizzle_z = 7;
1027			output[i].swizzle_w = 7;
1028			output[i].burst_count = 1;
1029			output[i].barrier = 1;
1030			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1031			output[i].array_base = 0;
1032			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1033			noutput++;
1034		}
1035	}
1036	/* add fake pixel export */
1037	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
1038		memset(&output[0], 0, sizeof(struct r600_bytecode_output));
1039		output[0].gpr = 0;
1040		output[0].elem_size = 3;
1041		output[0].swizzle_x = 7;
1042		output[0].swizzle_y = 7;
1043		output[0].swizzle_z = 7;
1044		output[0].swizzle_w = 7;
1045		output[0].burst_count = 1;
1046		output[0].barrier = 1;
1047		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1048		output[0].array_base = 0;
1049		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1050		noutput++;
1051	}
1052	/* set export done on last export of each type */
1053	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1054		if (ctx.bc->chip_class < CAYMAN) {
1055			if (i == (noutput - 1)) {
1056				output[i].end_of_program = 1;
1057			}
1058		}
1059		if (!(output_done & (1 << output[i].type))) {
1060			output_done |= (1 << output[i].type);
1061			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1062		}
1063	}
1064	/* add output to bytecode */
1065	for (i = 0; i < noutput; i++) {
1066		r = r600_bytecode_add_output(ctx.bc, &output[i]);
1067		if (r)
1068			goto out_err;
1069	}
1070	/* add program end */
1071	if (ctx.bc->chip_class == CAYMAN)
1072		cm_bytecode_add_cf_end(ctx.bc);
1073
1074	free(ctx.literals);
1075	tgsi_parse_free(&ctx.parse);
1076	return 0;
1077out_err:
1078	free(ctx.literals);
1079	tgsi_parse_free(&ctx.parse);
1080	return r;
1081}
1082
1083static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1084{
1085	R600_ERR("%s tgsi opcode unsupported\n",
1086		 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1087	return -EINVAL;
1088}
1089
1090static int tgsi_end(struct r600_shader_ctx *ctx)
1091{
1092	return 0;
1093}
1094
1095static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1096			const struct r600_shader_src *shader_src,
1097			unsigned chan)
1098{
1099	bc_src->sel = shader_src->sel;
1100	bc_src->chan = shader_src->swizzle[chan];
1101	bc_src->neg = shader_src->neg;
1102	bc_src->abs = shader_src->abs;
1103	bc_src->rel = shader_src->rel;
1104	bc_src->value = shader_src->value[bc_src->chan];
1105}
1106
1107static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1108{
1109	bc_src->abs = 1;
1110	bc_src->neg = 0;
1111}
1112
1113static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1114{
1115	bc_src->neg = !bc_src->neg;
1116}
1117
1118static void tgsi_dst(struct r600_shader_ctx *ctx,
1119		     const struct tgsi_full_dst_register *tgsi_dst,
1120		     unsigned swizzle,
1121		     struct r600_bytecode_alu_dst *r600_dst)
1122{
1123	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1124
1125	r600_dst->sel = tgsi_dst->Register.Index;
1126	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1127	r600_dst->chan = swizzle;
1128	r600_dst->write = 1;
1129	if (tgsi_dst->Register.Indirect)
1130		r600_dst->rel = V_SQ_REL_RELATIVE;
1131	if (inst->Instruction.Saturate) {
1132		r600_dst->clamp = 1;
1133	}
1134}
1135
1136static int tgsi_last_instruction(unsigned writemask)
1137{
1138	int i, lasti = 0;
1139
1140	for (i = 0; i < 4; i++) {
1141		if (writemask & (1 << i)) {
1142			lasti = i;
1143		}
1144	}
1145	return lasti;
1146}
1147
1148static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1149{
1150	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1151	struct r600_bytecode_alu alu;
1152	int i, j, r;
1153	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1154
1155	for (i = 0; i < lasti + 1; i++) {
1156		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1157			continue;
1158
1159		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1160		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1161
1162		alu.inst = ctx->inst_info->r600_opcode;
1163		if (!swap) {
1164			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1165				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1166			}
1167		} else {
1168			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1169			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1170		}
1171		/* handle some special cases */
1172		switch (ctx->inst_info->tgsi_opcode) {
1173		case TGSI_OPCODE_SUB:
1174			r600_bytecode_src_toggle_neg(&alu.src[1]);
1175			break;
1176		case TGSI_OPCODE_ABS:
1177			r600_bytecode_src_set_abs(&alu.src[0]);
1178			break;
1179		default:
1180			break;
1181		}
1182		if (i == lasti || trans_only) {
1183			alu.last = 1;
1184		}
1185		r = r600_bytecode_add_alu(ctx->bc, &alu);
1186		if (r)
1187			return r;
1188	}
1189	return 0;
1190}
1191
1192static int tgsi_op2(struct r600_shader_ctx *ctx)
1193{
1194	return tgsi_op2_s(ctx, 0, 0);
1195}
1196
1197static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1198{
1199	return tgsi_op2_s(ctx, 1, 0);
1200}
1201
1202static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1203{
1204	return tgsi_op2_s(ctx, 0, 1);
1205}
1206
1207static int tgsi_ineg(struct r600_shader_ctx *ctx)
1208{
1209	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1210	struct r600_bytecode_alu alu;
1211	int i, r;
1212	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1213
1214	for (i = 0; i < lasti + 1; i++) {
1215
1216		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1217			continue;
1218		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1219		alu.inst = ctx->inst_info->r600_opcode;
1220
1221		alu.src[0].sel = V_SQ_ALU_SRC_0;
1222
1223		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1224
1225		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1226
1227		if (i == lasti) {
1228			alu.last = 1;
1229		}
1230		r = r600_bytecode_add_alu(ctx->bc, &alu);
1231		if (r)
1232			return r;
1233	}
1234	return 0;
1235
1236}
1237
1238static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1239{
1240	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1241	int i, j, r;
1242	struct r600_bytecode_alu alu;
1243	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1244
1245	for (i = 0 ; i < last_slot; i++) {
1246		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1247		alu.inst = ctx->inst_info->r600_opcode;
1248		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1249			r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1250		}
1251		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1252		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1253
1254		if (i == last_slot - 1)
1255			alu.last = 1;
1256		r = r600_bytecode_add_alu(ctx->bc, &alu);
1257		if (r)
1258			return r;
1259	}
1260	return 0;
1261}
1262
1263/*
1264 * r600 - trunc to -PI..PI range
1265 * r700 - normalize by dividing by 2PI
1266 * see fdo bug 27901
1267 */
1268static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1269{
1270	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1271	static float double_pi = 3.1415926535 * 2;
1272	static float neg_pi = -3.1415926535;
1273
1274	int r;
1275	struct r600_bytecode_alu alu;
1276
1277	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1278	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1279	alu.is_op3 = 1;
1280
1281	alu.dst.chan = 0;
1282	alu.dst.sel = ctx->temp_reg;
1283	alu.dst.write = 1;
1284
1285	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1286
1287	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1288	alu.src[1].chan = 0;
1289	alu.src[1].value = *(uint32_t *)&half_inv_pi;
1290	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1291	alu.src[2].chan = 0;
1292	alu.last = 1;
1293	r = r600_bytecode_add_alu(ctx->bc, &alu);
1294	if (r)
1295		return r;
1296
1297	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1298	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1299
1300	alu.dst.chan = 0;
1301	alu.dst.sel = ctx->temp_reg;
1302	alu.dst.write = 1;
1303
1304	alu.src[0].sel = ctx->temp_reg;
1305	alu.src[0].chan = 0;
1306	alu.last = 1;
1307	r = r600_bytecode_add_alu(ctx->bc, &alu);
1308	if (r)
1309		return r;
1310
1311	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1312	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1313	alu.is_op3 = 1;
1314
1315	alu.dst.chan = 0;
1316	alu.dst.sel = ctx->temp_reg;
1317	alu.dst.write = 1;
1318
1319	alu.src[0].sel = ctx->temp_reg;
1320	alu.src[0].chan = 0;
1321
1322	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1323	alu.src[1].chan = 0;
1324	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1325	alu.src[2].chan = 0;
1326
1327	if (ctx->bc->chip_class == R600) {
1328		alu.src[1].value = *(uint32_t *)&double_pi;
1329		alu.src[2].value = *(uint32_t *)&neg_pi;
1330	} else {
1331		alu.src[1].sel = V_SQ_ALU_SRC_1;
1332		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1333		alu.src[2].neg = 1;
1334	}
1335
1336	alu.last = 1;
1337	r = r600_bytecode_add_alu(ctx->bc, &alu);
1338	if (r)
1339		return r;
1340	return 0;
1341}
1342
1343static int cayman_trig(struct r600_shader_ctx *ctx)
1344{
1345	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1346	struct r600_bytecode_alu alu;
1347	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1348	int i, r;
1349
1350	r = tgsi_setup_trig(ctx);
1351	if (r)
1352		return r;
1353
1354
1355	for (i = 0; i < last_slot; i++) {
1356		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1357		alu.inst = ctx->inst_info->r600_opcode;
1358		alu.dst.chan = i;
1359
1360		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1361		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1362
1363		alu.src[0].sel = ctx->temp_reg;
1364		alu.src[0].chan = 0;
1365		if (i == last_slot - 1)
1366			alu.last = 1;
1367		r = r600_bytecode_add_alu(ctx->bc, &alu);
1368		if (r)
1369			return r;
1370	}
1371	return 0;
1372}
1373
1374static int tgsi_trig(struct r600_shader_ctx *ctx)
1375{
1376	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1377	struct r600_bytecode_alu alu;
1378	int i, r;
1379	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1380
1381	r = tgsi_setup_trig(ctx);
1382	if (r)
1383		return r;
1384
1385	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1386	alu.inst = ctx->inst_info->r600_opcode;
1387	alu.dst.chan = 0;
1388	alu.dst.sel = ctx->temp_reg;
1389	alu.dst.write = 1;
1390
1391	alu.src[0].sel = ctx->temp_reg;
1392	alu.src[0].chan = 0;
1393	alu.last = 1;
1394	r = r600_bytecode_add_alu(ctx->bc, &alu);
1395	if (r)
1396		return r;
1397
1398	/* replicate result */
1399	for (i = 0; i < lasti + 1; i++) {
1400		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1401			continue;
1402
1403		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1404		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1405
1406		alu.src[0].sel = ctx->temp_reg;
1407		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1408		if (i == lasti)
1409			alu.last = 1;
1410		r = r600_bytecode_add_alu(ctx->bc, &alu);
1411		if (r)
1412			return r;
1413	}
1414	return 0;
1415}
1416
1417static int tgsi_scs(struct r600_shader_ctx *ctx)
1418{
1419	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1420	struct r600_bytecode_alu alu;
1421	int i, r;
1422
1423	/* We'll only need the trig stuff if we are going to write to the
1424	 * X or Y components of the destination vector.
1425	 */
1426	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1427		r = tgsi_setup_trig(ctx);
1428		if (r)
1429			return r;
1430	}
1431
1432	/* dst.x = COS */
1433	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1434		if (ctx->bc->chip_class == CAYMAN) {
1435			for (i = 0 ; i < 3; i++) {
1436				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1437				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1438				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1439
1440				if (i == 0)
1441					alu.dst.write = 1;
1442				else
1443					alu.dst.write = 0;
1444				alu.src[0].sel = ctx->temp_reg;
1445				alu.src[0].chan = 0;
1446				if (i == 2)
1447					alu.last = 1;
1448				r = r600_bytecode_add_alu(ctx->bc, &alu);
1449				if (r)
1450					return r;
1451			}
1452		} else {
1453			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1454			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1455			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1456
1457			alu.src[0].sel = ctx->temp_reg;
1458			alu.src[0].chan = 0;
1459			alu.last = 1;
1460			r = r600_bytecode_add_alu(ctx->bc, &alu);
1461			if (r)
1462				return r;
1463		}
1464	}
1465
1466	/* dst.y = SIN */
1467	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1468		if (ctx->bc->chip_class == CAYMAN) {
1469			for (i = 0 ; i < 3; i++) {
1470				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1471				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1472				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1473				if (i == 1)
1474					alu.dst.write = 1;
1475				else
1476					alu.dst.write = 0;
1477				alu.src[0].sel = ctx->temp_reg;
1478				alu.src[0].chan = 0;
1479				if (i == 2)
1480					alu.last = 1;
1481				r = r600_bytecode_add_alu(ctx->bc, &alu);
1482				if (r)
1483					return r;
1484			}
1485		} else {
1486			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1487			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1488			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1489
1490			alu.src[0].sel = ctx->temp_reg;
1491			alu.src[0].chan = 0;
1492			alu.last = 1;
1493			r = r600_bytecode_add_alu(ctx->bc, &alu);
1494			if (r)
1495				return r;
1496		}
1497	}
1498
1499	/* dst.z = 0.0; */
1500	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1501		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1502
1503		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1504
1505		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1506
1507		alu.src[0].sel = V_SQ_ALU_SRC_0;
1508		alu.src[0].chan = 0;
1509
1510		alu.last = 1;
1511
1512		r = r600_bytecode_add_alu(ctx->bc, &alu);
1513		if (r)
1514			return r;
1515	}
1516
1517	/* dst.w = 1.0; */
1518	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1519		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1520
1521		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1522
1523		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1524
1525		alu.src[0].sel = V_SQ_ALU_SRC_1;
1526		alu.src[0].chan = 0;
1527
1528		alu.last = 1;
1529
1530		r = r600_bytecode_add_alu(ctx->bc, &alu);
1531		if (r)
1532			return r;
1533	}
1534
1535	return 0;
1536}
1537
1538static int tgsi_kill(struct r600_shader_ctx *ctx)
1539{
1540	struct r600_bytecode_alu alu;
1541	int i, r;
1542
1543	for (i = 0; i < 4; i++) {
1544		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1545		alu.inst = ctx->inst_info->r600_opcode;
1546
1547		alu.dst.chan = i;
1548
1549		alu.src[0].sel = V_SQ_ALU_SRC_0;
1550
1551		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1552			alu.src[1].sel = V_SQ_ALU_SRC_1;
1553			alu.src[1].neg = 1;
1554		} else {
1555			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1556		}
1557		if (i == 3) {
1558			alu.last = 1;
1559		}
1560		r = r600_bytecode_add_alu(ctx->bc, &alu);
1561		if (r)
1562			return r;
1563	}
1564
1565	/* kill must be last in ALU */
1566	ctx->bc->force_add_cf = 1;
1567	ctx->shader->uses_kill = TRUE;
1568	return 0;
1569}
1570
1571static int tgsi_lit(struct r600_shader_ctx *ctx)
1572{
1573	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1574	struct r600_bytecode_alu alu;
1575	int r;
1576
1577	/* tmp.x = max(src.y, 0.0) */
1578	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1579	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1580	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1581	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1582	alu.src[1].chan = 1;
1583
1584	alu.dst.sel = ctx->temp_reg;
1585	alu.dst.chan = 0;
1586	alu.dst.write = 1;
1587
1588	alu.last = 1;
1589	r = r600_bytecode_add_alu(ctx->bc, &alu);
1590	if (r)
1591		return r;
1592
1593	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1594	{
1595		int chan;
1596		int sel;
1597		int i;
1598
1599		if (ctx->bc->chip_class == CAYMAN) {
1600			for (i = 0; i < 3; i++) {
1601				/* tmp.z = log(tmp.x) */
1602				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1603				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1604				alu.src[0].sel = ctx->temp_reg;
1605				alu.src[0].chan = 0;
1606				alu.dst.sel = ctx->temp_reg;
1607				alu.dst.chan = i;
1608				if (i == 2) {
1609					alu.dst.write = 1;
1610					alu.last = 1;
1611				} else
1612					alu.dst.write = 0;
1613
1614				r = r600_bytecode_add_alu(ctx->bc, &alu);
1615				if (r)
1616					return r;
1617			}
1618		} else {
1619			/* tmp.z = log(tmp.x) */
1620			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1621			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1622			alu.src[0].sel = ctx->temp_reg;
1623			alu.src[0].chan = 0;
1624			alu.dst.sel = ctx->temp_reg;
1625			alu.dst.chan = 2;
1626			alu.dst.write = 1;
1627			alu.last = 1;
1628			r = r600_bytecode_add_alu(ctx->bc, &alu);
1629			if (r)
1630				return r;
1631		}
1632
1633		chan = alu.dst.chan;
1634		sel = alu.dst.sel;
1635
1636		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1637		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1638		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1639		alu.src[0].sel  = sel;
1640		alu.src[0].chan = chan;
1641		r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
1642		r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
1643		alu.dst.sel = ctx->temp_reg;
1644		alu.dst.chan = 0;
1645		alu.dst.write = 1;
1646		alu.is_op3 = 1;
1647		alu.last = 1;
1648		r = r600_bytecode_add_alu(ctx->bc, &alu);
1649		if (r)
1650			return r;
1651
1652		if (ctx->bc->chip_class == CAYMAN) {
1653			for (i = 0; i < 3; i++) {
1654				/* dst.z = exp(tmp.x) */
1655				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1656				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1657				alu.src[0].sel = ctx->temp_reg;
1658				alu.src[0].chan = 0;
1659				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1660				if (i == 2) {
1661					alu.dst.write = 1;
1662					alu.last = 1;
1663				} else
1664					alu.dst.write = 0;
1665				r = r600_bytecode_add_alu(ctx->bc, &alu);
1666				if (r)
1667					return r;
1668			}
1669		} else {
1670			/* dst.z = exp(tmp.x) */
1671			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1672			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1673			alu.src[0].sel = ctx->temp_reg;
1674			alu.src[0].chan = 0;
1675			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1676			alu.last = 1;
1677			r = r600_bytecode_add_alu(ctx->bc, &alu);
1678			if (r)
1679				return r;
1680		}
1681	}
1682
1683	/* dst.x, <- 1.0  */
1684	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1685	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1686	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1687	alu.src[0].chan = 0;
1688	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1689	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1690	r = r600_bytecode_add_alu(ctx->bc, &alu);
1691	if (r)
1692		return r;
1693
1694	/* dst.y = max(src.x, 0.0) */
1695	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1696	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1697	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1698	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1699	alu.src[1].chan = 0;
1700	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1701	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1702	r = r600_bytecode_add_alu(ctx->bc, &alu);
1703	if (r)
1704		return r;
1705
1706	/* dst.w, <- 1.0  */
1707	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1708	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1709	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1710	alu.src[0].chan = 0;
1711	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1712	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1713	alu.last = 1;
1714	r = r600_bytecode_add_alu(ctx->bc, &alu);
1715	if (r)
1716		return r;
1717
1718	return 0;
1719}
1720
1721static int tgsi_rsq(struct r600_shader_ctx *ctx)
1722{
1723	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1724	struct r600_bytecode_alu alu;
1725	int i, r;
1726
1727	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1728
1729	/* FIXME:
1730	 * For state trackers other than OpenGL, we'll want to use
1731	 * _RECIPSQRT_IEEE instead.
1732	 */
1733	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1734
1735	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1736		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1737		r600_bytecode_src_set_abs(&alu.src[i]);
1738	}
1739	alu.dst.sel = ctx->temp_reg;
1740	alu.dst.write = 1;
1741	alu.last = 1;
1742	r = r600_bytecode_add_alu(ctx->bc, &alu);
1743	if (r)
1744		return r;
1745	/* replicate result */
1746	return tgsi_helper_tempx_replicate(ctx);
1747}
1748
1749static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1750{
1751	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1752	struct r600_bytecode_alu alu;
1753	int i, r;
1754
1755	for (i = 0; i < 4; i++) {
1756		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1757		alu.src[0].sel = ctx->temp_reg;
1758		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1759		alu.dst.chan = i;
1760		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1761		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1762		if (i == 3)
1763			alu.last = 1;
1764		r = r600_bytecode_add_alu(ctx->bc, &alu);
1765		if (r)
1766			return r;
1767	}
1768	return 0;
1769}
1770
1771static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1772{
1773	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1774	struct r600_bytecode_alu alu;
1775	int i, r;
1776
1777	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1778	alu.inst = ctx->inst_info->r600_opcode;
1779	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1780		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1781	}
1782	alu.dst.sel = ctx->temp_reg;
1783	alu.dst.write = 1;
1784	alu.last = 1;
1785	r = r600_bytecode_add_alu(ctx->bc, &alu);
1786	if (r)
1787		return r;
1788	/* replicate result */
1789	return tgsi_helper_tempx_replicate(ctx);
1790}
1791
1792static int cayman_pow(struct r600_shader_ctx *ctx)
1793{
1794	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1795	int i, r;
1796	struct r600_bytecode_alu alu;
1797	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1798
1799	for (i = 0; i < 3; i++) {
1800		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1801		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1802		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1803		alu.dst.sel = ctx->temp_reg;
1804		alu.dst.chan = i;
1805		alu.dst.write = 1;
1806		if (i == 2)
1807			alu.last = 1;
1808		r = r600_bytecode_add_alu(ctx->bc, &alu);
1809		if (r)
1810			return r;
1811	}
1812
1813	/* b * LOG2(a) */
1814	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1815	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1816	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1817	alu.src[1].sel = ctx->temp_reg;
1818	alu.dst.sel = ctx->temp_reg;
1819	alu.dst.write = 1;
1820	alu.last = 1;
1821	r = r600_bytecode_add_alu(ctx->bc, &alu);
1822	if (r)
1823		return r;
1824
1825	for (i = 0; i < last_slot; i++) {
1826		/* POW(a,b) = EXP2(b * LOG2(a))*/
1827		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1828		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1829		alu.src[0].sel = ctx->temp_reg;
1830
1831		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1832		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1833		if (i == last_slot - 1)
1834			alu.last = 1;
1835		r = r600_bytecode_add_alu(ctx->bc, &alu);
1836		if (r)
1837			return r;
1838	}
1839	return 0;
1840}
1841
1842static int tgsi_pow(struct r600_shader_ctx *ctx)
1843{
1844	struct r600_bytecode_alu alu;
1845	int r;
1846
1847	/* LOG2(a) */
1848	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1849	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1850	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1851	alu.dst.sel = ctx->temp_reg;
1852	alu.dst.write = 1;
1853	alu.last = 1;
1854	r = r600_bytecode_add_alu(ctx->bc, &alu);
1855	if (r)
1856		return r;
1857	/* b * LOG2(a) */
1858	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1859	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1860	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1861	alu.src[1].sel = ctx->temp_reg;
1862	alu.dst.sel = ctx->temp_reg;
1863	alu.dst.write = 1;
1864	alu.last = 1;
1865	r = r600_bytecode_add_alu(ctx->bc, &alu);
1866	if (r)
1867		return r;
1868	/* POW(a,b) = EXP2(b * LOG2(a))*/
1869	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1870	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1871	alu.src[0].sel = ctx->temp_reg;
1872	alu.dst.sel = ctx->temp_reg;
1873	alu.dst.write = 1;
1874	alu.last = 1;
1875	r = r600_bytecode_add_alu(ctx->bc, &alu);
1876	if (r)
1877		return r;
1878	return tgsi_helper_tempx_replicate(ctx);
1879}
1880
1881static int tgsi_idiv(struct r600_shader_ctx *ctx)
1882{
1883	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1884	struct r600_bytecode_alu alu;
1885	int i, r;
1886	unsigned write_mask = inst->Dst[0].Register.WriteMask;
1887	int last_inst = tgsi_last_instruction(write_mask);
1888	int tmp0 = ctx->temp_reg;
1889	int tmp1 = r600_get_temp(ctx);
1890	int unsigned_op = (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_UDIV);
1891
1892	/* tmp0 = float(src0) */
1893	for (i = 0; i < 4; i++) {
1894		if (!(write_mask & (1<<i)))
1895			continue;
1896
1897		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1898
1899		if (unsigned_op)
1900			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
1901		else
1902			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
1903
1904		alu.dst.sel = tmp0;
1905		alu.dst.chan = i;
1906		alu.dst.write = 1;
1907
1908		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1909		alu.last = 1;
1910		r = r600_bytecode_add_alu(ctx->bc, &alu);
1911		if (r)
1912			return r;
1913	}
1914
1915	if (!unsigned_op) {
1916		/* tmp1 = tmp0>=0 ? 0.5 : -0.5 for int*/
1917		for (i = 0; i < 4; i++) {
1918			if (!(write_mask & (1<<i)))
1919				continue;
1920
1921			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1922			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1923			alu.is_op3 = 1;
1924
1925			alu.dst.sel = tmp1;
1926			alu.dst.chan = i;
1927			alu.dst.write = 1;
1928
1929			alu.src[0].sel = tmp0;
1930			alu.src[0].chan = i;
1931
1932			alu.src[1].sel = V_SQ_ALU_SRC_0_5;
1933
1934			if (unsigned_op)
1935				alu.src[2].sel = V_SQ_ALU_SRC_0;
1936			else {
1937			alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1938			alu.src[2].neg = 1;
1939			}
1940
1941			if (i == last_inst)
1942				alu.last = 1;
1943			r = r600_bytecode_add_alu(ctx->bc, &alu);
1944			if (r)
1945				return r;
1946		}
1947	}
1948
1949	/* tmp0 = tmp0 + tmp1 for int */
1950	/* tmp0 = tmp0 + 0.5 for uint */
1951	for (i = 0; i < 4; i++) {
1952		if (!(write_mask & (1<<i)))
1953			continue;
1954
1955		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1956		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1957
1958		alu.dst.sel = tmp0;
1959		alu.dst.chan = i;
1960		alu.dst.write = 1;
1961
1962		alu.src[0].sel = tmp0;
1963		alu.src[0].chan = i;
1964
1965		if (unsigned_op)
1966			alu.src[1].sel = V_SQ_ALU_SRC_0_5;
1967		else {
1968			alu.src[1].sel = tmp1;
1969			alu.src[1].chan = i;
1970		}
1971
1972		if (i == last_inst)
1973			alu.last = 1;
1974		r = r600_bytecode_add_alu(ctx->bc, &alu);
1975		if (r)
1976			return r;
1977	}
1978
1979	/* tmp1 = float(src1) */
1980	for (i = 0; i < 4; i++) {
1981		if (!(write_mask & (1<<i)))
1982			continue;
1983
1984		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1985
1986		if (unsigned_op)
1987			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT);
1988		else
1989			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
1990
1991		alu.dst.sel = tmp1;
1992		alu.dst.chan = i;
1993		alu.dst.write = 1;
1994
1995		r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1996		alu.last = 1;
1997		r = r600_bytecode_add_alu(ctx->bc, &alu);
1998		if (r)
1999			return r;
2000	}
2001
2002	/* tmp1 = 1.0/src1 */
2003	for (i = 0; i < 4; i++) {
2004		if (!(write_mask & (1<<i)))
2005			continue;
2006
2007		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2008		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2009
2010		alu.dst.sel = tmp1;
2011		alu.dst.chan = i;
2012		alu.dst.write = 1;
2013
2014		alu.src[0].sel = tmp1;
2015		alu.src[0].chan = i;
2016
2017		alu.last = 1;
2018		r = r600_bytecode_add_alu(ctx->bc, &alu);
2019		if (r)
2020			return r;
2021	}
2022
2023	/* tmp1 = tmp0 * tmp1 */
2024	for (i = 0; i < 4; i++) {
2025		if (!(write_mask & (1<<i)))
2026			continue;
2027
2028		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2029		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2030
2031		alu.dst.sel = tmp1;
2032		alu.dst.chan = i;
2033		alu.dst.write = 1;
2034
2035		alu.src[0].sel = ctx->temp_reg;
2036		alu.src[0].chan = i;
2037
2038		alu.src[1].sel = tmp1;
2039		alu.src[1].chan = i;
2040
2041		if (i == last_inst)
2042			alu.last = 1;
2043		r = r600_bytecode_add_alu(ctx->bc, &alu);
2044		if (r)
2045			return r;
2046	}
2047
2048	/* tmp1 = trunc(tmp1) for evergreen+ */
2049	if (ctx->bc->chip_class >= EVERGREEN) {
2050		for (i = 0; i < 4; i++) {
2051			if (!(write_mask & (1<<i)))
2052				continue;
2053
2054			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2055			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
2056
2057			alu.dst.sel = tmp1;
2058			alu.dst.chan = i;
2059			alu.dst.write = 1;
2060
2061			alu.src[0].sel = tmp1;
2062			alu.src[0].chan = i;
2063
2064			if (i == last_inst)
2065				alu.last = 1;
2066			r = r600_bytecode_add_alu(ctx->bc, &alu);
2067			if (r)
2068				return r;
2069		}
2070	}
2071
2072	/* dst = int(tmp1) */
2073	for (i = 0; i < 4; i++) {
2074		if (!(write_mask & (1<<i)))
2075			continue;
2076
2077		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2078
2079		if (unsigned_op)
2080			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT);
2081		else
2082			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT);
2083
2084		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2085
2086		alu.src[0].sel = tmp1;
2087		alu.src[0].chan = i;
2088
2089		if ((ctx->bc->chip_class < EVERGREEN || unsigned_op) || i == last_inst)
2090			alu.last = 1;
2091		r = r600_bytecode_add_alu(ctx->bc, &alu);
2092		if (r)
2093			return r;
2094	}
2095
2096	return 0;
2097}
2098
2099static int tgsi_f2i(struct r600_shader_ctx *ctx)
2100{
2101	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2102	struct r600_bytecode_alu alu;
2103	int i, r;
2104	unsigned write_mask = inst->Dst[0].Register.WriteMask;
2105	int last_inst = tgsi_last_instruction(write_mask);
2106
2107	for (i = 0; i < 4; i++) {
2108		if (!(write_mask & (1<<i)))
2109			continue;
2110
2111		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2112		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC);
2113
2114		alu.dst.sel = ctx->temp_reg;
2115		alu.dst.chan = i;
2116		alu.dst.write = 1;
2117
2118		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2119		if (i == last_inst)
2120			alu.last = 1;
2121		r = r600_bytecode_add_alu(ctx->bc, &alu);
2122		if (r)
2123			return r;
2124	}
2125
2126	for (i = 0; i < 4; i++) {
2127		if (!(write_mask & (1<<i)))
2128			continue;
2129
2130		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2131		alu.inst = ctx->inst_info->r600_opcode;
2132
2133		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2134
2135		alu.src[0].sel = ctx->temp_reg;
2136		alu.src[0].chan = i;
2137
2138		if (i == last_inst)
2139			alu.last = 1;
2140		r = r600_bytecode_add_alu(ctx->bc, &alu);
2141		if (r)
2142			return r;
2143	}
2144
2145	return 0;
2146}
2147
2148static int tgsi_iabs(struct r600_shader_ctx *ctx)
2149{
2150	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2151	struct r600_bytecode_alu alu;
2152	int i, r;
2153	unsigned write_mask = inst->Dst[0].Register.WriteMask;
2154	int last_inst = tgsi_last_instruction(write_mask);
2155
2156	/* tmp = -src */
2157	for (i = 0; i < 4; i++) {
2158		if (!(write_mask & (1<<i)))
2159			continue;
2160
2161		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2162		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT);
2163
2164		alu.dst.sel = ctx->temp_reg;
2165		alu.dst.chan = i;
2166		alu.dst.write = 1;
2167
2168		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2169		alu.src[0].sel = V_SQ_ALU_SRC_0;
2170
2171		if (i == last_inst)
2172			alu.last = 1;
2173		r = r600_bytecode_add_alu(ctx->bc, &alu);
2174		if (r)
2175			return r;
2176	}
2177
2178	/* dst = (src >= 0 ? src : tmp) */
2179	for (i = 0; i < 4; i++) {
2180		if (!(write_mask & (1<<i)))
2181			continue;
2182
2183		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2184		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2185		alu.is_op3 = 1;
2186		alu.dst.write = 1;
2187
2188		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2189
2190		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2191		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2192		alu.src[2].sel = ctx->temp_reg;
2193		alu.src[2].chan = i;
2194
2195		if (i == last_inst)
2196			alu.last = 1;
2197		r = r600_bytecode_add_alu(ctx->bc, &alu);
2198		if (r)
2199			return r;
2200	}
2201	return 0;
2202}
2203
2204static int tgsi_issg(struct r600_shader_ctx *ctx)
2205{
2206	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2207	struct r600_bytecode_alu alu;
2208	int i, r;
2209	unsigned write_mask = inst->Dst[0].Register.WriteMask;
2210	int last_inst = tgsi_last_instruction(write_mask);
2211
2212	/* tmp = (src >= 0 ? src : -1) */
2213	for (i = 0; i < 4; i++) {
2214		if (!(write_mask & (1<<i)))
2215			continue;
2216
2217		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2218		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT);
2219		alu.is_op3 = 1;
2220
2221		alu.dst.sel = ctx->temp_reg;
2222		alu.dst.chan = i;
2223		alu.dst.write = 1;
2224
2225		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2226		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2227		alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT;
2228
2229		if (i == last_inst)
2230			alu.last = 1;
2231		r = r600_bytecode_add_alu(ctx->bc, &alu);
2232		if (r)
2233			return r;
2234	}
2235
2236	/* dst = (tmp > 0 ? 1 : tmp) */
2237	for (i = 0; i < 4; i++) {
2238		if (!(write_mask & (1<<i)))
2239			continue;
2240
2241		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2242		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT);
2243		alu.is_op3 = 1;
2244		alu.dst.write = 1;
2245
2246		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2247
2248		alu.src[0].sel = ctx->temp_reg;
2249		alu.src[0].chan = i;
2250
2251		alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
2252
2253		alu.src[2].sel = ctx->temp_reg;
2254		alu.src[2].chan = i;
2255
2256		if (i == last_inst)
2257			alu.last = 1;
2258		r = r600_bytecode_add_alu(ctx->bc, &alu);
2259		if (r)
2260			return r;
2261	}
2262	return 0;
2263}
2264
2265
2266
2267static int tgsi_ssg(struct r600_shader_ctx *ctx)
2268{
2269	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2270	struct r600_bytecode_alu alu;
2271	int i, r;
2272
2273	/* tmp = (src > 0 ? 1 : src) */
2274	for (i = 0; i < 4; i++) {
2275		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2276		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
2277		alu.is_op3 = 1;
2278
2279		alu.dst.sel = ctx->temp_reg;
2280		alu.dst.chan = i;
2281
2282		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2283		alu.src[1].sel = V_SQ_ALU_SRC_1;
2284		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
2285
2286		if (i == 3)
2287			alu.last = 1;
2288		r = r600_bytecode_add_alu(ctx->bc, &alu);
2289		if (r)
2290			return r;
2291	}
2292
2293	/* dst = (-tmp > 0 ? -1 : tmp) */
2294	for (i = 0; i < 4; i++) {
2295		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2296		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
2297		alu.is_op3 = 1;
2298		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2299
2300		alu.src[0].sel = ctx->temp_reg;
2301		alu.src[0].chan = i;
2302		alu.src[0].neg = 1;
2303
2304		alu.src[1].sel = V_SQ_ALU_SRC_1;
2305		alu.src[1].neg = 1;
2306
2307		alu.src[2].sel = ctx->temp_reg;
2308		alu.src[2].chan = i;
2309
2310		if (i == 3)
2311			alu.last = 1;
2312		r = r600_bytecode_add_alu(ctx->bc, &alu);
2313		if (r)
2314			return r;
2315	}
2316	return 0;
2317}
2318
2319static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
2320{
2321	struct r600_bytecode_alu alu;
2322	int i, r;
2323
2324	for (i = 0; i < 4; i++) {
2325		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2326		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
2327			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
2328			alu.dst.chan = i;
2329		} else {
2330			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2331			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2332			alu.src[0].sel = ctx->temp_reg;
2333			alu.src[0].chan = i;
2334		}
2335		if (i == 3) {
2336			alu.last = 1;
2337		}
2338		r = r600_bytecode_add_alu(ctx->bc, &alu);
2339		if (r)
2340			return r;
2341	}
2342	return 0;
2343}
2344
2345static int tgsi_op3(struct r600_shader_ctx *ctx)
2346{
2347	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2348	struct r600_bytecode_alu alu;
2349	int i, j, r;
2350	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2351
2352	for (i = 0; i < lasti + 1; i++) {
2353		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2354			continue;
2355
2356		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2357		alu.inst = ctx->inst_info->r600_opcode;
2358		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
2359			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
2360		}
2361
2362		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2363		alu.dst.chan = i;
2364		alu.dst.write = 1;
2365		alu.is_op3 = 1;
2366		if (i == lasti) {
2367			alu.last = 1;
2368		}
2369		r = r600_bytecode_add_alu(ctx->bc, &alu);
2370		if (r)
2371			return r;
2372	}
2373	return 0;
2374}
2375
2376static int tgsi_dp(struct r600_shader_ctx *ctx)
2377{
2378	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2379	struct r600_bytecode_alu alu;
2380	int i, j, r;
2381
2382	for (i = 0; i < 4; i++) {
2383		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2384		alu.inst = ctx->inst_info->r600_opcode;
2385		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
2386			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
2387		}
2388
2389		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2390		alu.dst.chan = i;
2391		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
2392		/* handle some special cases */
2393		switch (ctx->inst_info->tgsi_opcode) {
2394		case TGSI_OPCODE_DP2:
2395			if (i > 1) {
2396				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
2397				alu.src[0].chan = alu.src[1].chan = 0;
2398			}
2399			break;
2400		case TGSI_OPCODE_DP3:
2401			if (i > 2) {
2402				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
2403				alu.src[0].chan = alu.src[1].chan = 0;
2404			}
2405			break;
2406		case TGSI_OPCODE_DPH:
2407			if (i == 3) {
2408				alu.src[0].sel = V_SQ_ALU_SRC_1;
2409				alu.src[0].chan = 0;
2410				alu.src[0].neg = 0;
2411			}
2412			break;
2413		default:
2414			break;
2415		}
2416		if (i == 3) {
2417			alu.last = 1;
2418		}
2419		r = r600_bytecode_add_alu(ctx->bc, &alu);
2420		if (r)
2421			return r;
2422	}
2423	return 0;
2424}
2425
2426static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
2427						    unsigned index)
2428{
2429	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2430	return 	(inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
2431		inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
2432		ctx->src[index].neg || ctx->src[index].abs;
2433}
2434
2435static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
2436					unsigned index)
2437{
2438	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2439	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
2440}
2441
2442static int tgsi_tex(struct r600_shader_ctx *ctx)
2443{
2444	static float one_point_five = 1.5f;
2445	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2446	struct r600_bytecode_tex tex;
2447	struct r600_bytecode_alu alu;
2448	unsigned src_gpr;
2449	int r, i, j;
2450	int opcode;
2451	/* Texture fetch instructions can only use gprs as source.
2452	 * Also they cannot negate the source or take the absolute value */
2453	const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
2454	boolean src_loaded = FALSE;
2455	unsigned sampler_src_reg = 1;
2456	u8 offset_x = 0, offset_y = 0, offset_z = 0;
2457
2458	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
2459
2460	if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
2461		/* get offset values */
2462		if (inst->Texture.NumOffsets) {
2463			assert(inst->Texture.NumOffsets == 1);
2464
2465			offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
2466			offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
2467			offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
2468		}
2469	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
2470		/* TGSI moves the sampler to src reg 3 for TXD */
2471		sampler_src_reg = 3;
2472
2473		for (i = 1; i < 3; i++) {
2474			/* set gradients h/v */
2475			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2476			tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
2477				SQ_TEX_INST_SET_GRADIENTS_V;
2478			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2479			tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2480
2481			if (tgsi_tex_src_requires_loading(ctx, i)) {
2482				tex.src_gpr = r600_get_temp(ctx);
2483				tex.src_sel_x = 0;
2484				tex.src_sel_y = 1;
2485				tex.src_sel_z = 2;
2486				tex.src_sel_w = 3;
2487
2488				for (j = 0; j < 4; j++) {
2489					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2490					alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2491                                        r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
2492                                        alu.dst.sel = tex.src_gpr;
2493                                        alu.dst.chan = j;
2494                                        if (j == 3)
2495                                                alu.last = 1;
2496                                        alu.dst.write = 1;
2497                                        r = r600_bytecode_add_alu(ctx->bc, &alu);
2498                                        if (r)
2499                                                return r;
2500				}
2501
2502			} else {
2503				tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
2504				tex.src_sel_x = ctx->src[i].swizzle[0];
2505				tex.src_sel_y = ctx->src[i].swizzle[1];
2506				tex.src_sel_z = ctx->src[i].swizzle[2];
2507				tex.src_sel_w = ctx->src[i].swizzle[3];
2508				tex.src_rel = ctx->src[i].rel;
2509			}
2510			tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
2511			tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
2512			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2513				tex.coord_type_x = 1;
2514				tex.coord_type_y = 1;
2515				tex.coord_type_z = 1;
2516				tex.coord_type_w = 1;
2517			}
2518			r = r600_bytecode_add_tex(ctx->bc, &tex);
2519			if (r)
2520				return r;
2521		}
2522	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
2523		int out_chan;
2524		/* Add perspective divide */
2525		if (ctx->bc->chip_class == CAYMAN) {
2526			out_chan = 2;
2527			for (i = 0; i < 3; i++) {
2528				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2529				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2530				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2531
2532				alu.dst.sel = ctx->temp_reg;
2533				alu.dst.chan = i;
2534				if (i == 2)
2535					alu.last = 1;
2536				if (out_chan == i)
2537					alu.dst.write = 1;
2538				r = r600_bytecode_add_alu(ctx->bc, &alu);
2539				if (r)
2540					return r;
2541			}
2542
2543		} else {
2544			out_chan = 3;
2545			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2546			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2547			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2548
2549			alu.dst.sel = ctx->temp_reg;
2550			alu.dst.chan = out_chan;
2551			alu.last = 1;
2552			alu.dst.write = 1;
2553			r = r600_bytecode_add_alu(ctx->bc, &alu);
2554			if (r)
2555				return r;
2556		}
2557
2558		for (i = 0; i < 3; i++) {
2559			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2560			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2561			alu.src[0].sel = ctx->temp_reg;
2562			alu.src[0].chan = out_chan;
2563			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2564			alu.dst.sel = ctx->temp_reg;
2565			alu.dst.chan = i;
2566			alu.dst.write = 1;
2567			r = r600_bytecode_add_alu(ctx->bc, &alu);
2568			if (r)
2569				return r;
2570		}
2571		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2572		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2573		alu.src[0].sel = V_SQ_ALU_SRC_1;
2574		alu.src[0].chan = 0;
2575		alu.dst.sel = ctx->temp_reg;
2576		alu.dst.chan = 3;
2577		alu.last = 1;
2578		alu.dst.write = 1;
2579		r = r600_bytecode_add_alu(ctx->bc, &alu);
2580		if (r)
2581			return r;
2582		src_loaded = TRUE;
2583		src_gpr = ctx->temp_reg;
2584	}
2585
2586	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2587		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
2588		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
2589
2590		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
2591		for (i = 0; i < 4; i++) {
2592			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2593			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
2594			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2595			r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
2596			alu.dst.sel = ctx->temp_reg;
2597			alu.dst.chan = i;
2598			if (i == 3)
2599				alu.last = 1;
2600			alu.dst.write = 1;
2601			r = r600_bytecode_add_alu(ctx->bc, &alu);
2602			if (r)
2603				return r;
2604		}
2605
2606		/* tmp1.z = RCP_e(|tmp1.z|) */
2607		if (ctx->bc->chip_class == CAYMAN) {
2608			for (i = 0; i < 3; i++) {
2609				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2610				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2611				alu.src[0].sel = ctx->temp_reg;
2612				alu.src[0].chan = 2;
2613				alu.src[0].abs = 1;
2614				alu.dst.sel = ctx->temp_reg;
2615				alu.dst.chan = i;
2616				if (i == 2)
2617					alu.dst.write = 1;
2618				if (i == 2)
2619					alu.last = 1;
2620				r = r600_bytecode_add_alu(ctx->bc, &alu);
2621				if (r)
2622					return r;
2623			}
2624		} else {
2625			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2626			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2627			alu.src[0].sel = ctx->temp_reg;
2628			alu.src[0].chan = 2;
2629			alu.src[0].abs = 1;
2630			alu.dst.sel = ctx->temp_reg;
2631			alu.dst.chan = 2;
2632			alu.dst.write = 1;
2633			alu.last = 1;
2634			r = r600_bytecode_add_alu(ctx->bc, &alu);
2635			if (r)
2636				return r;
2637		}
2638
2639		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
2640		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
2641		 * muladd has no writemask, have to use another temp
2642		 */
2643		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2644		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2645		alu.is_op3 = 1;
2646
2647		alu.src[0].sel = ctx->temp_reg;
2648		alu.src[0].chan = 0;
2649		alu.src[1].sel = ctx->temp_reg;
2650		alu.src[1].chan = 2;
2651
2652		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2653		alu.src[2].chan = 0;
2654		alu.src[2].value = *(uint32_t *)&one_point_five;
2655
2656		alu.dst.sel = ctx->temp_reg;
2657		alu.dst.chan = 0;
2658		alu.dst.write = 1;
2659
2660		r = r600_bytecode_add_alu(ctx->bc, &alu);
2661		if (r)
2662			return r;
2663
2664		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2665		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2666		alu.is_op3 = 1;
2667
2668		alu.src[0].sel = ctx->temp_reg;
2669		alu.src[0].chan = 1;
2670		alu.src[1].sel = ctx->temp_reg;
2671		alu.src[1].chan = 2;
2672
2673		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2674		alu.src[2].chan = 0;
2675		alu.src[2].value = *(uint32_t *)&one_point_five;
2676
2677		alu.dst.sel = ctx->temp_reg;
2678		alu.dst.chan = 1;
2679		alu.dst.write = 1;
2680
2681		alu.last = 1;
2682		r = r600_bytecode_add_alu(ctx->bc, &alu);
2683		if (r)
2684			return r;
2685
2686		src_loaded = TRUE;
2687		src_gpr = ctx->temp_reg;
2688	}
2689
2690	if (src_requires_loading && !src_loaded) {
2691		for (i = 0; i < 4; i++) {
2692			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2693			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2694			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2695			alu.dst.sel = ctx->temp_reg;
2696			alu.dst.chan = i;
2697			if (i == 3)
2698				alu.last = 1;
2699			alu.dst.write = 1;
2700			r = r600_bytecode_add_alu(ctx->bc, &alu);
2701			if (r)
2702				return r;
2703		}
2704		src_loaded = TRUE;
2705		src_gpr = ctx->temp_reg;
2706	}
2707
2708	opcode = ctx->inst_info->r600_opcode;
2709	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2710	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2711	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2712	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
2713	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
2714		switch (opcode) {
2715		case SQ_TEX_INST_SAMPLE:
2716			opcode = SQ_TEX_INST_SAMPLE_C;
2717			break;
2718		case SQ_TEX_INST_SAMPLE_L:
2719			opcode = SQ_TEX_INST_SAMPLE_C_L;
2720			break;
2721		case SQ_TEX_INST_SAMPLE_LB:
2722			opcode = SQ_TEX_INST_SAMPLE_C_LB;
2723			break;
2724		case SQ_TEX_INST_SAMPLE_G:
2725			opcode = SQ_TEX_INST_SAMPLE_C_G;
2726			break;
2727		}
2728	}
2729
2730	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2731	tex.inst = opcode;
2732
2733	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2734	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2735	tex.src_gpr = src_gpr;
2736	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2737	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2738	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2739	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2740	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2741	if (src_loaded) {
2742		tex.src_sel_x = 0;
2743		tex.src_sel_y = 1;
2744		tex.src_sel_z = 2;
2745		tex.src_sel_w = 3;
2746	} else {
2747		tex.src_sel_x = ctx->src[0].swizzle[0];
2748		tex.src_sel_y = ctx->src[0].swizzle[1];
2749		tex.src_sel_z = ctx->src[0].swizzle[2];
2750		tex.src_sel_w = ctx->src[0].swizzle[3];
2751		tex.src_rel = ctx->src[0].rel;
2752	}
2753
2754	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2755		tex.src_sel_x = 1;
2756		tex.src_sel_y = 0;
2757		tex.src_sel_z = 3;
2758		tex.src_sel_w = 1;
2759	}
2760
2761	if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
2762	    inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
2763		tex.coord_type_x = 1;
2764		tex.coord_type_y = 1;
2765	}
2766	tex.coord_type_z = 1;
2767	tex.coord_type_w = 1;
2768
2769	tex.offset_x = offset_x;
2770	tex.offset_y = offset_y;
2771	tex.offset_z = offset_z;
2772
2773	/* Put the depth for comparison in W.
2774	 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
2775	 * Some instructions expect the depth in Z. */
2776	if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2777	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2778	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2779	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
2780	    opcode != SQ_TEX_INST_SAMPLE_C_L &&
2781	    opcode != SQ_TEX_INST_SAMPLE_C_LB) {
2782		tex.src_sel_w = tex.src_sel_z;
2783	}
2784
2785	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
2786	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
2787		if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
2788		    opcode == SQ_TEX_INST_SAMPLE_C_LB) {
2789			/* the array index is read from Y */
2790			tex.coord_type_y = 0;
2791		} else {
2792			/* the array index is read from Z */
2793			tex.coord_type_z = 0;
2794			tex.src_sel_z = tex.src_sel_y;
2795		}
2796	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
2797		   inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
2798		/* the array index is read from Z */
2799		tex.coord_type_z = 0;
2800
2801	r = r600_bytecode_add_tex(ctx->bc, &tex);
2802	if (r)
2803		return r;
2804
2805	/* add shadow ambient support  - gallium doesn't do it yet */
2806	return 0;
2807}
2808
2809static int tgsi_lrp(struct r600_shader_ctx *ctx)
2810{
2811	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2812	struct r600_bytecode_alu alu;
2813	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2814	unsigned i;
2815	int r;
2816
2817	/* optimize if it's just an equal balance */
2818	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2819		for (i = 0; i < lasti + 1; i++) {
2820			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2821				continue;
2822
2823			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2824			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2825			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2826			r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2827			alu.omod = 3;
2828			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2829			alu.dst.chan = i;
2830			if (i == lasti) {
2831				alu.last = 1;
2832			}
2833			r = r600_bytecode_add_alu(ctx->bc, &alu);
2834			if (r)
2835				return r;
2836		}
2837		return 0;
2838	}
2839
2840	/* 1 - src0 */
2841	for (i = 0; i < lasti + 1; i++) {
2842		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2843			continue;
2844
2845		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2846		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2847		alu.src[0].sel = V_SQ_ALU_SRC_1;
2848		alu.src[0].chan = 0;
2849		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2850		r600_bytecode_src_toggle_neg(&alu.src[1]);
2851		alu.dst.sel = ctx->temp_reg;
2852		alu.dst.chan = i;
2853		if (i == lasti) {
2854			alu.last = 1;
2855		}
2856		alu.dst.write = 1;
2857		r = r600_bytecode_add_alu(ctx->bc, &alu);
2858		if (r)
2859			return r;
2860	}
2861
2862	/* (1 - src0) * src2 */
2863	for (i = 0; i < lasti + 1; i++) {
2864		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2865			continue;
2866
2867		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2868		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2869		alu.src[0].sel = ctx->temp_reg;
2870		alu.src[0].chan = i;
2871		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2872		alu.dst.sel = ctx->temp_reg;
2873		alu.dst.chan = i;
2874		if (i == lasti) {
2875			alu.last = 1;
2876		}
2877		alu.dst.write = 1;
2878		r = r600_bytecode_add_alu(ctx->bc, &alu);
2879		if (r)
2880			return r;
2881	}
2882
2883	/* src0 * src1 + (1 - src0) * src2 */
2884	for (i = 0; i < lasti + 1; i++) {
2885		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2886			continue;
2887
2888		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2889		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2890		alu.is_op3 = 1;
2891		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2892		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2893		alu.src[2].sel = ctx->temp_reg;
2894		alu.src[2].chan = i;
2895
2896		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2897		alu.dst.chan = i;
2898		if (i == lasti) {
2899			alu.last = 1;
2900		}
2901		r = r600_bytecode_add_alu(ctx->bc, &alu);
2902		if (r)
2903			return r;
2904	}
2905	return 0;
2906}
2907
2908static int tgsi_cmp(struct r600_shader_ctx *ctx)
2909{
2910	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2911	struct r600_bytecode_alu alu;
2912	int i, r;
2913	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2914
2915	for (i = 0; i < lasti + 1; i++) {
2916		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2917			continue;
2918
2919		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2920		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2921		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2922		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2923		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
2924		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2925		alu.dst.chan = i;
2926		alu.dst.write = 1;
2927		alu.is_op3 = 1;
2928		if (i == lasti)
2929			alu.last = 1;
2930		r = r600_bytecode_add_alu(ctx->bc, &alu);
2931		if (r)
2932			return r;
2933	}
2934	return 0;
2935}
2936
2937static int tgsi_xpd(struct r600_shader_ctx *ctx)
2938{
2939	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2940	static const unsigned int src0_swizzle[] = {2, 0, 1};
2941	static const unsigned int src1_swizzle[] = {1, 2, 0};
2942	struct r600_bytecode_alu alu;
2943	uint32_t use_temp = 0;
2944	int i, r;
2945
2946	if (inst->Dst[0].Register.WriteMask != 0xf)
2947		use_temp = 1;
2948
2949	for (i = 0; i < 4; i++) {
2950		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2951		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2952		if (i < 3) {
2953			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2954			r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2955		} else {
2956			alu.src[0].sel = V_SQ_ALU_SRC_0;
2957			alu.src[0].chan = i;
2958			alu.src[1].sel = V_SQ_ALU_SRC_0;
2959			alu.src[1].chan = i;
2960		}
2961
2962		alu.dst.sel = ctx->temp_reg;
2963		alu.dst.chan = i;
2964		alu.dst.write = 1;
2965
2966		if (i == 3)
2967			alu.last = 1;
2968		r = r600_bytecode_add_alu(ctx->bc, &alu);
2969		if (r)
2970			return r;
2971	}
2972
2973	for (i = 0; i < 4; i++) {
2974		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2975		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2976
2977		if (i < 3) {
2978			r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2979			r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2980		} else {
2981			alu.src[0].sel = V_SQ_ALU_SRC_0;
2982			alu.src[0].chan = i;
2983			alu.src[1].sel = V_SQ_ALU_SRC_0;
2984			alu.src[1].chan = i;
2985		}
2986
2987		alu.src[2].sel = ctx->temp_reg;
2988		alu.src[2].neg = 1;
2989		alu.src[2].chan = i;
2990
2991		if (use_temp)
2992			alu.dst.sel = ctx->temp_reg;
2993		else
2994			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2995		alu.dst.chan = i;
2996		alu.dst.write = 1;
2997		alu.is_op3 = 1;
2998		if (i == 3)
2999			alu.last = 1;
3000		r = r600_bytecode_add_alu(ctx->bc, &alu);
3001		if (r)
3002			return r;
3003	}
3004	if (use_temp)
3005		return tgsi_helper_copy(ctx, inst);
3006	return 0;
3007}
3008
3009static int tgsi_exp(struct r600_shader_ctx *ctx)
3010{
3011	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3012	struct r600_bytecode_alu alu;
3013	int r;
3014	int i;
3015
3016	/* result.x = 2^floor(src); */
3017	if (inst->Dst[0].Register.WriteMask & 1) {
3018		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3019
3020		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
3021		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3022
3023		alu.dst.sel = ctx->temp_reg;
3024		alu.dst.chan = 0;
3025		alu.dst.write = 1;
3026		alu.last = 1;
3027		r = r600_bytecode_add_alu(ctx->bc, &alu);
3028		if (r)
3029			return r;
3030
3031		if (ctx->bc->chip_class == CAYMAN) {
3032			for (i = 0; i < 3; i++) {
3033				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3034				alu.src[0].sel = ctx->temp_reg;
3035				alu.src[0].chan = 0;
3036
3037				alu.dst.sel = ctx->temp_reg;
3038				alu.dst.chan = i;
3039				if (i == 0)
3040					alu.dst.write = 1;
3041				if (i == 2)
3042					alu.last = 1;
3043				r = r600_bytecode_add_alu(ctx->bc, &alu);
3044				if (r)
3045					return r;
3046			}
3047		} else {
3048			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3049			alu.src[0].sel = ctx->temp_reg;
3050			alu.src[0].chan = 0;
3051
3052			alu.dst.sel = ctx->temp_reg;
3053			alu.dst.chan = 0;
3054			alu.dst.write = 1;
3055			alu.last = 1;
3056			r = r600_bytecode_add_alu(ctx->bc, &alu);
3057			if (r)
3058				return r;
3059		}
3060	}
3061
3062	/* result.y = tmp - floor(tmp); */
3063	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
3064		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3065
3066		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
3067		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3068
3069		alu.dst.sel = ctx->temp_reg;
3070#if 0
3071		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3072		if (r)
3073			return r;
3074#endif
3075		alu.dst.write = 1;
3076		alu.dst.chan = 1;
3077
3078		alu.last = 1;
3079
3080		r = r600_bytecode_add_alu(ctx->bc, &alu);
3081		if (r)
3082			return r;
3083	}
3084
3085	/* result.z = RoughApprox2ToX(tmp);*/
3086	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
3087		if (ctx->bc->chip_class == CAYMAN) {
3088			for (i = 0; i < 3; i++) {
3089				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3090				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3091				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3092
3093				alu.dst.sel = ctx->temp_reg;
3094				alu.dst.chan = i;
3095				if (i == 2) {
3096					alu.dst.write = 1;
3097					alu.last = 1;
3098				}
3099
3100				r = r600_bytecode_add_alu(ctx->bc, &alu);
3101				if (r)
3102					return r;
3103			}
3104		} else {
3105			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3106			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3107			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3108
3109			alu.dst.sel = ctx->temp_reg;
3110			alu.dst.write = 1;
3111			alu.dst.chan = 2;
3112
3113			alu.last = 1;
3114
3115			r = r600_bytecode_add_alu(ctx->bc, &alu);
3116			if (r)
3117				return r;
3118		}
3119	}
3120
3121	/* result.w = 1.0;*/
3122	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
3123		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3124
3125		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3126		alu.src[0].sel = V_SQ_ALU_SRC_1;
3127		alu.src[0].chan = 0;
3128
3129		alu.dst.sel = ctx->temp_reg;
3130		alu.dst.chan = 3;
3131		alu.dst.write = 1;
3132		alu.last = 1;
3133		r = r600_bytecode_add_alu(ctx->bc, &alu);
3134		if (r)
3135			return r;
3136	}
3137	return tgsi_helper_copy(ctx, inst);
3138}
3139
3140static int tgsi_log(struct r600_shader_ctx *ctx)
3141{
3142	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3143	struct r600_bytecode_alu alu;
3144	int r;
3145	int i;
3146
3147	/* result.x = floor(log2(|src|)); */
3148	if (inst->Dst[0].Register.WriteMask & 1) {
3149		if (ctx->bc->chip_class == CAYMAN) {
3150			for (i = 0; i < 3; i++) {
3151				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3152
3153				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3154				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3155				r600_bytecode_src_set_abs(&alu.src[0]);
3156
3157				alu.dst.sel = ctx->temp_reg;
3158				alu.dst.chan = i;
3159				if (i == 0)
3160					alu.dst.write = 1;
3161				if (i == 2)
3162					alu.last = 1;
3163				r = r600_bytecode_add_alu(ctx->bc, &alu);
3164				if (r)
3165					return r;
3166			}
3167
3168		} else {
3169			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3170
3171			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3172			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3173			r600_bytecode_src_set_abs(&alu.src[0]);
3174
3175			alu.dst.sel = ctx->temp_reg;
3176			alu.dst.chan = 0;
3177			alu.dst.write = 1;
3178			alu.last = 1;
3179			r = r600_bytecode_add_alu(ctx->bc, &alu);
3180			if (r)
3181				return r;
3182		}
3183
3184		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
3185		alu.src[0].sel = ctx->temp_reg;
3186		alu.src[0].chan = 0;
3187
3188		alu.dst.sel = ctx->temp_reg;
3189		alu.dst.chan = 0;
3190		alu.dst.write = 1;
3191		alu.last = 1;
3192
3193		r = r600_bytecode_add_alu(ctx->bc, &alu);
3194		if (r)
3195			return r;
3196	}
3197
3198	/* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
3199	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
3200
3201		if (ctx->bc->chip_class == CAYMAN) {
3202			for (i = 0; i < 3; i++) {
3203				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3204
3205				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3206				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3207				r600_bytecode_src_set_abs(&alu.src[0]);
3208
3209				alu.dst.sel = ctx->temp_reg;
3210				alu.dst.chan = i;
3211				if (i == 1)
3212					alu.dst.write = 1;
3213				if (i == 2)
3214					alu.last = 1;
3215
3216				r = r600_bytecode_add_alu(ctx->bc, &alu);
3217				if (r)
3218					return r;
3219			}
3220		} else {
3221			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3222
3223			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3224			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3225			r600_bytecode_src_set_abs(&alu.src[0]);
3226
3227			alu.dst.sel = ctx->temp_reg;
3228			alu.dst.chan = 1;
3229			alu.dst.write = 1;
3230			alu.last = 1;
3231
3232			r = r600_bytecode_add_alu(ctx->bc, &alu);
3233			if (r)
3234				return r;
3235		}
3236
3237		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3238
3239		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
3240		alu.src[0].sel = ctx->temp_reg;
3241		alu.src[0].chan = 1;
3242
3243		alu.dst.sel = ctx->temp_reg;
3244		alu.dst.chan = 1;
3245		alu.dst.write = 1;
3246		alu.last = 1;
3247
3248		r = r600_bytecode_add_alu(ctx->bc, &alu);
3249		if (r)
3250			return r;
3251
3252		if (ctx->bc->chip_class == CAYMAN) {
3253			for (i = 0; i < 3; i++) {
3254				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3255				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3256				alu.src[0].sel = ctx->temp_reg;
3257				alu.src[0].chan = 1;
3258
3259				alu.dst.sel = ctx->temp_reg;
3260				alu.dst.chan = i;
3261				if (i == 1)
3262					alu.dst.write = 1;
3263				if (i == 2)
3264					alu.last = 1;
3265
3266				r = r600_bytecode_add_alu(ctx->bc, &alu);
3267				if (r)
3268					return r;
3269			}
3270		} else {
3271			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3272			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
3273			alu.src[0].sel = ctx->temp_reg;
3274			alu.src[0].chan = 1;
3275
3276			alu.dst.sel = ctx->temp_reg;
3277			alu.dst.chan = 1;
3278			alu.dst.write = 1;
3279			alu.last = 1;
3280
3281			r = r600_bytecode_add_alu(ctx->bc, &alu);
3282			if (r)
3283				return r;
3284		}
3285
3286		if (ctx->bc->chip_class == CAYMAN) {
3287			for (i = 0; i < 3; i++) {
3288				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3289				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3290				alu.src[0].sel = ctx->temp_reg;
3291				alu.src[0].chan = 1;
3292
3293				alu.dst.sel = ctx->temp_reg;
3294				alu.dst.chan = i;
3295				if (i == 1)
3296					alu.dst.write = 1;
3297				if (i == 2)
3298					alu.last = 1;
3299
3300				r = r600_bytecode_add_alu(ctx->bc, &alu);
3301				if (r)
3302					return r;
3303			}
3304		} else {
3305			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3306			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
3307			alu.src[0].sel = ctx->temp_reg;
3308			alu.src[0].chan = 1;
3309
3310			alu.dst.sel = ctx->temp_reg;
3311			alu.dst.chan = 1;
3312			alu.dst.write = 1;
3313			alu.last = 1;
3314
3315			r = r600_bytecode_add_alu(ctx->bc, &alu);
3316			if (r)
3317				return r;
3318		}
3319
3320		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3321
3322		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3323
3324		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3325		r600_bytecode_src_set_abs(&alu.src[0]);
3326
3327		alu.src[1].sel = ctx->temp_reg;
3328		alu.src[1].chan = 1;
3329
3330		alu.dst.sel = ctx->temp_reg;
3331		alu.dst.chan = 1;
3332		alu.dst.write = 1;
3333		alu.last = 1;
3334
3335		r = r600_bytecode_add_alu(ctx->bc, &alu);
3336		if (r)
3337			return r;
3338	}
3339
3340	/* result.z = log2(|src|);*/
3341	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
3342		if (ctx->bc->chip_class == CAYMAN) {
3343			for (i = 0; i < 3; i++) {
3344				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3345
3346				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3347				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3348				r600_bytecode_src_set_abs(&alu.src[0]);
3349
3350				alu.dst.sel = ctx->temp_reg;
3351				if (i == 2)
3352					alu.dst.write = 1;
3353				alu.dst.chan = i;
3354				if (i == 2)
3355					alu.last = 1;
3356
3357				r = r600_bytecode_add_alu(ctx->bc, &alu);
3358				if (r)
3359					return r;
3360			}
3361		} else {
3362			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3363
3364			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
3365			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3366			r600_bytecode_src_set_abs(&alu.src[0]);
3367
3368			alu.dst.sel = ctx->temp_reg;
3369			alu.dst.write = 1;
3370			alu.dst.chan = 2;
3371			alu.last = 1;
3372
3373			r = r600_bytecode_add_alu(ctx->bc, &alu);
3374			if (r)
3375				return r;
3376		}
3377	}
3378
3379	/* result.w = 1.0; */
3380	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
3381		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3382
3383		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
3384		alu.src[0].sel = V_SQ_ALU_SRC_1;
3385		alu.src[0].chan = 0;
3386
3387		alu.dst.sel = ctx->temp_reg;
3388		alu.dst.chan = 3;
3389		alu.dst.write = 1;
3390		alu.last = 1;
3391
3392		r = r600_bytecode_add_alu(ctx->bc, &alu);
3393		if (r)
3394			return r;
3395	}
3396
3397	return tgsi_helper_copy(ctx, inst);
3398}
3399
3400static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
3401{
3402	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3403	struct r600_bytecode_alu alu;
3404	int r;
3405
3406	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3407
3408	switch (inst->Instruction.Opcode) {
3409	case TGSI_OPCODE_ARL:
3410		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
3411		break;
3412	case TGSI_OPCODE_ARR:
3413		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3414		break;
3415	case TGSI_OPCODE_UARL:
3416		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
3417		break;
3418	default:
3419		assert(0);
3420		return -1;
3421	}
3422
3423	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3424	alu.last = 1;
3425	alu.dst.sel = ctx->bc->ar_reg;
3426	alu.dst.write = 1;
3427	r = r600_bytecode_add_alu(ctx->bc, &alu);
3428	if (r)
3429		return r;
3430
3431	ctx->bc->ar_loaded = 0;
3432	return 0;
3433}
3434static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
3435{
3436	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3437	struct r600_bytecode_alu alu;
3438	int r;
3439
3440	switch (inst->Instruction.Opcode) {
3441	case TGSI_OPCODE_ARL:
3442		memset(&alu, 0, sizeof(alu));
3443		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
3444		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3445		alu.dst.sel = ctx->bc->ar_reg;
3446		alu.dst.write = 1;
3447		alu.last = 1;
3448
3449		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3450			return r;
3451
3452		memset(&alu, 0, sizeof(alu));
3453		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3454		alu.src[0].sel = ctx->bc->ar_reg;
3455		alu.dst.sel = ctx->bc->ar_reg;
3456		alu.dst.write = 1;
3457		alu.last = 1;
3458
3459		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3460			return r;
3461		break;
3462	case TGSI_OPCODE_ARR:
3463		memset(&alu, 0, sizeof(alu));
3464		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3465		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3466		alu.dst.sel = ctx->bc->ar_reg;
3467		alu.dst.write = 1;
3468		alu.last = 1;
3469
3470		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3471			return r;
3472		break;
3473	case TGSI_OPCODE_UARL:
3474		memset(&alu, 0, sizeof(alu));
3475		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
3476		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3477		alu.dst.sel = ctx->bc->ar_reg;
3478		alu.dst.write = 1;
3479		alu.last = 1;
3480
3481		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3482			return r;
3483		break;
3484	default:
3485		assert(0);
3486		return -1;
3487	}
3488
3489	ctx->bc->ar_loaded = 0;
3490	return 0;
3491}
3492
3493static int tgsi_opdst(struct r600_shader_ctx *ctx)
3494{
3495	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3496	struct r600_bytecode_alu alu;
3497	int i, r = 0;
3498
3499	for (i = 0; i < 4; i++) {
3500		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3501
3502		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3503		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3504
3505		if (i == 0 || i == 3) {
3506			alu.src[0].sel = V_SQ_ALU_SRC_1;
3507		} else {
3508			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3509		}
3510
3511		if (i == 0 || i == 2) {
3512			alu.src[1].sel = V_SQ_ALU_SRC_1;
3513		} else {
3514			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3515		}
3516		if (i == 3)
3517			alu.last = 1;
3518		r = r600_bytecode_add_alu(ctx->bc, &alu);
3519		if (r)
3520			return r;
3521	}
3522	return 0;
3523}
3524
3525static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
3526{
3527	struct r600_bytecode_alu alu;
3528	int r;
3529
3530	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3531	alu.inst = opcode;
3532	alu.predicate = 1;
3533
3534	alu.dst.sel = ctx->temp_reg;
3535	alu.dst.write = 1;
3536	alu.dst.chan = 0;
3537
3538	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3539	alu.src[1].sel = V_SQ_ALU_SRC_0;
3540	alu.src[1].chan = 0;
3541
3542	alu.last = 1;
3543
3544	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
3545	if (r)
3546		return r;
3547	return 0;
3548}
3549
3550static int pops(struct r600_shader_ctx *ctx, int pops)
3551{
3552	unsigned force_pop = ctx->bc->force_add_cf;
3553
3554	if (!force_pop) {
3555		int alu_pop = 3;
3556		if (ctx->bc->cf_last) {
3557			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
3558				alu_pop = 0;
3559			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
3560				alu_pop = 1;
3561		}
3562		alu_pop += pops;
3563		if (alu_pop == 1) {
3564			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
3565			ctx->bc->force_add_cf = 1;
3566		} else if (alu_pop == 2) {
3567			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
3568			ctx->bc->force_add_cf = 1;
3569		} else {
3570			force_pop = 1;
3571		}
3572	}
3573
3574	if (force_pop) {
3575		r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
3576		ctx->bc->cf_last->pop_count = pops;
3577		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
3578	}
3579
3580	return 0;
3581}
3582
3583static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
3584{
3585	switch(reason) {
3586	case FC_PUSH_VPM:
3587		ctx->bc->callstack[ctx->bc->call_sp].current--;
3588		break;
3589	case FC_PUSH_WQM:
3590	case FC_LOOP:
3591		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
3592		break;
3593	case FC_REP:
3594		/* TOODO : for 16 vp asic should -= 2; */
3595		ctx->bc->callstack[ctx->bc->call_sp].current --;
3596		break;
3597	}
3598}
3599
3600static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
3601{
3602	if (check_max_only) {
3603		int diff;
3604		switch (reason) {
3605		case FC_PUSH_VPM:
3606			diff = 1;
3607			break;
3608		case FC_PUSH_WQM:
3609			diff = 4;
3610			break;
3611		default:
3612			assert(0);
3613			diff = 0;
3614		}
3615		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
3616		    ctx->bc->callstack[ctx->bc->call_sp].max) {
3617			ctx->bc->callstack[ctx->bc->call_sp].max =
3618				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
3619		}
3620		return;
3621	}
3622	switch (reason) {
3623	case FC_PUSH_VPM:
3624		ctx->bc->callstack[ctx->bc->call_sp].current++;
3625		break;
3626	case FC_PUSH_WQM:
3627	case FC_LOOP:
3628		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
3629		break;
3630	case FC_REP:
3631		ctx->bc->callstack[ctx->bc->call_sp].current++;
3632		break;
3633	}
3634
3635	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3636	    ctx->bc->callstack[ctx->bc->call_sp].max) {
3637		ctx->bc->callstack[ctx->bc->call_sp].max =
3638			ctx->bc->callstack[ctx->bc->call_sp].current;
3639	}
3640}
3641
3642static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3643{
3644	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3645
3646	sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
3647						sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
3648	sp->mid[sp->num_mid] = ctx->bc->cf_last;
3649	sp->num_mid++;
3650}
3651
3652static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3653{
3654	ctx->bc->fc_sp++;
3655	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3656	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3657}
3658
3659static void fc_poplevel(struct r600_shader_ctx *ctx)
3660{
3661	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3662	if (sp->mid) {
3663		free(sp->mid);
3664		sp->mid = NULL;
3665	}
3666	sp->num_mid = 0;
3667	sp->start = NULL;
3668	sp->type = 0;
3669	ctx->bc->fc_sp--;
3670}
3671
3672#if 0
3673static int emit_return(struct r600_shader_ctx *ctx)
3674{
3675	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
3676	return 0;
3677}
3678
3679static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3680{
3681
3682	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3683	ctx->bc->cf_last->pop_count = pops;
3684	/* TODO work out offset */
3685	return 0;
3686}
3687
3688static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3689{
3690	return 0;
3691}
3692
3693static void emit_testflag(struct r600_shader_ctx *ctx)
3694{
3695
3696}
3697
3698static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3699{
3700	emit_testflag(ctx);
3701	emit_jump_to_offset(ctx, 1, 4);
3702	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3703	pops(ctx, ifidx + 1);
3704	emit_return(ctx);
3705}
3706
3707static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3708{
3709	emit_testflag(ctx);
3710
3711	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3712	ctx->bc->cf_last->pop_count = 1;
3713
3714	fc_set_mid(ctx, fc_sp);
3715
3716	pops(ctx, 1);
3717}
3718#endif
3719
3720static int tgsi_if(struct r600_shader_ctx *ctx)
3721{
3722	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
3723
3724	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3725
3726	fc_pushlevel(ctx, FC_IF);
3727
3728	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3729	return 0;
3730}
3731
3732static int tgsi_else(struct r600_shader_ctx *ctx)
3733{
3734	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3735	ctx->bc->cf_last->pop_count = 1;
3736
3737	fc_set_mid(ctx, ctx->bc->fc_sp);
3738	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3739	return 0;
3740}
3741
3742static int tgsi_endif(struct r600_shader_ctx *ctx)
3743{
3744	pops(ctx, 1);
3745	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3746		R600_ERR("if/endif unbalanced in shader\n");
3747		return -1;
3748	}
3749
3750	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3751		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3752		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3753	} else {
3754		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3755	}
3756	fc_poplevel(ctx);
3757
3758	callstack_decrease_current(ctx, FC_PUSH_VPM);
3759	return 0;
3760}
3761
3762static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3763{
3764	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3765
3766	fc_pushlevel(ctx, FC_LOOP);
3767
3768	/* check stack depth */
3769	callstack_check_depth(ctx, FC_LOOP, 0);
3770	return 0;
3771}
3772
3773static int tgsi_endloop(struct r600_shader_ctx *ctx)
3774{
3775	int i;
3776
3777	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3778
3779	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3780		R600_ERR("loop/endloop in shader code are not paired.\n");
3781		return -EINVAL;
3782	}
3783
3784	/* fixup loop pointers - from r600isa
3785	   LOOP END points to CF after LOOP START,
3786	   LOOP START point to CF after LOOP END
3787	   BRK/CONT point to LOOP END CF
3788	*/
3789	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3790
3791	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3792
3793	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3794		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3795	}
3796	/* TODO add LOOPRET support */
3797	fc_poplevel(ctx);
3798	callstack_decrease_current(ctx, FC_LOOP);
3799	return 0;
3800}
3801
3802static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3803{
3804	unsigned int fscp;
3805
3806	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3807	{
3808		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3809			break;
3810	}
3811
3812	if (fscp == 0) {
3813		R600_ERR("Break not inside loop/endloop pair\n");
3814		return -EINVAL;
3815	}
3816
3817	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3818	ctx->bc->cf_last->pop_count = 1;
3819
3820	fc_set_mid(ctx, fscp);
3821
3822	pops(ctx, 1);
3823	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3824	return 0;
3825}
3826
3827static int tgsi_umad(struct r600_shader_ctx *ctx)
3828{
3829	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3830	struct r600_bytecode_alu alu;
3831	int i, j, r;
3832	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3833
3834	/* src0 * src1 */
3835	for (i = 0; i < lasti + 1; i++) {
3836		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3837			continue;
3838
3839		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3840
3841		alu.dst.chan = i;
3842		alu.dst.sel = ctx->temp_reg;
3843		alu.dst.write = 1;
3844
3845		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3846		for (j = 0; j < 2; j++) {
3847		        r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3848		}
3849
3850		alu.last = 1;
3851		r = r600_bytecode_add_alu(ctx->bc, &alu);
3852		if (r)
3853			return r;
3854	}
3855
3856
3857	for (i = 0; i < lasti + 1; i++) {
3858		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3859			continue;
3860
3861		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3862		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3863
3864		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3865
3866		alu.src[0].sel = ctx->temp_reg;
3867		alu.src[0].chan = i;
3868
3869		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
3870		if (i == lasti) {
3871			alu.last = 1;
3872		}
3873		r = r600_bytecode_add_alu(ctx->bc, &alu);
3874		if (r)
3875			return r;
3876	}
3877	return 0;
3878}
3879
3880static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3881	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3882	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3883	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3884
3885	/* FIXME:
3886	 * For state trackers other than OpenGL, we'll want to use
3887	 * _RECIP_IEEE instead.
3888	 */
3889	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3890
3891	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3892	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3893	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3894	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3895	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3896	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3897	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3898	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3899	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3900	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3901	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3902	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3903	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3904	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3905	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3906	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3907	/* gap */
3908	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3909	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3910	/* gap */
3911	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3912	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3913	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3914	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3915	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3916	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3917	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3918	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3919	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3920	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3921	/* gap */
3922	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3923	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3924	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3925	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3926	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3927	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3928	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3929	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3930	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3931	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3932	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3933	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3934	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3935	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3936	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3937	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3938	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3939	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3940	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3941	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3942	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3943	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3944	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3945	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3946	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3947	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3948	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3949	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3950	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3951	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3952	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3953	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3954	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3955	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3956	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3957	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3958	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3959	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3960	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3961	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3962	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3963	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3964	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3965	/* gap */
3966	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3967	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3968	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3969	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3970	/* gap */
3971	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3972	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3973	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3974	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3975	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3976	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
3977	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3978	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3979	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans},
3980	/* gap */
3981	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3982	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
3983	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
3984	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3985	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3986	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3987	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
3988	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3989	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3990	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3991	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3992	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3993	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3994	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3995	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3996	/* gap */
3997	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3998	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3999	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4000	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4001	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4002	/* gap */
4003	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4004	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4005	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4006	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4007	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4008	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4009	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4010	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4011	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
4012	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
4013	/* gap */
4014	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4015	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
4016	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
4017	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
4018	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
4019	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_op2},
4020	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
4021	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans},
4022	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2},
4023	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
4024	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans},
4025	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
4026	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
4027	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
4028	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
4029	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
4030	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4031	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2},
4032	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
4033	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
4034	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans},
4035	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
4036	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
4037	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4038	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4039	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4040	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4041	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
4042	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
4043	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
4044	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
4045	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
4046	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
4047	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
4048	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
4049	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
4050	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
4051	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
4052	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
4053	{TGSI_OPCODE_UARL,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
4054	{TGSI_OPCODE_UCMP,      0, 0, tgsi_unsupported},
4055	{TGSI_OPCODE_IABS,      0, 0, tgsi_iabs},
4056	{TGSI_OPCODE_ISSG,      0, 0, tgsi_issg},
4057	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4058};
4059
4060static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
4061	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
4062	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4063	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
4064	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
4065	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
4066	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
4067	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
4068	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
4069	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4070	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4071	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4072	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
4073	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
4074	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
4075	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
4076	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
4077	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
4078	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4079	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
4080	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4081	/* gap */
4082	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4083	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4084	/* gap */
4085	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4086	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4087	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
4088	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4089	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
4090	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
4091	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
4092	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
4093	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
4094	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
4095	/* gap */
4096	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4097	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4098	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4099	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4100	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
4101	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
4102	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
4103	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
4104	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4105	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4106	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4107	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4108	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4109	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
4110	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4111	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
4112	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
4113	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
4114	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
4115	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4116	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4117	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
4118	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4119	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4120	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4121	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4122	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4123	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4124	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4125	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
4126	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4127	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4128	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4129	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
4130	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
4131	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
4132	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
4133	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4134	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4135	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4136	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
4137	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
4138	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
4139	/* gap */
4140	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4141	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4142	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
4143	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
4144	/* gap */
4145	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4146	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4147	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4148	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4149	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4150	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
4151	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
4152	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
4153	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2},
4154	/* gap */
4155	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4156	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
4157	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
4158	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4159	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
4160	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4161	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
4162	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
4163	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
4164	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4165	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4166	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
4167	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4168	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
4169	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4170	/* gap */
4171	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4172	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4173	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4174	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4175	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4176	/* gap */
4177	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4178	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4179	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4180	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4181	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4182	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4183	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4184	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4185	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
4186	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
4187	/* gap */
4188	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4189	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i},
4190	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
4191	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
4192	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
4193	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
4194	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
4195	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2},
4196	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
4197	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i},
4198	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
4199	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
4200	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv},
4201	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
4202	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
4203	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
4204	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4205	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
4206	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
4207	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
4208	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2},
4209	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
4210	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
4211	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4212	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4213	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4214	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4215	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
4216	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
4217	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
4218	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
4219	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
4220	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
4221	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
4222	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
4223	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
4224	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
4225	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
4226	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
4227	{TGSI_OPCODE_UARL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
4228	{TGSI_OPCODE_UCMP,      0, 0, tgsi_unsupported},
4229	{TGSI_OPCODE_IABS,      0, 0, tgsi_iabs},
4230	{TGSI_OPCODE_ISSG,      0, 0, tgsi_issg},
4231	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4232};
4233
4234static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
4235	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
4236	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4237	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
4238	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
4239	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
4240	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
4241	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
4242	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
4243	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4244	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4245	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4246	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
4247	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
4248	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
4249	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
4250	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
4251	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
4252	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
4253	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
4254	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4255	/* gap */
4256	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4257	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4258	/* gap */
4259	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4260	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4261	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
4262	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4263	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
4264	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
4265	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
4266	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
4267	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
4268	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
4269	/* gap */
4270	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4271	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
4272	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4273	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4274	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
4275	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
4276	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
4277	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
4278	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4279	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4280	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4281	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4282	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4283	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
4284	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4285	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
4286	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
4287	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
4288	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
4289	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4290	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4291	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
4292	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
4293	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4294	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4295	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4296	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4297	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4298	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4299	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
4300	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4301	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4302	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4303	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
4304	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
4305	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
4306	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
4307	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4308	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4309	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
4310	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
4311	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
4312	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
4313	/* gap */
4314	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4315	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4316	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
4317	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
4318	/* gap */
4319	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4320	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4321	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4322	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4323	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4324	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4325	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
4326	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
4327	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4328	/* gap */
4329	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4330	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4331	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4332	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4333	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
4334	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4335	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
4336	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
4337	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
4338	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4339	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4340	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
4341	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4342	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
4343	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4344	/* gap */
4345	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4346	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4347	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4348	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4349	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4350	/* gap */
4351	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4352	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4353	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4354	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4355	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4356	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4357	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4358	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4359	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
4360	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
4361	/* gap */
4362	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4363	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4364	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4365	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
4366	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
4367	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4368	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4369	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4370	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4371	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4372	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4373	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4374	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4375	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4376	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4377	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4378	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4379	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4380	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4381	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4382	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4383	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4384	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4385	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4386	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4387	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4388	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4389	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
4390	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
4391	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
4392	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
4393	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
4394	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
4395	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
4396	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
4397	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
4398	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
4399	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
4400	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
4401	{TGSI_OPCODE_UARL,      0, 0, tgsi_unsupported},
4402	{TGSI_OPCODE_UCMP,      0, 0, tgsi_unsupported},
4403	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4404};
4405