r600_shader.c revision 2449695e822421fdcaf1c66dffc12d7d705ea69d
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_info.h"
25#include "tgsi/tgsi_parse.h"
26#include "tgsi/tgsi_scan.h"
27#include "tgsi/tgsi_dump.h"
28#include "util/u_format.h"
29#include "r600_pipe.h"
30#include "r600_asm.h"
31#include "r600_sq.h"
32#include "r600_formats.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37#include <byteswap.h>
38
39/* CAYMAN notes
40Why CAYMAN got loops for lots of instructions is explained here.
41
42-These 8xx t-slot only ops are implemented in all vector slots.
43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
44These 8xx t-slot only opcodes become vector ops, with all four
45slots expecting the arguments on sources a and b. Result is
46broadcast to all channels.
47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
48These 8xx t-slot only opcodes become vector ops in the z, y, and
49x slots.
50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64
52SQRT_IEEE/_64
53SIN/COS
54The w slot may have an independent co-issued operation, or if the
55result is required to be in the w slot, the opcode above may be
56issued in the w slot as well.
57The compiler must issue the source argument to slots z, y, and x
58*/
59
60static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
61{
62	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
63	struct r600_shader *rshader = &shader->shader;
64	uint32_t *ptr;
65	int	i;
66
67	/* copy new shader */
68	if (shader->bo == NULL) {
69		shader->bo = (struct r600_resource*)
70			pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4);
71		if (shader->bo == NULL) {
72			return -ENOMEM;
73		}
74		ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
75		if (R600_BIG_ENDIAN) {
76			for (i = 0; i < rshader->bc.ndw; ++i) {
77				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
78			}
79		} else {
80			memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
81		}
82		rctx->ws->buffer_unmap(shader->bo->buf);
83	}
84	/* build state */
85	switch (rshader->processor_type) {
86	case TGSI_PROCESSOR_VERTEX:
87		if (rctx->chip_class >= EVERGREEN) {
88			evergreen_pipe_shader_vs(ctx, shader);
89		} else {
90			r600_pipe_shader_vs(ctx, shader);
91		}
92		break;
93	case TGSI_PROCESSOR_FRAGMENT:
94		if (rctx->chip_class >= EVERGREEN) {
95			evergreen_pipe_shader_ps(ctx, shader);
96		} else {
97			r600_pipe_shader_ps(ctx, shader);
98		}
99		break;
100	default:
101		return -EINVAL;
102	}
103	return 0;
104}
105
106static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader);
107
108int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
109{
110	static int dump_shaders = -1;
111	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
112	int r;
113
114	/* Would like some magic "get_bool_option_once" routine.
115	*/
116	if (dump_shaders == -1)
117		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
118
119	if (dump_shaders) {
120		fprintf(stderr, "--------------------------------------------------------------\n");
121		tgsi_dump(shader->tokens, 0);
122
123		if (shader->so.num_outputs) {
124			unsigned i;
125			fprintf(stderr, "STREAMOUT\n");
126			for (i = 0; i < shader->so.num_outputs; i++) {
127				unsigned mask = ((1 << shader->so.output[i].num_components) - 1) <<
128						shader->so.output[i].start_component;
129				fprintf(stderr, "  %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
130					shader->so.output[i].output_buffer, shader->so.output[i].register_index,
131				        mask & 1 ? "x" : "_",
132				        (mask >> 1) & 1 ? "y" : "_",
133				        (mask >> 2) & 1 ? "z" : "_",
134				        (mask >> 3) & 1 ? "w" : "_");
135			}
136		}
137	}
138	r = r600_shader_from_tgsi(rctx, shader);
139	if (r) {
140		R600_ERR("translation from TGSI failed !\n");
141		return r;
142	}
143	r = r600_bytecode_build(&shader->shader.bc);
144	if (r) {
145		R600_ERR("building bytecode failed !\n");
146		return r;
147	}
148	if (dump_shaders) {
149		r600_bytecode_dump(&shader->shader.bc);
150		fprintf(stderr, "______________________________________________________________\n");
151	}
152	return r600_pipe_shader(ctx, shader);
153}
154
155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
156{
157	pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
158	r600_bytecode_clear(&shader->shader.bc);
159
160	memset(&shader->shader,0,sizeof(struct r600_shader));
161}
162
163/*
164 * tgsi -> r600 shader
165 */
166struct r600_shader_tgsi_instruction;
167
168struct r600_shader_src {
169	unsigned				sel;
170	unsigned				swizzle[4];
171	unsigned				neg;
172	unsigned				abs;
173	unsigned				rel;
174	uint32_t				value[4];
175};
176
177struct r600_shader_ctx {
178	struct tgsi_shader_info			info;
179	struct tgsi_parse_context		parse;
180	const struct tgsi_token			*tokens;
181	unsigned				type;
182	unsigned				file_offset[TGSI_FILE_COUNT];
183	unsigned				temp_reg;
184	struct r600_shader_tgsi_instruction	*inst_info;
185	struct r600_bytecode				*bc;
186	struct r600_shader			*shader;
187	struct r600_shader_src			src[4];
188	u32					*literals;
189	u32					nliterals;
190	u32					max_driver_temp_used;
191	/* needed for evergreen interpolation */
192	boolean                                 input_centroid;
193	boolean                                 input_linear;
194	boolean                                 input_perspective;
195	int					num_interp_gpr;
196};
197
198struct r600_shader_tgsi_instruction {
199	unsigned	tgsi_opcode;
200	unsigned	is_op3;
201	unsigned	r600_opcode;
202	int (*process)(struct r600_shader_ctx *ctx);
203};
204
205static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
206static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
207
208static int tgsi_is_supported(struct r600_shader_ctx *ctx)
209{
210	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
211	int j;
212
213	if (i->Instruction.NumDstRegs > 1) {
214		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
215		return -EINVAL;
216	}
217	if (i->Instruction.Predicate) {
218		R600_ERR("predicate unsupported\n");
219		return -EINVAL;
220	}
221#if 0
222	if (i->Instruction.Label) {
223		R600_ERR("label unsupported\n");
224		return -EINVAL;
225	}
226#endif
227	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
228		if (i->Src[j].Register.Dimension) {
229			R600_ERR("unsupported src %d (dimension %d)\n", j,
230				 i->Src[j].Register.Dimension);
231			return -EINVAL;
232		}
233	}
234	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
235		if (i->Dst[j].Register.Dimension) {
236			R600_ERR("unsupported dst (dimension)\n");
237			return -EINVAL;
238		}
239	}
240	return 0;
241}
242
243static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
244{
245	int i, r;
246	struct r600_bytecode_alu alu;
247	int gpr = 0, base_chan = 0;
248	int ij_index = 0;
249
250	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
251		ij_index = 0;
252		if (ctx->shader->input[input].centroid)
253			ij_index++;
254	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
255		ij_index = 0;
256		/* if we have perspective add one */
257		if (ctx->input_perspective)  {
258			ij_index++;
259			/* if we have perspective centroid */
260			if (ctx->input_centroid)
261				ij_index++;
262		}
263		if (ctx->shader->input[input].centroid)
264			ij_index++;
265	}
266
267	/* work out gpr and base_chan from index */
268	gpr = ij_index / 2;
269	base_chan = (2 * (ij_index % 2)) + 1;
270
271	for (i = 0; i < 8; i++) {
272		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
273
274		if (i < 4)
275			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
276		else
277			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
278
279		if ((i > 1) && (i < 6)) {
280			alu.dst.sel = ctx->shader->input[input].gpr;
281			alu.dst.write = 1;
282		}
283
284		alu.dst.chan = i % 4;
285
286		alu.src[0].sel = gpr;
287		alu.src[0].chan = (base_chan - (i % 2));
288
289		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
290
291		alu.bank_swizzle_force = SQ_ALU_VEC_210;
292		if ((i % 4) == 3)
293			alu.last = 1;
294		r = r600_bytecode_add_alu(ctx->bc, &alu);
295		if (r)
296			return r;
297	}
298	return 0;
299}
300
301static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input)
302{
303	int i, r;
304	struct r600_bytecode_alu alu;
305
306	for (i = 0; i < 4; i++) {
307		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
308
309		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0;
310
311		alu.dst.sel = ctx->shader->input[input].gpr;
312		alu.dst.write = 1;
313
314		alu.dst.chan = i;
315
316		alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
317		alu.src[0].chan = i;
318
319		if (i == 3)
320			alu.last = 1;
321		r = r600_bytecode_add_alu(ctx->bc, &alu);
322		if (r)
323			return r;
324	}
325	return 0;
326}
327
328/*
329 * Special export handling in shaders
330 *
331 * shader export ARRAY_BASE for EXPORT_POS:
332 * 60 is position
333 * 61 is misc vector
334 * 62, 63 are clip distance vectors
335 *
336 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL:
337 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61
338 * USE_VTX_POINT_SIZE - point size in the X channel of export 61
339 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61
340 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61
341 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61
342 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually
343 * exclusive from render target index)
344 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors
345 *
346 *
347 * shader export ARRAY_BASE for EXPORT_PIXEL:
348 * 0-7 CB targets
349 * 61 computed Z vector
350 *
351 * The use of the values exported in the computed Z vector are controlled
352 * by DB_SHADER_CONTROL:
353 * Z_EXPORT_ENABLE - Z as a float in RED
354 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN
355 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA
356 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE
357 * DB_SOURCE_FORMAT - export control restrictions
358 *
359 */
360
361
362/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */
363static int r600_spi_sid(struct r600_shader_io * io)
364{
365	int index, name = io->name;
366
367	/* These params are handled differently, they don't need
368	 * semantic indices, so we'll use 0 for them.
369	 */
370	if (name == TGSI_SEMANTIC_POSITION ||
371		name == TGSI_SEMANTIC_PSIZE ||
372		name == TGSI_SEMANTIC_FACE)
373		index = 0;
374	else {
375		if (name == TGSI_SEMANTIC_GENERIC) {
376			/* For generic params simply use sid from tgsi */
377			index = io->sid;
378		} else {
379
380			/* FIXME: two-side rendering is broken in r600g, this will
381			 * keep old functionality */
382			if (name == TGSI_SEMANTIC_BCOLOR)
383				name = TGSI_SEMANTIC_COLOR;
384
385			/* For non-generic params - pack name and sid into 8 bits */
386			index = 0x80 | (name<<3) | (io->sid);
387		}
388
389		/* Make sure that all really used indices have nonzero value, so
390		 * we can just compare it to 0 later instead of comparing the name
391		 * with different values to detect special cases. */
392		index++;
393	}
394
395	return index;
396};
397
398static int tgsi_declaration(struct r600_shader_ctx *ctx)
399{
400	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
401	unsigned i;
402	int r;
403
404	switch (d->Declaration.File) {
405	case TGSI_FILE_INPUT:
406		i = ctx->shader->ninput++;
407		ctx->shader->input[i].name = d->Semantic.Name;
408		ctx->shader->input[i].sid = d->Semantic.Index;
409		ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
410		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
411		ctx->shader->input[i].centroid = d->Declaration.Centroid;
412		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
413		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) {
414			/* turn input into interpolate on EG */
415			if (ctx->shader->input[i].spi_sid) {
416				ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
417				if (ctx->shader->input[i].interpolate > 0) {
418					evergreen_interp_alu(ctx, i);
419				} else {
420					evergreen_interp_flat(ctx, i);
421				}
422			}
423		}
424		break;
425	case TGSI_FILE_OUTPUT:
426		i = ctx->shader->noutput++;
427		ctx->shader->output[i].name = d->Semantic.Name;
428		ctx->shader->output[i].sid = d->Semantic.Index;
429		ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
430		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
431		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
432		break;
433	case TGSI_FILE_CONSTANT:
434	case TGSI_FILE_TEMPORARY:
435	case TGSI_FILE_SAMPLER:
436	case TGSI_FILE_ADDRESS:
437		break;
438
439	case TGSI_FILE_SYSTEM_VALUE:
440		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
441			struct r600_bytecode_alu alu;
442			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
443
444			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
445			alu.src[0].sel = 0;
446			alu.src[0].chan = 3;
447
448			alu.dst.sel = 0;
449			alu.dst.chan = 3;
450			alu.dst.write = 1;
451			alu.last = 1;
452
453			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
454				return r;
455			break;
456		}
457
458	default:
459		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
460		return -EINVAL;
461	}
462	return 0;
463}
464
465static int r600_get_temp(struct r600_shader_ctx *ctx)
466{
467	return ctx->temp_reg + ctx->max_driver_temp_used++;
468}
469
470/*
471 * for evergreen we need to scan the shader to find the number of GPRs we need to
472 * reserve for interpolation.
473 *
474 * we need to know if we are going to emit
475 * any centroid inputs
476 * if perspective and linear are required
477*/
478static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
479{
480	int i;
481	int num_baryc;
482
483	ctx->input_linear = FALSE;
484	ctx->input_perspective = FALSE;
485	ctx->input_centroid = FALSE;
486	ctx->num_interp_gpr = 1;
487
488	/* any centroid inputs */
489	for (i = 0; i < ctx->info.num_inputs; i++) {
490		/* skip position/face */
491		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
492		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
493			continue;
494		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
495			ctx->input_linear = TRUE;
496		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
497			ctx->input_perspective = TRUE;
498		if (ctx->info.input_centroid[i])
499			ctx->input_centroid = TRUE;
500	}
501
502	num_baryc = 0;
503	/* ignoring sample for now */
504	if (ctx->input_perspective)
505		num_baryc++;
506	if (ctx->input_linear)
507		num_baryc++;
508	if (ctx->input_centroid)
509		num_baryc *= 2;
510
511	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
512
513	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
514	return ctx->num_interp_gpr;
515}
516
517static void tgsi_src(struct r600_shader_ctx *ctx,
518		     const struct tgsi_full_src_register *tgsi_src,
519		     struct r600_shader_src *r600_src)
520{
521	memset(r600_src, 0, sizeof(*r600_src));
522	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
523	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
524	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
525	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
526	r600_src->neg = tgsi_src->Register.Negate;
527	r600_src->abs = tgsi_src->Register.Absolute;
528
529	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
530		int index;
531		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
532			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
533			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
534
535			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
536			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
537			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
538				return;
539		}
540		index = tgsi_src->Register.Index;
541		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
542		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
543	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
544		/* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
545		r600_src->swizzle[0] = 3;
546		r600_src->swizzle[1] = 3;
547		r600_src->swizzle[2] = 3;
548		r600_src->swizzle[3] = 3;
549		r600_src->sel = 0;
550	} else {
551		if (tgsi_src->Register.Indirect)
552			r600_src->rel = V_SQ_REL_RELATIVE;
553		r600_src->sel = tgsi_src->Register.Index;
554		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
555	}
556}
557
558static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
559{
560	struct r600_bytecode_vtx vtx;
561	unsigned int ar_reg;
562	int r;
563
564	if (offset) {
565		struct r600_bytecode_alu alu;
566
567		memset(&alu, 0, sizeof(alu));
568
569		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
570		alu.src[0].sel = ctx->bc->ar_reg;
571
572		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
573		alu.src[1].value = offset;
574
575		alu.dst.sel = dst_reg;
576		alu.dst.write = 1;
577		alu.last = 1;
578
579		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
580			return r;
581
582		ar_reg = dst_reg;
583	} else {
584		ar_reg = ctx->bc->ar_reg;
585	}
586
587	memset(&vtx, 0, sizeof(vtx));
588	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
589	vtx.src_gpr = ar_reg;
590	vtx.mega_fetch_count = 16;
591	vtx.dst_gpr = dst_reg;
592	vtx.dst_sel_x = 0;		/* SEL_X */
593	vtx.dst_sel_y = 1;		/* SEL_Y */
594	vtx.dst_sel_z = 2;		/* SEL_Z */
595	vtx.dst_sel_w = 3;		/* SEL_W */
596	vtx.data_format = FMT_32_32_32_32_FLOAT;
597	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
598	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
599	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
600	vtx.endian = r600_endian_swap(32);
601
602	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
603		return r;
604
605	return 0;
606}
607
608static int tgsi_split_constant(struct r600_shader_ctx *ctx)
609{
610	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
611	struct r600_bytecode_alu alu;
612	int i, j, k, nconst, r;
613
614	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
615		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
616			nconst++;
617		}
618		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
619	}
620	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
621		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
622			continue;
623		}
624
625		if (ctx->src[i].rel) {
626			int treg = r600_get_temp(ctx);
627			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
628				return r;
629
630			ctx->src[i].sel = treg;
631			ctx->src[i].rel = 0;
632			j--;
633		} else if (j > 0) {
634			int treg = r600_get_temp(ctx);
635			for (k = 0; k < 4; k++) {
636				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
637				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
638				alu.src[0].sel = ctx->src[i].sel;
639				alu.src[0].chan = k;
640				alu.src[0].rel = ctx->src[i].rel;
641				alu.dst.sel = treg;
642				alu.dst.chan = k;
643				alu.dst.write = 1;
644				if (k == 3)
645					alu.last = 1;
646				r = r600_bytecode_add_alu(ctx->bc, &alu);
647				if (r)
648					return r;
649			}
650			ctx->src[i].sel = treg;
651			ctx->src[i].rel =0;
652			j--;
653		}
654	}
655	return 0;
656}
657
658/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
659static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
660{
661	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
662	struct r600_bytecode_alu alu;
663	int i, j, k, nliteral, r;
664
665	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
666		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
667			nliteral++;
668		}
669	}
670	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
671		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
672			int treg = r600_get_temp(ctx);
673			for (k = 0; k < 4; k++) {
674				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
675				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
676				alu.src[0].sel = ctx->src[i].sel;
677				alu.src[0].chan = k;
678				alu.src[0].value = ctx->src[i].value[k];
679				alu.dst.sel = treg;
680				alu.dst.chan = k;
681				alu.dst.write = 1;
682				if (k == 3)
683					alu.last = 1;
684				r = r600_bytecode_add_alu(ctx->bc, &alu);
685				if (r)
686					return r;
687			}
688			ctx->src[i].sel = treg;
689			j--;
690		}
691	}
692	return 0;
693}
694
695static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader)
696{
697	struct r600_shader *shader = &pipeshader->shader;
698	struct tgsi_token *tokens = pipeshader->tokens;
699	struct pipe_stream_output_info so = pipeshader->so;
700	struct tgsi_full_immediate *immediate;
701	struct tgsi_full_property *property;
702	struct r600_shader_ctx ctx;
703	struct r600_bytecode_output output[32];
704	unsigned output_done, noutput;
705	unsigned opcode;
706	int i, j, r = 0, pos0;
707
708	ctx.bc = &shader->bc;
709	ctx.shader = shader;
710	r600_bytecode_init(ctx.bc, rctx->chip_class);
711	ctx.tokens = tokens;
712	tgsi_scan_shader(tokens, &ctx.info);
713	tgsi_parse_init(&ctx.parse, tokens);
714	ctx.type = ctx.parse.FullHeader.Processor.Processor;
715	shader->processor_type = ctx.type;
716	ctx.bc->type = shader->processor_type;
717
718	shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) ||
719		((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color));
720
721	shader->nr_cbufs = rctx->nr_cbufs;
722
723	/* register allocations */
724	/* Values [0,127] correspond to GPR[0..127].
725	 * Values [128,159] correspond to constant buffer bank 0
726	 * Values [160,191] correspond to constant buffer bank 1
727	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
728	 * Values [256,287] correspond to constant buffer bank 2 (EG)
729	 * Values [288,319] correspond to constant buffer bank 3 (EG)
730	 * Other special values are shown in the list below.
731	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
732	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
733	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
734	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
735	 * 248	SQ_ALU_SRC_0: special constant 0.0.
736	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
737	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
738	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
739	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
740	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
741	 * 254	SQ_ALU_SRC_PV: previous vector result.
742	 * 255	SQ_ALU_SRC_PS: previous scalar result.
743	 */
744	for (i = 0; i < TGSI_FILE_COUNT; i++) {
745		ctx.file_offset[i] = 0;
746	}
747	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
748		ctx.file_offset[TGSI_FILE_INPUT] = 1;
749		if (ctx.bc->chip_class >= EVERGREEN) {
750			r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
751		} else {
752			r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
753		}
754	}
755	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
756		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
757	}
758	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
759						ctx.info.file_max[TGSI_FILE_INPUT] + 1;
760	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
761						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
762
763	/* Outside the GPR range. This will be translated to one of the
764	 * kcache banks later. */
765	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
766
767	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
768	ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
769			ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
770	ctx.temp_reg = ctx.bc->ar_reg + 1;
771
772	ctx.nliterals = 0;
773	ctx.literals = NULL;
774	shader->fs_write_all = FALSE;
775	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
776		tgsi_parse_token(&ctx.parse);
777		switch (ctx.parse.FullToken.Token.Type) {
778		case TGSI_TOKEN_TYPE_IMMEDIATE:
779			immediate = &ctx.parse.FullToken.FullImmediate;
780			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
781			if(ctx.literals == NULL) {
782				r = -ENOMEM;
783				goto out_err;
784			}
785			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
786			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
787			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
788			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
789			ctx.nliterals++;
790			break;
791		case TGSI_TOKEN_TYPE_DECLARATION:
792			r = tgsi_declaration(&ctx);
793			if (r)
794				goto out_err;
795			break;
796		case TGSI_TOKEN_TYPE_INSTRUCTION:
797			r = tgsi_is_supported(&ctx);
798			if (r)
799				goto out_err;
800			ctx.max_driver_temp_used = 0;
801			/* reserve first tmp for everyone */
802			r600_get_temp(&ctx);
803
804			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
805			if ((r = tgsi_split_constant(&ctx)))
806				goto out_err;
807			if ((r = tgsi_split_literal_constant(&ctx)))
808				goto out_err;
809			if (ctx.bc->chip_class == CAYMAN)
810				ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
811			else if (ctx.bc->chip_class >= EVERGREEN)
812				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
813			else
814				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
815			r = ctx.inst_info->process(&ctx);
816			if (r)
817				goto out_err;
818			break;
819		case TGSI_TOKEN_TYPE_PROPERTY:
820			property = &ctx.parse.FullToken.FullProperty;
821			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
822				if (property->u[0].Data == 1)
823					shader->fs_write_all = TRUE;
824			}
825			break;
826		default:
827			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
828			r = -EINVAL;
829			goto out_err;
830		}
831	}
832
833	noutput = shader->noutput;
834
835	/* clamp color outputs */
836	if (shader->clamp_color) {
837		for (i = 0; i < noutput; i++) {
838			if (shader->output[i].name == TGSI_SEMANTIC_COLOR ||
839				shader->output[i].name == TGSI_SEMANTIC_BCOLOR) {
840
841				int j;
842				for (j = 0; j < 4; j++) {
843					struct r600_bytecode_alu alu;
844					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
845
846					/* MOV_SAT R, R */
847					alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
848					alu.dst.sel = shader->output[i].gpr;
849					alu.dst.chan = j;
850					alu.dst.write = 1;
851					alu.dst.clamp = 1;
852					alu.src[0].sel = alu.dst.sel;
853					alu.src[0].chan = j;
854
855					if (j == 3) {
856						alu.last = 1;
857					}
858					r = r600_bytecode_add_alu(ctx.bc, &alu);
859					if (r)
860						return r;
861				}
862			}
863		}
864	}
865
866	/* Add stream outputs. */
867	if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
868		for (i = 0; i < so.num_outputs; i++) {
869			struct r600_bytecode_output output;
870
871			if (so.output[i].output_buffer >= 4) {
872				R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
873					 so.output[i].output_buffer);
874				r = -EINVAL;
875				goto out_err;
876			}
877			if (so.output[i].start_component) {
878			   R600_ERR("stream_output - start_component cannot be non-zero\n");
879			   r = -EINVAL;
880			   goto out_err;
881			}
882
883			memset(&output, 0, sizeof(struct r600_bytecode_output));
884			output.gpr = shader->output[so.output[i].register_index].gpr;
885			output.elem_size = 0;
886			output.array_base = so.output[i].dst_offset;
887			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
888			output.burst_count = 1;
889			output.barrier = 1;
890			output.array_size = 0;
891			output.comp_mask = (1 << so.output[i].num_components) - 1;
892			if (ctx.bc->chip_class >= EVERGREEN) {
893				switch (so.output[i].output_buffer) {
894				case 0:
895					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0;
896					break;
897				case 1:
898					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1;
899					break;
900				case 2:
901					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2;
902					break;
903				case 3:
904					output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3;
905					break;
906				}
907			} else {
908				switch (so.output[i].output_buffer) {
909				case 0:
910					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0;
911					break;
912				case 1:
913					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1;
914					break;
915				case 2:
916					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2;
917					break;
918				case 3:
919					output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3;
920					break;
921				}
922			}
923			r = r600_bytecode_add_output(ctx.bc, &output);
924			if (r)
925				goto out_err;
926		}
927	}
928
929	/* export output */
930	j = 0;
931	for (i = 0, pos0 = 0; i < noutput; i++) {
932		memset(&output[i], 0, sizeof(struct r600_bytecode_output));
933		output[i + j].gpr = shader->output[i].gpr;
934		output[i + j].elem_size = 3;
935		output[i + j].swizzle_x = 0;
936		output[i + j].swizzle_y = 1;
937		output[i + j].swizzle_z = 2;
938		output[i + j].swizzle_w = 3;
939		output[i + j].burst_count = 1;
940		output[i + j].barrier = 1;
941		output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
942		output[i + j].array_base = i - pos0;
943		output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
944		switch (ctx.type) {
945		case TGSI_PROCESSOR_VERTEX:
946			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
947				output[i + j].array_base = 60;
948				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
949				/* position doesn't count in array_base */
950				pos0++;
951			}
952			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
953				output[i + j].array_base = 61;
954				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
955				/* position doesn't count in array_base */
956				pos0++;
957			}
958			break;
959		case TGSI_PROCESSOR_FRAGMENT:
960			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
961				output[i + j].array_base = shader->output[i].sid;
962				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
963				if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
964					for (j = 1; j < shader->nr_cbufs; j++) {
965						memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
966						output[i + j].gpr = shader->output[i].gpr;
967						output[i + j].elem_size = 3;
968						output[i + j].swizzle_x = 0;
969						output[i + j].swizzle_y = 1;
970						output[i + j].swizzle_z = 2;
971						output[i + j].swizzle_w = 3;
972						output[i + j].burst_count = 1;
973						output[i + j].barrier = 1;
974						output[i + j].array_base = shader->output[i].sid + j;
975						output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
976						output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
977					}
978					j = shader->nr_cbufs-1;
979				}
980			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
981				output[i + j].array_base = 61;
982				output[i + j].swizzle_x = 2;
983				output[i + j].swizzle_y = 7;
984				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
985				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
986			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
987				output[i + j].array_base = 61;
988				output[i + j].swizzle_x = 7;
989				output[i + j].swizzle_y = 1;
990				output[i + j].swizzle_z = output[i + j].swizzle_w = 7;
991				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
992			} else {
993				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
994				r = -EINVAL;
995				goto out_err;
996			}
997			break;
998		default:
999			R600_ERR("unsupported processor type %d\n", ctx.type);
1000			r = -EINVAL;
1001			goto out_err;
1002		}
1003	}
1004	noutput += j;
1005	/* add fake param output for vertex shader if no param is exported */
1006	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
1007		for (i = 0, pos0 = 0; i < noutput; i++) {
1008			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
1009				pos0 = 1;
1010				break;
1011			}
1012		}
1013		if (!pos0) {
1014			memset(&output[i], 0, sizeof(struct r600_bytecode_output));
1015			output[i].gpr = 0;
1016			output[i].elem_size = 3;
1017			output[i].swizzle_x = 7;
1018			output[i].swizzle_y = 7;
1019			output[i].swizzle_z = 7;
1020			output[i].swizzle_w = 7;
1021			output[i].burst_count = 1;
1022			output[i].barrier = 1;
1023			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
1024			output[i].array_base = 0;
1025			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1026			noutput++;
1027		}
1028	}
1029	/* add fake pixel export */
1030	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
1031		memset(&output[0], 0, sizeof(struct r600_bytecode_output));
1032		output[0].gpr = 0;
1033		output[0].elem_size = 3;
1034		output[0].swizzle_x = 7;
1035		output[0].swizzle_y = 7;
1036		output[0].swizzle_z = 7;
1037		output[0].swizzle_w = 7;
1038		output[0].burst_count = 1;
1039		output[0].barrier = 1;
1040		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
1041		output[0].array_base = 0;
1042		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
1043		noutput++;
1044	}
1045	/* set export done on last export of each type */
1046	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
1047		if (ctx.bc->chip_class < CAYMAN) {
1048			if (i == (noutput - 1)) {
1049				output[i].end_of_program = 1;
1050			}
1051		}
1052		if (!(output_done & (1 << output[i].type))) {
1053			output_done |= (1 << output[i].type);
1054			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
1055		}
1056	}
1057	/* add output to bytecode */
1058	for (i = 0; i < noutput; i++) {
1059		r = r600_bytecode_add_output(ctx.bc, &output[i]);
1060		if (r)
1061			goto out_err;
1062	}
1063	/* add program end */
1064	if (ctx.bc->chip_class == CAYMAN)
1065		cm_bytecode_add_cf_end(ctx.bc);
1066
1067	free(ctx.literals);
1068	tgsi_parse_free(&ctx.parse);
1069	return 0;
1070out_err:
1071	free(ctx.literals);
1072	tgsi_parse_free(&ctx.parse);
1073	return r;
1074}
1075
1076static int tgsi_unsupported(struct r600_shader_ctx *ctx)
1077{
1078	R600_ERR("%s tgsi opcode unsupported\n",
1079		 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
1080	return -EINVAL;
1081}
1082
1083static int tgsi_end(struct r600_shader_ctx *ctx)
1084{
1085	return 0;
1086}
1087
1088static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
1089			const struct r600_shader_src *shader_src,
1090			unsigned chan)
1091{
1092	bc_src->sel = shader_src->sel;
1093	bc_src->chan = shader_src->swizzle[chan];
1094	bc_src->neg = shader_src->neg;
1095	bc_src->abs = shader_src->abs;
1096	bc_src->rel = shader_src->rel;
1097	bc_src->value = shader_src->value[bc_src->chan];
1098}
1099
1100static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
1101{
1102	bc_src->abs = 1;
1103	bc_src->neg = 0;
1104}
1105
1106static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
1107{
1108	bc_src->neg = !bc_src->neg;
1109}
1110
1111static void tgsi_dst(struct r600_shader_ctx *ctx,
1112		     const struct tgsi_full_dst_register *tgsi_dst,
1113		     unsigned swizzle,
1114		     struct r600_bytecode_alu_dst *r600_dst)
1115{
1116	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1117
1118	r600_dst->sel = tgsi_dst->Register.Index;
1119	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
1120	r600_dst->chan = swizzle;
1121	r600_dst->write = 1;
1122	if (tgsi_dst->Register.Indirect)
1123		r600_dst->rel = V_SQ_REL_RELATIVE;
1124	if (inst->Instruction.Saturate) {
1125		r600_dst->clamp = 1;
1126	}
1127}
1128
1129static int tgsi_last_instruction(unsigned writemask)
1130{
1131	int i, lasti = 0;
1132
1133	for (i = 0; i < 4; i++) {
1134		if (writemask & (1 << i)) {
1135			lasti = i;
1136		}
1137	}
1138	return lasti;
1139}
1140
1141static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
1142{
1143	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1144	struct r600_bytecode_alu alu;
1145	int i, j, r;
1146	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1147
1148	for (i = 0; i < lasti + 1; i++) {
1149		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1150			continue;
1151
1152		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1153		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1154
1155		alu.inst = ctx->inst_info->r600_opcode;
1156		if (!swap) {
1157			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1158				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1159			}
1160		} else {
1161			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
1162			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1163		}
1164		/* handle some special cases */
1165		switch (ctx->inst_info->tgsi_opcode) {
1166		case TGSI_OPCODE_SUB:
1167			r600_bytecode_src_toggle_neg(&alu.src[1]);
1168			break;
1169		case TGSI_OPCODE_ABS:
1170			r600_bytecode_src_set_abs(&alu.src[0]);
1171			break;
1172		default:
1173			break;
1174		}
1175		if (i == lasti || trans_only) {
1176			alu.last = 1;
1177		}
1178		r = r600_bytecode_add_alu(ctx->bc, &alu);
1179		if (r)
1180			return r;
1181	}
1182	return 0;
1183}
1184
1185static int tgsi_op2(struct r600_shader_ctx *ctx)
1186{
1187	return tgsi_op2_s(ctx, 0, 0);
1188}
1189
1190static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1191{
1192	return tgsi_op2_s(ctx, 1, 0);
1193}
1194
1195static int tgsi_op2_trans(struct r600_shader_ctx *ctx)
1196{
1197	return tgsi_op2_s(ctx, 0, 1);
1198}
1199
1200static int tgsi_ineg(struct r600_shader_ctx *ctx)
1201{
1202	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1203	struct r600_bytecode_alu alu;
1204	int i, r;
1205	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1206
1207	for (i = 0; i < lasti + 1; i++) {
1208
1209		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1210			continue;
1211		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1212		alu.inst = ctx->inst_info->r600_opcode;
1213
1214		alu.src[0].sel = V_SQ_ALU_SRC_0;
1215
1216		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1217
1218		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1219
1220		if (i == lasti) {
1221			alu.last = 1;
1222		}
1223		r = r600_bytecode_add_alu(ctx->bc, &alu);
1224		if (r)
1225			return r;
1226	}
1227	return 0;
1228
1229}
1230
1231static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
1232{
1233	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1234	int i, j, r;
1235	struct r600_bytecode_alu alu;
1236	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1237
1238	for (i = 0 ; i < last_slot; i++) {
1239		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1240		alu.inst = ctx->inst_info->r600_opcode;
1241		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1242			r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
1243		}
1244		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1245		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1246
1247		if (i == last_slot - 1)
1248			alu.last = 1;
1249		r = r600_bytecode_add_alu(ctx->bc, &alu);
1250		if (r)
1251			return r;
1252	}
1253	return 0;
1254}
1255
1256/*
1257 * r600 - trunc to -PI..PI range
1258 * r700 - normalize by dividing by 2PI
1259 * see fdo bug 27901
1260 */
1261static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1262{
1263	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1264	static float double_pi = 3.1415926535 * 2;
1265	static float neg_pi = -3.1415926535;
1266
1267	int r;
1268	struct r600_bytecode_alu alu;
1269
1270	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1271	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1272	alu.is_op3 = 1;
1273
1274	alu.dst.chan = 0;
1275	alu.dst.sel = ctx->temp_reg;
1276	alu.dst.write = 1;
1277
1278	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1279
1280	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1281	alu.src[1].chan = 0;
1282	alu.src[1].value = *(uint32_t *)&half_inv_pi;
1283	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1284	alu.src[2].chan = 0;
1285	alu.last = 1;
1286	r = r600_bytecode_add_alu(ctx->bc, &alu);
1287	if (r)
1288		return r;
1289
1290	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1291	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1292
1293	alu.dst.chan = 0;
1294	alu.dst.sel = ctx->temp_reg;
1295	alu.dst.write = 1;
1296
1297	alu.src[0].sel = ctx->temp_reg;
1298	alu.src[0].chan = 0;
1299	alu.last = 1;
1300	r = r600_bytecode_add_alu(ctx->bc, &alu);
1301	if (r)
1302		return r;
1303
1304	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1305	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1306	alu.is_op3 = 1;
1307
1308	alu.dst.chan = 0;
1309	alu.dst.sel = ctx->temp_reg;
1310	alu.dst.write = 1;
1311
1312	alu.src[0].sel = ctx->temp_reg;
1313	alu.src[0].chan = 0;
1314
1315	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1316	alu.src[1].chan = 0;
1317	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1318	alu.src[2].chan = 0;
1319
1320	if (ctx->bc->chip_class == R600) {
1321		alu.src[1].value = *(uint32_t *)&double_pi;
1322		alu.src[2].value = *(uint32_t *)&neg_pi;
1323	} else {
1324		alu.src[1].sel = V_SQ_ALU_SRC_1;
1325		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1326		alu.src[2].neg = 1;
1327	}
1328
1329	alu.last = 1;
1330	r = r600_bytecode_add_alu(ctx->bc, &alu);
1331	if (r)
1332		return r;
1333	return 0;
1334}
1335
1336static int cayman_trig(struct r600_shader_ctx *ctx)
1337{
1338	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1339	struct r600_bytecode_alu alu;
1340	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1341	int i, r;
1342
1343	r = tgsi_setup_trig(ctx);
1344	if (r)
1345		return r;
1346
1347
1348	for (i = 0; i < last_slot; i++) {
1349		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1350		alu.inst = ctx->inst_info->r600_opcode;
1351		alu.dst.chan = i;
1352
1353		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1354		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1355
1356		alu.src[0].sel = ctx->temp_reg;
1357		alu.src[0].chan = 0;
1358		if (i == last_slot - 1)
1359			alu.last = 1;
1360		r = r600_bytecode_add_alu(ctx->bc, &alu);
1361		if (r)
1362			return r;
1363	}
1364	return 0;
1365}
1366
1367static int tgsi_trig(struct r600_shader_ctx *ctx)
1368{
1369	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1370	struct r600_bytecode_alu alu;
1371	int i, r;
1372	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1373
1374	r = tgsi_setup_trig(ctx);
1375	if (r)
1376		return r;
1377
1378	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1379	alu.inst = ctx->inst_info->r600_opcode;
1380	alu.dst.chan = 0;
1381	alu.dst.sel = ctx->temp_reg;
1382	alu.dst.write = 1;
1383
1384	alu.src[0].sel = ctx->temp_reg;
1385	alu.src[0].chan = 0;
1386	alu.last = 1;
1387	r = r600_bytecode_add_alu(ctx->bc, &alu);
1388	if (r)
1389		return r;
1390
1391	/* replicate result */
1392	for (i = 0; i < lasti + 1; i++) {
1393		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1394			continue;
1395
1396		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1397		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1398
1399		alu.src[0].sel = ctx->temp_reg;
1400		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1401		if (i == lasti)
1402			alu.last = 1;
1403		r = r600_bytecode_add_alu(ctx->bc, &alu);
1404		if (r)
1405			return r;
1406	}
1407	return 0;
1408}
1409
1410static int tgsi_scs(struct r600_shader_ctx *ctx)
1411{
1412	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1413	struct r600_bytecode_alu alu;
1414	int i, r;
1415
1416	/* We'll only need the trig stuff if we are going to write to the
1417	 * X or Y components of the destination vector.
1418	 */
1419	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1420		r = tgsi_setup_trig(ctx);
1421		if (r)
1422			return r;
1423	}
1424
1425	/* dst.x = COS */
1426	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1427		if (ctx->bc->chip_class == CAYMAN) {
1428			for (i = 0 ; i < 3; i++) {
1429				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1430				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1431				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1432
1433				if (i == 0)
1434					alu.dst.write = 1;
1435				else
1436					alu.dst.write = 0;
1437				alu.src[0].sel = ctx->temp_reg;
1438				alu.src[0].chan = 0;
1439				if (i == 2)
1440					alu.last = 1;
1441				r = r600_bytecode_add_alu(ctx->bc, &alu);
1442				if (r)
1443					return r;
1444			}
1445		} else {
1446			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1447			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1448			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1449
1450			alu.src[0].sel = ctx->temp_reg;
1451			alu.src[0].chan = 0;
1452			alu.last = 1;
1453			r = r600_bytecode_add_alu(ctx->bc, &alu);
1454			if (r)
1455				return r;
1456		}
1457	}
1458
1459	/* dst.y = SIN */
1460	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1461		if (ctx->bc->chip_class == CAYMAN) {
1462			for (i = 0 ; i < 3; i++) {
1463				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1464				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1465				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1466				if (i == 1)
1467					alu.dst.write = 1;
1468				else
1469					alu.dst.write = 0;
1470				alu.src[0].sel = ctx->temp_reg;
1471				alu.src[0].chan = 0;
1472				if (i == 2)
1473					alu.last = 1;
1474				r = r600_bytecode_add_alu(ctx->bc, &alu);
1475				if (r)
1476					return r;
1477			}
1478		} else {
1479			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1480			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1481			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1482
1483			alu.src[0].sel = ctx->temp_reg;
1484			alu.src[0].chan = 0;
1485			alu.last = 1;
1486			r = r600_bytecode_add_alu(ctx->bc, &alu);
1487			if (r)
1488				return r;
1489		}
1490	}
1491
1492	/* dst.z = 0.0; */
1493	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1494		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1495
1496		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1497
1498		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1499
1500		alu.src[0].sel = V_SQ_ALU_SRC_0;
1501		alu.src[0].chan = 0;
1502
1503		alu.last = 1;
1504
1505		r = r600_bytecode_add_alu(ctx->bc, &alu);
1506		if (r)
1507			return r;
1508	}
1509
1510	/* dst.w = 1.0; */
1511	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1512		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1513
1514		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1515
1516		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1517
1518		alu.src[0].sel = V_SQ_ALU_SRC_1;
1519		alu.src[0].chan = 0;
1520
1521		alu.last = 1;
1522
1523		r = r600_bytecode_add_alu(ctx->bc, &alu);
1524		if (r)
1525			return r;
1526	}
1527
1528	return 0;
1529}
1530
1531static int tgsi_kill(struct r600_shader_ctx *ctx)
1532{
1533	struct r600_bytecode_alu alu;
1534	int i, r;
1535
1536	for (i = 0; i < 4; i++) {
1537		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1538		alu.inst = ctx->inst_info->r600_opcode;
1539
1540		alu.dst.chan = i;
1541
1542		alu.src[0].sel = V_SQ_ALU_SRC_0;
1543
1544		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1545			alu.src[1].sel = V_SQ_ALU_SRC_1;
1546			alu.src[1].neg = 1;
1547		} else {
1548			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
1549		}
1550		if (i == 3) {
1551			alu.last = 1;
1552		}
1553		r = r600_bytecode_add_alu(ctx->bc, &alu);
1554		if (r)
1555			return r;
1556	}
1557
1558	/* kill must be last in ALU */
1559	ctx->bc->force_add_cf = 1;
1560	ctx->shader->uses_kill = TRUE;
1561	return 0;
1562}
1563
1564static int tgsi_lit(struct r600_shader_ctx *ctx)
1565{
1566	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1567	struct r600_bytecode_alu alu;
1568	int r;
1569
1570	/* tmp.x = max(src.y, 0.0) */
1571	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1572	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1573	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
1574	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1575	alu.src[1].chan = 1;
1576
1577	alu.dst.sel = ctx->temp_reg;
1578	alu.dst.chan = 0;
1579	alu.dst.write = 1;
1580
1581	alu.last = 1;
1582	r = r600_bytecode_add_alu(ctx->bc, &alu);
1583	if (r)
1584		return r;
1585
1586	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1587	{
1588		int chan;
1589		int sel;
1590		int i;
1591
1592		if (ctx->bc->chip_class == CAYMAN) {
1593			for (i = 0; i < 3; i++) {
1594				/* tmp.z = log(tmp.x) */
1595				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1596				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1597				alu.src[0].sel = ctx->temp_reg;
1598				alu.src[0].chan = 0;
1599				alu.dst.sel = ctx->temp_reg;
1600				alu.dst.chan = i;
1601				if (i == 2) {
1602					alu.dst.write = 1;
1603					alu.last = 1;
1604				} else
1605					alu.dst.write = 0;
1606
1607				r = r600_bytecode_add_alu(ctx->bc, &alu);
1608				if (r)
1609					return r;
1610			}
1611		} else {
1612			/* tmp.z = log(tmp.x) */
1613			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1614			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1615			alu.src[0].sel = ctx->temp_reg;
1616			alu.src[0].chan = 0;
1617			alu.dst.sel = ctx->temp_reg;
1618			alu.dst.chan = 2;
1619			alu.dst.write = 1;
1620			alu.last = 1;
1621			r = r600_bytecode_add_alu(ctx->bc, &alu);
1622			if (r)
1623				return r;
1624		}
1625
1626		chan = alu.dst.chan;
1627		sel = alu.dst.sel;
1628
1629		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
1630		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1631		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1632		alu.src[0].sel  = sel;
1633		alu.src[0].chan = chan;
1634		r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
1635		r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
1636		alu.dst.sel = ctx->temp_reg;
1637		alu.dst.chan = 0;
1638		alu.dst.write = 1;
1639		alu.is_op3 = 1;
1640		alu.last = 1;
1641		r = r600_bytecode_add_alu(ctx->bc, &alu);
1642		if (r)
1643			return r;
1644
1645		if (ctx->bc->chip_class == CAYMAN) {
1646			for (i = 0; i < 3; i++) {
1647				/* dst.z = exp(tmp.x) */
1648				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1649				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1650				alu.src[0].sel = ctx->temp_reg;
1651				alu.src[0].chan = 0;
1652				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1653				if (i == 2) {
1654					alu.dst.write = 1;
1655					alu.last = 1;
1656				} else
1657					alu.dst.write = 0;
1658				r = r600_bytecode_add_alu(ctx->bc, &alu);
1659				if (r)
1660					return r;
1661			}
1662		} else {
1663			/* dst.z = exp(tmp.x) */
1664			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1665			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1666			alu.src[0].sel = ctx->temp_reg;
1667			alu.src[0].chan = 0;
1668			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1669			alu.last = 1;
1670			r = r600_bytecode_add_alu(ctx->bc, &alu);
1671			if (r)
1672				return r;
1673		}
1674	}
1675
1676	/* dst.x, <- 1.0  */
1677	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1678	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1679	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1680	alu.src[0].chan = 0;
1681	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1682	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1683	r = r600_bytecode_add_alu(ctx->bc, &alu);
1684	if (r)
1685		return r;
1686
1687	/* dst.y = max(src.x, 0.0) */
1688	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1689	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1690	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1691	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1692	alu.src[1].chan = 0;
1693	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1694	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1695	r = r600_bytecode_add_alu(ctx->bc, &alu);
1696	if (r)
1697		return r;
1698
1699	/* dst.w, <- 1.0  */
1700	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1701	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1702	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1703	alu.src[0].chan = 0;
1704	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1705	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1706	alu.last = 1;
1707	r = r600_bytecode_add_alu(ctx->bc, &alu);
1708	if (r)
1709		return r;
1710
1711	return 0;
1712}
1713
1714static int tgsi_rsq(struct r600_shader_ctx *ctx)
1715{
1716	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1717	struct r600_bytecode_alu alu;
1718	int i, r;
1719
1720	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1721
1722	/* FIXME:
1723	 * For state trackers other than OpenGL, we'll want to use
1724	 * _RECIPSQRT_IEEE instead.
1725	 */
1726	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1727
1728	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1729		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1730		r600_bytecode_src_set_abs(&alu.src[i]);
1731	}
1732	alu.dst.sel = ctx->temp_reg;
1733	alu.dst.write = 1;
1734	alu.last = 1;
1735	r = r600_bytecode_add_alu(ctx->bc, &alu);
1736	if (r)
1737		return r;
1738	/* replicate result */
1739	return tgsi_helper_tempx_replicate(ctx);
1740}
1741
1742static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1743{
1744	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1745	struct r600_bytecode_alu alu;
1746	int i, r;
1747
1748	for (i = 0; i < 4; i++) {
1749		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1750		alu.src[0].sel = ctx->temp_reg;
1751		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1752		alu.dst.chan = i;
1753		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1754		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1755		if (i == 3)
1756			alu.last = 1;
1757		r = r600_bytecode_add_alu(ctx->bc, &alu);
1758		if (r)
1759			return r;
1760	}
1761	return 0;
1762}
1763
1764static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1765{
1766	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1767	struct r600_bytecode_alu alu;
1768	int i, r;
1769
1770	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1771	alu.inst = ctx->inst_info->r600_opcode;
1772	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1773		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
1774	}
1775	alu.dst.sel = ctx->temp_reg;
1776	alu.dst.write = 1;
1777	alu.last = 1;
1778	r = r600_bytecode_add_alu(ctx->bc, &alu);
1779	if (r)
1780		return r;
1781	/* replicate result */
1782	return tgsi_helper_tempx_replicate(ctx);
1783}
1784
1785static int cayman_pow(struct r600_shader_ctx *ctx)
1786{
1787	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1788	int i, r;
1789	struct r600_bytecode_alu alu;
1790	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
1791
1792	for (i = 0; i < 3; i++) {
1793		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1794		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1795		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1796		alu.dst.sel = ctx->temp_reg;
1797		alu.dst.chan = i;
1798		alu.dst.write = 1;
1799		if (i == 2)
1800			alu.last = 1;
1801		r = r600_bytecode_add_alu(ctx->bc, &alu);
1802		if (r)
1803			return r;
1804	}
1805
1806	/* b * LOG2(a) */
1807	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1808	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1809	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1810	alu.src[1].sel = ctx->temp_reg;
1811	alu.dst.sel = ctx->temp_reg;
1812	alu.dst.write = 1;
1813	alu.last = 1;
1814	r = r600_bytecode_add_alu(ctx->bc, &alu);
1815	if (r)
1816		return r;
1817
1818	for (i = 0; i < last_slot; i++) {
1819		/* POW(a,b) = EXP2(b * LOG2(a))*/
1820		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1821		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1822		alu.src[0].sel = ctx->temp_reg;
1823
1824		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1825		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1826		if (i == last_slot - 1)
1827			alu.last = 1;
1828		r = r600_bytecode_add_alu(ctx->bc, &alu);
1829		if (r)
1830			return r;
1831	}
1832	return 0;
1833}
1834
1835static int tgsi_pow(struct r600_shader_ctx *ctx)
1836{
1837	struct r600_bytecode_alu alu;
1838	int r;
1839
1840	/* LOG2(a) */
1841	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1842	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1843	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
1844	alu.dst.sel = ctx->temp_reg;
1845	alu.dst.write = 1;
1846	alu.last = 1;
1847	r = r600_bytecode_add_alu(ctx->bc, &alu);
1848	if (r)
1849		return r;
1850	/* b * LOG2(a) */
1851	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1852	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1853	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
1854	alu.src[1].sel = ctx->temp_reg;
1855	alu.dst.sel = ctx->temp_reg;
1856	alu.dst.write = 1;
1857	alu.last = 1;
1858	r = r600_bytecode_add_alu(ctx->bc, &alu);
1859	if (r)
1860		return r;
1861	/* POW(a,b) = EXP2(b * LOG2(a))*/
1862	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1863	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1864	alu.src[0].sel = ctx->temp_reg;
1865	alu.dst.sel = ctx->temp_reg;
1866	alu.dst.write = 1;
1867	alu.last = 1;
1868	r = r600_bytecode_add_alu(ctx->bc, &alu);
1869	if (r)
1870		return r;
1871	return tgsi_helper_tempx_replicate(ctx);
1872}
1873
1874static int tgsi_ssg(struct r600_shader_ctx *ctx)
1875{
1876	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1877	struct r600_bytecode_alu alu;
1878	int i, r;
1879
1880	/* tmp = (src > 0 ? 1 : src) */
1881	for (i = 0; i < 4; i++) {
1882		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1883		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1884		alu.is_op3 = 1;
1885
1886		alu.dst.sel = ctx->temp_reg;
1887		alu.dst.chan = i;
1888
1889		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
1890		alu.src[1].sel = V_SQ_ALU_SRC_1;
1891		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
1892
1893		if (i == 3)
1894			alu.last = 1;
1895		r = r600_bytecode_add_alu(ctx->bc, &alu);
1896		if (r)
1897			return r;
1898	}
1899
1900	/* dst = (-tmp > 0 ? -1 : tmp) */
1901	for (i = 0; i < 4; i++) {
1902		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1903		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1904		alu.is_op3 = 1;
1905		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1906
1907		alu.src[0].sel = ctx->temp_reg;
1908		alu.src[0].chan = i;
1909		alu.src[0].neg = 1;
1910
1911		alu.src[1].sel = V_SQ_ALU_SRC_1;
1912		alu.src[1].neg = 1;
1913
1914		alu.src[2].sel = ctx->temp_reg;
1915		alu.src[2].chan = i;
1916
1917		if (i == 3)
1918			alu.last = 1;
1919		r = r600_bytecode_add_alu(ctx->bc, &alu);
1920		if (r)
1921			return r;
1922	}
1923	return 0;
1924}
1925
1926static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1927{
1928	struct r600_bytecode_alu alu;
1929	int i, r;
1930
1931	for (i = 0; i < 4; i++) {
1932		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1933		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1934			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1935			alu.dst.chan = i;
1936		} else {
1937			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1938			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1939			alu.src[0].sel = ctx->temp_reg;
1940			alu.src[0].chan = i;
1941		}
1942		if (i == 3) {
1943			alu.last = 1;
1944		}
1945		r = r600_bytecode_add_alu(ctx->bc, &alu);
1946		if (r)
1947			return r;
1948	}
1949	return 0;
1950}
1951
1952static int tgsi_op3(struct r600_shader_ctx *ctx)
1953{
1954	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1955	struct r600_bytecode_alu alu;
1956	int i, j, r;
1957	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1958
1959	for (i = 0; i < lasti + 1; i++) {
1960		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1961			continue;
1962
1963		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1964		alu.inst = ctx->inst_info->r600_opcode;
1965		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1966			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1967		}
1968
1969		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1970		alu.dst.chan = i;
1971		alu.dst.write = 1;
1972		alu.is_op3 = 1;
1973		if (i == lasti) {
1974			alu.last = 1;
1975		}
1976		r = r600_bytecode_add_alu(ctx->bc, &alu);
1977		if (r)
1978			return r;
1979	}
1980	return 0;
1981}
1982
1983static int tgsi_dp(struct r600_shader_ctx *ctx)
1984{
1985	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1986	struct r600_bytecode_alu alu;
1987	int i, j, r;
1988
1989	for (i = 0; i < 4; i++) {
1990		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
1991		alu.inst = ctx->inst_info->r600_opcode;
1992		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1993			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
1994		}
1995
1996		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1997		alu.dst.chan = i;
1998		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1999		/* handle some special cases */
2000		switch (ctx->inst_info->tgsi_opcode) {
2001		case TGSI_OPCODE_DP2:
2002			if (i > 1) {
2003				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
2004				alu.src[0].chan = alu.src[1].chan = 0;
2005			}
2006			break;
2007		case TGSI_OPCODE_DP3:
2008			if (i > 2) {
2009				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
2010				alu.src[0].chan = alu.src[1].chan = 0;
2011			}
2012			break;
2013		case TGSI_OPCODE_DPH:
2014			if (i == 3) {
2015				alu.src[0].sel = V_SQ_ALU_SRC_1;
2016				alu.src[0].chan = 0;
2017				alu.src[0].neg = 0;
2018			}
2019			break;
2020		default:
2021			break;
2022		}
2023		if (i == 3) {
2024			alu.last = 1;
2025		}
2026		r = r600_bytecode_add_alu(ctx->bc, &alu);
2027		if (r)
2028			return r;
2029	}
2030	return 0;
2031}
2032
2033static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx,
2034						    unsigned index)
2035{
2036	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2037	return 	(inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
2038		inst->Src[index].Register.File != TGSI_FILE_INPUT) ||
2039		ctx->src[index].neg || ctx->src[index].abs;
2040}
2041
2042static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
2043					unsigned index)
2044{
2045	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2046	return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index;
2047}
2048
2049static int tgsi_tex(struct r600_shader_ctx *ctx)
2050{
2051	static float one_point_five = 1.5f;
2052	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2053	struct r600_bytecode_tex tex;
2054	struct r600_bytecode_alu alu;
2055	unsigned src_gpr;
2056	int r, i, j;
2057	int opcode;
2058	/* Texture fetch instructions can only use gprs as source.
2059	 * Also they cannot negate the source or take the absolute value */
2060	const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0);
2061	boolean src_loaded = FALSE;
2062	unsigned sampler_src_reg = 1;
2063	u8 offset_x = 0, offset_y = 0, offset_z = 0;
2064
2065	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
2066
2067	if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
2068		/* get offset values */
2069		if (inst->Texture.NumOffsets) {
2070			assert(inst->Texture.NumOffsets == 1);
2071
2072			offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
2073			offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
2074			offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
2075		}
2076	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
2077		/* TGSI moves the sampler to src reg 3 for TXD */
2078		sampler_src_reg = 3;
2079
2080		for (i = 1; i < 3; i++) {
2081			/* set gradients h/v */
2082			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2083			tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
2084				SQ_TEX_INST_SET_GRADIENTS_V;
2085			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2086			tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2087
2088			if (tgsi_tex_src_requires_loading(ctx, i)) {
2089				tex.src_gpr = r600_get_temp(ctx);
2090				tex.src_sel_x = 0;
2091				tex.src_sel_y = 1;
2092				tex.src_sel_z = 2;
2093				tex.src_sel_w = 3;
2094
2095				for (j = 0; j < 4; j++) {
2096					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2097					alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2098                                        r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
2099                                        alu.dst.sel = tex.src_gpr;
2100                                        alu.dst.chan = j;
2101                                        if (j == 3)
2102                                                alu.last = 1;
2103                                        alu.dst.write = 1;
2104                                        r = r600_bytecode_add_alu(ctx->bc, &alu);
2105                                        if (r)
2106                                                return r;
2107				}
2108
2109			} else {
2110				tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
2111				tex.src_sel_x = ctx->src[i].swizzle[0];
2112				tex.src_sel_y = ctx->src[i].swizzle[1];
2113				tex.src_sel_z = ctx->src[i].swizzle[2];
2114				tex.src_sel_w = ctx->src[i].swizzle[3];
2115				tex.src_rel = ctx->src[i].rel;
2116			}
2117			tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
2118			tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
2119			if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
2120				tex.coord_type_x = 1;
2121				tex.coord_type_y = 1;
2122				tex.coord_type_z = 1;
2123				tex.coord_type_w = 1;
2124			}
2125			r = r600_bytecode_add_tex(ctx->bc, &tex);
2126			if (r)
2127				return r;
2128		}
2129	} else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
2130		int out_chan;
2131		/* Add perspective divide */
2132		if (ctx->bc->chip_class == CAYMAN) {
2133			out_chan = 2;
2134			for (i = 0; i < 3; i++) {
2135				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2136				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2137				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2138
2139				alu.dst.sel = ctx->temp_reg;
2140				alu.dst.chan = i;
2141				if (i == 2)
2142					alu.last = 1;
2143				if (out_chan == i)
2144					alu.dst.write = 1;
2145				r = r600_bytecode_add_alu(ctx->bc, &alu);
2146				if (r)
2147					return r;
2148			}
2149
2150		} else {
2151			out_chan = 3;
2152			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2153			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2154			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
2155
2156			alu.dst.sel = ctx->temp_reg;
2157			alu.dst.chan = out_chan;
2158			alu.last = 1;
2159			alu.dst.write = 1;
2160			r = r600_bytecode_add_alu(ctx->bc, &alu);
2161			if (r)
2162				return r;
2163		}
2164
2165		for (i = 0; i < 3; i++) {
2166			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2167			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2168			alu.src[0].sel = ctx->temp_reg;
2169			alu.src[0].chan = out_chan;
2170			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2171			alu.dst.sel = ctx->temp_reg;
2172			alu.dst.chan = i;
2173			alu.dst.write = 1;
2174			r = r600_bytecode_add_alu(ctx->bc, &alu);
2175			if (r)
2176				return r;
2177		}
2178		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2179		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2180		alu.src[0].sel = V_SQ_ALU_SRC_1;
2181		alu.src[0].chan = 0;
2182		alu.dst.sel = ctx->temp_reg;
2183		alu.dst.chan = 3;
2184		alu.last = 1;
2185		alu.dst.write = 1;
2186		r = r600_bytecode_add_alu(ctx->bc, &alu);
2187		if (r)
2188			return r;
2189		src_loaded = TRUE;
2190		src_gpr = ctx->temp_reg;
2191	}
2192
2193	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2194		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
2195		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
2196
2197		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
2198		for (i = 0; i < 4; i++) {
2199			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2200			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
2201			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2202			r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
2203			alu.dst.sel = ctx->temp_reg;
2204			alu.dst.chan = i;
2205			if (i == 3)
2206				alu.last = 1;
2207			alu.dst.write = 1;
2208			r = r600_bytecode_add_alu(ctx->bc, &alu);
2209			if (r)
2210				return r;
2211		}
2212
2213		/* tmp1.z = RCP_e(|tmp1.z|) */
2214		if (ctx->bc->chip_class == CAYMAN) {
2215			for (i = 0; i < 3; i++) {
2216				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2217				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2218				alu.src[0].sel = ctx->temp_reg;
2219				alu.src[0].chan = 2;
2220				alu.src[0].abs = 1;
2221				alu.dst.sel = ctx->temp_reg;
2222				alu.dst.chan = i;
2223				if (i == 2)
2224					alu.dst.write = 1;
2225				if (i == 2)
2226					alu.last = 1;
2227				r = r600_bytecode_add_alu(ctx->bc, &alu);
2228				if (r)
2229					return r;
2230			}
2231		} else {
2232			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2233			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2234			alu.src[0].sel = ctx->temp_reg;
2235			alu.src[0].chan = 2;
2236			alu.src[0].abs = 1;
2237			alu.dst.sel = ctx->temp_reg;
2238			alu.dst.chan = 2;
2239			alu.dst.write = 1;
2240			alu.last = 1;
2241			r = r600_bytecode_add_alu(ctx->bc, &alu);
2242			if (r)
2243				return r;
2244		}
2245
2246		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
2247		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
2248		 * muladd has no writemask, have to use another temp
2249		 */
2250		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2251		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2252		alu.is_op3 = 1;
2253
2254		alu.src[0].sel = ctx->temp_reg;
2255		alu.src[0].chan = 0;
2256		alu.src[1].sel = ctx->temp_reg;
2257		alu.src[1].chan = 2;
2258
2259		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2260		alu.src[2].chan = 0;
2261		alu.src[2].value = *(uint32_t *)&one_point_five;
2262
2263		alu.dst.sel = ctx->temp_reg;
2264		alu.dst.chan = 0;
2265		alu.dst.write = 1;
2266
2267		r = r600_bytecode_add_alu(ctx->bc, &alu);
2268		if (r)
2269			return r;
2270
2271		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2272		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2273		alu.is_op3 = 1;
2274
2275		alu.src[0].sel = ctx->temp_reg;
2276		alu.src[0].chan = 1;
2277		alu.src[1].sel = ctx->temp_reg;
2278		alu.src[1].chan = 2;
2279
2280		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
2281		alu.src[2].chan = 0;
2282		alu.src[2].value = *(uint32_t *)&one_point_five;
2283
2284		alu.dst.sel = ctx->temp_reg;
2285		alu.dst.chan = 1;
2286		alu.dst.write = 1;
2287
2288		alu.last = 1;
2289		r = r600_bytecode_add_alu(ctx->bc, &alu);
2290		if (r)
2291			return r;
2292
2293		src_loaded = TRUE;
2294		src_gpr = ctx->temp_reg;
2295	}
2296
2297	if (src_requires_loading && !src_loaded) {
2298		for (i = 0; i < 4; i++) {
2299			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2300			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2301			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2302			alu.dst.sel = ctx->temp_reg;
2303			alu.dst.chan = i;
2304			if (i == 3)
2305				alu.last = 1;
2306			alu.dst.write = 1;
2307			r = r600_bytecode_add_alu(ctx->bc, &alu);
2308			if (r)
2309				return r;
2310		}
2311		src_loaded = TRUE;
2312		src_gpr = ctx->temp_reg;
2313	}
2314
2315	opcode = ctx->inst_info->r600_opcode;
2316	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2317	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2318	    inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2319	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
2320	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) {
2321		switch (opcode) {
2322		case SQ_TEX_INST_SAMPLE:
2323			opcode = SQ_TEX_INST_SAMPLE_C;
2324			break;
2325		case SQ_TEX_INST_SAMPLE_L:
2326			opcode = SQ_TEX_INST_SAMPLE_C_L;
2327			break;
2328		case SQ_TEX_INST_SAMPLE_LB:
2329			opcode = SQ_TEX_INST_SAMPLE_C_LB;
2330			break;
2331		case SQ_TEX_INST_SAMPLE_G:
2332			opcode = SQ_TEX_INST_SAMPLE_C_G;
2333			break;
2334		}
2335	}
2336
2337	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
2338	tex.inst = opcode;
2339
2340	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
2341	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
2342	tex.src_gpr = src_gpr;
2343	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
2344	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
2345	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
2346	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
2347	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
2348	if (src_loaded) {
2349		tex.src_sel_x = 0;
2350		tex.src_sel_y = 1;
2351		tex.src_sel_z = 2;
2352		tex.src_sel_w = 3;
2353	} else {
2354		tex.src_sel_x = ctx->src[0].swizzle[0];
2355		tex.src_sel_y = ctx->src[0].swizzle[1];
2356		tex.src_sel_z = ctx->src[0].swizzle[2];
2357		tex.src_sel_w = ctx->src[0].swizzle[3];
2358		tex.src_rel = ctx->src[0].rel;
2359	}
2360
2361	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
2362		tex.src_sel_x = 1;
2363		tex.src_sel_y = 0;
2364		tex.src_sel_z = 3;
2365		tex.src_sel_w = 1;
2366	}
2367
2368	if (inst->Texture.Texture != TGSI_TEXTURE_RECT &&
2369	    inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) {
2370		tex.coord_type_x = 1;
2371		tex.coord_type_y = 1;
2372	}
2373	tex.coord_type_z = 1;
2374	tex.coord_type_w = 1;
2375
2376	tex.offset_x = offset_x;
2377	tex.offset_y = offset_y;
2378	tex.offset_z = offset_z;
2379
2380	/* Put the depth for comparison in W.
2381	 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
2382	 * Some instructions expect the depth in Z. */
2383	if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
2384	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
2385	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
2386	     inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) &&
2387	    opcode != SQ_TEX_INST_SAMPLE_C_L &&
2388	    opcode != SQ_TEX_INST_SAMPLE_C_LB) {
2389		tex.src_sel_w = tex.src_sel_z;
2390	}
2391
2392	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY ||
2393	    inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) {
2394		if (opcode == SQ_TEX_INST_SAMPLE_C_L ||
2395		    opcode == SQ_TEX_INST_SAMPLE_C_LB) {
2396			/* the array index is read from Y */
2397			tex.coord_type_y = 0;
2398		} else {
2399			/* the array index is read from Z */
2400			tex.coord_type_z = 0;
2401			tex.src_sel_z = tex.src_sel_y;
2402		}
2403	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
2404		   inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
2405		/* the array index is read from Z */
2406		tex.coord_type_z = 0;
2407
2408	r = r600_bytecode_add_tex(ctx->bc, &tex);
2409	if (r)
2410		return r;
2411
2412	/* add shadow ambient support  - gallium doesn't do it yet */
2413	return 0;
2414}
2415
2416static int tgsi_lrp(struct r600_shader_ctx *ctx)
2417{
2418	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2419	struct r600_bytecode_alu alu;
2420	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2421	unsigned i;
2422	int r;
2423
2424	/* optimize if it's just an equal balance */
2425	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
2426		for (i = 0; i < lasti + 1; i++) {
2427			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2428				continue;
2429
2430			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2431			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2432			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
2433			r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2434			alu.omod = 3;
2435			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2436			alu.dst.chan = i;
2437			if (i == lasti) {
2438				alu.last = 1;
2439			}
2440			r = r600_bytecode_add_alu(ctx->bc, &alu);
2441			if (r)
2442				return r;
2443		}
2444		return 0;
2445	}
2446
2447	/* 1 - src0 */
2448	for (i = 0; i < lasti + 1; i++) {
2449		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2450			continue;
2451
2452		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2453		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
2454		alu.src[0].sel = V_SQ_ALU_SRC_1;
2455		alu.src[0].chan = 0;
2456		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
2457		r600_bytecode_src_toggle_neg(&alu.src[1]);
2458		alu.dst.sel = ctx->temp_reg;
2459		alu.dst.chan = i;
2460		if (i == lasti) {
2461			alu.last = 1;
2462		}
2463		alu.dst.write = 1;
2464		r = r600_bytecode_add_alu(ctx->bc, &alu);
2465		if (r)
2466			return r;
2467	}
2468
2469	/* (1 - src0) * src2 */
2470	for (i = 0; i < lasti + 1; i++) {
2471		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2472			continue;
2473
2474		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2475		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2476		alu.src[0].sel = ctx->temp_reg;
2477		alu.src[0].chan = i;
2478		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2479		alu.dst.sel = ctx->temp_reg;
2480		alu.dst.chan = i;
2481		if (i == lasti) {
2482			alu.last = 1;
2483		}
2484		alu.dst.write = 1;
2485		r = r600_bytecode_add_alu(ctx->bc, &alu);
2486		if (r)
2487			return r;
2488	}
2489
2490	/* src0 * src1 + (1 - src0) * src2 */
2491	for (i = 0; i < lasti + 1; i++) {
2492		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2493			continue;
2494
2495		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2496		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2497		alu.is_op3 = 1;
2498		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2499		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
2500		alu.src[2].sel = ctx->temp_reg;
2501		alu.src[2].chan = i;
2502
2503		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2504		alu.dst.chan = i;
2505		if (i == lasti) {
2506			alu.last = 1;
2507		}
2508		r = r600_bytecode_add_alu(ctx->bc, &alu);
2509		if (r)
2510			return r;
2511	}
2512	return 0;
2513}
2514
2515static int tgsi_cmp(struct r600_shader_ctx *ctx)
2516{
2517	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2518	struct r600_bytecode_alu alu;
2519	int i, r;
2520	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2521
2522	for (i = 0; i < lasti + 1; i++) {
2523		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2524			continue;
2525
2526		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2527		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2528		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
2529		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
2530		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
2531		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2532		alu.dst.chan = i;
2533		alu.dst.write = 1;
2534		alu.is_op3 = 1;
2535		if (i == lasti)
2536			alu.last = 1;
2537		r = r600_bytecode_add_alu(ctx->bc, &alu);
2538		if (r)
2539			return r;
2540	}
2541	return 0;
2542}
2543
2544static int tgsi_xpd(struct r600_shader_ctx *ctx)
2545{
2546	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2547	static const unsigned int src0_swizzle[] = {2, 0, 1};
2548	static const unsigned int src1_swizzle[] = {1, 2, 0};
2549	struct r600_bytecode_alu alu;
2550	uint32_t use_temp = 0;
2551	int i, r;
2552
2553	if (inst->Dst[0].Register.WriteMask != 0xf)
2554		use_temp = 1;
2555
2556	for (i = 0; i < 4; i++) {
2557		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2558		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2559		if (i < 3) {
2560			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2561			r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2562		} else {
2563			alu.src[0].sel = V_SQ_ALU_SRC_0;
2564			alu.src[0].chan = i;
2565			alu.src[1].sel = V_SQ_ALU_SRC_0;
2566			alu.src[1].chan = i;
2567		}
2568
2569		alu.dst.sel = ctx->temp_reg;
2570		alu.dst.chan = i;
2571		alu.dst.write = 1;
2572
2573		if (i == 3)
2574			alu.last = 1;
2575		r = r600_bytecode_add_alu(ctx->bc, &alu);
2576		if (r)
2577			return r;
2578	}
2579
2580	for (i = 0; i < 4; i++) {
2581		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2582		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2583
2584		if (i < 3) {
2585			r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2586			r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2587		} else {
2588			alu.src[0].sel = V_SQ_ALU_SRC_0;
2589			alu.src[0].chan = i;
2590			alu.src[1].sel = V_SQ_ALU_SRC_0;
2591			alu.src[1].chan = i;
2592		}
2593
2594		alu.src[2].sel = ctx->temp_reg;
2595		alu.src[2].neg = 1;
2596		alu.src[2].chan = i;
2597
2598		if (use_temp)
2599			alu.dst.sel = ctx->temp_reg;
2600		else
2601			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2602		alu.dst.chan = i;
2603		alu.dst.write = 1;
2604		alu.is_op3 = 1;
2605		if (i == 3)
2606			alu.last = 1;
2607		r = r600_bytecode_add_alu(ctx->bc, &alu);
2608		if (r)
2609			return r;
2610	}
2611	if (use_temp)
2612		return tgsi_helper_copy(ctx, inst);
2613	return 0;
2614}
2615
2616static int tgsi_exp(struct r600_shader_ctx *ctx)
2617{
2618	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2619	struct r600_bytecode_alu alu;
2620	int r;
2621	int i;
2622
2623	/* result.x = 2^floor(src); */
2624	if (inst->Dst[0].Register.WriteMask & 1) {
2625		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2626
2627		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2628		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2629
2630		alu.dst.sel = ctx->temp_reg;
2631		alu.dst.chan = 0;
2632		alu.dst.write = 1;
2633		alu.last = 1;
2634		r = r600_bytecode_add_alu(ctx->bc, &alu);
2635		if (r)
2636			return r;
2637
2638		if (ctx->bc->chip_class == CAYMAN) {
2639			for (i = 0; i < 3; i++) {
2640				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2641				alu.src[0].sel = ctx->temp_reg;
2642				alu.src[0].chan = 0;
2643
2644				alu.dst.sel = ctx->temp_reg;
2645				alu.dst.chan = i;
2646				if (i == 0)
2647					alu.dst.write = 1;
2648				if (i == 2)
2649					alu.last = 1;
2650				r = r600_bytecode_add_alu(ctx->bc, &alu);
2651				if (r)
2652					return r;
2653			}
2654		} else {
2655			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2656			alu.src[0].sel = ctx->temp_reg;
2657			alu.src[0].chan = 0;
2658
2659			alu.dst.sel = ctx->temp_reg;
2660			alu.dst.chan = 0;
2661			alu.dst.write = 1;
2662			alu.last = 1;
2663			r = r600_bytecode_add_alu(ctx->bc, &alu);
2664			if (r)
2665				return r;
2666		}
2667	}
2668
2669	/* result.y = tmp - floor(tmp); */
2670	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2671		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2672
2673		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2674		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2675
2676		alu.dst.sel = ctx->temp_reg;
2677#if 0
2678		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2679		if (r)
2680			return r;
2681#endif
2682		alu.dst.write = 1;
2683		alu.dst.chan = 1;
2684
2685		alu.last = 1;
2686
2687		r = r600_bytecode_add_alu(ctx->bc, &alu);
2688		if (r)
2689			return r;
2690	}
2691
2692	/* result.z = RoughApprox2ToX(tmp);*/
2693	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2694		if (ctx->bc->chip_class == CAYMAN) {
2695			for (i = 0; i < 3; i++) {
2696				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2697				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2698				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2699
2700				alu.dst.sel = ctx->temp_reg;
2701				alu.dst.chan = i;
2702				if (i == 2) {
2703					alu.dst.write = 1;
2704					alu.last = 1;
2705				}
2706
2707				r = r600_bytecode_add_alu(ctx->bc, &alu);
2708				if (r)
2709					return r;
2710			}
2711		} else {
2712			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2713			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2714			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2715
2716			alu.dst.sel = ctx->temp_reg;
2717			alu.dst.write = 1;
2718			alu.dst.chan = 2;
2719
2720			alu.last = 1;
2721
2722			r = r600_bytecode_add_alu(ctx->bc, &alu);
2723			if (r)
2724				return r;
2725		}
2726	}
2727
2728	/* result.w = 1.0;*/
2729	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2730		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2731
2732		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2733		alu.src[0].sel = V_SQ_ALU_SRC_1;
2734		alu.src[0].chan = 0;
2735
2736		alu.dst.sel = ctx->temp_reg;
2737		alu.dst.chan = 3;
2738		alu.dst.write = 1;
2739		alu.last = 1;
2740		r = r600_bytecode_add_alu(ctx->bc, &alu);
2741		if (r)
2742			return r;
2743	}
2744	return tgsi_helper_copy(ctx, inst);
2745}
2746
2747static int tgsi_log(struct r600_shader_ctx *ctx)
2748{
2749	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2750	struct r600_bytecode_alu alu;
2751	int r;
2752	int i;
2753
2754	/* result.x = floor(log2(|src|)); */
2755	if (inst->Dst[0].Register.WriteMask & 1) {
2756		if (ctx->bc->chip_class == CAYMAN) {
2757			for (i = 0; i < 3; i++) {
2758				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2759
2760				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2761				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2762				r600_bytecode_src_set_abs(&alu.src[0]);
2763
2764				alu.dst.sel = ctx->temp_reg;
2765				alu.dst.chan = i;
2766				if (i == 0)
2767					alu.dst.write = 1;
2768				if (i == 2)
2769					alu.last = 1;
2770				r = r600_bytecode_add_alu(ctx->bc, &alu);
2771				if (r)
2772					return r;
2773			}
2774
2775		} else {
2776			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2777
2778			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2779			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2780			r600_bytecode_src_set_abs(&alu.src[0]);
2781
2782			alu.dst.sel = ctx->temp_reg;
2783			alu.dst.chan = 0;
2784			alu.dst.write = 1;
2785			alu.last = 1;
2786			r = r600_bytecode_add_alu(ctx->bc, &alu);
2787			if (r)
2788				return r;
2789		}
2790
2791		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2792		alu.src[0].sel = ctx->temp_reg;
2793		alu.src[0].chan = 0;
2794
2795		alu.dst.sel = ctx->temp_reg;
2796		alu.dst.chan = 0;
2797		alu.dst.write = 1;
2798		alu.last = 1;
2799
2800		r = r600_bytecode_add_alu(ctx->bc, &alu);
2801		if (r)
2802			return r;
2803	}
2804
2805	/* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
2806	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2807
2808		if (ctx->bc->chip_class == CAYMAN) {
2809			for (i = 0; i < 3; i++) {
2810				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2811
2812				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2813				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2814				r600_bytecode_src_set_abs(&alu.src[0]);
2815
2816				alu.dst.sel = ctx->temp_reg;
2817				alu.dst.chan = i;
2818				if (i == 1)
2819					alu.dst.write = 1;
2820				if (i == 2)
2821					alu.last = 1;
2822
2823				r = r600_bytecode_add_alu(ctx->bc, &alu);
2824				if (r)
2825					return r;
2826			}
2827		} else {
2828			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2829
2830			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2831			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2832			r600_bytecode_src_set_abs(&alu.src[0]);
2833
2834			alu.dst.sel = ctx->temp_reg;
2835			alu.dst.chan = 1;
2836			alu.dst.write = 1;
2837			alu.last = 1;
2838
2839			r = r600_bytecode_add_alu(ctx->bc, &alu);
2840			if (r)
2841				return r;
2842		}
2843
2844		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2845
2846		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2847		alu.src[0].sel = ctx->temp_reg;
2848		alu.src[0].chan = 1;
2849
2850		alu.dst.sel = ctx->temp_reg;
2851		alu.dst.chan = 1;
2852		alu.dst.write = 1;
2853		alu.last = 1;
2854
2855		r = r600_bytecode_add_alu(ctx->bc, &alu);
2856		if (r)
2857			return r;
2858
2859		if (ctx->bc->chip_class == CAYMAN) {
2860			for (i = 0; i < 3; i++) {
2861				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2862				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2863				alu.src[0].sel = ctx->temp_reg;
2864				alu.src[0].chan = 1;
2865
2866				alu.dst.sel = ctx->temp_reg;
2867				alu.dst.chan = i;
2868				if (i == 1)
2869					alu.dst.write = 1;
2870				if (i == 2)
2871					alu.last = 1;
2872
2873				r = r600_bytecode_add_alu(ctx->bc, &alu);
2874				if (r)
2875					return r;
2876			}
2877		} else {
2878			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2879			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2880			alu.src[0].sel = ctx->temp_reg;
2881			alu.src[0].chan = 1;
2882
2883			alu.dst.sel = ctx->temp_reg;
2884			alu.dst.chan = 1;
2885			alu.dst.write = 1;
2886			alu.last = 1;
2887
2888			r = r600_bytecode_add_alu(ctx->bc, &alu);
2889			if (r)
2890				return r;
2891		}
2892
2893		if (ctx->bc->chip_class == CAYMAN) {
2894			for (i = 0; i < 3; i++) {
2895				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2896				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2897				alu.src[0].sel = ctx->temp_reg;
2898				alu.src[0].chan = 1;
2899
2900				alu.dst.sel = ctx->temp_reg;
2901				alu.dst.chan = i;
2902				if (i == 1)
2903					alu.dst.write = 1;
2904				if (i == 2)
2905					alu.last = 1;
2906
2907				r = r600_bytecode_add_alu(ctx->bc, &alu);
2908				if (r)
2909					return r;
2910			}
2911		} else {
2912			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2913			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2914			alu.src[0].sel = ctx->temp_reg;
2915			alu.src[0].chan = 1;
2916
2917			alu.dst.sel = ctx->temp_reg;
2918			alu.dst.chan = 1;
2919			alu.dst.write = 1;
2920			alu.last = 1;
2921
2922			r = r600_bytecode_add_alu(ctx->bc, &alu);
2923			if (r)
2924				return r;
2925		}
2926
2927		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2928
2929		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2930
2931		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2932		r600_bytecode_src_set_abs(&alu.src[0]);
2933
2934		alu.src[1].sel = ctx->temp_reg;
2935		alu.src[1].chan = 1;
2936
2937		alu.dst.sel = ctx->temp_reg;
2938		alu.dst.chan = 1;
2939		alu.dst.write = 1;
2940		alu.last = 1;
2941
2942		r = r600_bytecode_add_alu(ctx->bc, &alu);
2943		if (r)
2944			return r;
2945	}
2946
2947	/* result.z = log2(|src|);*/
2948	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2949		if (ctx->bc->chip_class == CAYMAN) {
2950			for (i = 0; i < 3; i++) {
2951				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2952
2953				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2954				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2955				r600_bytecode_src_set_abs(&alu.src[0]);
2956
2957				alu.dst.sel = ctx->temp_reg;
2958				if (i == 2)
2959					alu.dst.write = 1;
2960				alu.dst.chan = i;
2961				if (i == 2)
2962					alu.last = 1;
2963
2964				r = r600_bytecode_add_alu(ctx->bc, &alu);
2965				if (r)
2966					return r;
2967			}
2968		} else {
2969			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2970
2971			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2972			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
2973			r600_bytecode_src_set_abs(&alu.src[0]);
2974
2975			alu.dst.sel = ctx->temp_reg;
2976			alu.dst.write = 1;
2977			alu.dst.chan = 2;
2978			alu.last = 1;
2979
2980			r = r600_bytecode_add_alu(ctx->bc, &alu);
2981			if (r)
2982				return r;
2983		}
2984	}
2985
2986	/* result.w = 1.0; */
2987	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2988		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
2989
2990		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2991		alu.src[0].sel = V_SQ_ALU_SRC_1;
2992		alu.src[0].chan = 0;
2993
2994		alu.dst.sel = ctx->temp_reg;
2995		alu.dst.chan = 3;
2996		alu.dst.write = 1;
2997		alu.last = 1;
2998
2999		r = r600_bytecode_add_alu(ctx->bc, &alu);
3000		if (r)
3001			return r;
3002	}
3003
3004	return tgsi_helper_copy(ctx, inst);
3005}
3006
3007static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
3008{
3009	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3010	struct r600_bytecode_alu alu;
3011	int r;
3012
3013	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3014
3015	switch (inst->Instruction.Opcode) {
3016	case TGSI_OPCODE_ARL:
3017		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
3018		break;
3019	case TGSI_OPCODE_ARR:
3020		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3021		break;
3022	case TGSI_OPCODE_UARL:
3023		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
3024		break;
3025	default:
3026		assert(0);
3027		return -1;
3028	}
3029
3030	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3031	alu.last = 1;
3032	alu.dst.sel = ctx->bc->ar_reg;
3033	alu.dst.write = 1;
3034	r = r600_bytecode_add_alu(ctx->bc, &alu);
3035	if (r)
3036		return r;
3037
3038	ctx->bc->ar_loaded = 0;
3039	return 0;
3040}
3041static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
3042{
3043	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3044	struct r600_bytecode_alu alu;
3045	int r;
3046
3047	switch (inst->Instruction.Opcode) {
3048	case TGSI_OPCODE_ARL:
3049		memset(&alu, 0, sizeof(alu));
3050		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
3051		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3052		alu.dst.sel = ctx->bc->ar_reg;
3053		alu.dst.write = 1;
3054		alu.last = 1;
3055
3056		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3057			return r;
3058
3059		memset(&alu, 0, sizeof(alu));
3060		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3061		alu.src[0].sel = ctx->bc->ar_reg;
3062		alu.dst.sel = ctx->bc->ar_reg;
3063		alu.dst.write = 1;
3064		alu.last = 1;
3065
3066		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3067			return r;
3068		break;
3069	case TGSI_OPCODE_ARR:
3070		memset(&alu, 0, sizeof(alu));
3071		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
3072		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3073		alu.dst.sel = ctx->bc->ar_reg;
3074		alu.dst.write = 1;
3075		alu.last = 1;
3076
3077		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3078			return r;
3079		break;
3080	case TGSI_OPCODE_UARL:
3081		memset(&alu, 0, sizeof(alu));
3082		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
3083		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3084		alu.dst.sel = ctx->bc->ar_reg;
3085		alu.dst.write = 1;
3086		alu.last = 1;
3087
3088		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
3089			return r;
3090		break;
3091	default:
3092		assert(0);
3093		return -1;
3094	}
3095
3096	ctx->bc->ar_loaded = 0;
3097	return 0;
3098}
3099
3100static int tgsi_opdst(struct r600_shader_ctx *ctx)
3101{
3102	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3103	struct r600_bytecode_alu alu;
3104	int i, r = 0;
3105
3106	for (i = 0; i < 4; i++) {
3107		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3108
3109		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
3110		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3111
3112		if (i == 0 || i == 3) {
3113			alu.src[0].sel = V_SQ_ALU_SRC_1;
3114		} else {
3115			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
3116		}
3117
3118		if (i == 0 || i == 2) {
3119			alu.src[1].sel = V_SQ_ALU_SRC_1;
3120		} else {
3121			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
3122		}
3123		if (i == 3)
3124			alu.last = 1;
3125		r = r600_bytecode_add_alu(ctx->bc, &alu);
3126		if (r)
3127			return r;
3128	}
3129	return 0;
3130}
3131
3132static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
3133{
3134	struct r600_bytecode_alu alu;
3135	int r;
3136
3137	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3138	alu.inst = opcode;
3139	alu.predicate = 1;
3140
3141	alu.dst.sel = ctx->temp_reg;
3142	alu.dst.write = 1;
3143	alu.dst.chan = 0;
3144
3145	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
3146	alu.src[1].sel = V_SQ_ALU_SRC_0;
3147	alu.src[1].chan = 0;
3148
3149	alu.last = 1;
3150
3151	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
3152	if (r)
3153		return r;
3154	return 0;
3155}
3156
3157static int pops(struct r600_shader_ctx *ctx, int pops)
3158{
3159	unsigned force_pop = ctx->bc->force_add_cf;
3160
3161	if (!force_pop) {
3162		int alu_pop = 3;
3163		if (ctx->bc->cf_last) {
3164			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
3165				alu_pop = 0;
3166			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
3167				alu_pop = 1;
3168		}
3169		alu_pop += pops;
3170		if (alu_pop == 1) {
3171			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
3172			ctx->bc->force_add_cf = 1;
3173		} else if (alu_pop == 2) {
3174			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
3175			ctx->bc->force_add_cf = 1;
3176		} else {
3177			force_pop = 1;
3178		}
3179	}
3180
3181	if (force_pop) {
3182		r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
3183		ctx->bc->cf_last->pop_count = pops;
3184		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
3185	}
3186
3187	return 0;
3188}
3189
3190static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
3191{
3192	switch(reason) {
3193	case FC_PUSH_VPM:
3194		ctx->bc->callstack[ctx->bc->call_sp].current--;
3195		break;
3196	case FC_PUSH_WQM:
3197	case FC_LOOP:
3198		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
3199		break;
3200	case FC_REP:
3201		/* TOODO : for 16 vp asic should -= 2; */
3202		ctx->bc->callstack[ctx->bc->call_sp].current --;
3203		break;
3204	}
3205}
3206
3207static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
3208{
3209	if (check_max_only) {
3210		int diff;
3211		switch (reason) {
3212		case FC_PUSH_VPM:
3213			diff = 1;
3214			break;
3215		case FC_PUSH_WQM:
3216			diff = 4;
3217			break;
3218		default:
3219			assert(0);
3220			diff = 0;
3221		}
3222		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
3223		    ctx->bc->callstack[ctx->bc->call_sp].max) {
3224			ctx->bc->callstack[ctx->bc->call_sp].max =
3225				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
3226		}
3227		return;
3228	}
3229	switch (reason) {
3230	case FC_PUSH_VPM:
3231		ctx->bc->callstack[ctx->bc->call_sp].current++;
3232		break;
3233	case FC_PUSH_WQM:
3234	case FC_LOOP:
3235		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
3236		break;
3237	case FC_REP:
3238		ctx->bc->callstack[ctx->bc->call_sp].current++;
3239		break;
3240	}
3241
3242	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
3243	    ctx->bc->callstack[ctx->bc->call_sp].max) {
3244		ctx->bc->callstack[ctx->bc->call_sp].max =
3245			ctx->bc->callstack[ctx->bc->call_sp].current;
3246	}
3247}
3248
3249static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
3250{
3251	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
3252
3253	sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
3254						sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
3255	sp->mid[sp->num_mid] = ctx->bc->cf_last;
3256	sp->num_mid++;
3257}
3258
3259static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
3260{
3261	ctx->bc->fc_sp++;
3262	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
3263	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
3264}
3265
3266static void fc_poplevel(struct r600_shader_ctx *ctx)
3267{
3268	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
3269	if (sp->mid) {
3270		free(sp->mid);
3271		sp->mid = NULL;
3272	}
3273	sp->num_mid = 0;
3274	sp->start = NULL;
3275	sp->type = 0;
3276	ctx->bc->fc_sp--;
3277}
3278
3279#if 0
3280static int emit_return(struct r600_shader_ctx *ctx)
3281{
3282	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
3283	return 0;
3284}
3285
3286static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
3287{
3288
3289	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3290	ctx->bc->cf_last->pop_count = pops;
3291	/* TODO work out offset */
3292	return 0;
3293}
3294
3295static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
3296{
3297	return 0;
3298}
3299
3300static void emit_testflag(struct r600_shader_ctx *ctx)
3301{
3302
3303}
3304
3305static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
3306{
3307	emit_testflag(ctx);
3308	emit_jump_to_offset(ctx, 1, 4);
3309	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
3310	pops(ctx, ifidx + 1);
3311	emit_return(ctx);
3312}
3313
3314static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
3315{
3316	emit_testflag(ctx);
3317
3318	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3319	ctx->bc->cf_last->pop_count = 1;
3320
3321	fc_set_mid(ctx, fc_sp);
3322
3323	pops(ctx, 1);
3324}
3325#endif
3326
3327static int tgsi_if(struct r600_shader_ctx *ctx)
3328{
3329	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT));
3330
3331	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
3332
3333	fc_pushlevel(ctx, FC_IF);
3334
3335	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
3336	return 0;
3337}
3338
3339static int tgsi_else(struct r600_shader_ctx *ctx)
3340{
3341	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
3342	ctx->bc->cf_last->pop_count = 1;
3343
3344	fc_set_mid(ctx, ctx->bc->fc_sp);
3345	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
3346	return 0;
3347}
3348
3349static int tgsi_endif(struct r600_shader_ctx *ctx)
3350{
3351	pops(ctx, 1);
3352	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
3353		R600_ERR("if/endif unbalanced in shader\n");
3354		return -1;
3355	}
3356
3357	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
3358		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3359		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
3360	} else {
3361		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
3362	}
3363	fc_poplevel(ctx);
3364
3365	callstack_decrease_current(ctx, FC_PUSH_VPM);
3366	return 0;
3367}
3368
3369static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
3370{
3371	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
3372
3373	fc_pushlevel(ctx, FC_LOOP);
3374
3375	/* check stack depth */
3376	callstack_check_depth(ctx, FC_LOOP, 0);
3377	return 0;
3378}
3379
3380static int tgsi_endloop(struct r600_shader_ctx *ctx)
3381{
3382	int i;
3383
3384	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
3385
3386	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
3387		R600_ERR("loop/endloop in shader code are not paired.\n");
3388		return -EINVAL;
3389	}
3390
3391	/* fixup loop pointers - from r600isa
3392	   LOOP END points to CF after LOOP START,
3393	   LOOP START point to CF after LOOP END
3394	   BRK/CONT point to LOOP END CF
3395	*/
3396	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
3397
3398	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
3399
3400	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
3401		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
3402	}
3403	/* TODO add LOOPRET support */
3404	fc_poplevel(ctx);
3405	callstack_decrease_current(ctx, FC_LOOP);
3406	return 0;
3407}
3408
3409static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
3410{
3411	unsigned int fscp;
3412
3413	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
3414	{
3415		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
3416			break;
3417	}
3418
3419	if (fscp == 0) {
3420		R600_ERR("Break not inside loop/endloop pair\n");
3421		return -EINVAL;
3422	}
3423
3424	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
3425	ctx->bc->cf_last->pop_count = 1;
3426
3427	fc_set_mid(ctx, fscp);
3428
3429	pops(ctx, 1);
3430	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
3431	return 0;
3432}
3433
3434static int tgsi_umad(struct r600_shader_ctx *ctx)
3435{
3436	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
3437	struct r600_bytecode_alu alu;
3438	int i, j, r;
3439	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
3440
3441	/* src0 * src1 */
3442	for (i = 0; i < lasti + 1; i++) {
3443		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3444			continue;
3445
3446		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3447
3448		alu.dst.chan = i;
3449		alu.dst.sel = ctx->temp_reg;
3450		alu.dst.write = 1;
3451
3452		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
3453		for (j = 0; j < 2; j++) {
3454		        r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
3455		}
3456
3457		alu.last = 1;
3458		r = r600_bytecode_add_alu(ctx->bc, &alu);
3459		if (r)
3460			return r;
3461	}
3462
3463
3464	for (i = 0; i < lasti + 1; i++) {
3465		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
3466			continue;
3467
3468		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
3469		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
3470
3471		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
3472
3473		alu.src[0].sel = ctx->temp_reg;
3474		alu.src[0].chan = i;
3475
3476		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
3477		if (i == lasti) {
3478			alu.last = 1;
3479		}
3480		r = r600_bytecode_add_alu(ctx->bc, &alu);
3481		if (r)
3482			return r;
3483	}
3484	return 0;
3485}
3486
3487static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
3488	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3489	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3490	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3491
3492	/* FIXME:
3493	 * For state trackers other than OpenGL, we'll want to use
3494	 * _RECIP_IEEE instead.
3495	 */
3496	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
3497
3498	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
3499	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3500	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3501	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3502	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3503	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3504	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3505	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3506	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3507	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3508	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3509	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3510	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3511	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3512	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3513	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3514	/* gap */
3515	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3516	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3517	/* gap */
3518	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3519	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3520	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3521	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3522	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3523	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3524	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3525	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3526	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3527	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3528	/* gap */
3529	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3530	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3531	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3532	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3533	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3534	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3535	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3536	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3537	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3538	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3539	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3540	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3541	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3542	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3543	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3544	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3545	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3546	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3547	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3548	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3549	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3550	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3551	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3552	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3553	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3554	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3555	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3556	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3557	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3558	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3559	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3560	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3561	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3562	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3563	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3564	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3565	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3566	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3567	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3568	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3569	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3570	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3571	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3572	/* gap */
3573	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3574	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3575	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3576	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3577	/* gap */
3578	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3579	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3580	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3581	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3582	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3583	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
3584	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3585	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3586	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3587	/* gap */
3588	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3589	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
3590	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
3591	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3592	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3593	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3594	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
3595	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3596	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3597	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3598	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3599	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3600	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3601	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3602	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3603	/* gap */
3604	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3605	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3606	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3607	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3608	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3609	/* gap */
3610	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3611	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3612	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3613	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3614	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3615	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3616	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3617	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3618	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3619	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3620	/* gap */
3621	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3622	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans},
3623	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3624	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3625	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3626	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_op2},
3627	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
3628	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3629	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2},
3630	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2},
3631	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
3632	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
3633	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3634	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
3635	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
3636	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
3637	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3638	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2},
3639	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
3640	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
3641	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3642	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3643	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap},
3644	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3645	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3646	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3647	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3648	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
3649	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
3650	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
3651	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
3652	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
3653	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3654	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
3655	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
3656	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
3657	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
3658	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3659	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3660	{TGSI_OPCODE_UARL,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
3661	{TGSI_OPCODE_UCMP,      0, 0, tgsi_unsupported},
3662	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3663};
3664
3665static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3666	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3667	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3668	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3669	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3670	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq},
3671	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3672	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3673	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3674	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3675	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3676	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3677	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3678	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3679	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3680	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3681	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3682	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3683	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3684	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3685	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3686	/* gap */
3687	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3688	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3689	/* gap */
3690	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3691	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3692	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3693	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3694	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3695	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3696	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3697	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3698	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3699	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3700	/* gap */
3701	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3702	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3703	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3704	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3705	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3706	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3707	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3708	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3709	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3710	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3711	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3712	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3713	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3714	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3715	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3716	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3717	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3718	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3719	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3720	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3721	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3722	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3723	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3724	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3725	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3726	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3727	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3728	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3729	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3730	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3731	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3732	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3733	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3734	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3735	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3736	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3737	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3738	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3739	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3740	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3741	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3742	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3743	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3744	/* gap */
3745	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3746	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3747	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3748	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3749	/* gap */
3750	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3751	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3752	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3753	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3754	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3755	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
3756	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3757	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3758	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3759	/* gap */
3760	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3761	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2},
3762	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2},
3763	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3764	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3765	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3766	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
3767	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3768	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3769	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3770	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3771	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3772	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3773	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3774	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3775	/* gap */
3776	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3777	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3778	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3779	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3780	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3781	/* gap */
3782	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3783	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3784	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3785	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3786	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3787	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3788	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3789	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3790	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3791	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3792	/* gap */
3793	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3794	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2},
3795	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3796	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3797	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3798	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg},
3799	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2},
3800	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3801	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3802	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3803	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2},
3804	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2},
3805	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3806	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad},
3807	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2},
3808	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2},
3809	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3810	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans},
3811	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2},
3812	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2},
3813	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3814	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap},
3815	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2},
3816	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3817	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3818	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3819	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3820	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
3821	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
3822	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
3823	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
3824	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
3825	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3826	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
3827	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
3828	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
3829	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
3830	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
3831	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
3832	{TGSI_OPCODE_UARL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
3833	{TGSI_OPCODE_UCMP,      0, 0, tgsi_unsupported},
3834	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3835};
3836
3837static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
3838	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3839	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3840	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3841	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr},
3842	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr},
3843	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3844	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
3845	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3846	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3847	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3848	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3849	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3850	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3851	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3852	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3853	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3854	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3855	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3856	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3857	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3858	/* gap */
3859	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3860	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3861	/* gap */
3862	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3863	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3864	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3865	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3866	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3867	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
3868	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
3869	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
3870	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
3871	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3872	/* gap */
3873	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3874	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3875	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3876	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3877	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig},
3878	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3879	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3880	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3881	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3882	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3883	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3884	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3885	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3886	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3887	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3888	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3889	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig},
3890	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3891	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3892	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3893	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3894	{TGSI_OPCODE_TXD,	0, SQ_TEX_INST_SAMPLE_G, tgsi_tex},
3895	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3896	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3897	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3898	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3899	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3900	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3901	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3902	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3903	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3904	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3905	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3906	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3907	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3908	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3909	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex},
3910	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3911	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3912	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3913	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3914	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3915	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3916	/* gap */
3917	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3918	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3919	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3920	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3921	/* gap */
3922	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3923	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3924	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3925	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3926	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3927	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3928	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
3929	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3930	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3931	/* gap */
3932	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3933	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3934	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3935	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3936	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2},
3937	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3938	{TGSI_OPCODE_TXF,	0, SQ_TEX_INST_LD, tgsi_tex},
3939	{TGSI_OPCODE_TXQ,	0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex},
3940	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3941	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3942	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3943	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3944	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3945	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3946	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3947	/* gap */
3948	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3949	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3950	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3951	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3952	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3953	/* gap */
3954	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3955	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3956	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3957	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3958	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3959	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3960	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3961	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3962	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3963	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3964	/* gap */
3965	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3966	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3967	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3968	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2},
3969	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2},
3970	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3971	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3972	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3973	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3974	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3975	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3976	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3977	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3978	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3979	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3980	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3981	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3982	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3983	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3984	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3985	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3986	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3987	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3988	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3989	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3990	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3991	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3992	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
3993	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
3994	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
3995	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
3996	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
3997	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
3998	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
3999	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
4000	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
4001	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
4002	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
4003	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
4004	{TGSI_OPCODE_UARL,      0, 0, tgsi_unsupported},
4005	{TGSI_OPCODE_UCMP,      0, 0, tgsi_unsupported},
4006	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
4007};
4008