r600_shader.c revision 0ab7dcddb35560626c1aab4e8e6181dc4b4703a6
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_formats.h"
32#include "r600_opcodes.h"
33#include "r600d.h"
34#include <stdio.h>
35#include <errno.h>
36
37static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
38{
39	struct r600_pipe_state *rstate = &shader->rstate;
40	struct r600_shader *rshader = &shader->shader;
41	unsigned spi_vs_out_id[10];
42	unsigned i, tmp;
43
44	/* clear previous register */
45	rstate->nregs = 0;
46
47	/* so far never got proper semantic id from tgsi */
48	/* FIXME better to move this in config things so they get emited
49	 * only one time per cs
50	 */
51	for (i = 0; i < 10; i++) {
52		spi_vs_out_id[i] = 0;
53	}
54	for (i = 0; i < 32; i++) {
55		tmp = i << ((i & 3) * 8);
56		spi_vs_out_id[i / 4] |= tmp;
57	}
58	for (i = 0; i < 10; i++) {
59		r600_pipe_state_add_reg(rstate,
60					R_028614_SPI_VS_OUT_ID_0 + i * 4,
61					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
62	}
63
64	r600_pipe_state_add_reg(rstate,
65			R_0286C4_SPI_VS_OUT_CONFIG,
66			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
67			0xFFFFFFFF, NULL);
68	r600_pipe_state_add_reg(rstate,
69			R_028868_SQ_PGM_RESOURCES_VS,
70			S_028868_NUM_GPRS(rshader->bc.ngpr) |
71			S_028868_STACK_SIZE(rshader->bc.nstack),
72			0xFFFFFFFF, NULL);
73	r600_pipe_state_add_reg(rstate,
74			R_0288D0_SQ_PGM_CF_OFFSET_VS,
75			0x00000000, 0xFFFFFFFF, NULL);
76	r600_pipe_state_add_reg(rstate,
77			R_028858_SQ_PGM_START_VS,
78			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
79
80	r600_pipe_state_add_reg(rstate,
81				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
82				0xFFFFFFFF, NULL);
83
84}
85
86int r600_find_vs_semantic_index(struct r600_shader *vs,
87				struct r600_shader *ps, int id)
88{
89	struct r600_shader_io *input = &ps->input[id];
90
91	for (int i = 0; i < vs->noutput; i++) {
92		if (input->name == vs->output[i].name &&
93			input->sid == vs->output[i].sid) {
94			return i - 1;
95		}
96	}
97	return 0;
98}
99
100static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
101{
102	struct r600_pipe_state *rstate = &shader->rstate;
103	struct r600_shader *rshader = &shader->shader;
104	unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
105	int pos_index = -1, face_index = -1;
106
107	rstate->nregs = 0;
108
109	for (i = 0; i < rshader->ninput; i++) {
110		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
111			pos_index = i;
112		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
113			face_index = i;
114	}
115
116	for (i = 0; i < rshader->noutput; i++) {
117		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
118			r600_pipe_state_add_reg(rstate,
119						R_02880C_DB_SHADER_CONTROL,
120						S_02880C_Z_EXPORT_ENABLE(1),
121						S_02880C_Z_EXPORT_ENABLE(1), NULL);
122		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
123			r600_pipe_state_add_reg(rstate,
124						R_02880C_DB_SHADER_CONTROL,
125						S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
126						S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
127	}
128
129	exports_ps = 0;
130	num_cout = 0;
131	for (i = 0; i < rshader->noutput; i++) {
132		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
133			exports_ps |= 1;
134		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
135			num_cout++;
136		}
137	}
138	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
139	if (!exports_ps) {
140		/* always at least export 1 component per pixel */
141		exports_ps = 2;
142	}
143
144	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
145				S_0286CC_PERSP_GRADIENT_ENA(1);
146	spi_input_z = 0;
147	if (pos_index != -1) {
148		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
149					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
150					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
151					S_0286CC_BARYC_SAMPLE_CNTL(1));
152		spi_input_z |= 1;
153	}
154
155	spi_ps_in_control_1 = 0;
156	if (face_index != -1) {
157		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
158			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
159	}
160
161	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
162	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
163	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
164	r600_pipe_state_add_reg(rstate,
165				R_028840_SQ_PGM_START_PS,
166				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
167	r600_pipe_state_add_reg(rstate,
168				R_028850_SQ_PGM_RESOURCES_PS,
169				S_028868_NUM_GPRS(rshader->bc.ngpr) |
170				S_028868_STACK_SIZE(rshader->bc.nstack),
171				0xFFFFFFFF, NULL);
172	r600_pipe_state_add_reg(rstate,
173				R_028854_SQ_PGM_EXPORTS_PS,
174				exports_ps, 0xFFFFFFFF, NULL);
175	r600_pipe_state_add_reg(rstate,
176				R_0288CC_SQ_PGM_CF_OFFSET_PS,
177				0x00000000, 0xFFFFFFFF, NULL);
178
179	if (rshader->fs_write_all) {
180		r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
181					S_028808_MULTIWRITE_ENABLE(1),
182					S_028808_MULTIWRITE_ENABLE(1),
183					NULL);
184	}
185
186	if (rshader->uses_kill) {
187		/* only set some bits here, the other bits are set in the dsa state */
188		r600_pipe_state_add_reg(rstate,
189					R_02880C_DB_SHADER_CONTROL,
190					S_02880C_KILL_ENABLE(1),
191					S_02880C_KILL_ENABLE(1), NULL);
192	}
193	r600_pipe_state_add_reg(rstate,
194				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
195				0xFFFFFFFF, NULL);
196}
197
198static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
199{
200	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
201	struct r600_shader *rshader = &shader->shader;
202	void *ptr;
203
204	/* copy new shader */
205	if (shader->bo == NULL) {
206		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
207		if (shader->bo == NULL) {
208			return -ENOMEM;
209		}
210		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
211		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
212		r600_bo_unmap(rctx->radeon, shader->bo);
213	}
214	/* build state */
215	switch (rshader->processor_type) {
216	case TGSI_PROCESSOR_VERTEX:
217		if (rshader->family >= CHIP_CEDAR) {
218			evergreen_pipe_shader_vs(ctx, shader);
219		} else {
220			r600_pipe_shader_vs(ctx, shader);
221		}
222		break;
223	case TGSI_PROCESSOR_FRAGMENT:
224		if (rshader->family >= CHIP_CEDAR) {
225			evergreen_pipe_shader_ps(ctx, shader);
226		} else {
227			r600_pipe_shader_ps(ctx, shader);
228		}
229		break;
230	default:
231		return -EINVAL;
232	}
233	return 0;
234}
235
236static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
237
238int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
239{
240	static int dump_shaders = -1;
241	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
242	int r;
243
244        /* Would like some magic "get_bool_option_once" routine.
245         */
246        if (dump_shaders == -1)
247                dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
248
249	if (dump_shaders) {
250		fprintf(stderr, "--------------------------------------------------------------\n");
251		tgsi_dump(tokens, 0);
252	}
253	shader->shader.family = r600_get_family(rctx->radeon);
254	r = r600_shader_from_tgsi(tokens, &shader->shader);
255	if (r) {
256		R600_ERR("translation from TGSI failed !\n");
257		return r;
258	}
259	r = r600_bc_build(&shader->shader.bc);
260	if (r) {
261		R600_ERR("building bytecode failed !\n");
262		return r;
263	}
264	if (dump_shaders) {
265		r600_bc_dump(&shader->shader.bc);
266		fprintf(stderr, "______________________________________________________________\n");
267	}
268	return r600_pipe_shader(ctx, shader);
269}
270
271void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
272{
273	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
274
275	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
276	r600_bc_clear(&shader->shader.bc);
277}
278
279/*
280 * tgsi -> r600 shader
281 */
282struct r600_shader_tgsi_instruction;
283
284struct r600_shader_src {
285	unsigned				sel;
286	unsigned				swizzle[4];
287	unsigned				neg;
288	unsigned				abs;
289	unsigned				rel;
290	uint32_t				value[4];
291};
292
293struct r600_shader_ctx {
294	struct tgsi_shader_info			info;
295	struct tgsi_parse_context		parse;
296	const struct tgsi_token			*tokens;
297	unsigned				type;
298	unsigned				file_offset[TGSI_FILE_COUNT];
299	unsigned				temp_reg;
300	unsigned				ar_reg;
301	struct r600_shader_tgsi_instruction	*inst_info;
302	struct r600_bc				*bc;
303	struct r600_shader			*shader;
304	struct r600_shader_src			src[3];
305	u32					*literals;
306	u32					nliterals;
307	u32					max_driver_temp_used;
308	/* needed for evergreen interpolation */
309	boolean                                 input_centroid;
310	boolean                                 input_linear;
311	boolean                                 input_perspective;
312	int					num_interp_gpr;
313};
314
315struct r600_shader_tgsi_instruction {
316	unsigned	tgsi_opcode;
317	unsigned	is_op3;
318	unsigned	r600_opcode;
319	int (*process)(struct r600_shader_ctx *ctx);
320};
321
322static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
323static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
324
325static int tgsi_is_supported(struct r600_shader_ctx *ctx)
326{
327	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
328	int j;
329
330	if (i->Instruction.NumDstRegs > 1) {
331		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
332		return -EINVAL;
333	}
334	if (i->Instruction.Predicate) {
335		R600_ERR("predicate unsupported\n");
336		return -EINVAL;
337	}
338#if 0
339	if (i->Instruction.Label) {
340		R600_ERR("label unsupported\n");
341		return -EINVAL;
342	}
343#endif
344	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
345		if (i->Src[j].Register.Dimension) {
346			R600_ERR("unsupported src %d (dimension %d)\n", j,
347				 i->Src[j].Register.Dimension);
348			return -EINVAL;
349		}
350	}
351	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
352		if (i->Dst[j].Register.Dimension) {
353			R600_ERR("unsupported dst (dimension)\n");
354			return -EINVAL;
355		}
356	}
357	return 0;
358}
359
360static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
361{
362	int i, r;
363	struct r600_bc_alu alu;
364	int gpr = 0, base_chan = 0;
365	int ij_index = 0;
366
367	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
368		ij_index = 0;
369		if (ctx->shader->input[input].centroid)
370			ij_index++;
371	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
372		ij_index = 0;
373		/* if we have perspective add one */
374		if (ctx->input_perspective)  {
375			ij_index++;
376			/* if we have perspective centroid */
377			if (ctx->input_centroid)
378				ij_index++;
379		}
380		if (ctx->shader->input[input].centroid)
381			ij_index++;
382	}
383
384	/* work out gpr and base_chan from index */
385	gpr = ij_index / 2;
386	base_chan = (2 * (ij_index % 2)) + 1;
387
388	for (i = 0; i < 8; i++) {
389		memset(&alu, 0, sizeof(struct r600_bc_alu));
390
391		if (i < 4)
392			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
393		else
394			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
395
396		if ((i > 1) && (i < 6)) {
397			alu.dst.sel = ctx->shader->input[input].gpr;
398			alu.dst.write = 1;
399		}
400
401		alu.dst.chan = i % 4;
402
403		alu.src[0].sel = gpr;
404		alu.src[0].chan = (base_chan - (i % 2));
405
406		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
407
408		alu.bank_swizzle_force = SQ_ALU_VEC_210;
409		if ((i % 4) == 3)
410			alu.last = 1;
411		r = r600_bc_add_alu(ctx->bc, &alu);
412		if (r)
413			return r;
414	}
415	return 0;
416}
417
418
419static int tgsi_declaration(struct r600_shader_ctx *ctx)
420{
421	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
422	unsigned i;
423
424	switch (d->Declaration.File) {
425	case TGSI_FILE_INPUT:
426		i = ctx->shader->ninput++;
427		ctx->shader->input[i].name = d->Semantic.Name;
428		ctx->shader->input[i].sid = d->Semantic.Index;
429		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
430		ctx->shader->input[i].centroid = d->Declaration.Centroid;
431		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
432		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
433			/* turn input into interpolate on EG */
434			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
435				if (ctx->shader->input[i].interpolate > 0) {
436					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
437					evergreen_interp_alu(ctx, i);
438				}
439			}
440		}
441		break;
442	case TGSI_FILE_OUTPUT:
443		i = ctx->shader->noutput++;
444		ctx->shader->output[i].name = d->Semantic.Name;
445		ctx->shader->output[i].sid = d->Semantic.Index;
446		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
447		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
448		break;
449	case TGSI_FILE_CONSTANT:
450	case TGSI_FILE_TEMPORARY:
451	case TGSI_FILE_SAMPLER:
452	case TGSI_FILE_ADDRESS:
453		break;
454	default:
455		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
456		return -EINVAL;
457	}
458	return 0;
459}
460
461static int r600_get_temp(struct r600_shader_ctx *ctx)
462{
463	return ctx->temp_reg + ctx->max_driver_temp_used++;
464}
465
466/*
467 * for evergreen we need to scan the shader to find the number of GPRs we need to
468 * reserve for interpolation.
469 *
470 * we need to know if we are going to emit
471 * any centroid inputs
472 * if perspective and linear are required
473*/
474static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
475{
476	int i;
477	int num_baryc;
478
479	ctx->input_linear = FALSE;
480	ctx->input_perspective = FALSE;
481	ctx->input_centroid = FALSE;
482	ctx->num_interp_gpr = 1;
483
484	/* any centroid inputs */
485	for (i = 0; i < ctx->info.num_inputs; i++) {
486		/* skip position/face */
487		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
488		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
489			continue;
490		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
491			ctx->input_linear = TRUE;
492		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
493			ctx->input_perspective = TRUE;
494		if (ctx->info.input_centroid[i])
495			ctx->input_centroid = TRUE;
496	}
497
498	num_baryc = 0;
499	/* ignoring sample for now */
500	if (ctx->input_perspective)
501		num_baryc++;
502	if (ctx->input_linear)
503		num_baryc++;
504	if (ctx->input_centroid)
505		num_baryc *= 2;
506
507	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
508
509	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
510	return ctx->num_interp_gpr;
511}
512
513static void tgsi_src(struct r600_shader_ctx *ctx,
514		     const struct tgsi_full_src_register *tgsi_src,
515		     struct r600_shader_src *r600_src)
516{
517	memset(r600_src, 0, sizeof(*r600_src));
518	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
519	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
520	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
521	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
522	r600_src->neg = tgsi_src->Register.Negate;
523	r600_src->abs = tgsi_src->Register.Absolute;
524	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
525		int index;
526		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
527			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
528			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
529
530			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
531			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
532			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
533				return;
534		}
535		index = tgsi_src->Register.Index;
536		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
537		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
538	} else {
539		if (tgsi_src->Register.Indirect)
540			r600_src->rel = V_SQ_REL_RELATIVE;
541		r600_src->sel = tgsi_src->Register.Index;
542		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
543	}
544}
545
546static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
547{
548	struct r600_bc_vtx vtx;
549	unsigned int ar_reg;
550	int r;
551
552	if (offset) {
553		struct r600_bc_alu alu;
554
555		memset(&alu, 0, sizeof(alu));
556
557		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
558		alu.src[0].sel = ctx->ar_reg;
559
560		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
561		alu.src[1].value = offset;
562
563		alu.dst.sel = dst_reg;
564		alu.dst.write = 1;
565		alu.last = 1;
566
567		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
568			return r;
569
570		ar_reg = dst_reg;
571	} else {
572		ar_reg = ctx->ar_reg;
573	}
574
575	memset(&vtx, 0, sizeof(vtx));
576	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
577	vtx.src_gpr = ar_reg;
578	vtx.mega_fetch_count = 16;
579	vtx.dst_gpr = dst_reg;
580	vtx.dst_sel_x = 0;		/* SEL_X */
581	vtx.dst_sel_y = 1;		/* SEL_Y */
582	vtx.dst_sel_z = 2;		/* SEL_Z */
583	vtx.dst_sel_w = 3;		/* SEL_W */
584	vtx.data_format = FMT_32_32_32_32_FLOAT;
585	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
586	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
587	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
588
589	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
590		return r;
591
592	return 0;
593}
594
595static int tgsi_split_constant(struct r600_shader_ctx *ctx)
596{
597	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
598	struct r600_bc_alu alu;
599	int i, j, k, nconst, r;
600
601	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
602		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
603			nconst++;
604		}
605		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
606	}
607	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
608		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
609			continue;
610		}
611
612		if (ctx->src[i].rel) {
613			int treg = r600_get_temp(ctx);
614			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
615				return r;
616
617			ctx->src[i].sel = treg;
618			ctx->src[i].rel = 0;
619			j--;
620		} else if (j > 0) {
621			int treg = r600_get_temp(ctx);
622			for (k = 0; k < 4; k++) {
623				memset(&alu, 0, sizeof(struct r600_bc_alu));
624				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
625				alu.src[0].sel = ctx->src[i].sel;
626				alu.src[0].chan = k;
627				alu.src[0].rel = ctx->src[i].rel;
628				alu.dst.sel = treg;
629				alu.dst.chan = k;
630				alu.dst.write = 1;
631				if (k == 3)
632					alu.last = 1;
633				r = r600_bc_add_alu(ctx->bc, &alu);
634				if (r)
635					return r;
636			}
637			ctx->src[i].sel = treg;
638			ctx->src[i].rel =0;
639			j--;
640		}
641	}
642	return 0;
643}
644
645/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
646static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
647{
648	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
649	struct r600_bc_alu alu;
650	int i, j, k, nliteral, r;
651
652	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
653		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
654			nliteral++;
655		}
656	}
657	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
658		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
659			int treg = r600_get_temp(ctx);
660			for (k = 0; k < 4; k++) {
661				memset(&alu, 0, sizeof(struct r600_bc_alu));
662				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
663				alu.src[0].sel = ctx->src[i].sel;
664				alu.src[0].chan = k;
665				alu.src[0].value = ctx->src[i].value[k];
666				alu.dst.sel = treg;
667				alu.dst.chan = k;
668				alu.dst.write = 1;
669				if (k == 3)
670					alu.last = 1;
671				r = r600_bc_add_alu(ctx->bc, &alu);
672				if (r)
673					return r;
674			}
675			ctx->src[i].sel = treg;
676			j--;
677		}
678	}
679	return 0;
680}
681
682static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
683{
684	struct tgsi_full_immediate *immediate;
685	struct tgsi_full_property *property;
686	struct r600_shader_ctx ctx;
687	struct r600_bc_output output[32];
688	unsigned output_done, noutput;
689	unsigned opcode;
690	int i, r = 0, pos0;
691
692	ctx.bc = &shader->bc;
693	ctx.shader = shader;
694	r = r600_bc_init(ctx.bc, shader->family);
695	if (r)
696		return r;
697	ctx.tokens = tokens;
698	tgsi_scan_shader(tokens, &ctx.info);
699	tgsi_parse_init(&ctx.parse, tokens);
700	ctx.type = ctx.parse.FullHeader.Processor.Processor;
701	shader->processor_type = ctx.type;
702	ctx.bc->type = shader->processor_type;
703
704	/* register allocations */
705	/* Values [0,127] correspond to GPR[0..127].
706	 * Values [128,159] correspond to constant buffer bank 0
707	 * Values [160,191] correspond to constant buffer bank 1
708	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
709	 * Values [256,287] correspond to constant buffer bank 2 (EG)
710	 * Values [288,319] correspond to constant buffer bank 3 (EG)
711	 * Other special values are shown in the list below.
712	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
713	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
714	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
715	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
716	 * 248	SQ_ALU_SRC_0: special constant 0.0.
717	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
718	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
719	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
720	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
721	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
722	 * 254	SQ_ALU_SRC_PV: previous vector result.
723	 * 255	SQ_ALU_SRC_PS: previous scalar result.
724	 */
725	for (i = 0; i < TGSI_FILE_COUNT; i++) {
726		ctx.file_offset[i] = 0;
727	}
728	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
729		ctx.file_offset[TGSI_FILE_INPUT] = 1;
730		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
731			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
732		} else {
733			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
734		}
735	}
736	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
737		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
738	}
739	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
740						ctx.info.file_count[TGSI_FILE_INPUT];
741	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
742						ctx.info.file_count[TGSI_FILE_OUTPUT];
743
744	/* Outside the GPR range. This will be translated to one of the
745	 * kcache banks later. */
746	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
747
748	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
749	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
750			ctx.info.file_count[TGSI_FILE_TEMPORARY];
751	ctx.temp_reg = ctx.ar_reg + 1;
752
753	ctx.nliterals = 0;
754	ctx.literals = NULL;
755	shader->fs_write_all = FALSE;
756	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
757		tgsi_parse_token(&ctx.parse);
758		switch (ctx.parse.FullToken.Token.Type) {
759		case TGSI_TOKEN_TYPE_IMMEDIATE:
760			immediate = &ctx.parse.FullToken.FullImmediate;
761			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
762			if(ctx.literals == NULL) {
763				r = -ENOMEM;
764				goto out_err;
765			}
766			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
767			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
768			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
769			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
770			ctx.nliterals++;
771			break;
772		case TGSI_TOKEN_TYPE_DECLARATION:
773			r = tgsi_declaration(&ctx);
774			if (r)
775				goto out_err;
776			break;
777		case TGSI_TOKEN_TYPE_INSTRUCTION:
778			r = tgsi_is_supported(&ctx);
779			if (r)
780				goto out_err;
781			ctx.max_driver_temp_used = 0;
782			/* reserve first tmp for everyone */
783			r600_get_temp(&ctx);
784
785			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
786			if ((r = tgsi_split_constant(&ctx)))
787				goto out_err;
788			if ((r = tgsi_split_literal_constant(&ctx)))
789				goto out_err;
790			if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
791				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
792			else
793				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
794			r = ctx.inst_info->process(&ctx);
795			if (r)
796				goto out_err;
797			break;
798		case TGSI_TOKEN_TYPE_PROPERTY:
799			property = &ctx.parse.FullToken.FullProperty;
800			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
801				if (property->u[0].Data == 1)
802					shader->fs_write_all = TRUE;
803			}
804			break;
805		default:
806			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
807			r = -EINVAL;
808			goto out_err;
809		}
810	}
811	/* export output */
812	noutput = shader->noutput;
813	for (i = 0, pos0 = 0; i < noutput; i++) {
814		memset(&output[i], 0, sizeof(struct r600_bc_output));
815		output[i].gpr = shader->output[i].gpr;
816		output[i].elem_size = 3;
817		output[i].swizzle_x = 0;
818		output[i].swizzle_y = 1;
819		output[i].swizzle_z = 2;
820		output[i].swizzle_w = 3;
821		output[i].burst_count = 1;
822		output[i].barrier = 1;
823		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
824		output[i].array_base = i - pos0;
825		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
826		switch (ctx.type) {
827		case TGSI_PROCESSOR_VERTEX:
828			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
829				output[i].array_base = 60;
830				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
831				/* position doesn't count in array_base */
832				pos0++;
833			}
834			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
835				output[i].array_base = 61;
836				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
837				/* position doesn't count in array_base */
838				pos0++;
839			}
840			break;
841		case TGSI_PROCESSOR_FRAGMENT:
842			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
843				output[i].array_base = shader->output[i].sid;
844				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
845			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
846				output[i].array_base = 61;
847				output[i].swizzle_x = 2;
848				output[i].swizzle_y = 7;
849				output[i].swizzle_z = output[i].swizzle_w = 7;
850				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
851			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
852				output[i].array_base = 61;
853				output[i].swizzle_x = 7;
854				output[i].swizzle_y = 1;
855				output[i].swizzle_z = output[i].swizzle_w = 7;
856				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
857			} else {
858				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
859				r = -EINVAL;
860				goto out_err;
861			}
862			break;
863		default:
864			R600_ERR("unsupported processor type %d\n", ctx.type);
865			r = -EINVAL;
866			goto out_err;
867		}
868	}
869	/* add fake param output for vertex shader if no param is exported */
870	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
871		for (i = 0, pos0 = 0; i < noutput; i++) {
872			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
873				pos0 = 1;
874				break;
875			}
876		}
877		if (!pos0) {
878			memset(&output[i], 0, sizeof(struct r600_bc_output));
879			output[i].gpr = 0;
880			output[i].elem_size = 3;
881			output[i].swizzle_x = 0;
882			output[i].swizzle_y = 1;
883			output[i].swizzle_z = 2;
884			output[i].swizzle_w = 3;
885			output[i].burst_count = 1;
886			output[i].barrier = 1;
887			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
888			output[i].array_base = 0;
889			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
890			noutput++;
891		}
892	}
893	/* add fake pixel export */
894	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
895		memset(&output[0], 0, sizeof(struct r600_bc_output));
896		output[0].gpr = 0;
897		output[0].elem_size = 3;
898		output[0].swizzle_x = 7;
899		output[0].swizzle_y = 7;
900		output[0].swizzle_z = 7;
901		output[0].swizzle_w = 7;
902		output[0].burst_count = 1;
903		output[0].barrier = 1;
904		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
905		output[0].array_base = 0;
906		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
907		noutput++;
908	}
909	/* set export done on last export of each type */
910	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
911		if (i == (noutput - 1)) {
912			output[i].end_of_program = 1;
913		}
914		if (!(output_done & (1 << output[i].type))) {
915			output_done |= (1 << output[i].type);
916			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
917		}
918	}
919	/* add output to bytecode */
920	for (i = 0; i < noutput; i++) {
921		r = r600_bc_add_output(ctx.bc, &output[i]);
922		if (r)
923			goto out_err;
924	}
925	free(ctx.literals);
926	tgsi_parse_free(&ctx.parse);
927	return 0;
928out_err:
929	free(ctx.literals);
930	tgsi_parse_free(&ctx.parse);
931	return r;
932}
933
934static int tgsi_unsupported(struct r600_shader_ctx *ctx)
935{
936	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
937	return -EINVAL;
938}
939
940static int tgsi_end(struct r600_shader_ctx *ctx)
941{
942	return 0;
943}
944
945static void r600_bc_src(struct r600_bc_alu_src *bc_src,
946			const struct r600_shader_src *shader_src,
947			unsigned chan)
948{
949	bc_src->sel = shader_src->sel;
950	bc_src->chan = shader_src->swizzle[chan];
951	bc_src->neg = shader_src->neg;
952	bc_src->abs = shader_src->abs;
953	bc_src->rel = shader_src->rel;
954	bc_src->value = shader_src->value[bc_src->chan];
955}
956
957static void tgsi_dst(struct r600_shader_ctx *ctx,
958		     const struct tgsi_full_dst_register *tgsi_dst,
959		     unsigned swizzle,
960		     struct r600_bc_alu_dst *r600_dst)
961{
962	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
963
964	r600_dst->sel = tgsi_dst->Register.Index;
965	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
966	r600_dst->chan = swizzle;
967	r600_dst->write = 1;
968	if (tgsi_dst->Register.Indirect)
969		r600_dst->rel = V_SQ_REL_RELATIVE;
970	if (inst->Instruction.Saturate) {
971		r600_dst->clamp = 1;
972	}
973}
974
975static int tgsi_last_instruction(unsigned writemask)
976{
977	int i, lasti = 0;
978
979	for (i = 0; i < 4; i++) {
980		if (writemask & (1 << i)) {
981			lasti = i;
982		}
983	}
984	return lasti;
985}
986
987static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
988{
989	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
990	struct r600_bc_alu alu;
991	int i, j, r;
992	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
993
994	for (i = 0; i < lasti + 1; i++) {
995		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
996			continue;
997
998		memset(&alu, 0, sizeof(struct r600_bc_alu));
999		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1000
1001		alu.inst = ctx->inst_info->r600_opcode;
1002		if (!swap) {
1003			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1004				r600_bc_src(&alu.src[j], &ctx->src[j], i);
1005			}
1006		} else {
1007			r600_bc_src(&alu.src[0], &ctx->src[1], i);
1008			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1009		}
1010		/* handle some special cases */
1011		switch (ctx->inst_info->tgsi_opcode) {
1012		case TGSI_OPCODE_SUB:
1013			alu.src[1].neg = 1;
1014			break;
1015		case TGSI_OPCODE_ABS:
1016			alu.src[0].abs = 1;
1017			/* negation is performed after absolute value is taken */
1018			alu.src[0].neg = 0;
1019			break;
1020		default:
1021			break;
1022		}
1023		if (i == lasti) {
1024			alu.last = 1;
1025		}
1026		r = r600_bc_add_alu(ctx->bc, &alu);
1027		if (r)
1028			return r;
1029	}
1030	return 0;
1031}
1032
1033static int tgsi_op2(struct r600_shader_ctx *ctx)
1034{
1035	return tgsi_op2_s(ctx, 0);
1036}
1037
1038static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1039{
1040	return tgsi_op2_s(ctx, 1);
1041}
1042
1043/*
1044 * r600 - trunc to -PI..PI range
1045 * r700 - normalize by dividing by 2PI
1046 * see fdo bug 27901
1047 */
1048static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1049{
1050	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1051	static float double_pi = 3.1415926535 * 2;
1052	static float neg_pi = -3.1415926535;
1053
1054	int r;
1055	struct r600_bc_alu alu;
1056
1057	memset(&alu, 0, sizeof(struct r600_bc_alu));
1058	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1059	alu.is_op3 = 1;
1060
1061	alu.dst.chan = 0;
1062	alu.dst.sel = ctx->temp_reg;
1063	alu.dst.write = 1;
1064
1065	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1066
1067	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1068	alu.src[1].chan = 0;
1069	alu.src[1].value = *(uint32_t *)&half_inv_pi;
1070	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1071	alu.src[2].chan = 0;
1072	alu.last = 1;
1073	r = r600_bc_add_alu(ctx->bc, &alu);
1074	if (r)
1075		return r;
1076
1077	memset(&alu, 0, sizeof(struct r600_bc_alu));
1078	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1079
1080	alu.dst.chan = 0;
1081	alu.dst.sel = ctx->temp_reg;
1082	alu.dst.write = 1;
1083
1084	alu.src[0].sel = ctx->temp_reg;
1085	alu.src[0].chan = 0;
1086	alu.last = 1;
1087	r = r600_bc_add_alu(ctx->bc, &alu);
1088	if (r)
1089		return r;
1090
1091	memset(&alu, 0, sizeof(struct r600_bc_alu));
1092	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1093	alu.is_op3 = 1;
1094
1095	alu.dst.chan = 0;
1096	alu.dst.sel = ctx->temp_reg;
1097	alu.dst.write = 1;
1098
1099	alu.src[0].sel = ctx->temp_reg;
1100	alu.src[0].chan = 0;
1101
1102	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1103	alu.src[1].chan = 0;
1104	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1105	alu.src[2].chan = 0;
1106
1107	if (ctx->bc->chiprev == CHIPREV_R600) {
1108		alu.src[1].value = *(uint32_t *)&double_pi;
1109		alu.src[2].value = *(uint32_t *)&neg_pi;
1110	} else {
1111		alu.src[1].sel = V_SQ_ALU_SRC_1;
1112		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1113		alu.src[2].neg = 1;
1114	}
1115
1116	alu.last = 1;
1117	r = r600_bc_add_alu(ctx->bc, &alu);
1118	if (r)
1119		return r;
1120	return 0;
1121}
1122
1123static int tgsi_trig(struct r600_shader_ctx *ctx)
1124{
1125	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1126	struct r600_bc_alu alu;
1127	int i, r;
1128	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1129
1130	r = tgsi_setup_trig(ctx);
1131	if (r)
1132		return r;
1133
1134	memset(&alu, 0, sizeof(struct r600_bc_alu));
1135	alu.inst = ctx->inst_info->r600_opcode;
1136	alu.dst.chan = 0;
1137	alu.dst.sel = ctx->temp_reg;
1138	alu.dst.write = 1;
1139
1140	alu.src[0].sel = ctx->temp_reg;
1141	alu.src[0].chan = 0;
1142	alu.last = 1;
1143	r = r600_bc_add_alu(ctx->bc, &alu);
1144	if (r)
1145		return r;
1146
1147	/* replicate result */
1148	for (i = 0; i < lasti + 1; i++) {
1149		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1150			continue;
1151
1152		memset(&alu, 0, sizeof(struct r600_bc_alu));
1153		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1154
1155		alu.src[0].sel = ctx->temp_reg;
1156		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1157		if (i == lasti)
1158			alu.last = 1;
1159		r = r600_bc_add_alu(ctx->bc, &alu);
1160		if (r)
1161			return r;
1162	}
1163	return 0;
1164}
1165
1166static int tgsi_scs(struct r600_shader_ctx *ctx)
1167{
1168	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1169	struct r600_bc_alu alu;
1170	int r;
1171
1172	/* We'll only need the trig stuff if we are going to write to the
1173	 * X or Y components of the destination vector.
1174	 */
1175	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1176		r = tgsi_setup_trig(ctx);
1177		if (r)
1178			return r;
1179	}
1180
1181	/* dst.x = COS */
1182	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1183		memset(&alu, 0, sizeof(struct r600_bc_alu));
1184		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1185		tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1186
1187		alu.src[0].sel = ctx->temp_reg;
1188		alu.src[0].chan = 0;
1189		alu.last = 1;
1190		r = r600_bc_add_alu(ctx->bc, &alu);
1191		if (r)
1192			return r;
1193	}
1194
1195	/* dst.y = SIN */
1196	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1197		memset(&alu, 0, sizeof(struct r600_bc_alu));
1198		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1199		tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1200
1201		alu.src[0].sel = ctx->temp_reg;
1202		alu.src[0].chan = 0;
1203		alu.last = 1;
1204		r = r600_bc_add_alu(ctx->bc, &alu);
1205		if (r)
1206			return r;
1207	}
1208
1209	/* dst.z = 0.0; */
1210	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1211		memset(&alu, 0, sizeof(struct r600_bc_alu));
1212
1213		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1214
1215		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1216
1217		alu.src[0].sel = V_SQ_ALU_SRC_0;
1218		alu.src[0].chan = 0;
1219
1220		alu.last = 1;
1221
1222		r = r600_bc_add_alu(ctx->bc, &alu);
1223		if (r)
1224			return r;
1225	}
1226
1227	/* dst.w = 1.0; */
1228	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1229		memset(&alu, 0, sizeof(struct r600_bc_alu));
1230
1231		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1232
1233		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1234
1235		alu.src[0].sel = V_SQ_ALU_SRC_1;
1236		alu.src[0].chan = 0;
1237
1238		alu.last = 1;
1239
1240		r = r600_bc_add_alu(ctx->bc, &alu);
1241		if (r)
1242			return r;
1243	}
1244
1245	return 0;
1246}
1247
1248static int tgsi_kill(struct r600_shader_ctx *ctx)
1249{
1250	struct r600_bc_alu alu;
1251	int i, r;
1252
1253	for (i = 0; i < 4; i++) {
1254		memset(&alu, 0, sizeof(struct r600_bc_alu));
1255		alu.inst = ctx->inst_info->r600_opcode;
1256
1257		alu.dst.chan = i;
1258
1259		alu.src[0].sel = V_SQ_ALU_SRC_0;
1260
1261		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1262			alu.src[1].sel = V_SQ_ALU_SRC_1;
1263			alu.src[1].neg = 1;
1264		} else {
1265			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1266		}
1267		if (i == 3) {
1268			alu.last = 1;
1269		}
1270		r = r600_bc_add_alu(ctx->bc, &alu);
1271		if (r)
1272			return r;
1273	}
1274
1275	/* kill must be last in ALU */
1276	ctx->bc->force_add_cf = 1;
1277	ctx->shader->uses_kill = TRUE;
1278	return 0;
1279}
1280
1281static int tgsi_lit(struct r600_shader_ctx *ctx)
1282{
1283	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1284	struct r600_bc_alu alu;
1285	int r;
1286
1287	/* dst.x, <- 1.0  */
1288	memset(&alu, 0, sizeof(struct r600_bc_alu));
1289	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1290	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1291	alu.src[0].chan = 0;
1292	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1293	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1294	r = r600_bc_add_alu(ctx->bc, &alu);
1295	if (r)
1296		return r;
1297
1298	/* dst.y = max(src.x, 0.0) */
1299	memset(&alu, 0, sizeof(struct r600_bc_alu));
1300	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1301	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1302	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1303	alu.src[1].chan = 0;
1304	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1305	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1306	r = r600_bc_add_alu(ctx->bc, &alu);
1307	if (r)
1308		return r;
1309
1310	/* dst.w, <- 1.0  */
1311	memset(&alu, 0, sizeof(struct r600_bc_alu));
1312	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1313	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1314	alu.src[0].chan = 0;
1315	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1316	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1317	alu.last = 1;
1318	r = r600_bc_add_alu(ctx->bc, &alu);
1319	if (r)
1320		return r;
1321
1322	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1323	{
1324		int chan;
1325		int sel;
1326
1327		/* dst.z = log(src.y) */
1328		memset(&alu, 0, sizeof(struct r600_bc_alu));
1329		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1330		r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1331		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1332		alu.last = 1;
1333		r = r600_bc_add_alu(ctx->bc, &alu);
1334		if (r)
1335			return r;
1336
1337		chan = alu.dst.chan;
1338		sel = alu.dst.sel;
1339
1340		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1341		memset(&alu, 0, sizeof(struct r600_bc_alu));
1342		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1343		r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1344		alu.src[1].sel  = sel;
1345		alu.src[1].chan = chan;
1346
1347		r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1348		alu.dst.sel = ctx->temp_reg;
1349		alu.dst.chan = 0;
1350		alu.dst.write = 1;
1351		alu.is_op3 = 1;
1352		alu.last = 1;
1353		r = r600_bc_add_alu(ctx->bc, &alu);
1354		if (r)
1355			return r;
1356
1357		/* dst.z = exp(tmp.x) */
1358		memset(&alu, 0, sizeof(struct r600_bc_alu));
1359		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1360		alu.src[0].sel = ctx->temp_reg;
1361		alu.src[0].chan = 0;
1362		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1363		alu.last = 1;
1364		r = r600_bc_add_alu(ctx->bc, &alu);
1365		if (r)
1366			return r;
1367	}
1368	return 0;
1369}
1370
1371static int tgsi_rsq(struct r600_shader_ctx *ctx)
1372{
1373	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1374	struct r600_bc_alu alu;
1375	int i, r;
1376
1377	memset(&alu, 0, sizeof(struct r600_bc_alu));
1378
1379	/* FIXME:
1380	 * For state trackers other than OpenGL, we'll want to use
1381	 * _RECIPSQRT_IEEE instead.
1382	 */
1383	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1384
1385	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1386		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1387		alu.src[i].abs = 1;
1388	}
1389	alu.dst.sel = ctx->temp_reg;
1390	alu.dst.write = 1;
1391	alu.last = 1;
1392	r = r600_bc_add_alu(ctx->bc, &alu);
1393	if (r)
1394		return r;
1395	/* replicate result */
1396	return tgsi_helper_tempx_replicate(ctx);
1397}
1398
1399static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1400{
1401	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1402	struct r600_bc_alu alu;
1403	int i, r;
1404
1405	for (i = 0; i < 4; i++) {
1406		memset(&alu, 0, sizeof(struct r600_bc_alu));
1407		alu.src[0].sel = ctx->temp_reg;
1408		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1409		alu.dst.chan = i;
1410		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1411		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1412		if (i == 3)
1413			alu.last = 1;
1414		r = r600_bc_add_alu(ctx->bc, &alu);
1415		if (r)
1416			return r;
1417	}
1418	return 0;
1419}
1420
1421static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1422{
1423	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1424	struct r600_bc_alu alu;
1425	int i, r;
1426
1427	memset(&alu, 0, sizeof(struct r600_bc_alu));
1428	alu.inst = ctx->inst_info->r600_opcode;
1429	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1430		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1431	}
1432	alu.dst.sel = ctx->temp_reg;
1433	alu.dst.write = 1;
1434	alu.last = 1;
1435	r = r600_bc_add_alu(ctx->bc, &alu);
1436	if (r)
1437		return r;
1438	/* replicate result */
1439	return tgsi_helper_tempx_replicate(ctx);
1440}
1441
1442static int tgsi_pow(struct r600_shader_ctx *ctx)
1443{
1444	struct r600_bc_alu alu;
1445	int r;
1446
1447	/* LOG2(a) */
1448	memset(&alu, 0, sizeof(struct r600_bc_alu));
1449	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1450	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1451	alu.dst.sel = ctx->temp_reg;
1452	alu.dst.write = 1;
1453	alu.last = 1;
1454	r = r600_bc_add_alu(ctx->bc, &alu);
1455	if (r)
1456		return r;
1457	/* b * LOG2(a) */
1458	memset(&alu, 0, sizeof(struct r600_bc_alu));
1459	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1460	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1461	alu.src[1].sel = ctx->temp_reg;
1462	alu.dst.sel = ctx->temp_reg;
1463	alu.dst.write = 1;
1464	alu.last = 1;
1465	r = r600_bc_add_alu(ctx->bc, &alu);
1466	if (r)
1467		return r;
1468	/* POW(a,b) = EXP2(b * LOG2(a))*/
1469	memset(&alu, 0, sizeof(struct r600_bc_alu));
1470	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1471	alu.src[0].sel = ctx->temp_reg;
1472	alu.dst.sel = ctx->temp_reg;
1473	alu.dst.write = 1;
1474	alu.last = 1;
1475	r = r600_bc_add_alu(ctx->bc, &alu);
1476	if (r)
1477		return r;
1478	return tgsi_helper_tempx_replicate(ctx);
1479}
1480
1481static int tgsi_ssg(struct r600_shader_ctx *ctx)
1482{
1483	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1484	struct r600_bc_alu alu;
1485	int i, r;
1486
1487	/* tmp = (src > 0 ? 1 : src) */
1488	for (i = 0; i < 4; i++) {
1489		memset(&alu, 0, sizeof(struct r600_bc_alu));
1490		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1491		alu.is_op3 = 1;
1492
1493		alu.dst.sel = ctx->temp_reg;
1494		alu.dst.chan = i;
1495
1496		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1497		alu.src[1].sel = V_SQ_ALU_SRC_1;
1498		r600_bc_src(&alu.src[2], &ctx->src[0], i);
1499
1500		if (i == 3)
1501			alu.last = 1;
1502		r = r600_bc_add_alu(ctx->bc, &alu);
1503		if (r)
1504			return r;
1505	}
1506
1507	/* dst = (-tmp > 0 ? -1 : tmp) */
1508	for (i = 0; i < 4; i++) {
1509		memset(&alu, 0, sizeof(struct r600_bc_alu));
1510		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1511		alu.is_op3 = 1;
1512		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1513
1514		alu.src[0].sel = ctx->temp_reg;
1515		alu.src[0].chan = i;
1516		alu.src[0].neg = 1;
1517
1518		alu.src[1].sel = V_SQ_ALU_SRC_1;
1519		alu.src[1].neg = 1;
1520
1521		alu.src[2].sel = ctx->temp_reg;
1522		alu.src[2].chan = i;
1523
1524		if (i == 3)
1525			alu.last = 1;
1526		r = r600_bc_add_alu(ctx->bc, &alu);
1527		if (r)
1528			return r;
1529	}
1530	return 0;
1531}
1532
1533static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1534{
1535	struct r600_bc_alu alu;
1536	int i, r;
1537
1538	for (i = 0; i < 4; i++) {
1539		memset(&alu, 0, sizeof(struct r600_bc_alu));
1540		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1541			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1542			alu.dst.chan = i;
1543		} else {
1544			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1545			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1546			alu.src[0].sel = ctx->temp_reg;
1547			alu.src[0].chan = i;
1548		}
1549		if (i == 3) {
1550			alu.last = 1;
1551		}
1552		r = r600_bc_add_alu(ctx->bc, &alu);
1553		if (r)
1554			return r;
1555	}
1556	return 0;
1557}
1558
1559static int tgsi_op3(struct r600_shader_ctx *ctx)
1560{
1561	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1562	struct r600_bc_alu alu;
1563	int i, j, r;
1564	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1565
1566	for (i = 0; i < lasti + 1; i++) {
1567		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1568			continue;
1569
1570		memset(&alu, 0, sizeof(struct r600_bc_alu));
1571		alu.inst = ctx->inst_info->r600_opcode;
1572		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1573			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1574		}
1575
1576		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1577		alu.dst.chan = i;
1578		alu.dst.write = 1;
1579		alu.is_op3 = 1;
1580		if (i == lasti) {
1581			alu.last = 1;
1582		}
1583		r = r600_bc_add_alu(ctx->bc, &alu);
1584		if (r)
1585			return r;
1586	}
1587	return 0;
1588}
1589
1590static int tgsi_dp(struct r600_shader_ctx *ctx)
1591{
1592	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1593	struct r600_bc_alu alu;
1594	int i, j, r;
1595
1596	for (i = 0; i < 4; i++) {
1597		memset(&alu, 0, sizeof(struct r600_bc_alu));
1598		alu.inst = ctx->inst_info->r600_opcode;
1599		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1600			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1601		}
1602
1603		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1604		alu.dst.chan = i;
1605		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1606		/* handle some special cases */
1607		switch (ctx->inst_info->tgsi_opcode) {
1608		case TGSI_OPCODE_DP2:
1609			if (i > 1) {
1610				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1611				alu.src[0].chan = alu.src[1].chan = 0;
1612			}
1613			break;
1614		case TGSI_OPCODE_DP3:
1615			if (i > 2) {
1616				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1617				alu.src[0].chan = alu.src[1].chan = 0;
1618			}
1619			break;
1620		case TGSI_OPCODE_DPH:
1621			if (i == 3) {
1622				alu.src[0].sel = V_SQ_ALU_SRC_1;
1623				alu.src[0].chan = 0;
1624				alu.src[0].neg = 0;
1625			}
1626			break;
1627		default:
1628			break;
1629		}
1630		if (i == 3) {
1631			alu.last = 1;
1632		}
1633		r = r600_bc_add_alu(ctx->bc, &alu);
1634		if (r)
1635			return r;
1636	}
1637	return 0;
1638}
1639
1640static int tgsi_tex(struct r600_shader_ctx *ctx)
1641{
1642	static float one_point_five = 1.5f;
1643	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1644	struct r600_bc_tex tex;
1645	struct r600_bc_alu alu;
1646	unsigned src_gpr;
1647	int r, i;
1648	int opcode;
1649	boolean src_not_temp =
1650		inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1651		inst->Src[0].Register.File != TGSI_FILE_INPUT;
1652
1653	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1654
1655	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1656		/* Add perspective divide */
1657		memset(&alu, 0, sizeof(struct r600_bc_alu));
1658		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1659		r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1660
1661		alu.dst.sel = ctx->temp_reg;
1662		alu.dst.chan = 3;
1663		alu.last = 1;
1664		alu.dst.write = 1;
1665		r = r600_bc_add_alu(ctx->bc, &alu);
1666		if (r)
1667			return r;
1668
1669		for (i = 0; i < 3; i++) {
1670			memset(&alu, 0, sizeof(struct r600_bc_alu));
1671			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1672			alu.src[0].sel = ctx->temp_reg;
1673			alu.src[0].chan = 3;
1674			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1675			alu.dst.sel = ctx->temp_reg;
1676			alu.dst.chan = i;
1677			alu.dst.write = 1;
1678			r = r600_bc_add_alu(ctx->bc, &alu);
1679			if (r)
1680				return r;
1681		}
1682		memset(&alu, 0, sizeof(struct r600_bc_alu));
1683		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1684		alu.src[0].sel = V_SQ_ALU_SRC_1;
1685		alu.src[0].chan = 0;
1686		alu.dst.sel = ctx->temp_reg;
1687		alu.dst.chan = 3;
1688		alu.last = 1;
1689		alu.dst.write = 1;
1690		r = r600_bc_add_alu(ctx->bc, &alu);
1691		if (r)
1692			return r;
1693		src_not_temp = FALSE;
1694		src_gpr = ctx->temp_reg;
1695	}
1696
1697	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1698		int src_chan, src2_chan;
1699
1700		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1701		for (i = 0; i < 4; i++) {
1702			memset(&alu, 0, sizeof(struct r600_bc_alu));
1703			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1704			switch (i) {
1705			case 0:
1706				src_chan = 2;
1707				src2_chan = 1;
1708				break;
1709			case 1:
1710				src_chan = 2;
1711				src2_chan = 0;
1712				break;
1713			case 2:
1714				src_chan = 0;
1715				src2_chan = 2;
1716				break;
1717			case 3:
1718				src_chan = 1;
1719				src2_chan = 2;
1720				break;
1721			default:
1722				assert(0);
1723				src_chan = 0;
1724				src2_chan = 0;
1725				break;
1726			}
1727			r600_bc_src(&alu.src[0], &ctx->src[0], src_chan);
1728			r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan);
1729			alu.dst.sel = ctx->temp_reg;
1730			alu.dst.chan = i;
1731			if (i == 3)
1732				alu.last = 1;
1733			alu.dst.write = 1;
1734			r = r600_bc_add_alu(ctx->bc, &alu);
1735			if (r)
1736				return r;
1737		}
1738
1739		/* tmp1.z = RCP_e(|tmp1.z|) */
1740		memset(&alu, 0, sizeof(struct r600_bc_alu));
1741		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1742		alu.src[0].sel = ctx->temp_reg;
1743		alu.src[0].chan = 2;
1744		alu.src[0].abs = 1;
1745		alu.dst.sel = ctx->temp_reg;
1746		alu.dst.chan = 2;
1747		alu.dst.write = 1;
1748		alu.last = 1;
1749		r = r600_bc_add_alu(ctx->bc, &alu);
1750		if (r)
1751			return r;
1752
1753		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1754		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1755		 * muladd has no writemask, have to use another temp
1756		 */
1757		memset(&alu, 0, sizeof(struct r600_bc_alu));
1758		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1759		alu.is_op3 = 1;
1760
1761		alu.src[0].sel = ctx->temp_reg;
1762		alu.src[0].chan = 0;
1763		alu.src[1].sel = ctx->temp_reg;
1764		alu.src[1].chan = 2;
1765
1766		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1767		alu.src[2].chan = 0;
1768		alu.src[2].value = *(uint32_t *)&one_point_five;
1769
1770		alu.dst.sel = ctx->temp_reg;
1771		alu.dst.chan = 0;
1772		alu.dst.write = 1;
1773
1774		r = r600_bc_add_alu(ctx->bc, &alu);
1775		if (r)
1776			return r;
1777
1778		memset(&alu, 0, sizeof(struct r600_bc_alu));
1779		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1780		alu.is_op3 = 1;
1781
1782		alu.src[0].sel = ctx->temp_reg;
1783		alu.src[0].chan = 1;
1784		alu.src[1].sel = ctx->temp_reg;
1785		alu.src[1].chan = 2;
1786
1787		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1788		alu.src[2].chan = 0;
1789		alu.src[2].value = *(uint32_t *)&one_point_five;
1790
1791		alu.dst.sel = ctx->temp_reg;
1792		alu.dst.chan = 1;
1793		alu.dst.write = 1;
1794
1795		alu.last = 1;
1796		r = r600_bc_add_alu(ctx->bc, &alu);
1797		if (r)
1798			return r;
1799
1800		src_not_temp = FALSE;
1801		src_gpr = ctx->temp_reg;
1802	}
1803
1804	if (src_not_temp) {
1805		for (i = 0; i < 4; i++) {
1806			memset(&alu, 0, sizeof(struct r600_bc_alu));
1807			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1808			r600_bc_src(&alu.src[0], &ctx->src[0], i);
1809			alu.dst.sel = ctx->temp_reg;
1810			alu.dst.chan = i;
1811			if (i == 3)
1812				alu.last = 1;
1813			alu.dst.write = 1;
1814			r = r600_bc_add_alu(ctx->bc, &alu);
1815			if (r)
1816				return r;
1817		}
1818		src_gpr = ctx->temp_reg;
1819	}
1820
1821	opcode = ctx->inst_info->r600_opcode;
1822	if (opcode == SQ_TEX_INST_SAMPLE &&
1823	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1824		opcode = SQ_TEX_INST_SAMPLE_C;
1825
1826	memset(&tex, 0, sizeof(struct r600_bc_tex));
1827	tex.inst = opcode;
1828	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1829	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1830	tex.src_gpr = src_gpr;
1831	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1832	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1833	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1834	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1835	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1836	tex.src_sel_x = 0;
1837	tex.src_sel_y = 1;
1838	tex.src_sel_z = 2;
1839	tex.src_sel_w = 3;
1840
1841	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1842		tex.src_sel_x = 1;
1843		tex.src_sel_y = 0;
1844		tex.src_sel_z = 3;
1845		tex.src_sel_w = 1;
1846	}
1847
1848	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1849		tex.coord_type_x = 1;
1850		tex.coord_type_y = 1;
1851		tex.coord_type_z = 1;
1852		tex.coord_type_w = 1;
1853	}
1854
1855	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1856		tex.coord_type_z = 0;
1857		tex.src_sel_z = 1;
1858	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1859		tex.coord_type_z = 0;
1860
1861	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1862		tex.src_sel_w = 2;
1863
1864	r = r600_bc_add_tex(ctx->bc, &tex);
1865	if (r)
1866		return r;
1867
1868	/* add shadow ambient support  - gallium doesn't do it yet */
1869	return 0;
1870}
1871
1872static int tgsi_lrp(struct r600_shader_ctx *ctx)
1873{
1874	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1875	struct r600_bc_alu alu;
1876	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1877	unsigned i;
1878	int r;
1879
1880	/* optimize if it's just an equal balance */
1881	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1882		for (i = 0; i < lasti + 1; i++) {
1883			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1884				continue;
1885
1886			memset(&alu, 0, sizeof(struct r600_bc_alu));
1887			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1888			r600_bc_src(&alu.src[0], &ctx->src[1], i);
1889			r600_bc_src(&alu.src[1], &ctx->src[2], i);
1890			alu.omod = 3;
1891			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1892			alu.dst.chan = i;
1893			if (i == lasti) {
1894				alu.last = 1;
1895			}
1896			r = r600_bc_add_alu(ctx->bc, &alu);
1897			if (r)
1898				return r;
1899		}
1900		return 0;
1901	}
1902
1903	/* 1 - src0 */
1904	for (i = 0; i < lasti + 1; i++) {
1905		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1906			continue;
1907
1908		memset(&alu, 0, sizeof(struct r600_bc_alu));
1909		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1910		alu.src[0].sel = V_SQ_ALU_SRC_1;
1911		alu.src[0].chan = 0;
1912		r600_bc_src(&alu.src[1], &ctx->src[0], i);
1913		alu.src[1].neg = 1;
1914		alu.dst.sel = ctx->temp_reg;
1915		alu.dst.chan = i;
1916		if (i == lasti) {
1917			alu.last = 1;
1918		}
1919		alu.dst.write = 1;
1920		r = r600_bc_add_alu(ctx->bc, &alu);
1921		if (r)
1922			return r;
1923	}
1924
1925	/* (1 - src0) * src2 */
1926	for (i = 0; i < lasti + 1; i++) {
1927		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1928			continue;
1929
1930		memset(&alu, 0, sizeof(struct r600_bc_alu));
1931		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1932		alu.src[0].sel = ctx->temp_reg;
1933		alu.src[0].chan = i;
1934		r600_bc_src(&alu.src[1], &ctx->src[2], i);
1935		alu.dst.sel = ctx->temp_reg;
1936		alu.dst.chan = i;
1937		if (i == lasti) {
1938			alu.last = 1;
1939		}
1940		alu.dst.write = 1;
1941		r = r600_bc_add_alu(ctx->bc, &alu);
1942		if (r)
1943			return r;
1944	}
1945
1946	/* src0 * src1 + (1 - src0) * src2 */
1947	for (i = 0; i < lasti + 1; i++) {
1948		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1949			continue;
1950
1951		memset(&alu, 0, sizeof(struct r600_bc_alu));
1952		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1953		alu.is_op3 = 1;
1954		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1955		r600_bc_src(&alu.src[1], &ctx->src[1], i);
1956		alu.src[2].sel = ctx->temp_reg;
1957		alu.src[2].chan = i;
1958
1959		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1960		alu.dst.chan = i;
1961		if (i == lasti) {
1962			alu.last = 1;
1963		}
1964		r = r600_bc_add_alu(ctx->bc, &alu);
1965		if (r)
1966			return r;
1967	}
1968	return 0;
1969}
1970
1971static int tgsi_cmp(struct r600_shader_ctx *ctx)
1972{
1973	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1974	struct r600_bc_alu alu;
1975	int i, r;
1976	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1977
1978	for (i = 0; i < lasti + 1; i++) {
1979		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1980			continue;
1981
1982		memset(&alu, 0, sizeof(struct r600_bc_alu));
1983		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1984		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1985		r600_bc_src(&alu.src[1], &ctx->src[2], i);
1986		r600_bc_src(&alu.src[2], &ctx->src[1], i);
1987		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1988		alu.dst.chan = i;
1989		alu.dst.write = 1;
1990		alu.is_op3 = 1;
1991		if (i == lasti)
1992			alu.last = 1;
1993		r = r600_bc_add_alu(ctx->bc, &alu);
1994		if (r)
1995			return r;
1996	}
1997	return 0;
1998}
1999
2000static int tgsi_xpd(struct r600_shader_ctx *ctx)
2001{
2002	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2003	struct r600_bc_alu alu;
2004	uint32_t use_temp = 0;
2005	int i, r;
2006
2007	if (inst->Dst[0].Register.WriteMask != 0xf)
2008		use_temp = 1;
2009
2010	for (i = 0; i < 4; i++) {
2011		memset(&alu, 0, sizeof(struct r600_bc_alu));
2012		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2013
2014		switch (i) {
2015		case 0:
2016			r600_bc_src(&alu.src[0], &ctx->src[0], 2);
2017			break;
2018		case 1:
2019			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2020			break;
2021		case 2:
2022			r600_bc_src(&alu.src[0], &ctx->src[0], 1);
2023			break;
2024		case 3:
2025			alu.src[0].sel = V_SQ_ALU_SRC_0;
2026			alu.src[0].chan = i;
2027		}
2028
2029		switch (i) {
2030		case 0:
2031			r600_bc_src(&alu.src[1], &ctx->src[1], 1);
2032			break;
2033		case 1:
2034			r600_bc_src(&alu.src[1], &ctx->src[1], 2);
2035			break;
2036		case 2:
2037			r600_bc_src(&alu.src[1], &ctx->src[1], 0);
2038			break;
2039		case 3:
2040			alu.src[1].sel = V_SQ_ALU_SRC_0;
2041			alu.src[1].chan = i;
2042		}
2043
2044		alu.dst.sel = ctx->temp_reg;
2045		alu.dst.chan = i;
2046		alu.dst.write = 1;
2047
2048		if (i == 3)
2049			alu.last = 1;
2050		r = r600_bc_add_alu(ctx->bc, &alu);
2051		if (r)
2052			return r;
2053	}
2054
2055	for (i = 0; i < 4; i++) {
2056		memset(&alu, 0, sizeof(struct r600_bc_alu));
2057		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2058
2059		switch (i) {
2060		case 0:
2061			r600_bc_src(&alu.src[0], &ctx->src[0], 1);
2062			break;
2063		case 1:
2064			r600_bc_src(&alu.src[0], &ctx->src[0], 2);
2065			break;
2066		case 2:
2067			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2068			break;
2069		case 3:
2070			alu.src[0].sel = V_SQ_ALU_SRC_0;
2071			alu.src[0].chan = i;
2072		}
2073
2074		switch (i) {
2075		case 0:
2076			r600_bc_src(&alu.src[1], &ctx->src[1], 2);
2077			break;
2078		case 1:
2079			r600_bc_src(&alu.src[1], &ctx->src[1], 0);
2080			break;
2081		case 2:
2082			r600_bc_src(&alu.src[1], &ctx->src[1], 1);
2083			break;
2084		case 3:
2085			alu.src[1].sel = V_SQ_ALU_SRC_0;
2086			alu.src[1].chan = i;
2087		}
2088
2089		alu.src[2].sel = ctx->temp_reg;
2090		alu.src[2].neg = 1;
2091		alu.src[2].chan = i;
2092
2093		if (use_temp)
2094			alu.dst.sel = ctx->temp_reg;
2095		else
2096			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2097		alu.dst.chan = i;
2098		alu.dst.write = 1;
2099		alu.is_op3 = 1;
2100		if (i == 3)
2101			alu.last = 1;
2102		r = r600_bc_add_alu(ctx->bc, &alu);
2103		if (r)
2104			return r;
2105	}
2106	if (use_temp)
2107		return tgsi_helper_copy(ctx, inst);
2108	return 0;
2109}
2110
2111static int tgsi_exp(struct r600_shader_ctx *ctx)
2112{
2113	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2114	struct r600_bc_alu alu;
2115	int r;
2116
2117	/* result.x = 2^floor(src); */
2118	if (inst->Dst[0].Register.WriteMask & 1) {
2119		memset(&alu, 0, sizeof(struct r600_bc_alu));
2120
2121		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2122		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2123
2124		alu.dst.sel = ctx->temp_reg;
2125		alu.dst.chan = 0;
2126		alu.dst.write = 1;
2127		alu.last = 1;
2128		r = r600_bc_add_alu(ctx->bc, &alu);
2129		if (r)
2130			return r;
2131
2132		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2133		alu.src[0].sel = ctx->temp_reg;
2134		alu.src[0].chan = 0;
2135
2136		alu.dst.sel = ctx->temp_reg;
2137		alu.dst.chan = 0;
2138		alu.dst.write = 1;
2139		alu.last = 1;
2140		r = r600_bc_add_alu(ctx->bc, &alu);
2141		if (r)
2142			return r;
2143	}
2144
2145	/* result.y = tmp - floor(tmp); */
2146	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2147		memset(&alu, 0, sizeof(struct r600_bc_alu));
2148
2149		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2150		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2151
2152		alu.dst.sel = ctx->temp_reg;
2153//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2154//		if (r)
2155//			return r;
2156		alu.dst.write = 1;
2157		alu.dst.chan = 1;
2158
2159		alu.last = 1;
2160
2161		r = r600_bc_add_alu(ctx->bc, &alu);
2162		if (r)
2163			return r;
2164	}
2165
2166	/* result.z = RoughApprox2ToX(tmp);*/
2167	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2168		memset(&alu, 0, sizeof(struct r600_bc_alu));
2169		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2170		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2171
2172		alu.dst.sel = ctx->temp_reg;
2173		alu.dst.write = 1;
2174		alu.dst.chan = 2;
2175
2176		alu.last = 1;
2177
2178		r = r600_bc_add_alu(ctx->bc, &alu);
2179		if (r)
2180			return r;
2181	}
2182
2183	/* result.w = 1.0;*/
2184	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2185		memset(&alu, 0, sizeof(struct r600_bc_alu));
2186
2187		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2188		alu.src[0].sel = V_SQ_ALU_SRC_1;
2189		alu.src[0].chan = 0;
2190
2191		alu.dst.sel = ctx->temp_reg;
2192		alu.dst.chan = 3;
2193		alu.dst.write = 1;
2194		alu.last = 1;
2195		r = r600_bc_add_alu(ctx->bc, &alu);
2196		if (r)
2197			return r;
2198	}
2199	return tgsi_helper_copy(ctx, inst);
2200}
2201
2202static int tgsi_log(struct r600_shader_ctx *ctx)
2203{
2204	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2205	struct r600_bc_alu alu;
2206	int r;
2207
2208	/* result.x = floor(log2(src)); */
2209	if (inst->Dst[0].Register.WriteMask & 1) {
2210		memset(&alu, 0, sizeof(struct r600_bc_alu));
2211
2212		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2213		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2214
2215		alu.dst.sel = ctx->temp_reg;
2216		alu.dst.chan = 0;
2217		alu.dst.write = 1;
2218		alu.last = 1;
2219		r = r600_bc_add_alu(ctx->bc, &alu);
2220		if (r)
2221			return r;
2222
2223		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2224		alu.src[0].sel = ctx->temp_reg;
2225		alu.src[0].chan = 0;
2226
2227		alu.dst.sel = ctx->temp_reg;
2228		alu.dst.chan = 0;
2229		alu.dst.write = 1;
2230		alu.last = 1;
2231
2232		r = r600_bc_add_alu(ctx->bc, &alu);
2233		if (r)
2234			return r;
2235	}
2236
2237	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2238	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2239		memset(&alu, 0, sizeof(struct r600_bc_alu));
2240
2241		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2242		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2243
2244		alu.dst.sel = ctx->temp_reg;
2245		alu.dst.chan = 1;
2246		alu.dst.write = 1;
2247		alu.last = 1;
2248
2249		r = r600_bc_add_alu(ctx->bc, &alu);
2250		if (r)
2251			return r;
2252
2253		memset(&alu, 0, sizeof(struct r600_bc_alu));
2254
2255		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2256		alu.src[0].sel = ctx->temp_reg;
2257		alu.src[0].chan = 1;
2258
2259		alu.dst.sel = ctx->temp_reg;
2260		alu.dst.chan = 1;
2261		alu.dst.write = 1;
2262		alu.last = 1;
2263
2264		r = r600_bc_add_alu(ctx->bc, &alu);
2265		if (r)
2266			return r;
2267
2268		memset(&alu, 0, sizeof(struct r600_bc_alu));
2269
2270		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2271		alu.src[0].sel = ctx->temp_reg;
2272		alu.src[0].chan = 1;
2273
2274		alu.dst.sel = ctx->temp_reg;
2275		alu.dst.chan = 1;
2276		alu.dst.write = 1;
2277		alu.last = 1;
2278
2279		r = r600_bc_add_alu(ctx->bc, &alu);
2280		if (r)
2281			return r;
2282
2283		memset(&alu, 0, sizeof(struct r600_bc_alu));
2284
2285		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2286		alu.src[0].sel = ctx->temp_reg;
2287		alu.src[0].chan = 1;
2288
2289		alu.dst.sel = ctx->temp_reg;
2290		alu.dst.chan = 1;
2291		alu.dst.write = 1;
2292		alu.last = 1;
2293
2294		r = r600_bc_add_alu(ctx->bc, &alu);
2295		if (r)
2296			return r;
2297
2298		memset(&alu, 0, sizeof(struct r600_bc_alu));
2299
2300		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2301
2302		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2303
2304		alu.src[1].sel = ctx->temp_reg;
2305		alu.src[1].chan = 1;
2306
2307		alu.dst.sel = ctx->temp_reg;
2308		alu.dst.chan = 1;
2309		alu.dst.write = 1;
2310		alu.last = 1;
2311
2312		r = r600_bc_add_alu(ctx->bc, &alu);
2313		if (r)
2314			return r;
2315	}
2316
2317	/* result.z = log2(src);*/
2318	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2319		memset(&alu, 0, sizeof(struct r600_bc_alu));
2320
2321		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2322		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2323
2324		alu.dst.sel = ctx->temp_reg;
2325		alu.dst.write = 1;
2326		alu.dst.chan = 2;
2327		alu.last = 1;
2328
2329		r = r600_bc_add_alu(ctx->bc, &alu);
2330		if (r)
2331			return r;
2332	}
2333
2334	/* result.w = 1.0; */
2335	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2336		memset(&alu, 0, sizeof(struct r600_bc_alu));
2337
2338		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2339		alu.src[0].sel = V_SQ_ALU_SRC_1;
2340		alu.src[0].chan = 0;
2341
2342		alu.dst.sel = ctx->temp_reg;
2343		alu.dst.chan = 3;
2344		alu.dst.write = 1;
2345		alu.last = 1;
2346
2347		r = r600_bc_add_alu(ctx->bc, &alu);
2348		if (r)
2349			return r;
2350	}
2351
2352	return tgsi_helper_copy(ctx, inst);
2353}
2354
2355static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2356{
2357	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2358	struct r600_bc_alu alu;
2359	int r;
2360
2361	memset(&alu, 0, sizeof(struct r600_bc_alu));
2362
2363	switch (inst->Instruction.Opcode) {
2364	case TGSI_OPCODE_ARL:
2365		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2366		break;
2367	case TGSI_OPCODE_ARR:
2368		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2369		break;
2370	default:
2371		assert(0);
2372		return -1;
2373	}
2374
2375	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2376	alu.last = 1;
2377	alu.dst.sel = ctx->ar_reg;
2378	alu.dst.write = 1;
2379	r = r600_bc_add_alu(ctx->bc, &alu);
2380	if (r)
2381		return r;
2382
2383	/* TODO: Note that the MOVA can be avoided if we never use AR for
2384	 * indexing non-CB registers in the current ALU clause. Similarly, we
2385	 * need to load AR from ar_reg again if we started a new clause
2386	 * between ARL and AR usage. The easy way to do that is to remove
2387	 * the MOVA here, and load it for the first AR access after ar_reg
2388	 * has been modified in each clause. */
2389	memset(&alu, 0, sizeof(struct r600_bc_alu));
2390	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2391	alu.src[0].sel = ctx->ar_reg;
2392	alu.src[0].chan = 0;
2393	alu.last = 1;
2394	r = r600_bc_add_alu(ctx->bc, &alu);
2395	if (r)
2396		return r;
2397	return 0;
2398}
2399static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2400{
2401	/* TODO from r600c, ar values don't persist between clauses */
2402	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2403	struct r600_bc_alu alu;
2404	int r;
2405
2406	switch (inst->Instruction.Opcode) {
2407	case TGSI_OPCODE_ARL:
2408		memset(&alu, 0, sizeof(alu));
2409		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2410		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2411		alu.dst.sel = ctx->ar_reg;
2412		alu.dst.write = 1;
2413		alu.last = 1;
2414
2415		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2416			return r;
2417
2418		memset(&alu, 0, sizeof(alu));
2419		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2420		alu.src[0].sel = ctx->ar_reg;
2421		alu.dst.sel = ctx->ar_reg;
2422		alu.dst.write = 1;
2423		alu.last = 1;
2424
2425		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2426			return r;
2427		break;
2428	case TGSI_OPCODE_ARR:
2429		memset(&alu, 0, sizeof(alu));
2430		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2431		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2432		alu.dst.sel = ctx->ar_reg;
2433		alu.dst.write = 1;
2434		alu.last = 1;
2435
2436		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2437			return r;
2438		break;
2439	default:
2440		assert(0);
2441		return -1;
2442	}
2443
2444	memset(&alu, 0, sizeof(alu));
2445	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2446	alu.src[0].sel = ctx->ar_reg;
2447	alu.last = 1;
2448
2449	r = r600_bc_add_alu(ctx->bc, &alu);
2450	if (r)
2451		return r;
2452	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2453	return 0;
2454}
2455
2456static int tgsi_opdst(struct r600_shader_ctx *ctx)
2457{
2458	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2459	struct r600_bc_alu alu;
2460	int i, r = 0;
2461
2462	for (i = 0; i < 4; i++) {
2463		memset(&alu, 0, sizeof(struct r600_bc_alu));
2464
2465		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2466		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2467
2468		if (i == 0 || i == 3) {
2469			alu.src[0].sel = V_SQ_ALU_SRC_1;
2470		} else {
2471			r600_bc_src(&alu.src[0], &ctx->src[0], i);
2472		}
2473
2474		if (i == 0 || i == 2) {
2475			alu.src[1].sel = V_SQ_ALU_SRC_1;
2476		} else {
2477			r600_bc_src(&alu.src[1], &ctx->src[1], i);
2478		}
2479		if (i == 3)
2480			alu.last = 1;
2481		r = r600_bc_add_alu(ctx->bc, &alu);
2482		if (r)
2483			return r;
2484	}
2485	return 0;
2486}
2487
2488static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2489{
2490	struct r600_bc_alu alu;
2491	int r;
2492
2493	memset(&alu, 0, sizeof(struct r600_bc_alu));
2494	alu.inst = opcode;
2495	alu.predicate = 1;
2496
2497	alu.dst.sel = ctx->temp_reg;
2498	alu.dst.write = 1;
2499	alu.dst.chan = 0;
2500
2501	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2502	alu.src[1].sel = V_SQ_ALU_SRC_0;
2503	alu.src[1].chan = 0;
2504
2505	alu.last = 1;
2506
2507	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2508	if (r)
2509		return r;
2510	return 0;
2511}
2512
2513static int pops(struct r600_shader_ctx *ctx, int pops)
2514{
2515	int alu_pop = 3;
2516	if (ctx->bc->cf_last) {
2517		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2518			alu_pop = 0;
2519		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2520			alu_pop = 1;
2521	}
2522	alu_pop += pops;
2523	if (alu_pop == 1) {
2524		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2525		ctx->bc->force_add_cf = 1;
2526	} else if (alu_pop == 2) {
2527		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2528		ctx->bc->force_add_cf = 1;
2529	} else {
2530		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2531		ctx->bc->cf_last->pop_count = pops;
2532		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2533	}
2534	return 0;
2535}
2536
2537static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2538{
2539	switch(reason) {
2540	case FC_PUSH_VPM:
2541		ctx->bc->callstack[ctx->bc->call_sp].current--;
2542		break;
2543	case FC_PUSH_WQM:
2544	case FC_LOOP:
2545		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2546		break;
2547	case FC_REP:
2548		/* TOODO : for 16 vp asic should -= 2; */
2549		ctx->bc->callstack[ctx->bc->call_sp].current --;
2550		break;
2551	}
2552}
2553
2554static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2555{
2556	if (check_max_only) {
2557		int diff;
2558		switch (reason) {
2559		case FC_PUSH_VPM:
2560			diff = 1;
2561			break;
2562		case FC_PUSH_WQM:
2563			diff = 4;
2564			break;
2565		default:
2566			assert(0);
2567			diff = 0;
2568		}
2569		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2570		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2571			ctx->bc->callstack[ctx->bc->call_sp].max =
2572				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2573		}
2574		return;
2575	}
2576	switch (reason) {
2577	case FC_PUSH_VPM:
2578		ctx->bc->callstack[ctx->bc->call_sp].current++;
2579		break;
2580	case FC_PUSH_WQM:
2581	case FC_LOOP:
2582		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2583		break;
2584	case FC_REP:
2585		ctx->bc->callstack[ctx->bc->call_sp].current++;
2586		break;
2587	}
2588
2589	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2590	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2591		ctx->bc->callstack[ctx->bc->call_sp].max =
2592			ctx->bc->callstack[ctx->bc->call_sp].current;
2593	}
2594}
2595
2596static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2597{
2598	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2599
2600	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2601						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2602	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2603	sp->num_mid++;
2604}
2605
2606static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2607{
2608	ctx->bc->fc_sp++;
2609	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2610	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2611}
2612
2613static void fc_poplevel(struct r600_shader_ctx *ctx)
2614{
2615	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2616	if (sp->mid) {
2617		free(sp->mid);
2618		sp->mid = NULL;
2619	}
2620	sp->num_mid = 0;
2621	sp->start = NULL;
2622	sp->type = 0;
2623	ctx->bc->fc_sp--;
2624}
2625
2626#if 0
2627static int emit_return(struct r600_shader_ctx *ctx)
2628{
2629	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2630	return 0;
2631}
2632
2633static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2634{
2635
2636	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2637	ctx->bc->cf_last->pop_count = pops;
2638	/* TODO work out offset */
2639	return 0;
2640}
2641
2642static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2643{
2644	return 0;
2645}
2646
2647static void emit_testflag(struct r600_shader_ctx *ctx)
2648{
2649
2650}
2651
2652static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2653{
2654	emit_testflag(ctx);
2655	emit_jump_to_offset(ctx, 1, 4);
2656	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2657	pops(ctx, ifidx + 1);
2658	emit_return(ctx);
2659}
2660
2661static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2662{
2663	emit_testflag(ctx);
2664
2665	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2666	ctx->bc->cf_last->pop_count = 1;
2667
2668	fc_set_mid(ctx, fc_sp);
2669
2670	pops(ctx, 1);
2671}
2672#endif
2673
2674static int tgsi_if(struct r600_shader_ctx *ctx)
2675{
2676	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2677
2678	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2679
2680	fc_pushlevel(ctx, FC_IF);
2681
2682	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2683	return 0;
2684}
2685
2686static int tgsi_else(struct r600_shader_ctx *ctx)
2687{
2688	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2689	ctx->bc->cf_last->pop_count = 1;
2690
2691	fc_set_mid(ctx, ctx->bc->fc_sp);
2692	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2693	return 0;
2694}
2695
2696static int tgsi_endif(struct r600_shader_ctx *ctx)
2697{
2698	pops(ctx, 1);
2699	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2700		R600_ERR("if/endif unbalanced in shader\n");
2701		return -1;
2702	}
2703
2704	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2705		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2706		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2707	} else {
2708		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2709	}
2710	fc_poplevel(ctx);
2711
2712	callstack_decrease_current(ctx, FC_PUSH_VPM);
2713	return 0;
2714}
2715
2716static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2717{
2718	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2719
2720	fc_pushlevel(ctx, FC_LOOP);
2721
2722	/* check stack depth */
2723	callstack_check_depth(ctx, FC_LOOP, 0);
2724	return 0;
2725}
2726
2727static int tgsi_endloop(struct r600_shader_ctx *ctx)
2728{
2729	int i;
2730
2731	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2732
2733	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2734		R600_ERR("loop/endloop in shader code are not paired.\n");
2735		return -EINVAL;
2736	}
2737
2738	/* fixup loop pointers - from r600isa
2739	   LOOP END points to CF after LOOP START,
2740	   LOOP START point to CF after LOOP END
2741	   BRK/CONT point to LOOP END CF
2742	*/
2743	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2744
2745	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2746
2747	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2748		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2749	}
2750	/* TODO add LOOPRET support */
2751	fc_poplevel(ctx);
2752	callstack_decrease_current(ctx, FC_LOOP);
2753	return 0;
2754}
2755
2756static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2757{
2758	unsigned int fscp;
2759
2760	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2761	{
2762		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2763			break;
2764	}
2765
2766	if (fscp == 0) {
2767		R600_ERR("Break not inside loop/endloop pair\n");
2768		return -EINVAL;
2769	}
2770
2771	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2772	ctx->bc->cf_last->pop_count = 1;
2773
2774	fc_set_mid(ctx, fscp);
2775
2776	pops(ctx, 1);
2777	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2778	return 0;
2779}
2780
2781static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2782	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2783	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2784	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2785
2786	/* FIXME:
2787	 * For state trackers other than OpenGL, we'll want to use
2788	 * _RECIP_IEEE instead.
2789	 */
2790	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2791
2792	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2793	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2794	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2795	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2796	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2797	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2798	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2799	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2800	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2801	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2802	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2803	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2804	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2805	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2806	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2807	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2808	/* gap */
2809	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2810	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2811	/* gap */
2812	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2813	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2814	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2815	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2816	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2817	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2818	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2819	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2820	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2821	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2822	/* gap */
2823	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2824	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2825	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2826	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2827	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2828	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2829	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2830	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2831	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2832	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2833	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2834	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2835	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2836	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2837	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2839	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2840	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2841	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2842	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2843	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2844	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2845	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2846	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2847	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2848	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2849	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2853	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2854	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2855	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2856	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2857	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2858	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2859	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2860	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2861	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2863	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2864	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2865	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2866	/* gap */
2867	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2868	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2869	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2870	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2871	/* gap */
2872	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2873	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2874	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2875	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2876	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2877	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2878	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2880	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881	/* gap */
2882	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2885	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2887	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2891	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2894	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2896	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897	/* gap */
2898	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2899	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903	/* gap */
2904	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2907	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2908	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2909	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2910	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2911	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2912	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2913	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2914	/* gap */
2915	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2917	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2918	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2919	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2920	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2921	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2922	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2923	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2924	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2925	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2926	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2927	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2928	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2930	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2933	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2936	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2938	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2940	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2941	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2942	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2943};
2944
2945static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2946	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2947	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2948	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2949	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2950	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2951	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2952	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2954	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2955	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2956	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2957	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2958	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2959	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2960	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2961	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2962	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2963	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2964	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2965	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2966	/* gap */
2967	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2968	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2969	/* gap */
2970	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2971	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2972	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2973	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2974	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2975	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2976	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2977	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2978	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2979	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2980	/* gap */
2981	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2982	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2983	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2984	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2985	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2986	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2987	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2988	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2989	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2990	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2991	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2992	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2993	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2994	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2995	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2997	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2998	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2999	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3000	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3001	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3002	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3004	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3006	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3007	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3009	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3010	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3011	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3012	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3013	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3014	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3015	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3016	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3017	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3018	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3019	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3020	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3021	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3022	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3023	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3024	/* gap */
3025	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3026	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3027	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3028	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3029	/* gap */
3030	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3031	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3032	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3034	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3035	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3036	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3037	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3038	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3039	/* gap */
3040	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3041	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3042	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3043	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3045	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3048	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3049	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3050	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3051	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3052	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3054	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3055	/* gap */
3056	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3057	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3059	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3060	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3061	/* gap */
3062	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3063	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3065	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3066	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3067	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3069	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3071	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3072	/* gap */
3073	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3075	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3077	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3078	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3079	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3080	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3086	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3089	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3091	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3093	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3099	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101};
3102