r600_shader.c revision 0a6f09a76a416b8672e149c520aa5bef33174223
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_formats.h"
32#include "r600_opcodes.h"
33#include "r600d.h"
34#include <stdio.h>
35#include <errno.h>
36
37static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
38{
39	struct r600_pipe_state *rstate = &shader->rstate;
40	struct r600_shader *rshader = &shader->shader;
41	unsigned spi_vs_out_id[10];
42	unsigned i, tmp;
43
44	/* clear previous register */
45	rstate->nregs = 0;
46
47	/* so far never got proper semantic id from tgsi */
48	/* FIXME better to move this in config things so they get emited
49	 * only one time per cs
50	 */
51	for (i = 0; i < 10; i++) {
52		spi_vs_out_id[i] = 0;
53	}
54	for (i = 0; i < 32; i++) {
55		tmp = i << ((i & 3) * 8);
56		spi_vs_out_id[i / 4] |= tmp;
57	}
58	for (i = 0; i < 10; i++) {
59		r600_pipe_state_add_reg(rstate,
60					R_028614_SPI_VS_OUT_ID_0 + i * 4,
61					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
62	}
63
64	r600_pipe_state_add_reg(rstate,
65			R_0286C4_SPI_VS_OUT_CONFIG,
66			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
67			0xFFFFFFFF, NULL);
68	r600_pipe_state_add_reg(rstate,
69			R_028868_SQ_PGM_RESOURCES_VS,
70			S_028868_NUM_GPRS(rshader->bc.ngpr) |
71			S_028868_STACK_SIZE(rshader->bc.nstack),
72			0xFFFFFFFF, NULL);
73	r600_pipe_state_add_reg(rstate,
74			R_0288D0_SQ_PGM_CF_OFFSET_VS,
75			0x00000000, 0xFFFFFFFF, NULL);
76	r600_pipe_state_add_reg(rstate,
77			R_028858_SQ_PGM_START_VS,
78			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
79
80	r600_pipe_state_add_reg(rstate,
81				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
82				0xFFFFFFFF, NULL);
83
84}
85
86int r600_find_vs_semantic_index(struct r600_shader *vs,
87				struct r600_shader *ps, int id)
88{
89	struct r600_shader_io *input = &ps->input[id];
90
91	for (int i = 0; i < vs->noutput; i++) {
92		if (input->name == vs->output[i].name &&
93			input->sid == vs->output[i].sid) {
94			return i - 1;
95		}
96	}
97	return 0;
98}
99
100static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
101{
102	struct r600_pipe_state *rstate = &shader->rstate;
103	struct r600_shader *rshader = &shader->shader;
104	unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control;
105	int pos_index = -1, face_index = -1;
106
107	rstate->nregs = 0;
108
109	for (i = 0; i < rshader->ninput; i++) {
110		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
111			pos_index = i;
112		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
113			face_index = i;
114	}
115
116	db_shader_control = 0;
117	for (i = 0; i < rshader->noutput; i++) {
118		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
119			db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
120		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
121			db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(1);
122	}
123	if (rshader->uses_kill)
124		db_shader_control |= S_02880C_KILL_ENABLE(1);
125
126	exports_ps = 0;
127	num_cout = 0;
128	for (i = 0; i < rshader->noutput; i++) {
129		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
130			exports_ps |= 1;
131		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
132			num_cout++;
133		}
134	}
135	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
136	if (!exports_ps) {
137		/* always at least export 1 component per pixel */
138		exports_ps = 2;
139	}
140
141	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
142				S_0286CC_PERSP_GRADIENT_ENA(1);
143	spi_input_z = 0;
144	if (pos_index != -1) {
145		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
146					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
147					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
148					S_0286CC_BARYC_SAMPLE_CNTL(1));
149		spi_input_z |= 1;
150	}
151
152	spi_ps_in_control_1 = 0;
153	if (face_index != -1) {
154		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
155			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
156	}
157
158	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
159	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
160	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
161	r600_pipe_state_add_reg(rstate,
162				R_028840_SQ_PGM_START_PS,
163				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
164	r600_pipe_state_add_reg(rstate,
165				R_028850_SQ_PGM_RESOURCES_PS,
166				S_028868_NUM_GPRS(rshader->bc.ngpr) |
167				S_028868_STACK_SIZE(rshader->bc.nstack),
168				0xFFFFFFFF, NULL);
169	r600_pipe_state_add_reg(rstate,
170				R_028854_SQ_PGM_EXPORTS_PS,
171				exports_ps, 0xFFFFFFFF, NULL);
172	r600_pipe_state_add_reg(rstate,
173				R_0288CC_SQ_PGM_CF_OFFSET_PS,
174				0x00000000, 0xFFFFFFFF, NULL);
175
176	if (rshader->fs_write_all) {
177		r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
178					S_028808_MULTIWRITE_ENABLE(1),
179					S_028808_MULTIWRITE_ENABLE(1),
180					NULL);
181	}
182	/* only set some bits here, the other bits are set in the dsa state */
183	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL,
184				db_shader_control,
185				S_02880C_Z_EXPORT_ENABLE(1) |
186				S_02880C_STENCIL_REF_EXPORT_ENABLE(1) |
187				S_02880C_KILL_ENABLE(1),
188				NULL);
189
190	r600_pipe_state_add_reg(rstate,
191				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
192				0xFFFFFFFF, NULL);
193}
194
195static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
196{
197	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
198	struct r600_shader *rshader = &shader->shader;
199	void *ptr;
200
201	/* copy new shader */
202	if (shader->bo == NULL) {
203		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
204		if (shader->bo == NULL) {
205			return -ENOMEM;
206		}
207		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
208		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
209		r600_bo_unmap(rctx->radeon, shader->bo);
210	}
211	/* build state */
212	switch (rshader->processor_type) {
213	case TGSI_PROCESSOR_VERTEX:
214		if (rshader->family >= CHIP_CEDAR) {
215			evergreen_pipe_shader_vs(ctx, shader);
216		} else {
217			r600_pipe_shader_vs(ctx, shader);
218		}
219		break;
220	case TGSI_PROCESSOR_FRAGMENT:
221		if (rshader->family >= CHIP_CEDAR) {
222			evergreen_pipe_shader_ps(ctx, shader);
223		} else {
224			r600_pipe_shader_ps(ctx, shader);
225		}
226		break;
227	default:
228		return -EINVAL;
229	}
230	return 0;
231}
232
233static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
234
235int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
236{
237	static int dump_shaders = -1;
238	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
239	int r;
240
241	/* Would like some magic "get_bool_option_once" routine.
242	*/
243	if (dump_shaders == -1)
244		dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
245
246	if (dump_shaders) {
247		fprintf(stderr, "--------------------------------------------------------------\n");
248		tgsi_dump(tokens, 0);
249	}
250	shader->shader.family = r600_get_family(rctx->radeon);
251	r = r600_shader_from_tgsi(tokens, &shader->shader);
252	if (r) {
253		R600_ERR("translation from TGSI failed !\n");
254		return r;
255	}
256	r = r600_bc_build(&shader->shader.bc);
257	if (r) {
258		R600_ERR("building bytecode failed !\n");
259		return r;
260	}
261	if (dump_shaders) {
262		r600_bc_dump(&shader->shader.bc);
263		fprintf(stderr, "______________________________________________________________\n");
264	}
265	return r600_pipe_shader(ctx, shader);
266}
267
268void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
269{
270	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
271
272	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
273	r600_bc_clear(&shader->shader.bc);
274}
275
276/*
277 * tgsi -> r600 shader
278 */
279struct r600_shader_tgsi_instruction;
280
281struct r600_shader_src {
282	unsigned				sel;
283	unsigned				swizzle[4];
284	unsigned				neg;
285	unsigned				abs;
286	unsigned				rel;
287	uint32_t				value[4];
288};
289
290struct r600_shader_ctx {
291	struct tgsi_shader_info			info;
292	struct tgsi_parse_context		parse;
293	const struct tgsi_token			*tokens;
294	unsigned				type;
295	unsigned				file_offset[TGSI_FILE_COUNT];
296	unsigned				temp_reg;
297	unsigned				ar_reg;
298	struct r600_shader_tgsi_instruction	*inst_info;
299	struct r600_bc				*bc;
300	struct r600_shader			*shader;
301	struct r600_shader_src			src[3];
302	u32					*literals;
303	u32					nliterals;
304	u32					max_driver_temp_used;
305	/* needed for evergreen interpolation */
306	boolean                                 input_centroid;
307	boolean                                 input_linear;
308	boolean                                 input_perspective;
309	int					num_interp_gpr;
310};
311
312struct r600_shader_tgsi_instruction {
313	unsigned	tgsi_opcode;
314	unsigned	is_op3;
315	unsigned	r600_opcode;
316	int (*process)(struct r600_shader_ctx *ctx);
317};
318
319static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
320static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
321
322static int tgsi_is_supported(struct r600_shader_ctx *ctx)
323{
324	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
325	int j;
326
327	if (i->Instruction.NumDstRegs > 1) {
328		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
329		return -EINVAL;
330	}
331	if (i->Instruction.Predicate) {
332		R600_ERR("predicate unsupported\n");
333		return -EINVAL;
334	}
335#if 0
336	if (i->Instruction.Label) {
337		R600_ERR("label unsupported\n");
338		return -EINVAL;
339	}
340#endif
341	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
342		if (i->Src[j].Register.Dimension) {
343			R600_ERR("unsupported src %d (dimension %d)\n", j,
344				 i->Src[j].Register.Dimension);
345			return -EINVAL;
346		}
347	}
348	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
349		if (i->Dst[j].Register.Dimension) {
350			R600_ERR("unsupported dst (dimension)\n");
351			return -EINVAL;
352		}
353	}
354	return 0;
355}
356
357static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
358{
359	int i, r;
360	struct r600_bc_alu alu;
361	int gpr = 0, base_chan = 0;
362	int ij_index = 0;
363
364	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
365		ij_index = 0;
366		if (ctx->shader->input[input].centroid)
367			ij_index++;
368	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
369		ij_index = 0;
370		/* if we have perspective add one */
371		if (ctx->input_perspective)  {
372			ij_index++;
373			/* if we have perspective centroid */
374			if (ctx->input_centroid)
375				ij_index++;
376		}
377		if (ctx->shader->input[input].centroid)
378			ij_index++;
379	}
380
381	/* work out gpr and base_chan from index */
382	gpr = ij_index / 2;
383	base_chan = (2 * (ij_index % 2)) + 1;
384
385	for (i = 0; i < 8; i++) {
386		memset(&alu, 0, sizeof(struct r600_bc_alu));
387
388		if (i < 4)
389			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
390		else
391			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
392
393		if ((i > 1) && (i < 6)) {
394			alu.dst.sel = ctx->shader->input[input].gpr;
395			alu.dst.write = 1;
396		}
397
398		alu.dst.chan = i % 4;
399
400		alu.src[0].sel = gpr;
401		alu.src[0].chan = (base_chan - (i % 2));
402
403		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
404
405		alu.bank_swizzle_force = SQ_ALU_VEC_210;
406		if ((i % 4) == 3)
407			alu.last = 1;
408		r = r600_bc_add_alu(ctx->bc, &alu);
409		if (r)
410			return r;
411	}
412	return 0;
413}
414
415
416static int tgsi_declaration(struct r600_shader_ctx *ctx)
417{
418	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
419	unsigned i;
420	int r;
421
422	switch (d->Declaration.File) {
423	case TGSI_FILE_INPUT:
424		i = ctx->shader->ninput++;
425		ctx->shader->input[i].name = d->Semantic.Name;
426		ctx->shader->input[i].sid = d->Semantic.Index;
427		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
428		ctx->shader->input[i].centroid = d->Declaration.Centroid;
429		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
430		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
431			/* turn input into interpolate on EG */
432			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
433				if (ctx->shader->input[i].interpolate > 0) {
434					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
435					evergreen_interp_alu(ctx, i);
436				}
437			}
438		}
439		break;
440	case TGSI_FILE_OUTPUT:
441		i = ctx->shader->noutput++;
442		ctx->shader->output[i].name = d->Semantic.Name;
443		ctx->shader->output[i].sid = d->Semantic.Index;
444		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
445		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
446		break;
447	case TGSI_FILE_CONSTANT:
448	case TGSI_FILE_TEMPORARY:
449	case TGSI_FILE_SAMPLER:
450	case TGSI_FILE_ADDRESS:
451		break;
452
453	case TGSI_FILE_SYSTEM_VALUE:
454		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
455			struct r600_bc_alu alu;
456			memset(&alu, 0, sizeof(struct r600_bc_alu));
457
458			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
459			alu.src[0].sel = 0;
460			alu.src[0].chan = 3;
461
462			alu.dst.sel = 0;
463			alu.dst.chan = 3;
464			alu.dst.write = 1;
465			alu.last = 1;
466
467			if ((r = r600_bc_add_alu(ctx->bc, &alu)))
468				return r;
469			break;
470		}
471
472	default:
473		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
474		return -EINVAL;
475	}
476	return 0;
477}
478
479static int r600_get_temp(struct r600_shader_ctx *ctx)
480{
481	return ctx->temp_reg + ctx->max_driver_temp_used++;
482}
483
484/*
485 * for evergreen we need to scan the shader to find the number of GPRs we need to
486 * reserve for interpolation.
487 *
488 * we need to know if we are going to emit
489 * any centroid inputs
490 * if perspective and linear are required
491*/
492static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
493{
494	int i;
495	int num_baryc;
496
497	ctx->input_linear = FALSE;
498	ctx->input_perspective = FALSE;
499	ctx->input_centroid = FALSE;
500	ctx->num_interp_gpr = 1;
501
502	/* any centroid inputs */
503	for (i = 0; i < ctx->info.num_inputs; i++) {
504		/* skip position/face */
505		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
506		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
507			continue;
508		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
509			ctx->input_linear = TRUE;
510		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
511			ctx->input_perspective = TRUE;
512		if (ctx->info.input_centroid[i])
513			ctx->input_centroid = TRUE;
514	}
515
516	num_baryc = 0;
517	/* ignoring sample for now */
518	if (ctx->input_perspective)
519		num_baryc++;
520	if (ctx->input_linear)
521		num_baryc++;
522	if (ctx->input_centroid)
523		num_baryc *= 2;
524
525	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
526
527	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
528	return ctx->num_interp_gpr;
529}
530
531static void tgsi_src(struct r600_shader_ctx *ctx,
532		     const struct tgsi_full_src_register *tgsi_src,
533		     struct r600_shader_src *r600_src)
534{
535	memset(r600_src, 0, sizeof(*r600_src));
536	r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
537	r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
538	r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
539	r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
540	r600_src->neg = tgsi_src->Register.Negate;
541	r600_src->abs = tgsi_src->Register.Absolute;
542
543	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
544		int index;
545		if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
546			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
547			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
548
549			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
550			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
551			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
552				return;
553		}
554		index = tgsi_src->Register.Index;
555		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
556		memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
557	} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
558		/* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
559		r600_src->swizzle[0] = 3;
560		r600_src->swizzle[1] = 3;
561		r600_src->swizzle[2] = 3;
562		r600_src->swizzle[3] = 3;
563		r600_src->sel = 0;
564	} else {
565		if (tgsi_src->Register.Indirect)
566			r600_src->rel = V_SQ_REL_RELATIVE;
567		r600_src->sel = tgsi_src->Register.Index;
568		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
569	}
570}
571
572static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
573{
574	struct r600_bc_vtx vtx;
575	unsigned int ar_reg;
576	int r;
577
578	if (offset) {
579		struct r600_bc_alu alu;
580
581		memset(&alu, 0, sizeof(alu));
582
583		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
584		alu.src[0].sel = ctx->ar_reg;
585
586		alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
587		alu.src[1].value = offset;
588
589		alu.dst.sel = dst_reg;
590		alu.dst.write = 1;
591		alu.last = 1;
592
593		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
594			return r;
595
596		ar_reg = dst_reg;
597	} else {
598		ar_reg = ctx->ar_reg;
599	}
600
601	memset(&vtx, 0, sizeof(vtx));
602	vtx.fetch_type = 2;		/* VTX_FETCH_NO_INDEX_OFFSET */
603	vtx.src_gpr = ar_reg;
604	vtx.mega_fetch_count = 16;
605	vtx.dst_gpr = dst_reg;
606	vtx.dst_sel_x = 0;		/* SEL_X */
607	vtx.dst_sel_y = 1;		/* SEL_Y */
608	vtx.dst_sel_z = 2;		/* SEL_Z */
609	vtx.dst_sel_w = 3;		/* SEL_W */
610	vtx.data_format = FMT_32_32_32_32_FLOAT;
611	vtx.num_format_all = 2;		/* NUM_FORMAT_SCALED */
612	vtx.format_comp_all = 1;	/* FORMAT_COMP_SIGNED */
613	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
614
615	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
616		return r;
617
618	return 0;
619}
620
621static int tgsi_split_constant(struct r600_shader_ctx *ctx)
622{
623	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
624	struct r600_bc_alu alu;
625	int i, j, k, nconst, r;
626
627	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
628		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
629			nconst++;
630		}
631		tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
632	}
633	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
634		if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
635			continue;
636		}
637
638		if (ctx->src[i].rel) {
639			int treg = r600_get_temp(ctx);
640			if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
641				return r;
642
643			ctx->src[i].sel = treg;
644			ctx->src[i].rel = 0;
645			j--;
646		} else if (j > 0) {
647			int treg = r600_get_temp(ctx);
648			for (k = 0; k < 4; k++) {
649				memset(&alu, 0, sizeof(struct r600_bc_alu));
650				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
651				alu.src[0].sel = ctx->src[i].sel;
652				alu.src[0].chan = k;
653				alu.src[0].rel = ctx->src[i].rel;
654				alu.dst.sel = treg;
655				alu.dst.chan = k;
656				alu.dst.write = 1;
657				if (k == 3)
658					alu.last = 1;
659				r = r600_bc_add_alu(ctx->bc, &alu);
660				if (r)
661					return r;
662			}
663			ctx->src[i].sel = treg;
664			ctx->src[i].rel =0;
665			j--;
666		}
667	}
668	return 0;
669}
670
671/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
672static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
673{
674	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
675	struct r600_bc_alu alu;
676	int i, j, k, nliteral, r;
677
678	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
679		if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
680			nliteral++;
681		}
682	}
683	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
684		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
685			int treg = r600_get_temp(ctx);
686			for (k = 0; k < 4; k++) {
687				memset(&alu, 0, sizeof(struct r600_bc_alu));
688				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
689				alu.src[0].sel = ctx->src[i].sel;
690				alu.src[0].chan = k;
691				alu.src[0].value = ctx->src[i].value[k];
692				alu.dst.sel = treg;
693				alu.dst.chan = k;
694				alu.dst.write = 1;
695				if (k == 3)
696					alu.last = 1;
697				r = r600_bc_add_alu(ctx->bc, &alu);
698				if (r)
699					return r;
700			}
701			ctx->src[i].sel = treg;
702			j--;
703		}
704	}
705	return 0;
706}
707
708static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
709{
710	struct tgsi_full_immediate *immediate;
711	struct tgsi_full_property *property;
712	struct r600_shader_ctx ctx;
713	struct r600_bc_output output[32];
714	unsigned output_done, noutput;
715	unsigned opcode;
716	int i, r = 0, pos0;
717
718	ctx.bc = &shader->bc;
719	ctx.shader = shader;
720	r = r600_bc_init(ctx.bc, shader->family);
721	if (r)
722		return r;
723	ctx.tokens = tokens;
724	tgsi_scan_shader(tokens, &ctx.info);
725	tgsi_parse_init(&ctx.parse, tokens);
726	ctx.type = ctx.parse.FullHeader.Processor.Processor;
727	shader->processor_type = ctx.type;
728	ctx.bc->type = shader->processor_type;
729
730	/* register allocations */
731	/* Values [0,127] correspond to GPR[0..127].
732	 * Values [128,159] correspond to constant buffer bank 0
733	 * Values [160,191] correspond to constant buffer bank 1
734	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
735	 * Values [256,287] correspond to constant buffer bank 2 (EG)
736	 * Values [288,319] correspond to constant buffer bank 3 (EG)
737	 * Other special values are shown in the list below.
738	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
739	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
740	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
741	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
742	 * 248	SQ_ALU_SRC_0: special constant 0.0.
743	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
744	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
745	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
746	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
747	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
748	 * 254	SQ_ALU_SRC_PV: previous vector result.
749	 * 255	SQ_ALU_SRC_PS: previous scalar result.
750	 */
751	for (i = 0; i < TGSI_FILE_COUNT; i++) {
752		ctx.file_offset[i] = 0;
753	}
754	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
755		ctx.file_offset[TGSI_FILE_INPUT] = 1;
756		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
757			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
758		} else {
759			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
760		}
761	}
762	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
763		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
764	}
765	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
766						ctx.info.file_count[TGSI_FILE_INPUT];
767	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
768						ctx.info.file_count[TGSI_FILE_OUTPUT];
769
770	/* Outside the GPR range. This will be translated to one of the
771	 * kcache banks later. */
772	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
773
774	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
775	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
776			ctx.info.file_count[TGSI_FILE_TEMPORARY];
777	ctx.temp_reg = ctx.ar_reg + 1;
778
779	ctx.nliterals = 0;
780	ctx.literals = NULL;
781	shader->fs_write_all = FALSE;
782	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
783		tgsi_parse_token(&ctx.parse);
784		switch (ctx.parse.FullToken.Token.Type) {
785		case TGSI_TOKEN_TYPE_IMMEDIATE:
786			immediate = &ctx.parse.FullToken.FullImmediate;
787			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
788			if(ctx.literals == NULL) {
789				r = -ENOMEM;
790				goto out_err;
791			}
792			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
793			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
794			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
795			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
796			ctx.nliterals++;
797			break;
798		case TGSI_TOKEN_TYPE_DECLARATION:
799			r = tgsi_declaration(&ctx);
800			if (r)
801				goto out_err;
802			break;
803		case TGSI_TOKEN_TYPE_INSTRUCTION:
804			r = tgsi_is_supported(&ctx);
805			if (r)
806				goto out_err;
807			ctx.max_driver_temp_used = 0;
808			/* reserve first tmp for everyone */
809			r600_get_temp(&ctx);
810
811			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
812			if ((r = tgsi_split_constant(&ctx)))
813				goto out_err;
814			if ((r = tgsi_split_literal_constant(&ctx)))
815				goto out_err;
816			if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
817				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
818			else
819				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
820			r = ctx.inst_info->process(&ctx);
821			if (r)
822				goto out_err;
823			break;
824		case TGSI_TOKEN_TYPE_PROPERTY:
825			property = &ctx.parse.FullToken.FullProperty;
826			if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
827				if (property->u[0].Data == 1)
828					shader->fs_write_all = TRUE;
829			}
830			break;
831		default:
832			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
833			r = -EINVAL;
834			goto out_err;
835		}
836	}
837	/* export output */
838	noutput = shader->noutput;
839	for (i = 0, pos0 = 0; i < noutput; i++) {
840		memset(&output[i], 0, sizeof(struct r600_bc_output));
841		output[i].gpr = shader->output[i].gpr;
842		output[i].elem_size = 3;
843		output[i].swizzle_x = 0;
844		output[i].swizzle_y = 1;
845		output[i].swizzle_z = 2;
846		output[i].swizzle_w = 3;
847		output[i].burst_count = 1;
848		output[i].barrier = 1;
849		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
850		output[i].array_base = i - pos0;
851		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
852		switch (ctx.type) {
853		case TGSI_PROCESSOR_VERTEX:
854			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
855				output[i].array_base = 60;
856				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
857				/* position doesn't count in array_base */
858				pos0++;
859			}
860			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
861				output[i].array_base = 61;
862				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
863				/* position doesn't count in array_base */
864				pos0++;
865			}
866			break;
867		case TGSI_PROCESSOR_FRAGMENT:
868			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
869				output[i].array_base = shader->output[i].sid;
870				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
871			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
872				output[i].array_base = 61;
873				output[i].swizzle_x = 2;
874				output[i].swizzle_y = 7;
875				output[i].swizzle_z = output[i].swizzle_w = 7;
876				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
877			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
878				output[i].array_base = 61;
879				output[i].swizzle_x = 7;
880				output[i].swizzle_y = 1;
881				output[i].swizzle_z = output[i].swizzle_w = 7;
882				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
883			} else {
884				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
885				r = -EINVAL;
886				goto out_err;
887			}
888			break;
889		default:
890			R600_ERR("unsupported processor type %d\n", ctx.type);
891			r = -EINVAL;
892			goto out_err;
893		}
894	}
895	/* add fake param output for vertex shader if no param is exported */
896	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
897		for (i = 0, pos0 = 0; i < noutput; i++) {
898			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
899				pos0 = 1;
900				break;
901			}
902		}
903		if (!pos0) {
904			memset(&output[i], 0, sizeof(struct r600_bc_output));
905			output[i].gpr = 0;
906			output[i].elem_size = 3;
907			output[i].swizzle_x = 0;
908			output[i].swizzle_y = 1;
909			output[i].swizzle_z = 2;
910			output[i].swizzle_w = 3;
911			output[i].burst_count = 1;
912			output[i].barrier = 1;
913			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
914			output[i].array_base = 0;
915			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
916			noutput++;
917		}
918	}
919	/* add fake pixel export */
920	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
921		memset(&output[0], 0, sizeof(struct r600_bc_output));
922		output[0].gpr = 0;
923		output[0].elem_size = 3;
924		output[0].swizzle_x = 7;
925		output[0].swizzle_y = 7;
926		output[0].swizzle_z = 7;
927		output[0].swizzle_w = 7;
928		output[0].burst_count = 1;
929		output[0].barrier = 1;
930		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
931		output[0].array_base = 0;
932		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
933		noutput++;
934	}
935	/* set export done on last export of each type */
936	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
937		if (i == (noutput - 1)) {
938			output[i].end_of_program = 1;
939		}
940		if (!(output_done & (1 << output[i].type))) {
941			output_done |= (1 << output[i].type);
942			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
943		}
944	}
945	/* add output to bytecode */
946	for (i = 0; i < noutput; i++) {
947		r = r600_bc_add_output(ctx.bc, &output[i]);
948		if (r)
949			goto out_err;
950	}
951	free(ctx.literals);
952	tgsi_parse_free(&ctx.parse);
953	return 0;
954out_err:
955	free(ctx.literals);
956	tgsi_parse_free(&ctx.parse);
957	return r;
958}
959
960static int tgsi_unsupported(struct r600_shader_ctx *ctx)
961{
962	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
963	return -EINVAL;
964}
965
966static int tgsi_end(struct r600_shader_ctx *ctx)
967{
968	return 0;
969}
970
971static void r600_bc_src(struct r600_bc_alu_src *bc_src,
972			const struct r600_shader_src *shader_src,
973			unsigned chan)
974{
975	bc_src->sel = shader_src->sel;
976	bc_src->chan = shader_src->swizzle[chan];
977	bc_src->neg = shader_src->neg;
978	bc_src->abs = shader_src->abs;
979	bc_src->rel = shader_src->rel;
980	bc_src->value = shader_src->value[bc_src->chan];
981}
982
983static void tgsi_dst(struct r600_shader_ctx *ctx,
984		     const struct tgsi_full_dst_register *tgsi_dst,
985		     unsigned swizzle,
986		     struct r600_bc_alu_dst *r600_dst)
987{
988	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
989
990	r600_dst->sel = tgsi_dst->Register.Index;
991	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
992	r600_dst->chan = swizzle;
993	r600_dst->write = 1;
994	if (tgsi_dst->Register.Indirect)
995		r600_dst->rel = V_SQ_REL_RELATIVE;
996	if (inst->Instruction.Saturate) {
997		r600_dst->clamp = 1;
998	}
999}
1000
1001static int tgsi_last_instruction(unsigned writemask)
1002{
1003	int i, lasti = 0;
1004
1005	for (i = 0; i < 4; i++) {
1006		if (writemask & (1 << i)) {
1007			lasti = i;
1008		}
1009	}
1010	return lasti;
1011}
1012
1013static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
1014{
1015	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1016	struct r600_bc_alu alu;
1017	int i, j, r;
1018	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1019
1020	for (i = 0; i < lasti + 1; i++) {
1021		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1022			continue;
1023
1024		memset(&alu, 0, sizeof(struct r600_bc_alu));
1025		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1026
1027		alu.inst = ctx->inst_info->r600_opcode;
1028		if (!swap) {
1029			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1030				r600_bc_src(&alu.src[j], &ctx->src[j], i);
1031			}
1032		} else {
1033			r600_bc_src(&alu.src[0], &ctx->src[1], i);
1034			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1035		}
1036		/* handle some special cases */
1037		switch (ctx->inst_info->tgsi_opcode) {
1038		case TGSI_OPCODE_SUB:
1039			alu.src[1].neg = 1;
1040			break;
1041		case TGSI_OPCODE_ABS:
1042			alu.src[0].abs = 1;
1043			break;
1044		default:
1045			break;
1046		}
1047		if (i == lasti) {
1048			alu.last = 1;
1049		}
1050		r = r600_bc_add_alu(ctx->bc, &alu);
1051		if (r)
1052			return r;
1053	}
1054	return 0;
1055}
1056
1057static int tgsi_op2(struct r600_shader_ctx *ctx)
1058{
1059	return tgsi_op2_s(ctx, 0);
1060}
1061
1062static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
1063{
1064	return tgsi_op2_s(ctx, 1);
1065}
1066
1067/*
1068 * r600 - trunc to -PI..PI range
1069 * r700 - normalize by dividing by 2PI
1070 * see fdo bug 27901
1071 */
1072static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
1073{
1074	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
1075	static float double_pi = 3.1415926535 * 2;
1076	static float neg_pi = -3.1415926535;
1077
1078	int r;
1079	struct r600_bc_alu alu;
1080
1081	memset(&alu, 0, sizeof(struct r600_bc_alu));
1082	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1083	alu.is_op3 = 1;
1084
1085	alu.dst.chan = 0;
1086	alu.dst.sel = ctx->temp_reg;
1087	alu.dst.write = 1;
1088
1089	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1090
1091	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1092	alu.src[1].chan = 0;
1093	alu.src[1].value = *(uint32_t *)&half_inv_pi;
1094	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1095	alu.src[2].chan = 0;
1096	alu.last = 1;
1097	r = r600_bc_add_alu(ctx->bc, &alu);
1098	if (r)
1099		return r;
1100
1101	memset(&alu, 0, sizeof(struct r600_bc_alu));
1102	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1103
1104	alu.dst.chan = 0;
1105	alu.dst.sel = ctx->temp_reg;
1106	alu.dst.write = 1;
1107
1108	alu.src[0].sel = ctx->temp_reg;
1109	alu.src[0].chan = 0;
1110	alu.last = 1;
1111	r = r600_bc_add_alu(ctx->bc, &alu);
1112	if (r)
1113		return r;
1114
1115	memset(&alu, 0, sizeof(struct r600_bc_alu));
1116	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1117	alu.is_op3 = 1;
1118
1119	alu.dst.chan = 0;
1120	alu.dst.sel = ctx->temp_reg;
1121	alu.dst.write = 1;
1122
1123	alu.src[0].sel = ctx->temp_reg;
1124	alu.src[0].chan = 0;
1125
1126	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1127	alu.src[1].chan = 0;
1128	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1129	alu.src[2].chan = 0;
1130
1131	if (ctx->bc->chiprev == CHIPREV_R600) {
1132		alu.src[1].value = *(uint32_t *)&double_pi;
1133		alu.src[2].value = *(uint32_t *)&neg_pi;
1134	} else {
1135		alu.src[1].sel = V_SQ_ALU_SRC_1;
1136		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
1137		alu.src[2].neg = 1;
1138	}
1139
1140	alu.last = 1;
1141	r = r600_bc_add_alu(ctx->bc, &alu);
1142	if (r)
1143		return r;
1144	return 0;
1145}
1146
1147static int tgsi_trig(struct r600_shader_ctx *ctx)
1148{
1149	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1150	struct r600_bc_alu alu;
1151	int i, r;
1152	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1153
1154	r = tgsi_setup_trig(ctx);
1155	if (r)
1156		return r;
1157
1158	memset(&alu, 0, sizeof(struct r600_bc_alu));
1159	alu.inst = ctx->inst_info->r600_opcode;
1160	alu.dst.chan = 0;
1161	alu.dst.sel = ctx->temp_reg;
1162	alu.dst.write = 1;
1163
1164	alu.src[0].sel = ctx->temp_reg;
1165	alu.src[0].chan = 0;
1166	alu.last = 1;
1167	r = r600_bc_add_alu(ctx->bc, &alu);
1168	if (r)
1169		return r;
1170
1171	/* replicate result */
1172	for (i = 0; i < lasti + 1; i++) {
1173		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1174			continue;
1175
1176		memset(&alu, 0, sizeof(struct r600_bc_alu));
1177		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1178
1179		alu.src[0].sel = ctx->temp_reg;
1180		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1181		if (i == lasti)
1182			alu.last = 1;
1183		r = r600_bc_add_alu(ctx->bc, &alu);
1184		if (r)
1185			return r;
1186	}
1187	return 0;
1188}
1189
1190static int tgsi_scs(struct r600_shader_ctx *ctx)
1191{
1192	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1193	struct r600_bc_alu alu;
1194	int r;
1195
1196	/* We'll only need the trig stuff if we are going to write to the
1197	 * X or Y components of the destination vector.
1198	 */
1199	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1200		r = tgsi_setup_trig(ctx);
1201		if (r)
1202			return r;
1203	}
1204
1205	/* dst.x = COS */
1206	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1207		memset(&alu, 0, sizeof(struct r600_bc_alu));
1208		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1209		tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1210
1211		alu.src[0].sel = ctx->temp_reg;
1212		alu.src[0].chan = 0;
1213		alu.last = 1;
1214		r = r600_bc_add_alu(ctx->bc, &alu);
1215		if (r)
1216			return r;
1217	}
1218
1219	/* dst.y = SIN */
1220	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1221		memset(&alu, 0, sizeof(struct r600_bc_alu));
1222		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1223		tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1224
1225		alu.src[0].sel = ctx->temp_reg;
1226		alu.src[0].chan = 0;
1227		alu.last = 1;
1228		r = r600_bc_add_alu(ctx->bc, &alu);
1229		if (r)
1230			return r;
1231	}
1232
1233	/* dst.z = 0.0; */
1234	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1235		memset(&alu, 0, sizeof(struct r600_bc_alu));
1236
1237		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1238
1239		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1240
1241		alu.src[0].sel = V_SQ_ALU_SRC_0;
1242		alu.src[0].chan = 0;
1243
1244		alu.last = 1;
1245
1246		r = r600_bc_add_alu(ctx->bc, &alu);
1247		if (r)
1248			return r;
1249	}
1250
1251	/* dst.w = 1.0; */
1252	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1253		memset(&alu, 0, sizeof(struct r600_bc_alu));
1254
1255		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1256
1257		tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1258
1259		alu.src[0].sel = V_SQ_ALU_SRC_1;
1260		alu.src[0].chan = 0;
1261
1262		alu.last = 1;
1263
1264		r = r600_bc_add_alu(ctx->bc, &alu);
1265		if (r)
1266			return r;
1267	}
1268
1269	return 0;
1270}
1271
1272static int tgsi_kill(struct r600_shader_ctx *ctx)
1273{
1274	struct r600_bc_alu alu;
1275	int i, r;
1276
1277	for (i = 0; i < 4; i++) {
1278		memset(&alu, 0, sizeof(struct r600_bc_alu));
1279		alu.inst = ctx->inst_info->r600_opcode;
1280
1281		alu.dst.chan = i;
1282
1283		alu.src[0].sel = V_SQ_ALU_SRC_0;
1284
1285		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1286			alu.src[1].sel = V_SQ_ALU_SRC_1;
1287			alu.src[1].neg = 1;
1288		} else {
1289			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1290		}
1291		if (i == 3) {
1292			alu.last = 1;
1293		}
1294		r = r600_bc_add_alu(ctx->bc, &alu);
1295		if (r)
1296			return r;
1297	}
1298
1299	/* kill must be last in ALU */
1300	ctx->bc->force_add_cf = 1;
1301	ctx->shader->uses_kill = TRUE;
1302	return 0;
1303}
1304
1305static int tgsi_lit(struct r600_shader_ctx *ctx)
1306{
1307	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1308	struct r600_bc_alu alu;
1309	int r;
1310
1311	/* dst.x, <- 1.0  */
1312	memset(&alu, 0, sizeof(struct r600_bc_alu));
1313	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1314	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1315	alu.src[0].chan = 0;
1316	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1317	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1318	r = r600_bc_add_alu(ctx->bc, &alu);
1319	if (r)
1320		return r;
1321
1322	/* dst.y = max(src.x, 0.0) */
1323	memset(&alu, 0, sizeof(struct r600_bc_alu));
1324	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1325	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1326	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1327	alu.src[1].chan = 0;
1328	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1329	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1330	r = r600_bc_add_alu(ctx->bc, &alu);
1331	if (r)
1332		return r;
1333
1334	/* dst.w, <- 1.0  */
1335	memset(&alu, 0, sizeof(struct r600_bc_alu));
1336	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1337	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1338	alu.src[0].chan = 0;
1339	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1340	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1341	alu.last = 1;
1342	r = r600_bc_add_alu(ctx->bc, &alu);
1343	if (r)
1344		return r;
1345
1346	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1347	{
1348		int chan;
1349		int sel;
1350
1351		/* dst.z = log(src.y) */
1352		memset(&alu, 0, sizeof(struct r600_bc_alu));
1353		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1354		r600_bc_src(&alu.src[0], &ctx->src[0], 1);
1355		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1356		alu.last = 1;
1357		r = r600_bc_add_alu(ctx->bc, &alu);
1358		if (r)
1359			return r;
1360
1361		chan = alu.dst.chan;
1362		sel = alu.dst.sel;
1363
1364		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1365		memset(&alu, 0, sizeof(struct r600_bc_alu));
1366		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1367		r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1368		alu.src[1].sel  = sel;
1369		alu.src[1].chan = chan;
1370
1371		r600_bc_src(&alu.src[2], &ctx->src[0], 0);
1372		alu.dst.sel = ctx->temp_reg;
1373		alu.dst.chan = 0;
1374		alu.dst.write = 1;
1375		alu.is_op3 = 1;
1376		alu.last = 1;
1377		r = r600_bc_add_alu(ctx->bc, &alu);
1378		if (r)
1379			return r;
1380
1381		/* dst.z = exp(tmp.x) */
1382		memset(&alu, 0, sizeof(struct r600_bc_alu));
1383		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1384		alu.src[0].sel = ctx->temp_reg;
1385		alu.src[0].chan = 0;
1386		tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1387		alu.last = 1;
1388		r = r600_bc_add_alu(ctx->bc, &alu);
1389		if (r)
1390			return r;
1391	}
1392	return 0;
1393}
1394
1395static int tgsi_rsq(struct r600_shader_ctx *ctx)
1396{
1397	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1398	struct r600_bc_alu alu;
1399	int i, r;
1400
1401	memset(&alu, 0, sizeof(struct r600_bc_alu));
1402
1403	/* FIXME:
1404	 * For state trackers other than OpenGL, we'll want to use
1405	 * _RECIPSQRT_IEEE instead.
1406	 */
1407	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1408
1409	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1410		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1411		alu.src[i].abs = 1;
1412	}
1413	alu.dst.sel = ctx->temp_reg;
1414	alu.dst.write = 1;
1415	alu.last = 1;
1416	r = r600_bc_add_alu(ctx->bc, &alu);
1417	if (r)
1418		return r;
1419	/* replicate result */
1420	return tgsi_helper_tempx_replicate(ctx);
1421}
1422
1423static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1424{
1425	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1426	struct r600_bc_alu alu;
1427	int i, r;
1428
1429	for (i = 0; i < 4; i++) {
1430		memset(&alu, 0, sizeof(struct r600_bc_alu));
1431		alu.src[0].sel = ctx->temp_reg;
1432		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1433		alu.dst.chan = i;
1434		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1435		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1436		if (i == 3)
1437			alu.last = 1;
1438		r = r600_bc_add_alu(ctx->bc, &alu);
1439		if (r)
1440			return r;
1441	}
1442	return 0;
1443}
1444
1445static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1446{
1447	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1448	struct r600_bc_alu alu;
1449	int i, r;
1450
1451	memset(&alu, 0, sizeof(struct r600_bc_alu));
1452	alu.inst = ctx->inst_info->r600_opcode;
1453	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1454		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
1455	}
1456	alu.dst.sel = ctx->temp_reg;
1457	alu.dst.write = 1;
1458	alu.last = 1;
1459	r = r600_bc_add_alu(ctx->bc, &alu);
1460	if (r)
1461		return r;
1462	/* replicate result */
1463	return tgsi_helper_tempx_replicate(ctx);
1464}
1465
1466static int tgsi_pow(struct r600_shader_ctx *ctx)
1467{
1468	struct r600_bc_alu alu;
1469	int r;
1470
1471	/* LOG2(a) */
1472	memset(&alu, 0, sizeof(struct r600_bc_alu));
1473	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1474	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
1475	alu.dst.sel = ctx->temp_reg;
1476	alu.dst.write = 1;
1477	alu.last = 1;
1478	r = r600_bc_add_alu(ctx->bc, &alu);
1479	if (r)
1480		return r;
1481	/* b * LOG2(a) */
1482	memset(&alu, 0, sizeof(struct r600_bc_alu));
1483	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1484	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
1485	alu.src[1].sel = ctx->temp_reg;
1486	alu.dst.sel = ctx->temp_reg;
1487	alu.dst.write = 1;
1488	alu.last = 1;
1489	r = r600_bc_add_alu(ctx->bc, &alu);
1490	if (r)
1491		return r;
1492	/* POW(a,b) = EXP2(b * LOG2(a))*/
1493	memset(&alu, 0, sizeof(struct r600_bc_alu));
1494	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1495	alu.src[0].sel = ctx->temp_reg;
1496	alu.dst.sel = ctx->temp_reg;
1497	alu.dst.write = 1;
1498	alu.last = 1;
1499	r = r600_bc_add_alu(ctx->bc, &alu);
1500	if (r)
1501		return r;
1502	return tgsi_helper_tempx_replicate(ctx);
1503}
1504
1505static int tgsi_ssg(struct r600_shader_ctx *ctx)
1506{
1507	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1508	struct r600_bc_alu alu;
1509	int i, r;
1510
1511	/* tmp = (src > 0 ? 1 : src) */
1512	for (i = 0; i < 4; i++) {
1513		memset(&alu, 0, sizeof(struct r600_bc_alu));
1514		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1515		alu.is_op3 = 1;
1516
1517		alu.dst.sel = ctx->temp_reg;
1518		alu.dst.chan = i;
1519
1520		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1521		alu.src[1].sel = V_SQ_ALU_SRC_1;
1522		r600_bc_src(&alu.src[2], &ctx->src[0], i);
1523
1524		if (i == 3)
1525			alu.last = 1;
1526		r = r600_bc_add_alu(ctx->bc, &alu);
1527		if (r)
1528			return r;
1529	}
1530
1531	/* dst = (-tmp > 0 ? -1 : tmp) */
1532	for (i = 0; i < 4; i++) {
1533		memset(&alu, 0, sizeof(struct r600_bc_alu));
1534		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1535		alu.is_op3 = 1;
1536		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1537
1538		alu.src[0].sel = ctx->temp_reg;
1539		alu.src[0].chan = i;
1540		alu.src[0].neg = 1;
1541
1542		alu.src[1].sel = V_SQ_ALU_SRC_1;
1543		alu.src[1].neg = 1;
1544
1545		alu.src[2].sel = ctx->temp_reg;
1546		alu.src[2].chan = i;
1547
1548		if (i == 3)
1549			alu.last = 1;
1550		r = r600_bc_add_alu(ctx->bc, &alu);
1551		if (r)
1552			return r;
1553	}
1554	return 0;
1555}
1556
1557static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1558{
1559	struct r600_bc_alu alu;
1560	int i, r;
1561
1562	for (i = 0; i < 4; i++) {
1563		memset(&alu, 0, sizeof(struct r600_bc_alu));
1564		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1565			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1566			alu.dst.chan = i;
1567		} else {
1568			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1569			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1570			alu.src[0].sel = ctx->temp_reg;
1571			alu.src[0].chan = i;
1572		}
1573		if (i == 3) {
1574			alu.last = 1;
1575		}
1576		r = r600_bc_add_alu(ctx->bc, &alu);
1577		if (r)
1578			return r;
1579	}
1580	return 0;
1581}
1582
1583static int tgsi_op3(struct r600_shader_ctx *ctx)
1584{
1585	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1586	struct r600_bc_alu alu;
1587	int i, j, r;
1588	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1589
1590	for (i = 0; i < lasti + 1; i++) {
1591		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1592			continue;
1593
1594		memset(&alu, 0, sizeof(struct r600_bc_alu));
1595		alu.inst = ctx->inst_info->r600_opcode;
1596		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1597			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1598		}
1599
1600		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1601		alu.dst.chan = i;
1602		alu.dst.write = 1;
1603		alu.is_op3 = 1;
1604		if (i == lasti) {
1605			alu.last = 1;
1606		}
1607		r = r600_bc_add_alu(ctx->bc, &alu);
1608		if (r)
1609			return r;
1610	}
1611	return 0;
1612}
1613
1614static int tgsi_dp(struct r600_shader_ctx *ctx)
1615{
1616	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1617	struct r600_bc_alu alu;
1618	int i, j, r;
1619
1620	for (i = 0; i < 4; i++) {
1621		memset(&alu, 0, sizeof(struct r600_bc_alu));
1622		alu.inst = ctx->inst_info->r600_opcode;
1623		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1624			r600_bc_src(&alu.src[j], &ctx->src[j], i);
1625		}
1626
1627		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1628		alu.dst.chan = i;
1629		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1630		/* handle some special cases */
1631		switch (ctx->inst_info->tgsi_opcode) {
1632		case TGSI_OPCODE_DP2:
1633			if (i > 1) {
1634				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1635				alu.src[0].chan = alu.src[1].chan = 0;
1636			}
1637			break;
1638		case TGSI_OPCODE_DP3:
1639			if (i > 2) {
1640				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1641				alu.src[0].chan = alu.src[1].chan = 0;
1642			}
1643			break;
1644		case TGSI_OPCODE_DPH:
1645			if (i == 3) {
1646				alu.src[0].sel = V_SQ_ALU_SRC_1;
1647				alu.src[0].chan = 0;
1648				alu.src[0].neg = 0;
1649			}
1650			break;
1651		default:
1652			break;
1653		}
1654		if (i == 3) {
1655			alu.last = 1;
1656		}
1657		r = r600_bc_add_alu(ctx->bc, &alu);
1658		if (r)
1659			return r;
1660	}
1661	return 0;
1662}
1663
1664static int tgsi_tex(struct r600_shader_ctx *ctx)
1665{
1666	static float one_point_five = 1.5f;
1667	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1668	struct r600_bc_tex tex;
1669	struct r600_bc_alu alu;
1670	unsigned src_gpr;
1671	int r, i;
1672	int opcode;
1673	boolean src_not_temp =
1674		inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1675		inst->Src[0].Register.File != TGSI_FILE_INPUT;
1676
1677	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1678
1679	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1680		/* Add perspective divide */
1681		memset(&alu, 0, sizeof(struct r600_bc_alu));
1682		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1683		r600_bc_src(&alu.src[0], &ctx->src[0], 3);
1684
1685		alu.dst.sel = ctx->temp_reg;
1686		alu.dst.chan = 3;
1687		alu.last = 1;
1688		alu.dst.write = 1;
1689		r = r600_bc_add_alu(ctx->bc, &alu);
1690		if (r)
1691			return r;
1692
1693		for (i = 0; i < 3; i++) {
1694			memset(&alu, 0, sizeof(struct r600_bc_alu));
1695			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1696			alu.src[0].sel = ctx->temp_reg;
1697			alu.src[0].chan = 3;
1698			r600_bc_src(&alu.src[1], &ctx->src[0], i);
1699			alu.dst.sel = ctx->temp_reg;
1700			alu.dst.chan = i;
1701			alu.dst.write = 1;
1702			r = r600_bc_add_alu(ctx->bc, &alu);
1703			if (r)
1704				return r;
1705		}
1706		memset(&alu, 0, sizeof(struct r600_bc_alu));
1707		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1708		alu.src[0].sel = V_SQ_ALU_SRC_1;
1709		alu.src[0].chan = 0;
1710		alu.dst.sel = ctx->temp_reg;
1711		alu.dst.chan = 3;
1712		alu.last = 1;
1713		alu.dst.write = 1;
1714		r = r600_bc_add_alu(ctx->bc, &alu);
1715		if (r)
1716			return r;
1717		src_not_temp = FALSE;
1718		src_gpr = ctx->temp_reg;
1719	}
1720
1721	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1722		static const unsigned src0_swizzle[] = {2, 2, 0, 1};
1723		static const unsigned src1_swizzle[] = {1, 0, 2, 2};
1724
1725		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1726		for (i = 0; i < 4; i++) {
1727			memset(&alu, 0, sizeof(struct r600_bc_alu));
1728			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1729			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
1730			r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
1731			alu.dst.sel = ctx->temp_reg;
1732			alu.dst.chan = i;
1733			if (i == 3)
1734				alu.last = 1;
1735			alu.dst.write = 1;
1736			r = r600_bc_add_alu(ctx->bc, &alu);
1737			if (r)
1738				return r;
1739		}
1740
1741		/* tmp1.z = RCP_e(|tmp1.z|) */
1742		memset(&alu, 0, sizeof(struct r600_bc_alu));
1743		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1744		alu.src[0].sel = ctx->temp_reg;
1745		alu.src[0].chan = 2;
1746		alu.src[0].abs = 1;
1747		alu.dst.sel = ctx->temp_reg;
1748		alu.dst.chan = 2;
1749		alu.dst.write = 1;
1750		alu.last = 1;
1751		r = r600_bc_add_alu(ctx->bc, &alu);
1752		if (r)
1753			return r;
1754
1755		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1756		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1757		 * muladd has no writemask, have to use another temp
1758		 */
1759		memset(&alu, 0, sizeof(struct r600_bc_alu));
1760		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1761		alu.is_op3 = 1;
1762
1763		alu.src[0].sel = ctx->temp_reg;
1764		alu.src[0].chan = 0;
1765		alu.src[1].sel = ctx->temp_reg;
1766		alu.src[1].chan = 2;
1767
1768		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1769		alu.src[2].chan = 0;
1770		alu.src[2].value = *(uint32_t *)&one_point_five;
1771
1772		alu.dst.sel = ctx->temp_reg;
1773		alu.dst.chan = 0;
1774		alu.dst.write = 1;
1775
1776		r = r600_bc_add_alu(ctx->bc, &alu);
1777		if (r)
1778			return r;
1779
1780		memset(&alu, 0, sizeof(struct r600_bc_alu));
1781		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1782		alu.is_op3 = 1;
1783
1784		alu.src[0].sel = ctx->temp_reg;
1785		alu.src[0].chan = 1;
1786		alu.src[1].sel = ctx->temp_reg;
1787		alu.src[1].chan = 2;
1788
1789		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1790		alu.src[2].chan = 0;
1791		alu.src[2].value = *(uint32_t *)&one_point_five;
1792
1793		alu.dst.sel = ctx->temp_reg;
1794		alu.dst.chan = 1;
1795		alu.dst.write = 1;
1796
1797		alu.last = 1;
1798		r = r600_bc_add_alu(ctx->bc, &alu);
1799		if (r)
1800			return r;
1801
1802		src_not_temp = FALSE;
1803		src_gpr = ctx->temp_reg;
1804	}
1805
1806	if (src_not_temp) {
1807		for (i = 0; i < 4; i++) {
1808			memset(&alu, 0, sizeof(struct r600_bc_alu));
1809			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1810			r600_bc_src(&alu.src[0], &ctx->src[0], i);
1811			alu.dst.sel = ctx->temp_reg;
1812			alu.dst.chan = i;
1813			if (i == 3)
1814				alu.last = 1;
1815			alu.dst.write = 1;
1816			r = r600_bc_add_alu(ctx->bc, &alu);
1817			if (r)
1818				return r;
1819		}
1820		src_gpr = ctx->temp_reg;
1821	}
1822
1823	opcode = ctx->inst_info->r600_opcode;
1824	if (opcode == SQ_TEX_INST_SAMPLE &&
1825	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1826		opcode = SQ_TEX_INST_SAMPLE_C;
1827
1828	memset(&tex, 0, sizeof(struct r600_bc_tex));
1829	tex.inst = opcode;
1830	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1831	tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
1832	tex.src_gpr = src_gpr;
1833	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1834	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1835	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1836	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1837	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1838	tex.src_sel_x = 0;
1839	tex.src_sel_y = 1;
1840	tex.src_sel_z = 2;
1841	tex.src_sel_w = 3;
1842
1843	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1844		tex.src_sel_x = 1;
1845		tex.src_sel_y = 0;
1846		tex.src_sel_z = 3;
1847		tex.src_sel_w = 1;
1848	}
1849
1850	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1851		tex.coord_type_x = 1;
1852		tex.coord_type_y = 1;
1853		tex.coord_type_z = 1;
1854		tex.coord_type_w = 1;
1855	}
1856
1857	if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
1858		tex.coord_type_z = 0;
1859		tex.src_sel_z = 1;
1860	} else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
1861		tex.coord_type_z = 0;
1862
1863	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1864		tex.src_sel_w = 2;
1865
1866	r = r600_bc_add_tex(ctx->bc, &tex);
1867	if (r)
1868		return r;
1869
1870	/* add shadow ambient support  - gallium doesn't do it yet */
1871	return 0;
1872}
1873
1874static int tgsi_lrp(struct r600_shader_ctx *ctx)
1875{
1876	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1877	struct r600_bc_alu alu;
1878	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1879	unsigned i;
1880	int r;
1881
1882	/* optimize if it's just an equal balance */
1883	if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
1884		for (i = 0; i < lasti + 1; i++) {
1885			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1886				continue;
1887
1888			memset(&alu, 0, sizeof(struct r600_bc_alu));
1889			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1890			r600_bc_src(&alu.src[0], &ctx->src[1], i);
1891			r600_bc_src(&alu.src[1], &ctx->src[2], i);
1892			alu.omod = 3;
1893			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1894			alu.dst.chan = i;
1895			if (i == lasti) {
1896				alu.last = 1;
1897			}
1898			r = r600_bc_add_alu(ctx->bc, &alu);
1899			if (r)
1900				return r;
1901		}
1902		return 0;
1903	}
1904
1905	/* 1 - src0 */
1906	for (i = 0; i < lasti + 1; i++) {
1907		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1908			continue;
1909
1910		memset(&alu, 0, sizeof(struct r600_bc_alu));
1911		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1912		alu.src[0].sel = V_SQ_ALU_SRC_1;
1913		alu.src[0].chan = 0;
1914		r600_bc_src(&alu.src[1], &ctx->src[0], i);
1915		alu.src[1].neg = 1;
1916		alu.dst.sel = ctx->temp_reg;
1917		alu.dst.chan = i;
1918		if (i == lasti) {
1919			alu.last = 1;
1920		}
1921		alu.dst.write = 1;
1922		r = r600_bc_add_alu(ctx->bc, &alu);
1923		if (r)
1924			return r;
1925	}
1926
1927	/* (1 - src0) * src2 */
1928	for (i = 0; i < lasti + 1; i++) {
1929		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1930			continue;
1931
1932		memset(&alu, 0, sizeof(struct r600_bc_alu));
1933		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1934		alu.src[0].sel = ctx->temp_reg;
1935		alu.src[0].chan = i;
1936		r600_bc_src(&alu.src[1], &ctx->src[2], i);
1937		alu.dst.sel = ctx->temp_reg;
1938		alu.dst.chan = i;
1939		if (i == lasti) {
1940			alu.last = 1;
1941		}
1942		alu.dst.write = 1;
1943		r = r600_bc_add_alu(ctx->bc, &alu);
1944		if (r)
1945			return r;
1946	}
1947
1948	/* src0 * src1 + (1 - src0) * src2 */
1949	for (i = 0; i < lasti + 1; i++) {
1950		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1951			continue;
1952
1953		memset(&alu, 0, sizeof(struct r600_bc_alu));
1954		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1955		alu.is_op3 = 1;
1956		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1957		r600_bc_src(&alu.src[1], &ctx->src[1], i);
1958		alu.src[2].sel = ctx->temp_reg;
1959		alu.src[2].chan = i;
1960
1961		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1962		alu.dst.chan = i;
1963		if (i == lasti) {
1964			alu.last = 1;
1965		}
1966		r = r600_bc_add_alu(ctx->bc, &alu);
1967		if (r)
1968			return r;
1969	}
1970	return 0;
1971}
1972
1973static int tgsi_cmp(struct r600_shader_ctx *ctx)
1974{
1975	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1976	struct r600_bc_alu alu;
1977	int i, r;
1978	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1979
1980	for (i = 0; i < lasti + 1; i++) {
1981		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1982			continue;
1983
1984		memset(&alu, 0, sizeof(struct r600_bc_alu));
1985		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1986		r600_bc_src(&alu.src[0], &ctx->src[0], i);
1987		r600_bc_src(&alu.src[1], &ctx->src[2], i);
1988		r600_bc_src(&alu.src[2], &ctx->src[1], i);
1989		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1990		alu.dst.chan = i;
1991		alu.dst.write = 1;
1992		alu.is_op3 = 1;
1993		if (i == lasti)
1994			alu.last = 1;
1995		r = r600_bc_add_alu(ctx->bc, &alu);
1996		if (r)
1997			return r;
1998	}
1999	return 0;
2000}
2001
2002static int tgsi_xpd(struct r600_shader_ctx *ctx)
2003{
2004	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2005	static const unsigned int src0_swizzle[] = {2, 0, 1};
2006	static const unsigned int src1_swizzle[] = {1, 2, 0};
2007	struct r600_bc_alu alu;
2008	uint32_t use_temp = 0;
2009	int i, r;
2010
2011	if (inst->Dst[0].Register.WriteMask != 0xf)
2012		use_temp = 1;
2013
2014	for (i = 0; i < 4; i++) {
2015		memset(&alu, 0, sizeof(struct r600_bc_alu));
2016		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2017		if (i < 3) {
2018			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
2019			r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
2020		} else {
2021			alu.src[0].sel = V_SQ_ALU_SRC_0;
2022			alu.src[0].chan = i;
2023			alu.src[1].sel = V_SQ_ALU_SRC_0;
2024			alu.src[1].chan = i;
2025		}
2026
2027		alu.dst.sel = ctx->temp_reg;
2028		alu.dst.chan = i;
2029		alu.dst.write = 1;
2030
2031		if (i == 3)
2032			alu.last = 1;
2033		r = r600_bc_add_alu(ctx->bc, &alu);
2034		if (r)
2035			return r;
2036	}
2037
2038	for (i = 0; i < 4; i++) {
2039		memset(&alu, 0, sizeof(struct r600_bc_alu));
2040		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2041
2042		if (i < 3) {
2043			r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
2044			r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
2045		} else {
2046			alu.src[0].sel = V_SQ_ALU_SRC_0;
2047			alu.src[0].chan = i;
2048			alu.src[1].sel = V_SQ_ALU_SRC_0;
2049			alu.src[1].chan = i;
2050		}
2051
2052		alu.src[2].sel = ctx->temp_reg;
2053		alu.src[2].neg = 1;
2054		alu.src[2].chan = i;
2055
2056		if (use_temp)
2057			alu.dst.sel = ctx->temp_reg;
2058		else
2059			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2060		alu.dst.chan = i;
2061		alu.dst.write = 1;
2062		alu.is_op3 = 1;
2063		if (i == 3)
2064			alu.last = 1;
2065		r = r600_bc_add_alu(ctx->bc, &alu);
2066		if (r)
2067			return r;
2068	}
2069	if (use_temp)
2070		return tgsi_helper_copy(ctx, inst);
2071	return 0;
2072}
2073
2074static int tgsi_exp(struct r600_shader_ctx *ctx)
2075{
2076	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2077	struct r600_bc_alu alu;
2078	int r;
2079
2080	/* result.x = 2^floor(src); */
2081	if (inst->Dst[0].Register.WriteMask & 1) {
2082		memset(&alu, 0, sizeof(struct r600_bc_alu));
2083
2084		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2085		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2086
2087		alu.dst.sel = ctx->temp_reg;
2088		alu.dst.chan = 0;
2089		alu.dst.write = 1;
2090		alu.last = 1;
2091		r = r600_bc_add_alu(ctx->bc, &alu);
2092		if (r)
2093			return r;
2094
2095		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2096		alu.src[0].sel = ctx->temp_reg;
2097		alu.src[0].chan = 0;
2098
2099		alu.dst.sel = ctx->temp_reg;
2100		alu.dst.chan = 0;
2101		alu.dst.write = 1;
2102		alu.last = 1;
2103		r = r600_bc_add_alu(ctx->bc, &alu);
2104		if (r)
2105			return r;
2106	}
2107
2108	/* result.y = tmp - floor(tmp); */
2109	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2110		memset(&alu, 0, sizeof(struct r600_bc_alu));
2111
2112		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2113		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2114
2115		alu.dst.sel = ctx->temp_reg;
2116//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2117//		if (r)
2118//			return r;
2119		alu.dst.write = 1;
2120		alu.dst.chan = 1;
2121
2122		alu.last = 1;
2123
2124		r = r600_bc_add_alu(ctx->bc, &alu);
2125		if (r)
2126			return r;
2127	}
2128
2129	/* result.z = RoughApprox2ToX(tmp);*/
2130	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2131		memset(&alu, 0, sizeof(struct r600_bc_alu));
2132		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2133		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2134
2135		alu.dst.sel = ctx->temp_reg;
2136		alu.dst.write = 1;
2137		alu.dst.chan = 2;
2138
2139		alu.last = 1;
2140
2141		r = r600_bc_add_alu(ctx->bc, &alu);
2142		if (r)
2143			return r;
2144	}
2145
2146	/* result.w = 1.0;*/
2147	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2148		memset(&alu, 0, sizeof(struct r600_bc_alu));
2149
2150		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2151		alu.src[0].sel = V_SQ_ALU_SRC_1;
2152		alu.src[0].chan = 0;
2153
2154		alu.dst.sel = ctx->temp_reg;
2155		alu.dst.chan = 3;
2156		alu.dst.write = 1;
2157		alu.last = 1;
2158		r = r600_bc_add_alu(ctx->bc, &alu);
2159		if (r)
2160			return r;
2161	}
2162	return tgsi_helper_copy(ctx, inst);
2163}
2164
2165static int tgsi_log(struct r600_shader_ctx *ctx)
2166{
2167	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2168	struct r600_bc_alu alu;
2169	int r;
2170
2171	/* result.x = floor(log2(src)); */
2172	if (inst->Dst[0].Register.WriteMask & 1) {
2173		memset(&alu, 0, sizeof(struct r600_bc_alu));
2174
2175		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2176		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2177
2178		alu.dst.sel = ctx->temp_reg;
2179		alu.dst.chan = 0;
2180		alu.dst.write = 1;
2181		alu.last = 1;
2182		r = r600_bc_add_alu(ctx->bc, &alu);
2183		if (r)
2184			return r;
2185
2186		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2187		alu.src[0].sel = ctx->temp_reg;
2188		alu.src[0].chan = 0;
2189
2190		alu.dst.sel = ctx->temp_reg;
2191		alu.dst.chan = 0;
2192		alu.dst.write = 1;
2193		alu.last = 1;
2194
2195		r = r600_bc_add_alu(ctx->bc, &alu);
2196		if (r)
2197			return r;
2198	}
2199
2200	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2201	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2202		memset(&alu, 0, sizeof(struct r600_bc_alu));
2203
2204		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2205		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2206
2207		alu.dst.sel = ctx->temp_reg;
2208		alu.dst.chan = 1;
2209		alu.dst.write = 1;
2210		alu.last = 1;
2211
2212		r = r600_bc_add_alu(ctx->bc, &alu);
2213		if (r)
2214			return r;
2215
2216		memset(&alu, 0, sizeof(struct r600_bc_alu));
2217
2218		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2219		alu.src[0].sel = ctx->temp_reg;
2220		alu.src[0].chan = 1;
2221
2222		alu.dst.sel = ctx->temp_reg;
2223		alu.dst.chan = 1;
2224		alu.dst.write = 1;
2225		alu.last = 1;
2226
2227		r = r600_bc_add_alu(ctx->bc, &alu);
2228		if (r)
2229			return r;
2230
2231		memset(&alu, 0, sizeof(struct r600_bc_alu));
2232
2233		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2234		alu.src[0].sel = ctx->temp_reg;
2235		alu.src[0].chan = 1;
2236
2237		alu.dst.sel = ctx->temp_reg;
2238		alu.dst.chan = 1;
2239		alu.dst.write = 1;
2240		alu.last = 1;
2241
2242		r = r600_bc_add_alu(ctx->bc, &alu);
2243		if (r)
2244			return r;
2245
2246		memset(&alu, 0, sizeof(struct r600_bc_alu));
2247
2248		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2249		alu.src[0].sel = ctx->temp_reg;
2250		alu.src[0].chan = 1;
2251
2252		alu.dst.sel = ctx->temp_reg;
2253		alu.dst.chan = 1;
2254		alu.dst.write = 1;
2255		alu.last = 1;
2256
2257		r = r600_bc_add_alu(ctx->bc, &alu);
2258		if (r)
2259			return r;
2260
2261		memset(&alu, 0, sizeof(struct r600_bc_alu));
2262
2263		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2264
2265		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2266
2267		alu.src[1].sel = ctx->temp_reg;
2268		alu.src[1].chan = 1;
2269
2270		alu.dst.sel = ctx->temp_reg;
2271		alu.dst.chan = 1;
2272		alu.dst.write = 1;
2273		alu.last = 1;
2274
2275		r = r600_bc_add_alu(ctx->bc, &alu);
2276		if (r)
2277			return r;
2278	}
2279
2280	/* result.z = log2(src);*/
2281	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2282		memset(&alu, 0, sizeof(struct r600_bc_alu));
2283
2284		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2285		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2286
2287		alu.dst.sel = ctx->temp_reg;
2288		alu.dst.write = 1;
2289		alu.dst.chan = 2;
2290		alu.last = 1;
2291
2292		r = r600_bc_add_alu(ctx->bc, &alu);
2293		if (r)
2294			return r;
2295	}
2296
2297	/* result.w = 1.0; */
2298	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2299		memset(&alu, 0, sizeof(struct r600_bc_alu));
2300
2301		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2302		alu.src[0].sel = V_SQ_ALU_SRC_1;
2303		alu.src[0].chan = 0;
2304
2305		alu.dst.sel = ctx->temp_reg;
2306		alu.dst.chan = 3;
2307		alu.dst.write = 1;
2308		alu.last = 1;
2309
2310		r = r600_bc_add_alu(ctx->bc, &alu);
2311		if (r)
2312			return r;
2313	}
2314
2315	return tgsi_helper_copy(ctx, inst);
2316}
2317
2318static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2319{
2320	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2321	struct r600_bc_alu alu;
2322	int r;
2323
2324	memset(&alu, 0, sizeof(struct r600_bc_alu));
2325
2326	switch (inst->Instruction.Opcode) {
2327	case TGSI_OPCODE_ARL:
2328		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2329		break;
2330	case TGSI_OPCODE_ARR:
2331		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2332		break;
2333	default:
2334		assert(0);
2335		return -1;
2336	}
2337
2338	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2339	alu.last = 1;
2340	alu.dst.sel = ctx->ar_reg;
2341	alu.dst.write = 1;
2342	r = r600_bc_add_alu(ctx->bc, &alu);
2343	if (r)
2344		return r;
2345
2346	/* TODO: Note that the MOVA can be avoided if we never use AR for
2347	 * indexing non-CB registers in the current ALU clause. Similarly, we
2348	 * need to load AR from ar_reg again if we started a new clause
2349	 * between ARL and AR usage. The easy way to do that is to remove
2350	 * the MOVA here, and load it for the first AR access after ar_reg
2351	 * has been modified in each clause. */
2352	memset(&alu, 0, sizeof(struct r600_bc_alu));
2353	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2354	alu.src[0].sel = ctx->ar_reg;
2355	alu.src[0].chan = 0;
2356	alu.last = 1;
2357	r = r600_bc_add_alu(ctx->bc, &alu);
2358	if (r)
2359		return r;
2360	return 0;
2361}
2362static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2363{
2364	/* TODO from r600c, ar values don't persist between clauses */
2365	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2366	struct r600_bc_alu alu;
2367	int r;
2368
2369	switch (inst->Instruction.Opcode) {
2370	case TGSI_OPCODE_ARL:
2371		memset(&alu, 0, sizeof(alu));
2372		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
2373		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2374		alu.dst.sel = ctx->ar_reg;
2375		alu.dst.write = 1;
2376		alu.last = 1;
2377
2378		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2379			return r;
2380
2381		memset(&alu, 0, sizeof(alu));
2382		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2383		alu.src[0].sel = ctx->ar_reg;
2384		alu.dst.sel = ctx->ar_reg;
2385		alu.dst.write = 1;
2386		alu.last = 1;
2387
2388		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2389			return r;
2390		break;
2391	case TGSI_OPCODE_ARR:
2392		memset(&alu, 0, sizeof(alu));
2393		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2394		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2395		alu.dst.sel = ctx->ar_reg;
2396		alu.dst.write = 1;
2397		alu.last = 1;
2398
2399		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
2400			return r;
2401		break;
2402	default:
2403		assert(0);
2404		return -1;
2405	}
2406
2407	memset(&alu, 0, sizeof(alu));
2408	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2409	alu.src[0].sel = ctx->ar_reg;
2410	alu.last = 1;
2411
2412	r = r600_bc_add_alu(ctx->bc, &alu);
2413	if (r)
2414		return r;
2415	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2416	return 0;
2417}
2418
2419static int tgsi_opdst(struct r600_shader_ctx *ctx)
2420{
2421	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2422	struct r600_bc_alu alu;
2423	int i, r = 0;
2424
2425	for (i = 0; i < 4; i++) {
2426		memset(&alu, 0, sizeof(struct r600_bc_alu));
2427
2428		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2429		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2430
2431		if (i == 0 || i == 3) {
2432			alu.src[0].sel = V_SQ_ALU_SRC_1;
2433		} else {
2434			r600_bc_src(&alu.src[0], &ctx->src[0], i);
2435		}
2436
2437		if (i == 0 || i == 2) {
2438			alu.src[1].sel = V_SQ_ALU_SRC_1;
2439		} else {
2440			r600_bc_src(&alu.src[1], &ctx->src[1], i);
2441		}
2442		if (i == 3)
2443			alu.last = 1;
2444		r = r600_bc_add_alu(ctx->bc, &alu);
2445		if (r)
2446			return r;
2447	}
2448	return 0;
2449}
2450
2451static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2452{
2453	struct r600_bc_alu alu;
2454	int r;
2455
2456	memset(&alu, 0, sizeof(struct r600_bc_alu));
2457	alu.inst = opcode;
2458	alu.predicate = 1;
2459
2460	alu.dst.sel = ctx->temp_reg;
2461	alu.dst.write = 1;
2462	alu.dst.chan = 0;
2463
2464	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
2465	alu.src[1].sel = V_SQ_ALU_SRC_0;
2466	alu.src[1].chan = 0;
2467
2468	alu.last = 1;
2469
2470	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2471	if (r)
2472		return r;
2473	return 0;
2474}
2475
2476static int pops(struct r600_shader_ctx *ctx, int pops)
2477{
2478	int alu_pop = 3;
2479	if (ctx->bc->cf_last) {
2480		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2481			alu_pop = 0;
2482		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2483			alu_pop = 1;
2484	}
2485	alu_pop += pops;
2486	if (alu_pop == 1) {
2487		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2488		ctx->bc->force_add_cf = 1;
2489	} else if (alu_pop == 2) {
2490		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2491		ctx->bc->force_add_cf = 1;
2492	} else {
2493		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2494		ctx->bc->cf_last->pop_count = pops;
2495		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2496	}
2497	return 0;
2498}
2499
2500static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2501{
2502	switch(reason) {
2503	case FC_PUSH_VPM:
2504		ctx->bc->callstack[ctx->bc->call_sp].current--;
2505		break;
2506	case FC_PUSH_WQM:
2507	case FC_LOOP:
2508		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2509		break;
2510	case FC_REP:
2511		/* TOODO : for 16 vp asic should -= 2; */
2512		ctx->bc->callstack[ctx->bc->call_sp].current --;
2513		break;
2514	}
2515}
2516
2517static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2518{
2519	if (check_max_only) {
2520		int diff;
2521		switch (reason) {
2522		case FC_PUSH_VPM:
2523			diff = 1;
2524			break;
2525		case FC_PUSH_WQM:
2526			diff = 4;
2527			break;
2528		default:
2529			assert(0);
2530			diff = 0;
2531		}
2532		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2533		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2534			ctx->bc->callstack[ctx->bc->call_sp].max =
2535				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2536		}
2537		return;
2538	}
2539	switch (reason) {
2540	case FC_PUSH_VPM:
2541		ctx->bc->callstack[ctx->bc->call_sp].current++;
2542		break;
2543	case FC_PUSH_WQM:
2544	case FC_LOOP:
2545		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2546		break;
2547	case FC_REP:
2548		ctx->bc->callstack[ctx->bc->call_sp].current++;
2549		break;
2550	}
2551
2552	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2553	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2554		ctx->bc->callstack[ctx->bc->call_sp].max =
2555			ctx->bc->callstack[ctx->bc->call_sp].current;
2556	}
2557}
2558
2559static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2560{
2561	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2562
2563	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2564						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2565	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2566	sp->num_mid++;
2567}
2568
2569static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2570{
2571	ctx->bc->fc_sp++;
2572	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2573	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2574}
2575
2576static void fc_poplevel(struct r600_shader_ctx *ctx)
2577{
2578	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2579	if (sp->mid) {
2580		free(sp->mid);
2581		sp->mid = NULL;
2582	}
2583	sp->num_mid = 0;
2584	sp->start = NULL;
2585	sp->type = 0;
2586	ctx->bc->fc_sp--;
2587}
2588
2589#if 0
2590static int emit_return(struct r600_shader_ctx *ctx)
2591{
2592	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2593	return 0;
2594}
2595
2596static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2597{
2598
2599	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2600	ctx->bc->cf_last->pop_count = pops;
2601	/* TODO work out offset */
2602	return 0;
2603}
2604
2605static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2606{
2607	return 0;
2608}
2609
2610static void emit_testflag(struct r600_shader_ctx *ctx)
2611{
2612
2613}
2614
2615static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2616{
2617	emit_testflag(ctx);
2618	emit_jump_to_offset(ctx, 1, 4);
2619	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2620	pops(ctx, ifidx + 1);
2621	emit_return(ctx);
2622}
2623
2624static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2625{
2626	emit_testflag(ctx);
2627
2628	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2629	ctx->bc->cf_last->pop_count = 1;
2630
2631	fc_set_mid(ctx, fc_sp);
2632
2633	pops(ctx, 1);
2634}
2635#endif
2636
2637static int tgsi_if(struct r600_shader_ctx *ctx)
2638{
2639	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2640
2641	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2642
2643	fc_pushlevel(ctx, FC_IF);
2644
2645	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2646	return 0;
2647}
2648
2649static int tgsi_else(struct r600_shader_ctx *ctx)
2650{
2651	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2652	ctx->bc->cf_last->pop_count = 1;
2653
2654	fc_set_mid(ctx, ctx->bc->fc_sp);
2655	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2656	return 0;
2657}
2658
2659static int tgsi_endif(struct r600_shader_ctx *ctx)
2660{
2661	pops(ctx, 1);
2662	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2663		R600_ERR("if/endif unbalanced in shader\n");
2664		return -1;
2665	}
2666
2667	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2668		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2669		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2670	} else {
2671		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2672	}
2673	fc_poplevel(ctx);
2674
2675	callstack_decrease_current(ctx, FC_PUSH_VPM);
2676	return 0;
2677}
2678
2679static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2680{
2681	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2682
2683	fc_pushlevel(ctx, FC_LOOP);
2684
2685	/* check stack depth */
2686	callstack_check_depth(ctx, FC_LOOP, 0);
2687	return 0;
2688}
2689
2690static int tgsi_endloop(struct r600_shader_ctx *ctx)
2691{
2692	int i;
2693
2694	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2695
2696	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2697		R600_ERR("loop/endloop in shader code are not paired.\n");
2698		return -EINVAL;
2699	}
2700
2701	/* fixup loop pointers - from r600isa
2702	   LOOP END points to CF after LOOP START,
2703	   LOOP START point to CF after LOOP END
2704	   BRK/CONT point to LOOP END CF
2705	*/
2706	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2707
2708	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2709
2710	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2711		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2712	}
2713	/* TODO add LOOPRET support */
2714	fc_poplevel(ctx);
2715	callstack_decrease_current(ctx, FC_LOOP);
2716	return 0;
2717}
2718
2719static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2720{
2721	unsigned int fscp;
2722
2723	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2724	{
2725		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2726			break;
2727	}
2728
2729	if (fscp == 0) {
2730		R600_ERR("Break not inside loop/endloop pair\n");
2731		return -EINVAL;
2732	}
2733
2734	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2735	ctx->bc->cf_last->pop_count = 1;
2736
2737	fc_set_mid(ctx, fscp);
2738
2739	pops(ctx, 1);
2740	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2741	return 0;
2742}
2743
2744static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2745	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2746	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2747	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2748
2749	/* FIXME:
2750	 * For state trackers other than OpenGL, we'll want to use
2751	 * _RECIP_IEEE instead.
2752	 */
2753	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2754
2755	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2756	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2757	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2758	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2759	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2760	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2761	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2762	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2763	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2764	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2765	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2766	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2767	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2768	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2769	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2770	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2771	/* gap */
2772	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2773	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2774	/* gap */
2775	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2776	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2777	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2778	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2779	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2780	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2781	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2782	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2783	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2784	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2785	/* gap */
2786	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2787	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2788	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2789	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2790	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2791	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2792	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2793	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2794	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2795	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2796	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2797	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2798	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2799	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2800	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2801	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2802	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2803	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2804	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2805	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2806	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2807	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2808	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2809	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2810	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2811	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2812	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2813	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2814	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2815	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2816	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2817	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2818	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2819	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2820	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2821	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2822	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2823	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2824	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2825	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2826	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2827	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2828	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2829	/* gap */
2830	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2831	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2832	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2833	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2834	/* gap */
2835	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2836	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2837	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2838	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2839	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2840	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2841	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2842	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
2843	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2844	/* gap */
2845	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2846	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2847	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2848	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2849	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2850	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2851	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2852	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2853	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2854	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2855	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2856	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2857	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2858	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2859	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2860	/* gap */
2861	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2862	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2863	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2864	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2865	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2866	/* gap */
2867	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2868	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2869	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2870	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2871	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2872	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2873	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2874	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2875	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2876	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2877	/* gap */
2878	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2879	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2880	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2881	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2882	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2883	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2884	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2885	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2886	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2887	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2888	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2889	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2890	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2891	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2892	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2893	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2894	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2895	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2896	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2897	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2898	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2899	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2900	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2901	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2902	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2903	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2904	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2905	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2906};
2907
2908static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
2909	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2910	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2911	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2912	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2913	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2914	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2915	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2916	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2917	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2918	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2919	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2920	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2921	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2922	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2923	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2924	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2925	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2926	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2927	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2928	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2929	/* gap */
2930	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2931	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2932	/* gap */
2933	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2934	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2935	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2936	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2937	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2938	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2939	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2940	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2941	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2942	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2943	/* gap */
2944	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2945	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2946	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2947	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2948	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2949	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2950	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2951	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2952	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2953	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2954	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2955	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2956	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2957	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2958	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2959	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2960	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2961	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2962	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2963	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2964	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2965	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2966	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2967	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2968	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2969	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2970	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2971	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2972	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2973	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
2974	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2975	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2976	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2977	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2978	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2979	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2980	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2981	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2982	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2983	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2984	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2985	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2986	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2987	/* gap */
2988	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2989	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2990	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2991	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2992	/* gap */
2993	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2994	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2995	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2996	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2997	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2998	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2999	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3000	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
3001	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3002	/* gap */
3003	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3004	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3006	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3007	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3009	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3010	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3011	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3012	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3013	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3014	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3015	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3016	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3017	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3018	/* gap */
3019	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3020	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3021	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3022	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3023	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3024	/* gap */
3025	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3026	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3027	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3028	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3029	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3030	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3031	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3032	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3034	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3035	/* gap */
3036	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3037	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3038	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3039	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3040	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3041	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3042	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3043	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3045	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3048	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3049	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3050	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3051	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3052	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3053	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3054	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3055	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3056	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3057	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3058	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3059	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3060	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3061	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3062	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3063	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064};
3065