r600_shader.c revision dffad730df17983cfaef0808555a8c26cad0aa15
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_pipe.h"
29#include "r600_asm.h"
30#include "r600_sq.h"
31#include "r600_opcodes.h"
32#include "r600d.h"
33#include <stdio.h>
34#include <errno.h>
35
36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
37{
38	struct r600_pipe_state *rstate = &shader->rstate;
39	struct r600_shader *rshader = &shader->shader;
40	unsigned spi_vs_out_id[10];
41	unsigned i, tmp;
42
43	/* clear previous register */
44	rstate->nregs = 0;
45
46	/* so far never got proper semantic id from tgsi */
47	/* FIXME better to move this in config things so they get emited
48	 * only one time per cs
49	 */
50	for (i = 0; i < 10; i++) {
51		spi_vs_out_id[i] = 0;
52	}
53	for (i = 0; i < 32; i++) {
54		tmp = i << ((i & 3) * 8);
55		spi_vs_out_id[i / 4] |= tmp;
56	}
57	for (i = 0; i < 10; i++) {
58		r600_pipe_state_add_reg(rstate,
59					R_028614_SPI_VS_OUT_ID_0 + i * 4,
60					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
61	}
62
63	r600_pipe_state_add_reg(rstate,
64			R_0286C4_SPI_VS_OUT_CONFIG,
65			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
66			0xFFFFFFFF, NULL);
67	r600_pipe_state_add_reg(rstate,
68			R_028868_SQ_PGM_RESOURCES_VS,
69			S_028868_NUM_GPRS(rshader->bc.ngpr) |
70			S_028868_STACK_SIZE(rshader->bc.nstack),
71			0xFFFFFFFF, NULL);
72	r600_pipe_state_add_reg(rstate,
73			R_0288D0_SQ_PGM_CF_OFFSET_VS,
74			0x00000000, 0xFFFFFFFF, NULL);
75	r600_pipe_state_add_reg(rstate,
76			R_028858_SQ_PGM_START_VS,
77			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
78
79	r600_pipe_state_add_reg(rstate,
80				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
81				0xFFFFFFFF, NULL);
82
83}
84
85int r600_find_vs_semantic_index(struct r600_shader *vs,
86				struct r600_shader *ps, int id)
87{
88	struct r600_shader_io *input = &ps->input[id];
89
90	for (int i = 0; i < vs->noutput; i++) {
91		if (input->name == vs->output[i].name &&
92			input->sid == vs->output[i].sid) {
93			return i - 1;
94		}
95	}
96	return 0;
97}
98
99static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
100{
101	struct r600_pipe_state *rstate = &shader->rstate;
102	struct r600_shader *rshader = &shader->shader;
103	unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
104	int pos_index = -1, face_index = -1;
105
106	rstate->nregs = 0;
107
108	for (i = 0; i < rshader->ninput; i++) {
109		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
110			pos_index = i;
111		if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
112			face_index = i;
113	}
114
115	for (i = 0; i < rshader->noutput; i++) {
116		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
117			r600_pipe_state_add_reg(rstate,
118						R_02880C_DB_SHADER_CONTROL,
119						S_02880C_Z_EXPORT_ENABLE(1),
120						S_02880C_Z_EXPORT_ENABLE(1), NULL);
121		if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
122			r600_pipe_state_add_reg(rstate,
123						R_02880C_DB_SHADER_CONTROL,
124						S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
125						S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
126	}
127
128	exports_ps = 0;
129	num_cout = 0;
130	for (i = 0; i < rshader->noutput; i++) {
131		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
132			exports_ps |= 1;
133		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
134			num_cout++;
135		}
136	}
137	exports_ps |= S_028854_EXPORT_COLORS(num_cout);
138	if (!exports_ps) {
139		/* always at least export 1 component per pixel */
140		exports_ps = 2;
141	}
142
143	spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
144				S_0286CC_PERSP_GRADIENT_ENA(1);
145	spi_input_z = 0;
146	if (pos_index != -1) {
147		spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
148					S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
149					S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
150					S_0286CC_BARYC_SAMPLE_CNTL(1));
151		spi_input_z |= 1;
152	}
153
154	spi_ps_in_control_1 = 0;
155	if (face_index != -1) {
156		spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
157			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
158	}
159
160	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
161	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
162	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
163	r600_pipe_state_add_reg(rstate,
164				R_028840_SQ_PGM_START_PS,
165				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
166	r600_pipe_state_add_reg(rstate,
167				R_028850_SQ_PGM_RESOURCES_PS,
168				S_028868_NUM_GPRS(rshader->bc.ngpr) |
169				S_028868_STACK_SIZE(rshader->bc.nstack),
170				0xFFFFFFFF, NULL);
171	r600_pipe_state_add_reg(rstate,
172				R_028854_SQ_PGM_EXPORTS_PS,
173				exports_ps, 0xFFFFFFFF, NULL);
174	r600_pipe_state_add_reg(rstate,
175				R_0288CC_SQ_PGM_CF_OFFSET_PS,
176				0x00000000, 0xFFFFFFFF, NULL);
177
178	if (rshader->uses_kill) {
179		/* only set some bits here, the other bits are set in the dsa state */
180		r600_pipe_state_add_reg(rstate,
181					R_02880C_DB_SHADER_CONTROL,
182					S_02880C_KILL_ENABLE(1),
183					S_02880C_KILL_ENABLE(1), NULL);
184	}
185	r600_pipe_state_add_reg(rstate,
186				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
187				0xFFFFFFFF, NULL);
188}
189
190int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
191{
192	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
193	struct r600_shader *rshader = &shader->shader;
194	void *ptr;
195
196	/* copy new shader */
197	if (shader->bo == NULL) {
198		shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
199		if (shader->bo == NULL) {
200			return -ENOMEM;
201		}
202		ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
203		memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
204		r600_bo_unmap(rctx->radeon, shader->bo);
205	}
206	/* build state */
207	switch (rshader->processor_type) {
208	case TGSI_PROCESSOR_VERTEX:
209		if (rshader->family >= CHIP_CEDAR) {
210			evergreen_pipe_shader_vs(ctx, shader);
211		} else {
212			r600_pipe_shader_vs(ctx, shader);
213		}
214		break;
215	case TGSI_PROCESSOR_FRAGMENT:
216		if (rshader->family >= CHIP_CEDAR) {
217			evergreen_pipe_shader_ps(ctx, shader);
218		} else {
219			r600_pipe_shader_ps(ctx, shader);
220		}
221		break;
222	default:
223		return -EINVAL;
224	}
225	return 0;
226}
227
228int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
229int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
230{
231	static int dump_shaders = -1;
232	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
233	int r;
234
235        /* Would like some magic "get_bool_option_once" routine.
236         */
237        if (dump_shaders == -1)
238                dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
239
240	if (dump_shaders) {
241		fprintf(stderr, "--------------------------------------------------------------\n");
242		tgsi_dump(tokens, 0);
243	}
244	shader->shader.family = r600_get_family(rctx->radeon);
245	r = r600_shader_from_tgsi(tokens, &shader->shader);
246	if (r) {
247		R600_ERR("translation from TGSI failed !\n");
248		return r;
249	}
250	r = r600_bc_build(&shader->shader.bc);
251	if (r) {
252		R600_ERR("building bytecode failed !\n");
253		return r;
254	}
255	if (dump_shaders) {
256		r600_bc_dump(&shader->shader.bc);
257		fprintf(stderr, "______________________________________________________________\n");
258	}
259	return r600_pipe_shader(ctx, shader);
260}
261
262void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
263{
264	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
265
266	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
267	r600_bc_clear(&shader->shader.bc);
268}
269
270/*
271 * tgsi -> r600 shader
272 */
273struct r600_shader_tgsi_instruction;
274
275struct r600_shader_ctx {
276	struct tgsi_shader_info			info;
277	struct tgsi_parse_context		parse;
278	const struct tgsi_token			*tokens;
279	unsigned				type;
280	unsigned				file_offset[TGSI_FILE_COUNT];
281	unsigned				temp_reg;
282	struct r600_shader_tgsi_instruction	*inst_info;
283	struct r600_bc				*bc;
284	struct r600_shader			*shader;
285	u32					value[4];
286	u32					*literals;
287	u32					nliterals;
288	u32					max_driver_temp_used;
289	/* needed for evergreen interpolation */
290	boolean                                 input_centroid;
291	boolean                                 input_linear;
292	boolean                                 input_perspective;
293	int					num_interp_gpr;
294};
295
296struct r600_shader_tgsi_instruction {
297	unsigned	tgsi_opcode;
298	unsigned	is_op3;
299	unsigned	r600_opcode;
300	int (*process)(struct r600_shader_ctx *ctx);
301};
302
303static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[];
304static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
305
306static int tgsi_is_supported(struct r600_shader_ctx *ctx)
307{
308	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
309	int j;
310
311	if (i->Instruction.NumDstRegs > 1) {
312		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
313		return -EINVAL;
314	}
315	if (i->Instruction.Predicate) {
316		R600_ERR("predicate unsupported\n");
317		return -EINVAL;
318	}
319#if 0
320	if (i->Instruction.Label) {
321		R600_ERR("label unsupported\n");
322		return -EINVAL;
323	}
324#endif
325	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
326		if (i->Src[j].Register.Dimension) {
327			R600_ERR("unsupported src %d (dimension %d)\n", j,
328				 i->Src[j].Register.Dimension);
329			return -EINVAL;
330		}
331	}
332	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
333		if (i->Dst[j].Register.Dimension) {
334			R600_ERR("unsupported dst (dimension)\n");
335			return -EINVAL;
336		}
337	}
338	return 0;
339}
340
341static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
342{
343	int i, r;
344	struct r600_bc_alu alu;
345	int gpr = 0, base_chan = 0;
346	int ij_index = 0;
347
348	if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
349		ij_index = 0;
350		if (ctx->shader->input[input].centroid)
351			ij_index++;
352	} else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
353		ij_index = 0;
354		/* if we have perspective add one */
355		if (ctx->input_perspective)  {
356			ij_index++;
357			/* if we have perspective centroid */
358			if (ctx->input_centroid)
359				ij_index++;
360		}
361		if (ctx->shader->input[input].centroid)
362			ij_index++;
363	}
364
365	/* work out gpr and base_chan from index */
366	gpr = ij_index / 2;
367	base_chan = (2 * (ij_index % 2)) + 1;
368
369	for (i = 0; i < 8; i++) {
370		memset(&alu, 0, sizeof(struct r600_bc_alu));
371
372		if (i < 4)
373			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
374		else
375			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY;
376
377		if ((i > 1) && (i < 6)) {
378			alu.dst.sel = ctx->shader->input[input].gpr;
379			alu.dst.write = 1;
380		}
381
382		alu.dst.chan = i % 4;
383
384		alu.src[0].sel = gpr;
385		alu.src[0].chan = (base_chan - (i % 2));
386
387		alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
388
389		alu.bank_swizzle_force = SQ_ALU_VEC_210;
390		if ((i % 4) == 3)
391			alu.last = 1;
392		r = r600_bc_add_alu(ctx->bc, &alu);
393		if (r)
394			return r;
395	}
396	return 0;
397}
398
399
400static int tgsi_declaration(struct r600_shader_ctx *ctx)
401{
402	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
403	unsigned i;
404
405	switch (d->Declaration.File) {
406	case TGSI_FILE_INPUT:
407		i = ctx->shader->ninput++;
408		ctx->shader->input[i].name = d->Semantic.Name;
409		ctx->shader->input[i].sid = d->Semantic.Index;
410		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
411		ctx->shader->input[i].centroid = d->Declaration.Centroid;
412		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
413		if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
414			/* turn input into interpolate on EG */
415			if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
416				if (ctx->shader->input[i].interpolate > 0) {
417					ctx->shader->input[i].lds_pos = ctx->shader->nlds++;
418					evergreen_interp_alu(ctx, i);
419				}
420			}
421		}
422		break;
423	case TGSI_FILE_OUTPUT:
424		i = ctx->shader->noutput++;
425		ctx->shader->output[i].name = d->Semantic.Name;
426		ctx->shader->output[i].sid = d->Semantic.Index;
427		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
428		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
429		break;
430	case TGSI_FILE_CONSTANT:
431	case TGSI_FILE_TEMPORARY:
432	case TGSI_FILE_SAMPLER:
433	case TGSI_FILE_ADDRESS:
434		break;
435	default:
436		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
437		return -EINVAL;
438	}
439	return 0;
440}
441
442static int r600_get_temp(struct r600_shader_ctx *ctx)
443{
444	return ctx->temp_reg + ctx->max_driver_temp_used++;
445}
446
447/*
448 * for evergreen we need to scan the shader to find the number of GPRs we need to
449 * reserve for interpolation.
450 *
451 * we need to know if we are going to emit
452 * any centroid inputs
453 * if perspective and linear are required
454*/
455static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
456{
457	int i;
458	int num_baryc;
459
460	ctx->input_linear = FALSE;
461	ctx->input_perspective = FALSE;
462	ctx->input_centroid = FALSE;
463	ctx->num_interp_gpr = 1;
464
465	/* any centroid inputs */
466	for (i = 0; i < ctx->info.num_inputs; i++) {
467		/* skip position/face */
468		if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
469		    ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
470			continue;
471		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
472			ctx->input_linear = TRUE;
473		if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
474			ctx->input_perspective = TRUE;
475		if (ctx->info.input_centroid[i])
476			ctx->input_centroid = TRUE;
477	}
478
479	num_baryc = 0;
480	/* ignoring sample for now */
481	if (ctx->input_perspective)
482		num_baryc++;
483	if (ctx->input_linear)
484		num_baryc++;
485	if (ctx->input_centroid)
486		num_baryc *= 2;
487
488	ctx->num_interp_gpr += (num_baryc + 1) >> 1;
489
490	/* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */
491	return ctx->num_interp_gpr;
492}
493
494int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
495{
496	struct tgsi_full_immediate *immediate;
497	struct r600_shader_ctx ctx;
498	struct r600_bc_output output[32];
499	unsigned output_done, noutput;
500	unsigned opcode;
501	int i, r = 0, pos0;
502
503	ctx.bc = &shader->bc;
504	ctx.shader = shader;
505	r = r600_bc_init(ctx.bc, shader->family);
506	if (r)
507		return r;
508	ctx.tokens = tokens;
509	tgsi_scan_shader(tokens, &ctx.info);
510	tgsi_parse_init(&ctx.parse, tokens);
511	ctx.type = ctx.parse.FullHeader.Processor.Processor;
512	shader->processor_type = ctx.type;
513	ctx.bc->type = shader->processor_type;
514
515	/* register allocations */
516	/* Values [0,127] correspond to GPR[0..127].
517	 * Values [128,159] correspond to constant buffer bank 0
518	 * Values [160,191] correspond to constant buffer bank 1
519	 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
520	 * Values [256,287] correspond to constant buffer bank 2 (EG)
521	 * Values [288,319] correspond to constant buffer bank 3 (EG)
522	 * Other special values are shown in the list below.
523	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
524	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
525	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
526	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
527	 * 248	SQ_ALU_SRC_0: special constant 0.0.
528	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
529	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
530	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
531	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
532	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
533	 * 254	SQ_ALU_SRC_PV: previous vector result.
534	 * 255	SQ_ALU_SRC_PS: previous scalar result.
535	 */
536	for (i = 0; i < TGSI_FILE_COUNT; i++) {
537		ctx.file_offset[i] = 0;
538	}
539	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
540		ctx.file_offset[TGSI_FILE_INPUT] = 1;
541		if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
542			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
543		} else {
544			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
545		}
546	}
547	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) {
548		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
549	}
550	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
551						ctx.info.file_count[TGSI_FILE_INPUT];
552	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
553						ctx.info.file_count[TGSI_FILE_OUTPUT];
554
555	/* Outside the GPR range. This will be translated to one of the
556	 * kcache banks later. */
557	ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
558
559	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
560	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
561			ctx.info.file_count[TGSI_FILE_TEMPORARY];
562
563	ctx.nliterals = 0;
564	ctx.literals = NULL;
565
566	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
567		tgsi_parse_token(&ctx.parse);
568		switch (ctx.parse.FullToken.Token.Type) {
569		case TGSI_TOKEN_TYPE_IMMEDIATE:
570			immediate = &ctx.parse.FullToken.FullImmediate;
571			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
572			if(ctx.literals == NULL) {
573				r = -ENOMEM;
574				goto out_err;
575			}
576			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
577			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
578			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
579			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
580			ctx.nliterals++;
581			break;
582		case TGSI_TOKEN_TYPE_DECLARATION:
583			r = tgsi_declaration(&ctx);
584			if (r)
585				goto out_err;
586			break;
587		case TGSI_TOKEN_TYPE_INSTRUCTION:
588			r = tgsi_is_supported(&ctx);
589			if (r)
590				goto out_err;
591			ctx.max_driver_temp_used = 0;
592			/* reserve first tmp for everyone */
593			r600_get_temp(&ctx);
594			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
595			if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
596				ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
597			else
598				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
599			r = ctx.inst_info->process(&ctx);
600			if (r)
601				goto out_err;
602			r = r600_bc_add_literal(ctx.bc, ctx.value);
603			if (r)
604				goto out_err;
605			break;
606		case TGSI_TOKEN_TYPE_PROPERTY:
607			break;
608		default:
609			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
610			r = -EINVAL;
611			goto out_err;
612		}
613	}
614	/* export output */
615	noutput = shader->noutput;
616	for (i = 0, pos0 = 0; i < noutput; i++) {
617		memset(&output[i], 0, sizeof(struct r600_bc_output));
618		output[i].gpr = shader->output[i].gpr;
619		output[i].elem_size = 3;
620		output[i].swizzle_x = 0;
621		output[i].swizzle_y = 1;
622		output[i].swizzle_z = 2;
623		output[i].swizzle_w = 3;
624		output[i].barrier = 1;
625		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
626		output[i].array_base = i - pos0;
627		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
628		switch (ctx.type) {
629		case TGSI_PROCESSOR_VERTEX:
630			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
631				output[i].array_base = 60;
632				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
633				/* position doesn't count in array_base */
634				pos0++;
635			}
636			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
637				output[i].array_base = 61;
638				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
639				/* position doesn't count in array_base */
640				pos0++;
641			}
642			break;
643		case TGSI_PROCESSOR_FRAGMENT:
644			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
645				output[i].array_base = shader->output[i].sid;
646				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
647			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
648				output[i].array_base = 61;
649				output[i].swizzle_x = 2;
650				output[i].swizzle_y = 7;
651				output[i].swizzle_z = output[i].swizzle_w = 7;
652				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
653			} else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
654				output[i].array_base = 61;
655				output[i].swizzle_x = 7;
656				output[i].swizzle_y = 1;
657				output[i].swizzle_z = output[i].swizzle_w = 7;
658				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
659			} else {
660				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
661				r = -EINVAL;
662				goto out_err;
663			}
664			break;
665		default:
666			R600_ERR("unsupported processor type %d\n", ctx.type);
667			r = -EINVAL;
668			goto out_err;
669		}
670	}
671	/* add fake param output for vertex shader if no param is exported */
672	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
673		for (i = 0, pos0 = 0; i < noutput; i++) {
674			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
675				pos0 = 1;
676				break;
677			}
678		}
679		if (!pos0) {
680			memset(&output[i], 0, sizeof(struct r600_bc_output));
681			output[i].gpr = 0;
682			output[i].elem_size = 3;
683			output[i].swizzle_x = 0;
684			output[i].swizzle_y = 1;
685			output[i].swizzle_z = 2;
686			output[i].swizzle_w = 3;
687			output[i].barrier = 1;
688			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
689			output[i].array_base = 0;
690			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
691			noutput++;
692		}
693	}
694	/* add fake pixel export */
695	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
696		memset(&output[0], 0, sizeof(struct r600_bc_output));
697		output[0].gpr = 0;
698		output[0].elem_size = 3;
699		output[0].swizzle_x = 7;
700		output[0].swizzle_y = 7;
701		output[0].swizzle_z = 7;
702		output[0].swizzle_w = 7;
703		output[0].barrier = 1;
704		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
705		output[0].array_base = 0;
706		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
707		noutput++;
708	}
709	/* set export done on last export of each type */
710	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
711		if (i == (noutput - 1)) {
712			output[i].end_of_program = 1;
713		}
714		if (!(output_done & (1 << output[i].type))) {
715			output_done |= (1 << output[i].type);
716			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
717		}
718	}
719	/* add output to bytecode */
720	for (i = 0; i < noutput; i++) {
721		r = r600_bc_add_output(ctx.bc, &output[i]);
722		if (r)
723			goto out_err;
724	}
725	free(ctx.literals);
726	tgsi_parse_free(&ctx.parse);
727	return 0;
728out_err:
729	free(ctx.literals);
730	tgsi_parse_free(&ctx.parse);
731	return r;
732}
733
734static int tgsi_unsupported(struct r600_shader_ctx *ctx)
735{
736	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
737	return -EINVAL;
738}
739
740static int tgsi_end(struct r600_shader_ctx *ctx)
741{
742	return 0;
743}
744
745static int tgsi_src(struct r600_shader_ctx *ctx,
746			const struct tgsi_full_src_register *tgsi_src,
747			struct r600_bc_alu_src *r600_src)
748{
749	int index;
750	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
751	r600_src->sel = tgsi_src->Register.Index;
752	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
753		r600_src->sel = 0;
754		index = tgsi_src->Register.Index;
755		ctx->value[0] = ctx->literals[index * 4 + 0];
756		ctx->value[1] = ctx->literals[index * 4 + 1];
757		ctx->value[2] = ctx->literals[index * 4 + 2];
758		ctx->value[3] = ctx->literals[index * 4 + 3];
759	}
760	if (tgsi_src->Register.Indirect)
761		r600_src->rel = V_SQ_REL_RELATIVE;
762	r600_src->neg = tgsi_src->Register.Negate;
763	r600_src->abs = tgsi_src->Register.Absolute;
764	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
765	return 0;
766}
767
768static int tgsi_dst(struct r600_shader_ctx *ctx,
769			const struct tgsi_full_dst_register *tgsi_dst,
770			unsigned swizzle,
771			struct r600_bc_alu_dst *r600_dst)
772{
773	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
774
775	r600_dst->sel = tgsi_dst->Register.Index;
776	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
777	r600_dst->chan = swizzle;
778	r600_dst->write = 1;
779	if (tgsi_dst->Register.Indirect)
780		r600_dst->rel = V_SQ_REL_RELATIVE;
781	if (inst->Instruction.Saturate) {
782		r600_dst->clamp = 1;
783	}
784	return 0;
785}
786
787static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
788{
789	switch (swizzle) {
790	case 0:
791		return tgsi_src->Register.SwizzleX;
792	case 1:
793		return tgsi_src->Register.SwizzleY;
794	case 2:
795		return tgsi_src->Register.SwizzleZ;
796	case 3:
797		return tgsi_src->Register.SwizzleW;
798	default:
799		return 0;
800	}
801}
802
803static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
804{
805	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
806	struct r600_bc_alu alu;
807	int i, j, k, nconst, r;
808
809	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
810		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
811			nconst++;
812		}
813		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
814		if (r) {
815			return r;
816		}
817	}
818	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
819		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
820			int treg = r600_get_temp(ctx);
821			for (k = 0; k < 4; k++) {
822				memset(&alu, 0, sizeof(struct r600_bc_alu));
823				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
824				alu.src[0].sel = r600_src[i].sel;
825				alu.src[0].chan = k;
826				alu.src[0].rel = r600_src[i].rel;
827				alu.dst.sel = treg;
828				alu.dst.chan = k;
829				alu.dst.write = 1;
830				if (k == 3)
831					alu.last = 1;
832				r = r600_bc_add_alu(ctx->bc, &alu);
833				if (r)
834					return r;
835			}
836			r600_src[i].sel = treg;
837			r600_src[i].rel =0;
838			j--;
839		}
840	}
841	return 0;
842}
843
844/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
845static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
846{
847	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
848	struct r600_bc_alu alu;
849	int i, j, k, nliteral, r;
850
851	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
852		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
853			nliteral++;
854		}
855	}
856	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
857		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
858			int treg = r600_get_temp(ctx);
859			for (k = 0; k < 4; k++) {
860				memset(&alu, 0, sizeof(struct r600_bc_alu));
861				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
862				alu.src[0].sel = r600_src[i].sel;
863				alu.src[0].chan = k;
864				alu.dst.sel = treg;
865				alu.dst.chan = k;
866				alu.dst.write = 1;
867				if (k == 3)
868					alu.last = 1;
869				r = r600_bc_add_alu(ctx->bc, &alu);
870				if (r)
871					return r;
872			}
873			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
874			if (r)
875				return r;
876			r600_src[i].sel = treg;
877			j--;
878		}
879	}
880	return 0;
881}
882
883static int tgsi_last_instruction(unsigned writemask)
884{
885	int i, lasti = 0;
886
887	for (i = 0; i < 4; i++) {
888		if (writemask & (1 << i)) {
889			lasti = i;
890		}
891	}
892	return lasti;
893}
894
895static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
896{
897	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
898	struct r600_bc_alu_src r600_src[3];
899	struct r600_bc_alu alu;
900	int i, j, r;
901	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
902
903	r = tgsi_split_constant(ctx, r600_src);
904	if (r)
905		return r;
906	r = tgsi_split_literal_constant(ctx, r600_src);
907	if (r)
908		return r;
909	for (i = 0; i < lasti + 1; i++) {
910		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
911			continue;
912
913		memset(&alu, 0, sizeof(struct r600_bc_alu));
914		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
915		if (r)
916			return r;
917
918		alu.inst = ctx->inst_info->r600_opcode;
919		if (!swap) {
920			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
921				alu.src[j] = r600_src[j];
922				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
923			}
924		} else {
925			alu.src[0] = r600_src[1];
926			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
927
928			alu.src[1] = r600_src[0];
929			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
930		}
931		/* handle some special cases */
932		switch (ctx->inst_info->tgsi_opcode) {
933		case TGSI_OPCODE_SUB:
934			alu.src[1].neg = 1;
935			break;
936		case TGSI_OPCODE_ABS:
937			alu.src[0].abs = 1;
938			break;
939		default:
940			break;
941		}
942		if (i == lasti) {
943			alu.last = 1;
944		}
945		r = r600_bc_add_alu(ctx->bc, &alu);
946		if (r)
947			return r;
948	}
949	return 0;
950}
951
952static int tgsi_op2(struct r600_shader_ctx *ctx)
953{
954	return tgsi_op2_s(ctx, 0);
955}
956
957static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
958{
959	return tgsi_op2_s(ctx, 1);
960}
961
962/*
963 * r600 - trunc to -PI..PI range
964 * r700 - normalize by dividing by 2PI
965 * see fdo bug 27901
966 */
967static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
968			   struct r600_bc_alu_src r600_src[3])
969{
970	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
971	int r, src0_chan;
972	uint32_t lit_vals[4];
973	struct r600_bc_alu alu;
974
975	memset(lit_vals, 0, 4*4);
976	r = tgsi_split_constant(ctx, r600_src);
977	if (r)
978		return r;
979	r = tgsi_split_literal_constant(ctx, r600_src);
980	if (r)
981		return r;
982
983	src0_chan = tgsi_chan(&inst->Src[0], 0);
984
985	/* We are going to feed two literals to the MAD below,
986	 * which means that if the first operand is a literal as well,
987	 * we need to copy its value manually.
988	 */
989	if (r600_src[0].sel == V_SQ_ALU_SRC_LITERAL) {
990		unsigned index = inst->Src[0].Register.Index;
991
992		lit_vals[2] = ctx->literals[index * 4 + src0_chan];
993		src0_chan = 2;
994	}
995
996	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
997	lit_vals[1] = fui(0.5f);
998
999	memset(&alu, 0, sizeof(struct r600_bc_alu));
1000	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1001	alu.is_op3 = 1;
1002
1003	alu.dst.chan = 0;
1004	alu.dst.sel = ctx->temp_reg;
1005	alu.dst.write = 1;
1006
1007	alu.src[0] = r600_src[0];
1008	alu.src[0].chan = src0_chan;
1009
1010	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1011	alu.src[1].chan = 0;
1012	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1013	alu.src[2].chan = 1;
1014	alu.last = 1;
1015	r = r600_bc_add_alu(ctx->bc, &alu);
1016	if (r)
1017		return r;
1018	r = r600_bc_add_literal(ctx->bc, lit_vals);
1019	if (r)
1020		return r;
1021
1022	memset(&alu, 0, sizeof(struct r600_bc_alu));
1023	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1024
1025	alu.dst.chan = 0;
1026	alu.dst.sel = ctx->temp_reg;
1027	alu.dst.write = 1;
1028
1029	alu.src[0].sel = ctx->temp_reg;
1030	alu.src[0].chan = 0;
1031	alu.last = 1;
1032	r = r600_bc_add_alu(ctx->bc, &alu);
1033	if (r)
1034		return r;
1035
1036	if (ctx->bc->chiprev == CHIPREV_R600) {
1037		lit_vals[0] = fui(3.1415926535897f * 2.0f);
1038		lit_vals[1] = fui(-3.1415926535897f);
1039	} else {
1040		lit_vals[0] = fui(1.0f);
1041		lit_vals[1] = fui(-0.5f);
1042	}
1043
1044	memset(&alu, 0, sizeof(struct r600_bc_alu));
1045	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1046	alu.is_op3 = 1;
1047
1048	alu.dst.chan = 0;
1049	alu.dst.sel = ctx->temp_reg;
1050	alu.dst.write = 1;
1051
1052	alu.src[0].sel = ctx->temp_reg;
1053	alu.src[0].chan = 0;
1054
1055	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
1056	alu.src[1].chan = 0;
1057	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1058	alu.src[2].chan = 1;
1059	alu.last = 1;
1060	r = r600_bc_add_alu(ctx->bc, &alu);
1061	if (r)
1062		return r;
1063	r = r600_bc_add_literal(ctx->bc, lit_vals);
1064	if (r)
1065		return r;
1066	return 0;
1067}
1068
1069static int tgsi_trig(struct r600_shader_ctx *ctx)
1070{
1071	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1072	struct r600_bc_alu_src r600_src[3];
1073	struct r600_bc_alu alu;
1074	int i, r;
1075	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1076
1077	r = tgsi_setup_trig(ctx, r600_src);
1078	if (r)
1079		return r;
1080
1081	memset(&alu, 0, sizeof(struct r600_bc_alu));
1082	alu.inst = ctx->inst_info->r600_opcode;
1083	alu.dst.chan = 0;
1084	alu.dst.sel = ctx->temp_reg;
1085	alu.dst.write = 1;
1086
1087	alu.src[0].sel = ctx->temp_reg;
1088	alu.src[0].chan = 0;
1089	alu.last = 1;
1090	r = r600_bc_add_alu(ctx->bc, &alu);
1091	if (r)
1092		return r;
1093
1094	/* replicate result */
1095	for (i = 0; i < lasti + 1; i++) {
1096		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1097			continue;
1098
1099		memset(&alu, 0, sizeof(struct r600_bc_alu));
1100		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1101
1102		alu.src[0].sel = ctx->temp_reg;
1103		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1104		if (r)
1105			return r;
1106		if (i == lasti)
1107			alu.last = 1;
1108		r = r600_bc_add_alu(ctx->bc, &alu);
1109		if (r)
1110			return r;
1111	}
1112	return 0;
1113}
1114
1115static int tgsi_scs(struct r600_shader_ctx *ctx)
1116{
1117	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1118	struct r600_bc_alu_src r600_src[3];
1119	struct r600_bc_alu alu;
1120	int r;
1121
1122	/* We'll only need the trig stuff if we are going to write to the
1123	 * X or Y components of the destination vector.
1124	 */
1125	if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
1126		r = tgsi_setup_trig(ctx, r600_src);
1127		if (r)
1128			return r;
1129	}
1130
1131	/* dst.x = COS */
1132	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
1133		memset(&alu, 0, sizeof(struct r600_bc_alu));
1134		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
1135		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1136		if (r)
1137			return r;
1138
1139		alu.src[0].sel = ctx->temp_reg;
1140		alu.src[0].chan = 0;
1141		alu.last = 1;
1142		r = r600_bc_add_alu(ctx->bc, &alu);
1143		if (r)
1144			return r;
1145	}
1146
1147	/* dst.y = SIN */
1148	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
1149		memset(&alu, 0, sizeof(struct r600_bc_alu));
1150		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
1151		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1152		if (r)
1153			return r;
1154
1155		alu.src[0].sel = ctx->temp_reg;
1156		alu.src[0].chan = 0;
1157		alu.last = 1;
1158		r = r600_bc_add_alu(ctx->bc, &alu);
1159		if (r)
1160			return r;
1161	}
1162
1163	/* dst.z = 0.0; */
1164	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
1165		memset(&alu, 0, sizeof(struct r600_bc_alu));
1166
1167		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1168
1169		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1170		if (r)
1171			return r;
1172
1173		alu.src[0].sel = V_SQ_ALU_SRC_0;
1174		alu.src[0].chan = 0;
1175
1176		alu.last = 1;
1177
1178		r = r600_bc_add_alu(ctx->bc, &alu);
1179		if (r)
1180			return r;
1181
1182		r = r600_bc_add_literal(ctx->bc, ctx->value);
1183		if (r)
1184			return r;
1185	}
1186
1187	/* dst.w = 1.0; */
1188	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
1189		memset(&alu, 0, sizeof(struct r600_bc_alu));
1190
1191		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1192
1193		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1194		if (r)
1195			return r;
1196
1197		alu.src[0].sel = V_SQ_ALU_SRC_1;
1198		alu.src[0].chan = 0;
1199
1200		alu.last = 1;
1201
1202		r = r600_bc_add_alu(ctx->bc, &alu);
1203		if (r)
1204			return r;
1205
1206		r = r600_bc_add_literal(ctx->bc, ctx->value);
1207		if (r)
1208			return r;
1209	}
1210
1211	return 0;
1212}
1213
1214static int tgsi_kill(struct r600_shader_ctx *ctx)
1215{
1216	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1217	struct r600_bc_alu alu;
1218	int i, r;
1219
1220	for (i = 0; i < 4; i++) {
1221		memset(&alu, 0, sizeof(struct r600_bc_alu));
1222		alu.inst = ctx->inst_info->r600_opcode;
1223
1224		alu.dst.chan = i;
1225
1226		alu.src[0].sel = V_SQ_ALU_SRC_0;
1227
1228		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
1229			alu.src[1].sel = V_SQ_ALU_SRC_1;
1230			alu.src[1].neg = 1;
1231		} else {
1232			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1233			if (r)
1234				return r;
1235			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1236		}
1237		if (i == 3) {
1238			alu.last = 1;
1239		}
1240		r = r600_bc_add_alu(ctx->bc, &alu);
1241		if (r)
1242			return r;
1243	}
1244	r = r600_bc_add_literal(ctx->bc, ctx->value);
1245	if (r)
1246		return r;
1247
1248	/* kill must be last in ALU */
1249	ctx->bc->force_add_cf = 1;
1250	ctx->shader->uses_kill = TRUE;
1251	return 0;
1252}
1253
1254static int tgsi_lit(struct r600_shader_ctx *ctx)
1255{
1256	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1257	struct r600_bc_alu alu;
1258	struct r600_bc_alu_src r600_src[3];
1259	int r;
1260
1261	r = tgsi_split_constant(ctx, r600_src);
1262	if (r)
1263		return r;
1264	r = tgsi_split_literal_constant(ctx, r600_src);
1265	if (r)
1266		return r;
1267
1268	/* dst.x, <- 1.0  */
1269	memset(&alu, 0, sizeof(struct r600_bc_alu));
1270	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1271	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1272	alu.src[0].chan = 0;
1273	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1274	if (r)
1275		return r;
1276	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1277	r = r600_bc_add_alu(ctx->bc, &alu);
1278	if (r)
1279		return r;
1280
1281	/* dst.y = max(src.x, 0.0) */
1282	memset(&alu, 0, sizeof(struct r600_bc_alu));
1283	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
1284	alu.src[0] = r600_src[0];
1285	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1286	alu.src[1].chan = 0;
1287	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1288	if (r)
1289		return r;
1290	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1291	r = r600_bc_add_alu(ctx->bc, &alu);
1292	if (r)
1293		return r;
1294
1295	/* dst.w, <- 1.0  */
1296	memset(&alu, 0, sizeof(struct r600_bc_alu));
1297	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1298	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1299	alu.src[0].chan = 0;
1300	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1301	if (r)
1302		return r;
1303	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1304	alu.last = 1;
1305	r = r600_bc_add_alu(ctx->bc, &alu);
1306	if (r)
1307		return r;
1308
1309	r = r600_bc_add_literal(ctx->bc, ctx->value);
1310	if (r)
1311		return r;
1312
1313	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1314	{
1315		int chan;
1316		int sel;
1317
1318		/* dst.z = log(src.y) */
1319		memset(&alu, 0, sizeof(struct r600_bc_alu));
1320		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1321		alu.src[0] = r600_src[0];
1322		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1323		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1324		if (r)
1325			return r;
1326		alu.last = 1;
1327		r = r600_bc_add_alu(ctx->bc, &alu);
1328		if (r)
1329			return r;
1330
1331		r = r600_bc_add_literal(ctx->bc, ctx->value);
1332		if (r)
1333			return r;
1334
1335		chan = alu.dst.chan;
1336		sel = alu.dst.sel;
1337
1338		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1339		memset(&alu, 0, sizeof(struct r600_bc_alu));
1340		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1341		alu.src[0] = r600_src[0];
1342		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1343		alu.src[1].sel  = sel;
1344		alu.src[1].chan = chan;
1345
1346		alu.src[2] = r600_src[0];
1347		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1348		alu.dst.sel = ctx->temp_reg;
1349		alu.dst.chan = 0;
1350		alu.dst.write = 1;
1351		alu.is_op3 = 1;
1352		alu.last = 1;
1353		r = r600_bc_add_alu(ctx->bc, &alu);
1354		if (r)
1355			return r;
1356
1357		r = r600_bc_add_literal(ctx->bc, ctx->value);
1358		if (r)
1359			return r;
1360		/* dst.z = exp(tmp.x) */
1361		memset(&alu, 0, sizeof(struct r600_bc_alu));
1362		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1363		alu.src[0].sel = ctx->temp_reg;
1364		alu.src[0].chan = 0;
1365		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1366		if (r)
1367			return r;
1368		alu.last = 1;
1369		r = r600_bc_add_alu(ctx->bc, &alu);
1370		if (r)
1371			return r;
1372	}
1373	return 0;
1374}
1375
1376static int tgsi_rsq(struct r600_shader_ctx *ctx)
1377{
1378	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1379	struct r600_bc_alu alu;
1380	int i, r;
1381
1382	memset(&alu, 0, sizeof(struct r600_bc_alu));
1383
1384	/* FIXME:
1385	 * For state trackers other than OpenGL, we'll want to use
1386	 * _RECIPSQRT_IEEE instead.
1387	 */
1388	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
1389
1390	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1391		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1392		if (r)
1393			return r;
1394		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1395		alu.src[i].abs = 1;
1396	}
1397	alu.dst.sel = ctx->temp_reg;
1398	alu.dst.write = 1;
1399	alu.last = 1;
1400	r = r600_bc_add_alu(ctx->bc, &alu);
1401	if (r)
1402		return r;
1403	r = r600_bc_add_literal(ctx->bc, ctx->value);
1404	if (r)
1405		return r;
1406	/* replicate result */
1407	return tgsi_helper_tempx_replicate(ctx);
1408}
1409
1410static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1411{
1412	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1413	struct r600_bc_alu alu;
1414	int i, r;
1415
1416	for (i = 0; i < 4; i++) {
1417		memset(&alu, 0, sizeof(struct r600_bc_alu));
1418		alu.src[0].sel = ctx->temp_reg;
1419		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1420		alu.dst.chan = i;
1421		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1422		if (r)
1423			return r;
1424		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1425		if (i == 3)
1426			alu.last = 1;
1427		r = r600_bc_add_alu(ctx->bc, &alu);
1428		if (r)
1429			return r;
1430	}
1431	return 0;
1432}
1433
1434static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1435{
1436	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1437	struct r600_bc_alu alu;
1438	int i, r;
1439
1440	memset(&alu, 0, sizeof(struct r600_bc_alu));
1441	alu.inst = ctx->inst_info->r600_opcode;
1442	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1443		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1444		if (r)
1445			return r;
1446		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1447	}
1448	alu.dst.sel = ctx->temp_reg;
1449	alu.dst.write = 1;
1450	alu.last = 1;
1451	r = r600_bc_add_alu(ctx->bc, &alu);
1452	if (r)
1453		return r;
1454	r = r600_bc_add_literal(ctx->bc, ctx->value);
1455	if (r)
1456		return r;
1457	/* replicate result */
1458	return tgsi_helper_tempx_replicate(ctx);
1459}
1460
1461static int tgsi_pow(struct r600_shader_ctx *ctx)
1462{
1463	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1464	struct r600_bc_alu alu;
1465	int r;
1466
1467	/* LOG2(a) */
1468	memset(&alu, 0, sizeof(struct r600_bc_alu));
1469	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1470	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1471	if (r)
1472		return r;
1473	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1474	alu.dst.sel = ctx->temp_reg;
1475	alu.dst.write = 1;
1476	alu.last = 1;
1477	r = r600_bc_add_alu(ctx->bc, &alu);
1478	if (r)
1479		return r;
1480	r = r600_bc_add_literal(ctx->bc,ctx->value);
1481	if (r)
1482		return r;
1483	/* b * LOG2(a) */
1484	memset(&alu, 0, sizeof(struct r600_bc_alu));
1485	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1486	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1487	if (r)
1488		return r;
1489	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1490	alu.src[1].sel = ctx->temp_reg;
1491	alu.dst.sel = ctx->temp_reg;
1492	alu.dst.write = 1;
1493	alu.last = 1;
1494	r = r600_bc_add_alu(ctx->bc, &alu);
1495	if (r)
1496		return r;
1497	r = r600_bc_add_literal(ctx->bc,ctx->value);
1498	if (r)
1499		return r;
1500	/* POW(a,b) = EXP2(b * LOG2(a))*/
1501	memset(&alu, 0, sizeof(struct r600_bc_alu));
1502	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1503	alu.src[0].sel = ctx->temp_reg;
1504	alu.dst.sel = ctx->temp_reg;
1505	alu.dst.write = 1;
1506	alu.last = 1;
1507	r = r600_bc_add_alu(ctx->bc, &alu);
1508	if (r)
1509		return r;
1510	r = r600_bc_add_literal(ctx->bc,ctx->value);
1511	if (r)
1512		return r;
1513	return tgsi_helper_tempx_replicate(ctx);
1514}
1515
1516static int tgsi_ssg(struct r600_shader_ctx *ctx)
1517{
1518	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1519	struct r600_bc_alu alu;
1520	struct r600_bc_alu_src r600_src[3];
1521	int i, r;
1522
1523	r = tgsi_split_constant(ctx, r600_src);
1524	if (r)
1525		return r;
1526	r = tgsi_split_literal_constant(ctx, r600_src);
1527	if (r)
1528		return r;
1529
1530	/* tmp = (src > 0 ? 1 : src) */
1531	for (i = 0; i < 4; i++) {
1532		memset(&alu, 0, sizeof(struct r600_bc_alu));
1533		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1534		alu.is_op3 = 1;
1535
1536		alu.dst.sel = ctx->temp_reg;
1537		alu.dst.chan = i;
1538
1539		alu.src[0] = r600_src[0];
1540		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1541
1542		alu.src[1].sel = V_SQ_ALU_SRC_1;
1543
1544		alu.src[2] = r600_src[0];
1545		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1546		if (i == 3)
1547			alu.last = 1;
1548		r = r600_bc_add_alu(ctx->bc, &alu);
1549		if (r)
1550			return r;
1551	}
1552	r = r600_bc_add_literal(ctx->bc, ctx->value);
1553	if (r)
1554		return r;
1555
1556	/* dst = (-tmp > 0 ? -1 : tmp) */
1557	for (i = 0; i < 4; i++) {
1558		memset(&alu, 0, sizeof(struct r600_bc_alu));
1559		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1560		alu.is_op3 = 1;
1561		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1562		if (r)
1563			return r;
1564
1565		alu.src[0].sel = ctx->temp_reg;
1566		alu.src[0].chan = i;
1567		alu.src[0].neg = 1;
1568
1569		alu.src[1].sel = V_SQ_ALU_SRC_1;
1570		alu.src[1].neg = 1;
1571
1572		alu.src[2].sel = ctx->temp_reg;
1573		alu.src[2].chan = i;
1574
1575		if (i == 3)
1576			alu.last = 1;
1577		r = r600_bc_add_alu(ctx->bc, &alu);
1578		if (r)
1579			return r;
1580	}
1581	return 0;
1582}
1583
1584static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1585{
1586	struct r600_bc_alu alu;
1587	int i, r;
1588
1589	r = r600_bc_add_literal(ctx->bc, ctx->value);
1590	if (r)
1591		return r;
1592	for (i = 0; i < 4; i++) {
1593		memset(&alu, 0, sizeof(struct r600_bc_alu));
1594		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1595			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1596			alu.dst.chan = i;
1597		} else {
1598			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1599			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1600			if (r)
1601				return r;
1602			alu.src[0].sel = ctx->temp_reg;
1603			alu.src[0].chan = i;
1604		}
1605		if (i == 3) {
1606			alu.last = 1;
1607		}
1608		r = r600_bc_add_alu(ctx->bc, &alu);
1609		if (r)
1610			return r;
1611	}
1612	return 0;
1613}
1614
1615static int tgsi_op3(struct r600_shader_ctx *ctx)
1616{
1617	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1618	struct r600_bc_alu_src r600_src[3];
1619	struct r600_bc_alu alu;
1620	int i, j, r;
1621	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1622
1623	r = tgsi_split_constant(ctx, r600_src);
1624	if (r)
1625		return r;
1626	r = tgsi_split_literal_constant(ctx, r600_src);
1627	if (r)
1628		return r;
1629	for (i = 0; i < lasti + 1; i++) {
1630		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1631			continue;
1632
1633		memset(&alu, 0, sizeof(struct r600_bc_alu));
1634		alu.inst = ctx->inst_info->r600_opcode;
1635		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1636			alu.src[j] = r600_src[j];
1637			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1638		}
1639
1640		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1641		if (r)
1642			return r;
1643
1644		alu.dst.chan = i;
1645		alu.dst.write = 1;
1646		alu.is_op3 = 1;
1647		if (i == lasti) {
1648			alu.last = 1;
1649		}
1650		r = r600_bc_add_alu(ctx->bc, &alu);
1651		if (r)
1652			return r;
1653	}
1654	return 0;
1655}
1656
1657static int tgsi_dp(struct r600_shader_ctx *ctx)
1658{
1659	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1660	struct r600_bc_alu_src r600_src[3];
1661	struct r600_bc_alu alu;
1662	int i, j, r;
1663
1664	r = tgsi_split_constant(ctx, r600_src);
1665	if (r)
1666		return r;
1667	r = tgsi_split_literal_constant(ctx, r600_src);
1668	if (r)
1669		return r;
1670	for (i = 0; i < 4; i++) {
1671		memset(&alu, 0, sizeof(struct r600_bc_alu));
1672		alu.inst = ctx->inst_info->r600_opcode;
1673		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1674			alu.src[j] = r600_src[j];
1675			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1676		}
1677
1678		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1679		if (r)
1680			return r;
1681
1682		alu.dst.chan = i;
1683		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1684		/* handle some special cases */
1685		switch (ctx->inst_info->tgsi_opcode) {
1686		case TGSI_OPCODE_DP2:
1687			if (i > 1) {
1688				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1689				alu.src[0].chan = alu.src[1].chan = 0;
1690			}
1691			break;
1692		case TGSI_OPCODE_DP3:
1693			if (i > 2) {
1694				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1695				alu.src[0].chan = alu.src[1].chan = 0;
1696			}
1697			break;
1698		case TGSI_OPCODE_DPH:
1699			if (i == 3) {
1700				alu.src[0].sel = V_SQ_ALU_SRC_1;
1701				alu.src[0].chan = 0;
1702				alu.src[0].neg = 0;
1703			}
1704			break;
1705		default:
1706			break;
1707		}
1708		if (i == 3) {
1709			alu.last = 1;
1710		}
1711		r = r600_bc_add_alu(ctx->bc, &alu);
1712		if (r)
1713			return r;
1714	}
1715	return 0;
1716}
1717
1718static int tgsi_tex(struct r600_shader_ctx *ctx)
1719{
1720	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1721	struct r600_bc_tex tex;
1722	struct r600_bc_alu alu;
1723	unsigned src_gpr;
1724	int r, i;
1725	int opcode;
1726	boolean src_not_temp =
1727		inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
1728		inst->Src[0].Register.File != TGSI_FILE_INPUT;
1729	uint32_t lit_vals[4];
1730
1731	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1732
1733	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1734		/* Add perspective divide */
1735		memset(&alu, 0, sizeof(struct r600_bc_alu));
1736		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1737		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1738		if (r)
1739			return r;
1740
1741		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1742		alu.dst.sel = ctx->temp_reg;
1743		alu.dst.chan = 3;
1744		alu.last = 1;
1745		alu.dst.write = 1;
1746		r = r600_bc_add_alu(ctx->bc, &alu);
1747		if (r)
1748			return r;
1749
1750		for (i = 0; i < 3; i++) {
1751			memset(&alu, 0, sizeof(struct r600_bc_alu));
1752			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1753			alu.src[0].sel = ctx->temp_reg;
1754			alu.src[0].chan = 3;
1755			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1756			if (r)
1757				return r;
1758			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1759			alu.dst.sel = ctx->temp_reg;
1760			alu.dst.chan = i;
1761			alu.dst.write = 1;
1762			r = r600_bc_add_alu(ctx->bc, &alu);
1763			if (r)
1764				return r;
1765		}
1766		memset(&alu, 0, sizeof(struct r600_bc_alu));
1767		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1768		alu.src[0].sel = V_SQ_ALU_SRC_1;
1769		alu.src[0].chan = 0;
1770		alu.dst.sel = ctx->temp_reg;
1771		alu.dst.chan = 3;
1772		alu.last = 1;
1773		alu.dst.write = 1;
1774		r = r600_bc_add_alu(ctx->bc, &alu);
1775		if (r)
1776			return r;
1777		src_not_temp = FALSE;
1778		src_gpr = ctx->temp_reg;
1779	}
1780
1781	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1782		int src_chan, src2_chan;
1783
1784		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1785		for (i = 0; i < 4; i++) {
1786			memset(&alu, 0, sizeof(struct r600_bc_alu));
1787			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1788			switch (i) {
1789			case 0:
1790				src_chan = 2;
1791				src2_chan = 1;
1792				break;
1793			case 1:
1794				src_chan = 2;
1795				src2_chan = 0;
1796				break;
1797			case 2:
1798				src_chan = 0;
1799				src2_chan = 2;
1800				break;
1801			case 3:
1802				src_chan = 1;
1803				src2_chan = 2;
1804				break;
1805			default:
1806				assert(0);
1807				src_chan = 0;
1808				src2_chan = 0;
1809				break;
1810			}
1811			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1812			if (r)
1813				return r;
1814			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1815			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1816			if (r)
1817				return r;
1818			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1819			alu.dst.sel = ctx->temp_reg;
1820			alu.dst.chan = i;
1821			if (i == 3)
1822				alu.last = 1;
1823			alu.dst.write = 1;
1824			r = r600_bc_add_alu(ctx->bc, &alu);
1825			if (r)
1826				return r;
1827		}
1828
1829		/* tmp1.z = RCP_e(|tmp1.z|) */
1830		memset(&alu, 0, sizeof(struct r600_bc_alu));
1831		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1832		alu.src[0].sel = ctx->temp_reg;
1833		alu.src[0].chan = 2;
1834		alu.src[0].abs = 1;
1835		alu.dst.sel = ctx->temp_reg;
1836		alu.dst.chan = 2;
1837		alu.dst.write = 1;
1838		alu.last = 1;
1839		r = r600_bc_add_alu(ctx->bc, &alu);
1840		if (r)
1841			return r;
1842
1843		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1844		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1845		 * muladd has no writemask, have to use another temp
1846		 */
1847		memset(&alu, 0, sizeof(struct r600_bc_alu));
1848		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1849		alu.is_op3 = 1;
1850
1851		alu.src[0].sel = ctx->temp_reg;
1852		alu.src[0].chan = 0;
1853		alu.src[1].sel = ctx->temp_reg;
1854		alu.src[1].chan = 2;
1855
1856		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1857		alu.src[2].chan = 0;
1858
1859		alu.dst.sel = ctx->temp_reg;
1860		alu.dst.chan = 0;
1861		alu.dst.write = 1;
1862
1863		r = r600_bc_add_alu(ctx->bc, &alu);
1864		if (r)
1865			return r;
1866
1867		memset(&alu, 0, sizeof(struct r600_bc_alu));
1868		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1869		alu.is_op3 = 1;
1870
1871		alu.src[0].sel = ctx->temp_reg;
1872		alu.src[0].chan = 1;
1873		alu.src[1].sel = ctx->temp_reg;
1874		alu.src[1].chan = 2;
1875
1876		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1877		alu.src[2].chan = 0;
1878
1879		alu.dst.sel = ctx->temp_reg;
1880		alu.dst.chan = 1;
1881		alu.dst.write = 1;
1882
1883		alu.last = 1;
1884		r = r600_bc_add_alu(ctx->bc, &alu);
1885		if (r)
1886			return r;
1887
1888		lit_vals[0] = fui(1.5f);
1889
1890		r = r600_bc_add_literal(ctx->bc, lit_vals);
1891		if (r)
1892			return r;
1893		src_not_temp = FALSE;
1894		src_gpr = ctx->temp_reg;
1895	}
1896
1897	if (src_not_temp) {
1898		for (i = 0; i < 4; i++) {
1899			memset(&alu, 0, sizeof(struct r600_bc_alu));
1900			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1901			alu.src[0].sel = src_gpr;
1902			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1903			alu.dst.sel = ctx->temp_reg;
1904			alu.dst.chan = i;
1905			if (i == 3)
1906				alu.last = 1;
1907			alu.dst.write = 1;
1908			r = r600_bc_add_alu(ctx->bc, &alu);
1909			if (r)
1910				return r;
1911		}
1912		src_gpr = ctx->temp_reg;
1913	}
1914
1915	opcode = ctx->inst_info->r600_opcode;
1916	if (opcode == SQ_TEX_INST_SAMPLE &&
1917	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1918		opcode = SQ_TEX_INST_SAMPLE_C;
1919
1920	memset(&tex, 0, sizeof(struct r600_bc_tex));
1921	tex.inst = opcode;
1922	tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1923	tex.resource_id = tex.sampler_id;
1924	tex.src_gpr = src_gpr;
1925	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1926	tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
1927	tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
1928	tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
1929	tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
1930	tex.src_sel_x = 0;
1931	tex.src_sel_y = 1;
1932	tex.src_sel_z = 2;
1933	tex.src_sel_w = 3;
1934
1935	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1936		tex.src_sel_x = 1;
1937		tex.src_sel_y = 0;
1938		tex.src_sel_z = 3;
1939		tex.src_sel_w = 1;
1940	}
1941
1942	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1943		tex.coord_type_x = 1;
1944		tex.coord_type_y = 1;
1945		tex.coord_type_z = 1;
1946		tex.coord_type_w = 1;
1947	}
1948
1949	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1950		tex.src_sel_w = 2;
1951
1952	r = r600_bc_add_tex(ctx->bc, &tex);
1953	if (r)
1954		return r;
1955
1956	/* add shadow ambient support  - gallium doesn't do it yet */
1957	return 0;
1958}
1959
1960static int tgsi_lrp(struct r600_shader_ctx *ctx)
1961{
1962	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1963	struct r600_bc_alu_src r600_src[3];
1964	struct r600_bc_alu alu;
1965	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
1966	unsigned i;
1967	int r;
1968
1969	r = tgsi_split_constant(ctx, r600_src);
1970	if (r)
1971		return r;
1972	r = tgsi_split_literal_constant(ctx, r600_src);
1973	if (r)
1974		return r;
1975	/* 1 - src0 */
1976	for (i = 0; i < lasti + 1; i++) {
1977		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
1978			continue;
1979
1980		memset(&alu, 0, sizeof(struct r600_bc_alu));
1981		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1982		alu.src[0].sel = V_SQ_ALU_SRC_1;
1983		alu.src[0].chan = 0;
1984		alu.src[1] = r600_src[0];
1985		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1986		alu.src[1].neg = 1;
1987		alu.dst.sel = ctx->temp_reg;
1988		alu.dst.chan = i;
1989		if (i == lasti) {
1990			alu.last = 1;
1991		}
1992		alu.dst.write = 1;
1993		r = r600_bc_add_alu(ctx->bc, &alu);
1994		if (r)
1995			return r;
1996	}
1997	r = r600_bc_add_literal(ctx->bc, ctx->value);
1998	if (r)
1999		return r;
2000
2001	/* (1 - src0) * src2 */
2002	for (i = 0; i < lasti + 1; i++) {
2003		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2004			continue;
2005
2006		memset(&alu, 0, sizeof(struct r600_bc_alu));
2007		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2008		alu.src[0].sel = ctx->temp_reg;
2009		alu.src[0].chan = i;
2010		alu.src[1] = r600_src[2];
2011		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2012		alu.dst.sel = ctx->temp_reg;
2013		alu.dst.chan = i;
2014		if (i == lasti) {
2015			alu.last = 1;
2016		}
2017		alu.dst.write = 1;
2018		r = r600_bc_add_alu(ctx->bc, &alu);
2019		if (r)
2020			return r;
2021	}
2022	r = r600_bc_add_literal(ctx->bc, ctx->value);
2023	if (r)
2024		return r;
2025
2026	/* src0 * src1 + (1 - src0) * src2 */
2027	for (i = 0; i < lasti + 1; i++) {
2028		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2029			continue;
2030
2031		memset(&alu, 0, sizeof(struct r600_bc_alu));
2032		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2033		alu.is_op3 = 1;
2034		alu.src[0] = r600_src[0];
2035		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2036		alu.src[1] = r600_src[1];
2037		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2038		alu.src[2].sel = ctx->temp_reg;
2039		alu.src[2].chan = i;
2040
2041		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2042		if (r)
2043			return r;
2044
2045		alu.dst.chan = i;
2046		if (i == lasti) {
2047			alu.last = 1;
2048		}
2049		r = r600_bc_add_alu(ctx->bc, &alu);
2050		if (r)
2051			return r;
2052	}
2053	return 0;
2054}
2055
2056static int tgsi_cmp(struct r600_shader_ctx *ctx)
2057{
2058	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2059	struct r600_bc_alu_src r600_src[3];
2060	struct r600_bc_alu alu;
2061	int i, r;
2062	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
2063
2064	r = tgsi_split_constant(ctx, r600_src);
2065	if (r)
2066		return r;
2067	r = tgsi_split_literal_constant(ctx, r600_src);
2068	if (r)
2069		return r;
2070
2071	for (i = 0; i < lasti + 1; i++) {
2072		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
2073			continue;
2074
2075		memset(&alu, 0, sizeof(struct r600_bc_alu));
2076		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
2077		alu.src[0] = r600_src[0];
2078		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2079
2080		alu.src[1] = r600_src[2];
2081		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
2082
2083		alu.src[2] = r600_src[1];
2084		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
2085
2086		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2087		if (r)
2088			return r;
2089
2090		alu.dst.chan = i;
2091		alu.dst.write = 1;
2092		alu.is_op3 = 1;
2093		if (i == lasti)
2094			alu.last = 1;
2095		r = r600_bc_add_alu(ctx->bc, &alu);
2096		if (r)
2097			return r;
2098	}
2099	return 0;
2100}
2101
2102static int tgsi_xpd(struct r600_shader_ctx *ctx)
2103{
2104	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2105	struct r600_bc_alu_src r600_src[3];
2106	struct r600_bc_alu alu;
2107	uint32_t use_temp = 0;
2108	int i, r;
2109
2110	if (inst->Dst[0].Register.WriteMask != 0xf)
2111		use_temp = 1;
2112
2113	r = tgsi_split_constant(ctx, r600_src);
2114	if (r)
2115		return r;
2116	r = tgsi_split_literal_constant(ctx, r600_src);
2117	if (r)
2118		return r;
2119
2120	for (i = 0; i < 4; i++) {
2121		memset(&alu, 0, sizeof(struct r600_bc_alu));
2122		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2123
2124		alu.src[0] = r600_src[0];
2125		switch (i) {
2126		case 0:
2127			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2128			break;
2129		case 1:
2130			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2131			break;
2132		case 2:
2133			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2134			break;
2135		case 3:
2136			alu.src[0].sel = V_SQ_ALU_SRC_0;
2137			alu.src[0].chan = i;
2138		}
2139
2140		alu.src[1] = r600_src[1];
2141		switch (i) {
2142		case 0:
2143			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2144			break;
2145		case 1:
2146			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2147			break;
2148		case 2:
2149			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2150			break;
2151		case 3:
2152			alu.src[1].sel = V_SQ_ALU_SRC_0;
2153			alu.src[1].chan = i;
2154		}
2155
2156		alu.dst.sel = ctx->temp_reg;
2157		alu.dst.chan = i;
2158		alu.dst.write = 1;
2159
2160		if (i == 3)
2161			alu.last = 1;
2162		r = r600_bc_add_alu(ctx->bc, &alu);
2163		if (r)
2164			return r;
2165
2166		r = r600_bc_add_literal(ctx->bc, ctx->value);
2167		if (r)
2168			return r;
2169	}
2170
2171	for (i = 0; i < 4; i++) {
2172		memset(&alu, 0, sizeof(struct r600_bc_alu));
2173		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
2174
2175		alu.src[0] = r600_src[0];
2176		switch (i) {
2177		case 0:
2178			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
2179			break;
2180		case 1:
2181			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
2182			break;
2183		case 2:
2184			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2185			break;
2186		case 3:
2187			alu.src[0].sel = V_SQ_ALU_SRC_0;
2188			alu.src[0].chan = i;
2189		}
2190
2191		alu.src[1] = r600_src[1];
2192		switch (i) {
2193		case 0:
2194			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
2195			break;
2196		case 1:
2197			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
2198			break;
2199		case 2:
2200			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
2201			break;
2202		case 3:
2203			alu.src[1].sel = V_SQ_ALU_SRC_0;
2204			alu.src[1].chan = i;
2205		}
2206
2207		alu.src[2].sel = ctx->temp_reg;
2208		alu.src[2].neg = 1;
2209		alu.src[2].chan = i;
2210
2211		if (use_temp)
2212			alu.dst.sel = ctx->temp_reg;
2213		else {
2214			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2215			if (r)
2216				return r;
2217		}
2218		alu.dst.chan = i;
2219		alu.dst.write = 1;
2220		alu.is_op3 = 1;
2221		if (i == 3)
2222			alu.last = 1;
2223		r = r600_bc_add_alu(ctx->bc, &alu);
2224		if (r)
2225			return r;
2226
2227		r = r600_bc_add_literal(ctx->bc, ctx->value);
2228		if (r)
2229			return r;
2230	}
2231	if (use_temp)
2232		return tgsi_helper_copy(ctx, inst);
2233	return 0;
2234}
2235
2236static int tgsi_exp(struct r600_shader_ctx *ctx)
2237{
2238	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2239	struct r600_bc_alu_src r600_src[3] = { { 0 } };
2240	struct r600_bc_alu alu;
2241	int r;
2242
2243	/* result.x = 2^floor(src); */
2244	if (inst->Dst[0].Register.WriteMask & 1) {
2245		memset(&alu, 0, sizeof(struct r600_bc_alu));
2246
2247		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2248		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2249		if (r)
2250			return r;
2251
2252		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2253
2254		alu.dst.sel = ctx->temp_reg;
2255		alu.dst.chan = 0;
2256		alu.dst.write = 1;
2257		alu.last = 1;
2258		r = r600_bc_add_alu(ctx->bc, &alu);
2259		if (r)
2260			return r;
2261
2262		r = r600_bc_add_literal(ctx->bc, ctx->value);
2263		if (r)
2264			return r;
2265
2266		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2267		alu.src[0].sel = ctx->temp_reg;
2268		alu.src[0].chan = 0;
2269
2270		alu.dst.sel = ctx->temp_reg;
2271		alu.dst.chan = 0;
2272		alu.dst.write = 1;
2273		alu.last = 1;
2274		r = r600_bc_add_alu(ctx->bc, &alu);
2275		if (r)
2276			return r;
2277
2278		r = r600_bc_add_literal(ctx->bc, ctx->value);
2279		if (r)
2280			return r;
2281	}
2282
2283	/* result.y = tmp - floor(tmp); */
2284	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2285		memset(&alu, 0, sizeof(struct r600_bc_alu));
2286
2287		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
2288		alu.src[0] = r600_src[0];
2289		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2290		if (r)
2291			return r;
2292		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2293
2294		alu.dst.sel = ctx->temp_reg;
2295//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2296//		if (r)
2297//			return r;
2298		alu.dst.write = 1;
2299		alu.dst.chan = 1;
2300
2301		alu.last = 1;
2302
2303		r = r600_bc_add_alu(ctx->bc, &alu);
2304		if (r)
2305			return r;
2306		r = r600_bc_add_literal(ctx->bc, ctx->value);
2307		if (r)
2308			return r;
2309	}
2310
2311	/* result.z = RoughApprox2ToX(tmp);*/
2312	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
2313		memset(&alu, 0, sizeof(struct r600_bc_alu));
2314		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2315		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2316		if (r)
2317			return r;
2318		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2319
2320		alu.dst.sel = ctx->temp_reg;
2321		alu.dst.write = 1;
2322		alu.dst.chan = 2;
2323
2324		alu.last = 1;
2325
2326		r = r600_bc_add_alu(ctx->bc, &alu);
2327		if (r)
2328			return r;
2329		r = r600_bc_add_literal(ctx->bc, ctx->value);
2330		if (r)
2331			return r;
2332	}
2333
2334	/* result.w = 1.0;*/
2335	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2336		memset(&alu, 0, sizeof(struct r600_bc_alu));
2337
2338		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2339		alu.src[0].sel = V_SQ_ALU_SRC_1;
2340		alu.src[0].chan = 0;
2341
2342		alu.dst.sel = ctx->temp_reg;
2343		alu.dst.chan = 3;
2344		alu.dst.write = 1;
2345		alu.last = 1;
2346		r = r600_bc_add_alu(ctx->bc, &alu);
2347		if (r)
2348			return r;
2349		r = r600_bc_add_literal(ctx->bc, ctx->value);
2350		if (r)
2351			return r;
2352	}
2353	return tgsi_helper_copy(ctx, inst);
2354}
2355
2356static int tgsi_log(struct r600_shader_ctx *ctx)
2357{
2358	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2359	struct r600_bc_alu alu;
2360	int r;
2361
2362	/* result.x = floor(log2(src)); */
2363	if (inst->Dst[0].Register.WriteMask & 1) {
2364		memset(&alu, 0, sizeof(struct r600_bc_alu));
2365
2366		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2367		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2368		if (r)
2369			return r;
2370
2371		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2372
2373		alu.dst.sel = ctx->temp_reg;
2374		alu.dst.chan = 0;
2375		alu.dst.write = 1;
2376		alu.last = 1;
2377		r = r600_bc_add_alu(ctx->bc, &alu);
2378		if (r)
2379			return r;
2380
2381		r = r600_bc_add_literal(ctx->bc, ctx->value);
2382		if (r)
2383			return r;
2384
2385		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2386		alu.src[0].sel = ctx->temp_reg;
2387		alu.src[0].chan = 0;
2388
2389		alu.dst.sel = ctx->temp_reg;
2390		alu.dst.chan = 0;
2391		alu.dst.write = 1;
2392		alu.last = 1;
2393
2394		r = r600_bc_add_alu(ctx->bc, &alu);
2395		if (r)
2396			return r;
2397
2398		r = r600_bc_add_literal(ctx->bc, ctx->value);
2399		if (r)
2400			return r;
2401	}
2402
2403	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
2404	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2405		memset(&alu, 0, sizeof(struct r600_bc_alu));
2406
2407		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2408		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2409		if (r)
2410			return r;
2411
2412		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2413
2414		alu.dst.sel = ctx->temp_reg;
2415		alu.dst.chan = 1;
2416		alu.dst.write = 1;
2417		alu.last = 1;
2418
2419		r = r600_bc_add_alu(ctx->bc, &alu);
2420		if (r)
2421			return r;
2422
2423		r = r600_bc_add_literal(ctx->bc, ctx->value);
2424		if (r)
2425			return r;
2426
2427		memset(&alu, 0, sizeof(struct r600_bc_alu));
2428
2429		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2430		alu.src[0].sel = ctx->temp_reg;
2431		alu.src[0].chan = 1;
2432
2433		alu.dst.sel = ctx->temp_reg;
2434		alu.dst.chan = 1;
2435		alu.dst.write = 1;
2436		alu.last = 1;
2437
2438		r = r600_bc_add_alu(ctx->bc, &alu);
2439		if (r)
2440			return r;
2441
2442		r = r600_bc_add_literal(ctx->bc, ctx->value);
2443		if (r)
2444			return r;
2445
2446		memset(&alu, 0, sizeof(struct r600_bc_alu));
2447
2448		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
2449		alu.src[0].sel = ctx->temp_reg;
2450		alu.src[0].chan = 1;
2451
2452		alu.dst.sel = ctx->temp_reg;
2453		alu.dst.chan = 1;
2454		alu.dst.write = 1;
2455		alu.last = 1;
2456
2457		r = r600_bc_add_alu(ctx->bc, &alu);
2458		if (r)
2459			return r;
2460
2461		r = r600_bc_add_literal(ctx->bc, ctx->value);
2462		if (r)
2463			return r;
2464
2465		memset(&alu, 0, sizeof(struct r600_bc_alu));
2466
2467		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
2468		alu.src[0].sel = ctx->temp_reg;
2469		alu.src[0].chan = 1;
2470
2471		alu.dst.sel = ctx->temp_reg;
2472		alu.dst.chan = 1;
2473		alu.dst.write = 1;
2474		alu.last = 1;
2475
2476		r = r600_bc_add_alu(ctx->bc, &alu);
2477		if (r)
2478			return r;
2479
2480		r = r600_bc_add_literal(ctx->bc, ctx->value);
2481		if (r)
2482			return r;
2483
2484		memset(&alu, 0, sizeof(struct r600_bc_alu));
2485
2486		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2487
2488		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2489		if (r)
2490			return r;
2491
2492		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2493
2494		alu.src[1].sel = ctx->temp_reg;
2495		alu.src[1].chan = 1;
2496
2497		alu.dst.sel = ctx->temp_reg;
2498		alu.dst.chan = 1;
2499		alu.dst.write = 1;
2500		alu.last = 1;
2501
2502		r = r600_bc_add_alu(ctx->bc, &alu);
2503		if (r)
2504			return r;
2505
2506		r = r600_bc_add_literal(ctx->bc, ctx->value);
2507		if (r)
2508			return r;
2509	}
2510
2511	/* result.z = log2(src);*/
2512	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2513		memset(&alu, 0, sizeof(struct r600_bc_alu));
2514
2515		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2516		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2517		if (r)
2518			return r;
2519
2520		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2521
2522		alu.dst.sel = ctx->temp_reg;
2523		alu.dst.write = 1;
2524		alu.dst.chan = 2;
2525		alu.last = 1;
2526
2527		r = r600_bc_add_alu(ctx->bc, &alu);
2528		if (r)
2529			return r;
2530
2531		r = r600_bc_add_literal(ctx->bc, ctx->value);
2532		if (r)
2533			return r;
2534	}
2535
2536	/* result.w = 1.0; */
2537	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2538		memset(&alu, 0, sizeof(struct r600_bc_alu));
2539
2540		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2541		alu.src[0].sel = V_SQ_ALU_SRC_1;
2542		alu.src[0].chan = 0;
2543
2544		alu.dst.sel = ctx->temp_reg;
2545		alu.dst.chan = 3;
2546		alu.dst.write = 1;
2547		alu.last = 1;
2548
2549		r = r600_bc_add_alu(ctx->bc, &alu);
2550		if (r)
2551			return r;
2552
2553		r = r600_bc_add_literal(ctx->bc, ctx->value);
2554		if (r)
2555			return r;
2556	}
2557
2558	return tgsi_helper_copy(ctx, inst);
2559}
2560
2561static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
2562{
2563	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2564	struct r600_bc_alu alu;
2565	int r;
2566	memset(&alu, 0, sizeof(struct r600_bc_alu));
2567
2568	switch (inst->Instruction.Opcode) {
2569	case TGSI_OPCODE_ARL:
2570		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR;
2571		break;
2572	case TGSI_OPCODE_ARR:
2573		alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
2574		break;
2575	default:
2576		assert(0);
2577		return -1;
2578	}
2579
2580	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2581	if (r)
2582		return r;
2583	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2584	alu.last = 1;
2585	alu.dst.chan = 0;
2586	alu.dst.sel = ctx->temp_reg;
2587	alu.dst.write = 1;
2588	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2589	if (r)
2590		return r;
2591	memset(&alu, 0, sizeof(struct r600_bc_alu));
2592	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
2593	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2594	if (r)
2595		return r;
2596	alu.src[0].sel = ctx->temp_reg;
2597	alu.src[0].chan = 0;
2598	alu.last = 1;
2599	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2600	if (r)
2601		return r;
2602	return 0;
2603}
2604static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
2605{
2606	/* TODO from r600c, ar values don't persist between clauses */
2607	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2608	struct r600_bc_alu alu;
2609	int r;
2610	memset(&alu, 0, sizeof(struct r600_bc_alu));
2611
2612	switch (inst->Instruction.Opcode) {
2613	case TGSI_OPCODE_ARL:
2614		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2615		break;
2616	case TGSI_OPCODE_ARR:
2617		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
2618		break;
2619	default:
2620		assert(0);
2621		return -1;
2622	}
2623
2624
2625	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2626	if (r)
2627		return r;
2628	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2629
2630	alu.last = 1;
2631
2632	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2633	if (r)
2634		return r;
2635	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
2636	return 0;
2637}
2638
2639static int tgsi_opdst(struct r600_shader_ctx *ctx)
2640{
2641	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2642	struct r600_bc_alu alu;
2643	int i, r = 0;
2644
2645	for (i = 0; i < 4; i++) {
2646		memset(&alu, 0, sizeof(struct r600_bc_alu));
2647
2648		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2649		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2650		if (r)
2651			return r;
2652
2653		if (i == 0 || i == 3) {
2654			alu.src[0].sel = V_SQ_ALU_SRC_1;
2655		} else {
2656			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2657			if (r)
2658				return r;
2659			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2660		}
2661
2662	        if (i == 0 || i == 2) {
2663			alu.src[1].sel = V_SQ_ALU_SRC_1;
2664		} else {
2665			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2666			if (r)
2667				return r;
2668			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2669		}
2670		if (i == 3)
2671			alu.last = 1;
2672		r = r600_bc_add_alu(ctx->bc, &alu);
2673		if (r)
2674			return r;
2675	}
2676	return 0;
2677}
2678
2679static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2680{
2681	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2682	struct r600_bc_alu alu;
2683	int r;
2684
2685	memset(&alu, 0, sizeof(struct r600_bc_alu));
2686	alu.inst = opcode;
2687	alu.predicate = 1;
2688
2689	alu.dst.sel = ctx->temp_reg;
2690	alu.dst.write = 1;
2691	alu.dst.chan = 0;
2692
2693	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2694	if (r)
2695		return r;
2696	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2697	alu.src[1].sel = V_SQ_ALU_SRC_0;
2698	alu.src[1].chan = 0;
2699
2700	alu.last = 1;
2701
2702	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2703	if (r)
2704		return r;
2705	return 0;
2706}
2707
2708static int pops(struct r600_shader_ctx *ctx, int pops)
2709{
2710	int alu_pop = 3;
2711	if (ctx->bc->cf_last) {
2712		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
2713			alu_pop = 0;
2714		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
2715			alu_pop = 1;
2716	}
2717	alu_pop += pops;
2718	if (alu_pop == 1) {
2719		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
2720		ctx->bc->force_add_cf = 1;
2721	} else if (alu_pop == 2) {
2722		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
2723		ctx->bc->force_add_cf = 1;
2724	} else {
2725		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2726		ctx->bc->cf_last->pop_count = pops;
2727		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
2728	}
2729	return 0;
2730}
2731
2732static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2733{
2734	switch(reason) {
2735	case FC_PUSH_VPM:
2736		ctx->bc->callstack[ctx->bc->call_sp].current--;
2737		break;
2738	case FC_PUSH_WQM:
2739	case FC_LOOP:
2740		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2741		break;
2742	case FC_REP:
2743		/* TOODO : for 16 vp asic should -= 2; */
2744		ctx->bc->callstack[ctx->bc->call_sp].current --;
2745		break;
2746	}
2747}
2748
2749static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2750{
2751	if (check_max_only) {
2752		int diff;
2753		switch (reason) {
2754		case FC_PUSH_VPM:
2755			diff = 1;
2756			break;
2757		case FC_PUSH_WQM:
2758			diff = 4;
2759			break;
2760		default:
2761			assert(0);
2762			diff = 0;
2763		}
2764		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2765		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2766			ctx->bc->callstack[ctx->bc->call_sp].max =
2767				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2768		}
2769		return;
2770	}
2771	switch (reason) {
2772	case FC_PUSH_VPM:
2773		ctx->bc->callstack[ctx->bc->call_sp].current++;
2774		break;
2775	case FC_PUSH_WQM:
2776	case FC_LOOP:
2777		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2778		break;
2779	case FC_REP:
2780		ctx->bc->callstack[ctx->bc->call_sp].current++;
2781		break;
2782	}
2783
2784	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2785	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2786		ctx->bc->callstack[ctx->bc->call_sp].max =
2787			ctx->bc->callstack[ctx->bc->call_sp].current;
2788	}
2789}
2790
2791static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2792{
2793	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2794
2795	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2796						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2797	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2798	sp->num_mid++;
2799}
2800
2801static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2802{
2803	ctx->bc->fc_sp++;
2804	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2805	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2806}
2807
2808static void fc_poplevel(struct r600_shader_ctx *ctx)
2809{
2810	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2811	if (sp->mid) {
2812		free(sp->mid);
2813		sp->mid = NULL;
2814	}
2815	sp->num_mid = 0;
2816	sp->start = NULL;
2817	sp->type = 0;
2818	ctx->bc->fc_sp--;
2819}
2820
2821#if 0
2822static int emit_return(struct r600_shader_ctx *ctx)
2823{
2824	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2825	return 0;
2826}
2827
2828static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2829{
2830
2831	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2832	ctx->bc->cf_last->pop_count = pops;
2833	/* TODO work out offset */
2834	return 0;
2835}
2836
2837static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2838{
2839	return 0;
2840}
2841
2842static void emit_testflag(struct r600_shader_ctx *ctx)
2843{
2844
2845}
2846
2847static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2848{
2849	emit_testflag(ctx);
2850	emit_jump_to_offset(ctx, 1, 4);
2851	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2852	pops(ctx, ifidx + 1);
2853	emit_return(ctx);
2854}
2855
2856static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2857{
2858	emit_testflag(ctx);
2859
2860	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2861	ctx->bc->cf_last->pop_count = 1;
2862
2863	fc_set_mid(ctx, fc_sp);
2864
2865	pops(ctx, 1);
2866}
2867#endif
2868
2869static int tgsi_if(struct r600_shader_ctx *ctx)
2870{
2871	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2872
2873	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2874
2875	fc_pushlevel(ctx, FC_IF);
2876
2877	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2878	return 0;
2879}
2880
2881static int tgsi_else(struct r600_shader_ctx *ctx)
2882{
2883	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2884	ctx->bc->cf_last->pop_count = 1;
2885
2886	fc_set_mid(ctx, ctx->bc->fc_sp);
2887	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2888	return 0;
2889}
2890
2891static int tgsi_endif(struct r600_shader_ctx *ctx)
2892{
2893	pops(ctx, 1);
2894	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2895		R600_ERR("if/endif unbalanced in shader\n");
2896		return -1;
2897	}
2898
2899	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2900		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2901		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2902	} else {
2903		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2904	}
2905	fc_poplevel(ctx);
2906
2907	callstack_decrease_current(ctx, FC_PUSH_VPM);
2908	return 0;
2909}
2910
2911static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2912{
2913	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2914
2915	fc_pushlevel(ctx, FC_LOOP);
2916
2917	/* check stack depth */
2918	callstack_check_depth(ctx, FC_LOOP, 0);
2919	return 0;
2920}
2921
2922static int tgsi_endloop(struct r600_shader_ctx *ctx)
2923{
2924	int i;
2925
2926	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2927
2928	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2929		R600_ERR("loop/endloop in shader code are not paired.\n");
2930		return -EINVAL;
2931	}
2932
2933	/* fixup loop pointers - from r600isa
2934	   LOOP END points to CF after LOOP START,
2935	   LOOP START point to CF after LOOP END
2936	   BRK/CONT point to LOOP END CF
2937	*/
2938	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2939
2940	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2941
2942	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2943		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2944	}
2945	/* TODO add LOOPRET support */
2946	fc_poplevel(ctx);
2947	callstack_decrease_current(ctx, FC_LOOP);
2948	return 0;
2949}
2950
2951static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2952{
2953	unsigned int fscp;
2954
2955	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2956	{
2957		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2958			break;
2959	}
2960
2961	if (fscp == 0) {
2962		R600_ERR("Break not inside loop/endloop pair\n");
2963		return -EINVAL;
2964	}
2965
2966	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2967	ctx->bc->cf_last->pop_count = 1;
2968
2969	fc_set_mid(ctx, fscp);
2970
2971	pops(ctx, 1);
2972	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2973	return 0;
2974}
2975
2976static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2977	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
2978	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2979	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2980
2981	/* FIXME:
2982	 * For state trackers other than OpenGL, we'll want to use
2983	 * _RECIP_IEEE instead.
2984	 */
2985	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate},
2986
2987	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2988	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2989	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2990	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2991	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2992	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2993	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2994	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2995	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2996	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2997	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2998	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2999	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3000	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3001	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3002	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3003	/* gap */
3004	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3005	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3006	/* gap */
3007	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3008	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3009	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3010	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3011	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3012	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3013	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3014	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3015	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3016	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3017	/* gap */
3018	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3019	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3020	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3021	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3022	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3023	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3024	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3025	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3026	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3027	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3028	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3029	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3030	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3031	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3032	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3033	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3034	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3035	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3036	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3037	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3038	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3039	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3040	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3041	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3042	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3043	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3044	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3045	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3046	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3047	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl},
3048	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3049	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3050	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3051	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3052	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3053	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3054	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3055	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3056	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3057	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3058	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3059	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3060	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3061	/* gap */
3062	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3063	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3064	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3065	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3066	/* gap */
3067	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3068	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3069	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3070	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3071	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3072	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3073	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3074	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3075	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3076	/* gap */
3077	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3078	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3079	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3080	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3081	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3082	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3083	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3084	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3085	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3086	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3087	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3088	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3089	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3090	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3091	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3092	/* gap */
3093	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3094	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3095	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3096	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3097	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3098	/* gap */
3099	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3100	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3101	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3102	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3103	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3104	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3105	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3106	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3107	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3108	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3109	/* gap */
3110	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3111	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3112	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3113	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3114	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3115	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3116	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3117	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3118	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3119	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3120	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3121	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3122	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3123	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3124	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3125	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3126	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3127	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3128	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3129	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3130	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3131	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3132	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3133	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3134	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3135	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3136	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3137	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3138};
3139
3140static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
3141	{TGSI_OPCODE_ARL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3142	{TGSI_OPCODE_MOV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3143	{TGSI_OPCODE_LIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
3144	{TGSI_OPCODE_RCP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
3145	{TGSI_OPCODE_RSQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
3146	{TGSI_OPCODE_EXP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
3147	{TGSI_OPCODE_LOG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3148	{TGSI_OPCODE_MUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
3149	{TGSI_OPCODE_ADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3150	{TGSI_OPCODE_DP3,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3151	{TGSI_OPCODE_DP4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3152	{TGSI_OPCODE_DST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
3153	{TGSI_OPCODE_MIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
3154	{TGSI_OPCODE_MAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
3155	{TGSI_OPCODE_SLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
3156	{TGSI_OPCODE_SGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
3157	{TGSI_OPCODE_MAD,	1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
3158	{TGSI_OPCODE_SUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
3159	{TGSI_OPCODE_LRP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
3160	{TGSI_OPCODE_CND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3161	/* gap */
3162	{20,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3163	{TGSI_OPCODE_DP2A,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3164	/* gap */
3165	{22,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3166	{23,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3167	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
3168	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3169	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
3170	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3171	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
3172	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
3173	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
3174	{TGSI_OPCODE_XPD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
3175	/* gap */
3176	{32,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3177	{TGSI_OPCODE_ABS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
3178	{TGSI_OPCODE_RCC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3179	{TGSI_OPCODE_DPH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3180	{TGSI_OPCODE_COS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
3181	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
3182	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
3183	{TGSI_OPCODE_KILP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
3184	{TGSI_OPCODE_PK2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3185	{TGSI_OPCODE_PK2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3186	{TGSI_OPCODE_PK4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3187	{TGSI_OPCODE_PK4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3188	{TGSI_OPCODE_RFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3189	{TGSI_OPCODE_SEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
3190	{TGSI_OPCODE_SFL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3191	{TGSI_OPCODE_SGT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
3192	{TGSI_OPCODE_SIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
3193	{TGSI_OPCODE_SLE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
3194	{TGSI_OPCODE_SNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
3195	{TGSI_OPCODE_STR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3196	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3197	{TGSI_OPCODE_TXD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3198	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
3199	{TGSI_OPCODE_UP2H,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3200	{TGSI_OPCODE_UP2US,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3201	{TGSI_OPCODE_UP4B,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3202	{TGSI_OPCODE_UP4UB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3203	{TGSI_OPCODE_X2D,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3204	{TGSI_OPCODE_ARA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3205	{TGSI_OPCODE_ARR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl},
3206	{TGSI_OPCODE_BRA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3207	{TGSI_OPCODE_CAL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3208	{TGSI_OPCODE_RET,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3209	{TGSI_OPCODE_SSG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
3210	{TGSI_OPCODE_CMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
3211	{TGSI_OPCODE_SCS,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
3212	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3213	{TGSI_OPCODE_NRM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3214	{TGSI_OPCODE_DIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3215	{TGSI_OPCODE_DP2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
3216	{TGSI_OPCODE_TXL,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
3217	{TGSI_OPCODE_BRK,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
3218	{TGSI_OPCODE_IF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
3219	/* gap */
3220	{75,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3221	{76,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3222	{TGSI_OPCODE_ELSE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
3223	{TGSI_OPCODE_ENDIF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
3224	/* gap */
3225	{79,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3226	{80,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3227	{TGSI_OPCODE_PUSHA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3228	{TGSI_OPCODE_POPA,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3229	{TGSI_OPCODE_CEIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3230	{TGSI_OPCODE_I2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3231	{TGSI_OPCODE_NOT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3232	{TGSI_OPCODE_TRUNC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
3233	{TGSI_OPCODE_SHL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3234	/* gap */
3235	{88,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3236	{TGSI_OPCODE_AND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3237	{TGSI_OPCODE_OR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3238	{TGSI_OPCODE_MOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3239	{TGSI_OPCODE_XOR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3240	{TGSI_OPCODE_SAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3241	{TGSI_OPCODE_TXF,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3242	{TGSI_OPCODE_TXQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3243	{TGSI_OPCODE_CONT,	0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
3244	{TGSI_OPCODE_EMIT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3245	{TGSI_OPCODE_ENDPRIM,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3246	{TGSI_OPCODE_BGNLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
3247	{TGSI_OPCODE_BGNSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3248	{TGSI_OPCODE_ENDLOOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
3249	{TGSI_OPCODE_ENDSUB,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3250	/* gap */
3251	{103,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3252	{104,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3253	{105,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3254	{106,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3255	{TGSI_OPCODE_NOP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3256	/* gap */
3257	{108,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3258	{109,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3259	{110,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3260	{111,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3261	{TGSI_OPCODE_NRM4,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3262	{TGSI_OPCODE_CALLNZ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3263	{TGSI_OPCODE_IFC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3264	{TGSI_OPCODE_BREAKC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3265	{TGSI_OPCODE_KIL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
3266	{TGSI_OPCODE_END,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
3267	/* gap */
3268	{118,			0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3269	{TGSI_OPCODE_F2I,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3270	{TGSI_OPCODE_IDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3271	{TGSI_OPCODE_IMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3272	{TGSI_OPCODE_IMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3273	{TGSI_OPCODE_INEG,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3274	{TGSI_OPCODE_ISGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3275	{TGSI_OPCODE_ISHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3276	{TGSI_OPCODE_ISLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3277	{TGSI_OPCODE_F2U,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3278	{TGSI_OPCODE_U2F,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3279	{TGSI_OPCODE_UADD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3280	{TGSI_OPCODE_UDIV,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3281	{TGSI_OPCODE_UMAD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3282	{TGSI_OPCODE_UMAX,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3283	{TGSI_OPCODE_UMIN,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3284	{TGSI_OPCODE_UMOD,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3285	{TGSI_OPCODE_UMUL,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3286	{TGSI_OPCODE_USEQ,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3287	{TGSI_OPCODE_USGE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3288	{TGSI_OPCODE_USHR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3289	{TGSI_OPCODE_USLT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3290	{TGSI_OPCODE_USNE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3291	{TGSI_OPCODE_SWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3292	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3293	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3294	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3295	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
3296};
3297