r600_shader.c revision 5d66a8606d68caf0fb4754c144c5fb7d87fbf7df
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_screen.h"
29#include "r600_context.h"
30#include "r600_shader.h"
31#include "r600_asm.h"
32#include "r600_sq.h"
33#include "r600d.h"
34#include <stdio.h>
35#include <errno.h>
36
37
38struct r600_shader_tgsi_instruction;
39
40struct r600_shader_ctx {
41	struct tgsi_shader_info			info;
42	struct tgsi_parse_context		parse;
43	const struct tgsi_token			*tokens;
44	unsigned				type;
45	unsigned				file_offset[TGSI_FILE_COUNT];
46	unsigned				temp_reg;
47	struct r600_shader_tgsi_instruction	*inst_info;
48	struct r600_bc				*bc;
49	struct r600_shader			*shader;
50	u32					value[4];
51	u32					*literals;
52	u32					nliterals;
53	u32                                     max_driver_temp_used;
54};
55
56struct r600_shader_tgsi_instruction {
57	unsigned	tgsi_opcode;
58	unsigned	is_op3;
59	unsigned	r600_opcode;
60	int (*process)(struct r600_shader_ctx *ctx);
61};
62
63static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
64static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
65
66static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
67{
68	struct r600_context *rctx = r600_context(ctx);
69	const struct util_format_description *desc;
70	enum pipe_format resource_format[160];
71	unsigned i, nresources = 0;
72	struct r600_bc *bc = &shader->bc;
73	struct r600_bc_cf *cf;
74	struct r600_bc_vtx *vtx;
75
76	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
77		return 0;
78	for (i = 0; i < rctx->vertex_elements->count; i++) {
79		resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
80	}
81	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
82		switch (cf->inst) {
83		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
84		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
85			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
86				desc = util_format_description(resource_format[vtx->buffer_id]);
87				if (desc == NULL) {
88					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
89					return -EINVAL;
90				}
91				vtx->dst_sel_x = desc->swizzle[0];
92				vtx->dst_sel_y = desc->swizzle[1];
93				vtx->dst_sel_z = desc->swizzle[2];
94				vtx->dst_sel_w = desc->swizzle[3];
95			}
96			break;
97		default:
98			break;
99		}
100	}
101	return r600_bc_build(&shader->bc);
102}
103
104int r600_pipe_shader_create(struct pipe_context *ctx,
105			struct r600_context_state *rpshader,
106			const struct tgsi_token *tokens)
107{
108	struct r600_screen *rscreen = r600_screen(ctx->screen);
109	int r;
110
111//fprintf(stderr, "--------------------------------------------------------------\n");
112//tgsi_dump(tokens, 0);
113	if (rpshader == NULL)
114		return -ENOMEM;
115	rpshader->shader.family = radeon_get_family(rscreen->rw);
116	r = r600_shader_from_tgsi(tokens, &rpshader->shader);
117	if (r) {
118		R600_ERR("translation from TGSI failed !\n");
119		return r;
120	}
121	r = r600_bc_build(&rpshader->shader.bc);
122	if (r) {
123		R600_ERR("building bytecode failed !\n");
124		return r;
125	}
126//fprintf(stderr, "______________________________________________________________\n");
127	return 0;
128}
129
130static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
131{
132	struct r600_screen *rscreen = r600_screen(ctx->screen);
133	struct r600_shader *rshader = &rpshader->shader;
134	struct radeon_state *state;
135	unsigned i, tmp;
136
137	rpshader->rstate = radeon_state_decref(rpshader->rstate);
138	state = radeon_state_shader(rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
139	if (state == NULL)
140		return -ENOMEM;
141	for (i = 0; i < 10; i++) {
142		state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
143	}
144	/* so far never got proper semantic id from tgsi */
145	for (i = 0; i < 32; i++) {
146		tmp = i << ((i & 3) * 8);
147		state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
148	}
149	state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
150	state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
151		S_028868_STACK_SIZE(rshader->bc.nstack);
152	rpshader->rstate = state;
153	rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
154	rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
155	rpshader->rstate->nbo = 2;
156	rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
157	rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
158	return radeon_state_pm4(state);
159}
160
161static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
162{
163	const struct pipe_rasterizer_state *rasterizer;
164	struct r600_screen *rscreen = r600_screen(ctx->screen);
165	struct r600_shader *rshader = &rpshader->shader;
166	struct r600_context *rctx = r600_context(ctx);
167	struct radeon_state *state;
168	unsigned i, tmp, exports_ps, num_cout;
169	boolean have_pos = FALSE;
170
171	rasterizer = &rctx->rasterizer->state.rasterizer;
172	rpshader->rstate = radeon_state_decref(rpshader->rstate);
173	state = radeon_state_shader(rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
174	if (state == NULL)
175		return -ENOMEM;
176	for (i = 0; i < rshader->ninput; i++) {
177		tmp = S_028644_SEMANTIC(i);
178		tmp |= S_028644_SEL_CENTROID(1);
179		if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
180			have_pos = TRUE;
181		if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
182			rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
183			tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
184		}
185		if (rasterizer->sprite_coord_enable & (1 << i)) {
186			tmp |= S_028644_PT_SPRITE_TEX(1);
187		}
188		state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
189	}
190
191	exports_ps = 0;
192	num_cout = 0;
193	for (i = 0; i < rshader->noutput; i++) {
194		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
195			exports_ps |= 1;
196		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
197			exports_ps |= (1 << (num_cout+1));
198			num_cout++;
199		}
200	}
201	if (!exports_ps) {
202		/* always at least export 1 component per pixel */
203		exports_ps = 2;
204	}
205	state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
206							S_0286CC_PERSP_GRADIENT_ENA(1);
207	if (have_pos) {
208		state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |=  S_0286CC_POSITION_ENA(1);
209		                                                       S_0286CC_BARYC_SAMPLE_CNTL(1);
210	}
211	state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
212	state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
213		S_028868_STACK_SIZE(rshader->bc.nstack);
214	state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
215	rpshader->rstate = state;
216	rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
217	rpshader->rstate->nbo = 1;
218	rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
219	return radeon_state_pm4(state);
220}
221
222static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
223{
224	struct r600_screen *rscreen = r600_screen(ctx->screen);
225	struct r600_context *rctx = r600_context(ctx);
226	struct r600_shader *rshader = &rpshader->shader;
227	int r;
228
229	/* copy new shader */
230	radeon_bo_decref(rscreen->rw, rpshader->bo);
231	rpshader->bo = NULL;
232	rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
233				4096, NULL);
234	if (rpshader->bo == NULL) {
235		return -ENOMEM;
236	}
237	radeon_bo_map(rscreen->rw, rpshader->bo);
238	memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
239	radeon_bo_unmap(rscreen->rw, rpshader->bo);
240	/* build state */
241	rshader->flat_shade = rctx->flat_shade;
242	switch (rshader->processor_type) {
243	case TGSI_PROCESSOR_VERTEX:
244		r = r600_pipe_shader_vs(ctx, rpshader);
245		break;
246	case TGSI_PROCESSOR_FRAGMENT:
247		r = r600_pipe_shader_ps(ctx, rpshader);
248		break;
249	default:
250		r = -EINVAL;
251		break;
252	}
253	return r;
254}
255
256int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
257{
258	struct r600_context *rctx = r600_context(ctx);
259	int r;
260
261	if (rpshader == NULL)
262		return -EINVAL;
263	/* there should be enough input */
264	if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
265		R600_ERR("%d resources provided, expecting %d\n",
266			rctx->vertex_elements->count, rpshader->shader.bc.nresource);
267		return -EINVAL;
268	}
269	r = r600_shader_update(ctx, &rpshader->shader);
270	if (r)
271		return r;
272	return r600_pipe_shader(ctx, rpshader);
273}
274
275static int tgsi_is_supported(struct r600_shader_ctx *ctx)
276{
277	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
278	int j;
279
280	if (i->Instruction.NumDstRegs > 1) {
281		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
282		return -EINVAL;
283	}
284	if (i->Instruction.Predicate) {
285		R600_ERR("predicate unsupported\n");
286		return -EINVAL;
287	}
288#if 0
289	if (i->Instruction.Label) {
290		R600_ERR("label unsupported\n");
291		return -EINVAL;
292	}
293#endif
294	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
295		if (i->Src[j].Register.Dimension ||
296			i->Src[j].Register.Absolute) {
297			R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
298				 i->Src[j].Register.Dimension,
299				 i->Src[j].Register.Absolute);
300			return -EINVAL;
301		}
302	}
303	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
304		if (i->Dst[j].Register.Dimension) {
305			R600_ERR("unsupported dst (dimension)\n");
306			return -EINVAL;
307		}
308	}
309	return 0;
310}
311
312static int tgsi_declaration(struct r600_shader_ctx *ctx)
313{
314	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
315	struct r600_bc_vtx vtx;
316	unsigned i;
317	int r;
318
319	switch (d->Declaration.File) {
320	case TGSI_FILE_INPUT:
321		i = ctx->shader->ninput++;
322		ctx->shader->input[i].name = d->Semantic.Name;
323		ctx->shader->input[i].sid = d->Semantic.Index;
324		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
325		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
326		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
327			/* turn input into fetch */
328			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
329			vtx.inst = 0;
330			vtx.fetch_type = 0;
331			vtx.buffer_id = i;
332			/* register containing the index into the buffer */
333			vtx.src_gpr = 0;
334			vtx.src_sel_x = 0;
335			vtx.mega_fetch_count = 0x1F;
336			vtx.dst_gpr = ctx->shader->input[i].gpr;
337			vtx.dst_sel_x = 0;
338			vtx.dst_sel_y = 1;
339			vtx.dst_sel_z = 2;
340			vtx.dst_sel_w = 3;
341			r = r600_bc_add_vtx(ctx->bc, &vtx);
342			if (r)
343				return r;
344		}
345		break;
346	case TGSI_FILE_OUTPUT:
347		i = ctx->shader->noutput++;
348		ctx->shader->output[i].name = d->Semantic.Name;
349		ctx->shader->output[i].sid = d->Semantic.Index;
350		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
351		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
352		break;
353	case TGSI_FILE_CONSTANT:
354	case TGSI_FILE_TEMPORARY:
355	case TGSI_FILE_SAMPLER:
356	case TGSI_FILE_ADDRESS:
357		break;
358	default:
359		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
360		return -EINVAL;
361	}
362	return 0;
363}
364
365static int r600_get_temp(struct r600_shader_ctx *ctx)
366{
367	return ctx->temp_reg + ctx->max_driver_temp_used++;
368}
369
370int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
371{
372	struct tgsi_full_immediate *immediate;
373	struct r600_shader_ctx ctx;
374	struct r600_bc_output output[32];
375	unsigned output_done, noutput;
376	unsigned opcode;
377	int i, r = 0, pos0;
378
379	ctx.bc = &shader->bc;
380	ctx.shader = shader;
381	r = r600_bc_init(ctx.bc, shader->family);
382	if (r)
383		return r;
384	ctx.tokens = tokens;
385	tgsi_scan_shader(tokens, &ctx.info);
386	tgsi_parse_init(&ctx.parse, tokens);
387	ctx.type = ctx.parse.FullHeader.Processor.Processor;
388	shader->processor_type = ctx.type;
389
390	/* register allocations */
391	/* Values [0,127] correspond to GPR[0..127].
392	 * Values [128,159] correspond to constant buffer bank 0
393	 * Values [160,191] correspond to constant buffer bank 1
394	 * Values [256,511] correspond to cfile constants c[0..255].
395	 * Other special values are shown in the list below.
396	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
397	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
398	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
399	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
400	 * 248	SQ_ALU_SRC_0: special constant 0.0.
401	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
402	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
403	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
404	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
405	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
406	 * 254	SQ_ALU_SRC_PV: previous vector result.
407	 * 255	SQ_ALU_SRC_PS: previous scalar result.
408	 */
409	for (i = 0; i < TGSI_FILE_COUNT; i++) {
410		ctx.file_offset[i] = 0;
411	}
412	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
413		ctx.file_offset[TGSI_FILE_INPUT] = 1;
414	}
415	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
416						ctx.info.file_count[TGSI_FILE_INPUT];
417	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
418						ctx.info.file_count[TGSI_FILE_OUTPUT];
419	ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
420	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
421	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
422			ctx.info.file_count[TGSI_FILE_TEMPORARY];
423
424	ctx.nliterals = 0;
425	ctx.literals = NULL;
426
427	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
428		tgsi_parse_token(&ctx.parse);
429		switch (ctx.parse.FullToken.Token.Type) {
430		case TGSI_TOKEN_TYPE_IMMEDIATE:
431			immediate = &ctx.parse.FullToken.FullImmediate;
432			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
433			if(ctx.literals == NULL) {
434				r = -ENOMEM;
435				goto out_err;
436			}
437			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
438			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
439			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
440			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
441			ctx.nliterals++;
442			break;
443		case TGSI_TOKEN_TYPE_DECLARATION:
444			r = tgsi_declaration(&ctx);
445			if (r)
446				goto out_err;
447			break;
448		case TGSI_TOKEN_TYPE_INSTRUCTION:
449			r = tgsi_is_supported(&ctx);
450			if (r)
451				goto out_err;
452			ctx.max_driver_temp_used = 0;
453			/* reserve first tmp for everyone */
454			r600_get_temp(&ctx);
455			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
456			ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
457			r = ctx.inst_info->process(&ctx);
458			if (r)
459				goto out_err;
460			r = r600_bc_add_literal(ctx.bc, ctx.value);
461			if (r)
462				goto out_err;
463			break;
464		default:
465			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
466			r = -EINVAL;
467			goto out_err;
468		}
469	}
470	/* export output */
471	noutput = shader->noutput;
472	for (i = 0, pos0 = 0; i < noutput; i++) {
473		memset(&output[i], 0, sizeof(struct r600_bc_output));
474		output[i].gpr = shader->output[i].gpr;
475		output[i].elem_size = 3;
476		output[i].swizzle_x = 0;
477		output[i].swizzle_y = 1;
478		output[i].swizzle_z = 2;
479		output[i].swizzle_w = 3;
480		output[i].barrier = 1;
481		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
482		output[i].array_base = i - pos0;
483		output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
484		switch (ctx.type) {
485		case TGSI_PROCESSOR_VERTEX:
486			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
487				output[i].array_base = 60;
488				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
489				/* position doesn't count in array_base */
490				pos0++;
491			}
492			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
493				output[i].array_base = 61;
494				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
495				/* position doesn't count in array_base */
496				pos0++;
497			}
498			break;
499		case TGSI_PROCESSOR_FRAGMENT:
500			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
501				output[i].array_base = shader->output[i].sid;
502				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
503			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
504				output[i].array_base = 61;
505				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
506			} else {
507				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
508				r = -EINVAL;
509				goto out_err;
510			}
511			break;
512		default:
513			R600_ERR("unsupported processor type %d\n", ctx.type);
514			r = -EINVAL;
515			goto out_err;
516		}
517	}
518	/* add fake param output for vertex shader if no param is exported */
519	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
520		for (i = 0, pos0 = 0; i < noutput; i++) {
521			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
522				pos0 = 1;
523				break;
524			}
525		}
526		if (!pos0) {
527			memset(&output[i], 0, sizeof(struct r600_bc_output));
528			output[i].gpr = 0;
529			output[i].elem_size = 3;
530			output[i].swizzle_x = 0;
531			output[i].swizzle_y = 1;
532			output[i].swizzle_z = 2;
533			output[i].swizzle_w = 3;
534			output[i].barrier = 1;
535			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
536			output[i].array_base = 0;
537			output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
538			noutput++;
539		}
540	}
541	/* add fake pixel export */
542	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
543		memset(&output[0], 0, sizeof(struct r600_bc_output));
544		output[0].gpr = 0;
545		output[0].elem_size = 3;
546		output[0].swizzle_x = 7;
547		output[0].swizzle_y = 7;
548		output[0].swizzle_z = 7;
549		output[0].swizzle_w = 7;
550		output[0].barrier = 1;
551		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
552		output[0].array_base = 0;
553		output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
554		noutput++;
555	}
556	/* set export done on last export of each type */
557	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
558		if (i == (noutput - 1)) {
559			output[i].end_of_program = 1;
560		}
561		if (!(output_done & (1 << output[i].type))) {
562			output_done |= (1 << output[i].type);
563			output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
564		}
565	}
566	/* add output to bytecode */
567	for (i = 0; i < noutput; i++) {
568		r = r600_bc_add_output(ctx.bc, &output[i]);
569		if (r)
570			goto out_err;
571	}
572	free(ctx.literals);
573	tgsi_parse_free(&ctx.parse);
574	return 0;
575out_err:
576	free(ctx.literals);
577	tgsi_parse_free(&ctx.parse);
578	return r;
579}
580
581static int tgsi_unsupported(struct r600_shader_ctx *ctx)
582{
583	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
584	return -EINVAL;
585}
586
587static int tgsi_end(struct r600_shader_ctx *ctx)
588{
589	return 0;
590}
591
592static int tgsi_src(struct r600_shader_ctx *ctx,
593			const struct tgsi_full_src_register *tgsi_src,
594			struct r600_bc_alu_src *r600_src)
595{
596	int index;
597	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
598	r600_src->sel = tgsi_src->Register.Index;
599	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
600		r600_src->sel = 0;
601		index = tgsi_src->Register.Index;
602		ctx->value[0] = ctx->literals[index * 4 + 0];
603		ctx->value[1] = ctx->literals[index * 4 + 1];
604		ctx->value[2] = ctx->literals[index * 4 + 2];
605		ctx->value[3] = ctx->literals[index * 4 + 3];
606	}
607	if (tgsi_src->Register.Indirect)
608		r600_src->rel = V_SQ_REL_RELATIVE;
609	r600_src->neg = tgsi_src->Register.Negate;
610	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
611	return 0;
612}
613
614static int tgsi_dst(struct r600_shader_ctx *ctx,
615			const struct tgsi_full_dst_register *tgsi_dst,
616			unsigned swizzle,
617			struct r600_bc_alu_dst *r600_dst)
618{
619	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
620
621	r600_dst->sel = tgsi_dst->Register.Index;
622	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
623	r600_dst->chan = swizzle;
624	r600_dst->write = 1;
625	if (tgsi_dst->Register.Indirect)
626		r600_dst->rel = V_SQ_REL_RELATIVE;
627	if (inst->Instruction.Saturate) {
628		r600_dst->clamp = 1;
629	}
630	return 0;
631}
632
633static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
634{
635	switch (swizzle) {
636	case 0:
637		return tgsi_src->Register.SwizzleX;
638	case 1:
639		return tgsi_src->Register.SwizzleY;
640	case 2:
641		return tgsi_src->Register.SwizzleZ;
642	case 3:
643		return tgsi_src->Register.SwizzleW;
644	default:
645		return 0;
646	}
647}
648
649static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
650{
651	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
652	struct r600_bc_alu alu;
653	int i, j, k, nconst, r;
654
655	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
656		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
657			nconst++;
658		}
659		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
660		if (r) {
661			return r;
662		}
663	}
664	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
665		if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
666			int treg = r600_get_temp(ctx);
667			for (k = 0; k < 4; k++) {
668				memset(&alu, 0, sizeof(struct r600_bc_alu));
669				alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
670				alu.src[0].sel = r600_src[j].sel;
671				alu.src[0].chan = k;
672				alu.dst.sel = treg;
673				alu.dst.chan = k;
674				alu.dst.write = 1;
675				if (k == 3)
676					alu.last = 1;
677				r = r600_bc_add_alu(ctx->bc, &alu);
678				if (r)
679					return r;
680			}
681			r600_src[j].sel = treg;
682			j--;
683		}
684	}
685	return 0;
686}
687
688/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
689static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
690{
691	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
692	struct r600_bc_alu alu;
693	int i, j, k, nliteral, r;
694
695	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
696		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
697			nliteral++;
698		}
699	}
700	for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
701		if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
702			int treg = r600_get_temp(ctx);
703			for (k = 0; k < 4; k++) {
704				memset(&alu, 0, sizeof(struct r600_bc_alu));
705				alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
706				alu.src[0].sel = r600_src[j].sel;
707				alu.src[0].chan = k;
708				alu.dst.sel = treg;
709				alu.dst.chan = k;
710				alu.dst.write = 1;
711				if (k == 3)
712					alu.last = 1;
713				r = r600_bc_add_alu(ctx->bc, &alu);
714				if (r)
715					return r;
716			}
717			r = r600_bc_add_literal(ctx->bc, ctx->value);
718			if (r)
719				return r;
720			r600_src[j].sel = treg;
721			j++;
722		}
723	}
724	return 0;
725}
726
727static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
728{
729	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
730	struct r600_bc_alu_src r600_src[3];
731	struct r600_bc_alu alu;
732	int i, j, r;
733	int lasti = 0;
734
735	for (i = 0; i < 4; i++) {
736		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
737			lasti = i;
738		}
739	}
740
741	r = tgsi_split_constant(ctx, r600_src);
742	if (r)
743		return r;
744	for (i = 0; i < lasti + 1; i++) {
745		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
746			continue;
747
748		memset(&alu, 0, sizeof(struct r600_bc_alu));
749		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
750		if (r)
751			return r;
752
753		alu.inst = ctx->inst_info->r600_opcode;
754		if (!swap) {
755			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
756				alu.src[j] = r600_src[j];
757				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
758			}
759		} else {
760			alu.src[0] = r600_src[1];
761			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
762
763			alu.src[1] = r600_src[0];
764			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
765		}
766		/* handle some special cases */
767		switch (ctx->inst_info->tgsi_opcode) {
768		case TGSI_OPCODE_SUB:
769			alu.src[1].neg = 1;
770			break;
771		case TGSI_OPCODE_ABS:
772			alu.src[0].abs = 1;
773			break;
774		default:
775			break;
776		}
777		if (i == lasti) {
778			alu.last = 1;
779		}
780		r = r600_bc_add_alu(ctx->bc, &alu);
781		if (r)
782			return r;
783	}
784	return 0;
785}
786
787static int tgsi_op2(struct r600_shader_ctx *ctx)
788{
789	return tgsi_op2_s(ctx, 0);
790}
791
792static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
793{
794	return tgsi_op2_s(ctx, 1);
795}
796
797/*
798 * r600 - trunc to -PI..PI range
799 * r700 - normalize by dividing by 2PI
800 * see fdo bug 27901
801 */
802static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
803			   struct r600_bc_alu_src r600_src[3])
804{
805	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
806	int r;
807	uint32_t lit_vals[4];
808	struct r600_bc_alu alu;
809
810	memset(lit_vals, 0, 4*4);
811	r = tgsi_split_constant(ctx, r600_src);
812	if (r)
813		return r;
814
815	r = tgsi_split_literal_constant(ctx, r600_src);
816	if (r)
817		return r;
818
819	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
820	lit_vals[1] = fui(0.5f);
821
822	memset(&alu, 0, sizeof(struct r600_bc_alu));
823	alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
824	alu.is_op3 = 1;
825
826	alu.dst.chan = 0;
827	alu.dst.sel = ctx->temp_reg;
828	alu.dst.write = 1;
829
830	alu.src[0] = r600_src[0];
831	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
832
833	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
834	alu.src[1].chan = 0;
835	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
836	alu.src[2].chan = 1;
837	alu.last = 1;
838	r = r600_bc_add_alu(ctx->bc, &alu);
839	if (r)
840		return r;
841	r = r600_bc_add_literal(ctx->bc, lit_vals);
842	if (r)
843		return r;
844
845	memset(&alu, 0, sizeof(struct r600_bc_alu));
846	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
847
848	alu.dst.chan = 0;
849	alu.dst.sel = ctx->temp_reg;
850	alu.dst.write = 1;
851
852	alu.src[0].sel = ctx->temp_reg;
853	alu.src[0].chan = 0;
854	alu.last = 1;
855	r = r600_bc_add_alu(ctx->bc, &alu);
856	if (r)
857		return r;
858
859	if (ctx->bc->chiprev == 0) {
860		lit_vals[0] = fui(3.1415926535897f * 2.0f);
861		lit_vals[1] = fui(-3.1415926535897f);
862	} else {
863		lit_vals[0] = fui(1.0f);
864		lit_vals[1] = fui(-0.5f);
865	}
866
867	memset(&alu, 0, sizeof(struct r600_bc_alu));
868	alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
869	alu.is_op3 = 1;
870
871	alu.dst.chan = 0;
872	alu.dst.sel = ctx->temp_reg;
873	alu.dst.write = 1;
874
875	alu.src[0].sel = ctx->temp_reg;
876	alu.src[0].chan = 0;
877
878	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
879	alu.src[1].chan = 0;
880	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
881	alu.src[2].chan = 1;
882	alu.last = 1;
883	r = r600_bc_add_alu(ctx->bc, &alu);
884	if (r)
885		return r;
886	r = r600_bc_add_literal(ctx->bc, lit_vals);
887	if (r)
888		return r;
889	return 0;
890}
891
892static int tgsi_trig(struct r600_shader_ctx *ctx)
893{
894	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
895	struct r600_bc_alu_src r600_src[3];
896	struct r600_bc_alu alu;
897	int i, r;
898	int lasti = 0;
899
900	r = tgsi_setup_trig(ctx, r600_src);
901	if (r)
902		return r;
903
904	memset(&alu, 0, sizeof(struct r600_bc_alu));
905	alu.inst = ctx->inst_info->r600_opcode;
906	alu.dst.chan = 0;
907	alu.dst.sel = ctx->temp_reg;
908	alu.dst.write = 1;
909
910	alu.src[0].sel = ctx->temp_reg;
911	alu.src[0].chan = 0;
912	alu.last = 1;
913	r = r600_bc_add_alu(ctx->bc, &alu);
914	if (r)
915		return r;
916
917	/* replicate result */
918	for (i = 0; i < 4; i++) {
919		if (inst->Dst[0].Register.WriteMask & (1 << i))
920			lasti = i;
921	}
922	for (i = 0; i < lasti + 1; i++) {
923		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
924			continue;
925
926		memset(&alu, 0, sizeof(struct r600_bc_alu));
927		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
928
929		alu.src[0].sel = ctx->temp_reg;
930		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
931		if (r)
932			return r;
933		if (i == lasti)
934			alu.last = 1;
935		r = r600_bc_add_alu(ctx->bc, &alu);
936		if (r)
937			return r;
938	}
939	return 0;
940}
941
942static int tgsi_scs(struct r600_shader_ctx *ctx)
943{
944	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
945	struct r600_bc_alu_src r600_src[3];
946	struct r600_bc_alu alu;
947	int r;
948
949	r = tgsi_setup_trig(ctx, r600_src);
950	if (r)
951		return r;
952
953
954	/* dst.x = COS */
955	memset(&alu, 0, sizeof(struct r600_bc_alu));
956	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS;
957	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
958	if (r)
959		return r;
960
961	alu.src[0].sel = ctx->temp_reg;
962	alu.src[0].chan = 0;
963	alu.last = 1;
964	r = r600_bc_add_alu(ctx->bc, &alu);
965	if (r)
966		return r;
967
968	/* dst.y = SIN */
969	memset(&alu, 0, sizeof(struct r600_bc_alu));
970	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN;
971	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
972	if (r)
973		return r;
974
975	alu.src[0].sel = ctx->temp_reg;
976	alu.src[0].chan = 0;
977	alu.last = 1;
978	r = r600_bc_add_alu(ctx->bc, &alu);
979	if (r)
980		return r;
981	return 0;
982}
983
984static int tgsi_kill(struct r600_shader_ctx *ctx)
985{
986	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
987	struct r600_bc_alu alu;
988	int i, r;
989
990	for (i = 0; i < 4; i++) {
991		memset(&alu, 0, sizeof(struct r600_bc_alu));
992		alu.inst = ctx->inst_info->r600_opcode;
993
994		alu.dst.chan = i;
995
996		alu.src[0].sel = V_SQ_ALU_SRC_0;
997
998		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
999			alu.src[1].sel = V_SQ_ALU_SRC_1;
1000			alu.src[1].neg = 1;
1001		} else {
1002			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1003			if (r)
1004				return r;
1005			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1006		}
1007		if (i == 3) {
1008			alu.last = 1;
1009		}
1010		r = r600_bc_add_alu(ctx->bc, &alu);
1011		if (r)
1012			return r;
1013	}
1014	r = r600_bc_add_literal(ctx->bc, ctx->value);
1015	if (r)
1016		return r;
1017
1018	/* kill must be last in ALU */
1019	ctx->bc->force_add_cf = 1;
1020	ctx->shader->uses_kill = TRUE;
1021	return 0;
1022}
1023
1024static int tgsi_lit(struct r600_shader_ctx *ctx)
1025{
1026	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1027	struct r600_bc_alu alu;
1028	struct r600_bc_alu_src r600_src[3];
1029	int r;
1030
1031	r = tgsi_split_constant(ctx, r600_src);
1032	if (r)
1033		return r;
1034	r = tgsi_split_literal_constant(ctx, r600_src);
1035	if (r)
1036		return r;
1037
1038	/* dst.x, <- 1.0  */
1039	memset(&alu, 0, sizeof(struct r600_bc_alu));
1040	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1041	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
1042	alu.src[0].chan = 0;
1043	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
1044	if (r)
1045		return r;
1046	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
1047	r = r600_bc_add_alu(ctx->bc, &alu);
1048	if (r)
1049		return r;
1050
1051	/* dst.y = max(src.x, 0.0) */
1052	memset(&alu, 0, sizeof(struct r600_bc_alu));
1053	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
1054	alu.src[0] = r600_src[0];
1055	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
1056	alu.src[1].chan = 0;
1057	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
1058	if (r)
1059		return r;
1060	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1061	r = r600_bc_add_alu(ctx->bc, &alu);
1062	if (r)
1063		return r;
1064
1065	/* dst.w, <- 1.0  */
1066	memset(&alu, 0, sizeof(struct r600_bc_alu));
1067	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1068	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1069	alu.src[0].chan = 0;
1070	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1071	if (r)
1072		return r;
1073	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1074	alu.last = 1;
1075	r = r600_bc_add_alu(ctx->bc, &alu);
1076	if (r)
1077		return r;
1078
1079	r = r600_bc_add_literal(ctx->bc, ctx->value);
1080	if (r)
1081		return r;
1082
1083	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1084	{
1085		int chan;
1086		int sel;
1087
1088		/* dst.z = log(src.y) */
1089		memset(&alu, 0, sizeof(struct r600_bc_alu));
1090		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
1091		alu.src[0] = r600_src[0];
1092		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1093		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1094		if (r)
1095			return r;
1096		alu.last = 1;
1097		r = r600_bc_add_alu(ctx->bc, &alu);
1098		if (r)
1099			return r;
1100
1101		r = r600_bc_add_literal(ctx->bc, ctx->value);
1102		if (r)
1103			return r;
1104
1105		chan = alu.dst.chan;
1106		sel = alu.dst.sel;
1107
1108		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1109		memset(&alu, 0, sizeof(struct r600_bc_alu));
1110		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
1111		alu.src[0] = r600_src[0];
1112		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1113		alu.src[1].sel  = sel;
1114		alu.src[1].chan = chan;
1115
1116		alu.src[2] = r600_src[0];
1117		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1118		alu.dst.sel = ctx->temp_reg;
1119		alu.dst.chan = 0;
1120		alu.dst.write = 1;
1121		alu.is_op3 = 1;
1122		alu.last = 1;
1123		r = r600_bc_add_alu(ctx->bc, &alu);
1124		if (r)
1125			return r;
1126
1127		r = r600_bc_add_literal(ctx->bc, ctx->value);
1128		if (r)
1129			return r;
1130		/* dst.z = exp(tmp.x) */
1131		memset(&alu, 0, sizeof(struct r600_bc_alu));
1132		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1133		alu.src[0].sel = ctx->temp_reg;
1134		alu.src[0].chan = 0;
1135		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1136		if (r)
1137			return r;
1138		alu.last = 1;
1139		r = r600_bc_add_alu(ctx->bc, &alu);
1140		if (r)
1141			return r;
1142	}
1143	return 0;
1144}
1145
1146static int tgsi_trans(struct r600_shader_ctx *ctx)
1147{
1148	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1149	struct r600_bc_alu alu;
1150	int i, j, r;
1151
1152	for (i = 0; i < 4; i++) {
1153		memset(&alu, 0, sizeof(struct r600_bc_alu));
1154		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1155			alu.inst = ctx->inst_info->r600_opcode;
1156			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1157				r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1158				if (r)
1159					return r;
1160				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1161			}
1162			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1163			if (r)
1164				return r;
1165			alu.last = 1;
1166			r = r600_bc_add_alu(ctx->bc, &alu);
1167			if (r)
1168				return r;
1169		}
1170	}
1171	return 0;
1172}
1173
1174static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1175{
1176	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1177	struct r600_bc_alu alu;
1178	int i, r;
1179
1180	for (i = 0; i < 4; i++) {
1181		memset(&alu, 0, sizeof(struct r600_bc_alu));
1182		alu.src[0].sel = ctx->temp_reg;
1183		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1184		alu.dst.chan = i;
1185		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1186		if (r)
1187			return r;
1188		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1189		if (i == 3)
1190			alu.last = 1;
1191		r = r600_bc_add_alu(ctx->bc, &alu);
1192		if (r)
1193			return r;
1194	}
1195	return 0;
1196}
1197
1198static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1199{
1200	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1201	struct r600_bc_alu alu;
1202	int i, r;
1203
1204	memset(&alu, 0, sizeof(struct r600_bc_alu));
1205	alu.inst = ctx->inst_info->r600_opcode;
1206	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1207		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1208		if (r)
1209			return r;
1210		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1211	}
1212	alu.dst.sel = ctx->temp_reg;
1213	alu.dst.write = 1;
1214	alu.last = 1;
1215	r = r600_bc_add_alu(ctx->bc, &alu);
1216	if (r)
1217		return r;
1218	r = r600_bc_add_literal(ctx->bc, ctx->value);
1219	if (r)
1220		return r;
1221	/* replicate result */
1222	return tgsi_helper_tempx_replicate(ctx);
1223}
1224
1225static int tgsi_pow(struct r600_shader_ctx *ctx)
1226{
1227	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1228	struct r600_bc_alu alu;
1229	int r;
1230
1231	/* LOG2(a) */
1232	memset(&alu, 0, sizeof(struct r600_bc_alu));
1233	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1234	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1235	if (r)
1236		return r;
1237	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1238	alu.dst.sel = ctx->temp_reg;
1239	alu.dst.write = 1;
1240	alu.last = 1;
1241	r = r600_bc_add_alu(ctx->bc, &alu);
1242	if (r)
1243		return r;
1244	r = r600_bc_add_literal(ctx->bc,ctx->value);
1245	if (r)
1246		return r;
1247	/* b * LOG2(a) */
1248	memset(&alu, 0, sizeof(struct r600_bc_alu));
1249	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1250	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1251	if (r)
1252		return r;
1253	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1254	alu.src[1].sel = ctx->temp_reg;
1255	alu.dst.sel = ctx->temp_reg;
1256	alu.dst.write = 1;
1257	alu.last = 1;
1258	r = r600_bc_add_alu(ctx->bc, &alu);
1259	if (r)
1260		return r;
1261	r = r600_bc_add_literal(ctx->bc,ctx->value);
1262	if (r)
1263		return r;
1264	/* POW(a,b) = EXP2(b * LOG2(a))*/
1265	memset(&alu, 0, sizeof(struct r600_bc_alu));
1266	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1267	alu.src[0].sel = ctx->temp_reg;
1268	alu.dst.sel = ctx->temp_reg;
1269	alu.dst.write = 1;
1270	alu.last = 1;
1271	r = r600_bc_add_alu(ctx->bc, &alu);
1272	if (r)
1273		return r;
1274	r = r600_bc_add_literal(ctx->bc,ctx->value);
1275	if (r)
1276		return r;
1277	return tgsi_helper_tempx_replicate(ctx);
1278}
1279
1280static int tgsi_ssg(struct r600_shader_ctx *ctx)
1281{
1282	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1283	struct r600_bc_alu alu;
1284	struct r600_bc_alu_src r600_src[3];
1285	int i, r;
1286
1287	r = tgsi_split_constant(ctx, r600_src);
1288	if (r)
1289		return r;
1290
1291	/* tmp = (src > 0 ? 1 : src) */
1292	for (i = 0; i < 4; i++) {
1293		memset(&alu, 0, sizeof(struct r600_bc_alu));
1294		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1295		alu.is_op3 = 1;
1296
1297		alu.dst.sel = ctx->temp_reg;
1298		alu.dst.chan = i;
1299
1300		alu.src[0] = r600_src[0];
1301		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1302
1303		alu.src[1].sel = V_SQ_ALU_SRC_1;
1304
1305		alu.src[2] = r600_src[0];
1306		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1307		if (i == 3)
1308			alu.last = 1;
1309		r = r600_bc_add_alu(ctx->bc, &alu);
1310		if (r)
1311			return r;
1312	}
1313	r = r600_bc_add_literal(ctx->bc, ctx->value);
1314	if (r)
1315		return r;
1316
1317	/* dst = (-tmp > 0 ? -1 : tmp) */
1318	for (i = 0; i < 4; i++) {
1319		memset(&alu, 0, sizeof(struct r600_bc_alu));
1320		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1321		alu.is_op3 = 1;
1322		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1323		if (r)
1324			return r;
1325
1326		alu.src[0].sel = ctx->temp_reg;
1327		alu.src[0].chan = i;
1328		alu.src[0].neg = 1;
1329
1330		alu.src[1].sel = V_SQ_ALU_SRC_1;
1331		alu.src[1].neg = 1;
1332
1333		alu.src[2].sel = ctx->temp_reg;
1334		alu.src[2].chan = i;
1335
1336		if (i == 3)
1337			alu.last = 1;
1338		r = r600_bc_add_alu(ctx->bc, &alu);
1339		if (r)
1340			return r;
1341	}
1342	return 0;
1343}
1344
1345static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1346{
1347	struct r600_bc_alu alu;
1348	int i, r;
1349
1350	r = r600_bc_add_literal(ctx->bc, ctx->value);
1351	if (r)
1352		return r;
1353	for (i = 0; i < 4; i++) {
1354		memset(&alu, 0, sizeof(struct r600_bc_alu));
1355		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1356			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1357			alu.dst.chan = i;
1358		} else {
1359			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1360			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1361			if (r)
1362				return r;
1363			alu.src[0].sel = ctx->temp_reg;
1364			alu.src[0].chan = i;
1365		}
1366		if (i == 3) {
1367			alu.last = 1;
1368		}
1369		r = r600_bc_add_alu(ctx->bc, &alu);
1370		if (r)
1371			return r;
1372	}
1373	return 0;
1374}
1375
1376static int tgsi_op3(struct r600_shader_ctx *ctx)
1377{
1378	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1379	struct r600_bc_alu_src r600_src[3];
1380	struct r600_bc_alu alu;
1381	int i, j, r;
1382
1383	r = tgsi_split_constant(ctx, r600_src);
1384	if (r)
1385		return r;
1386	/* do it in 2 step as op3 doesn't support writemask */
1387	for (i = 0; i < 4; i++) {
1388		memset(&alu, 0, sizeof(struct r600_bc_alu));
1389		alu.inst = ctx->inst_info->r600_opcode;
1390		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1391			alu.src[j] = r600_src[j];
1392			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1393		}
1394		alu.dst.sel = ctx->temp_reg;
1395		alu.dst.chan = i;
1396		alu.dst.write = 1;
1397		alu.is_op3 = 1;
1398		if (i == 3) {
1399			alu.last = 1;
1400		}
1401		r = r600_bc_add_alu(ctx->bc, &alu);
1402		if (r)
1403			return r;
1404	}
1405	return tgsi_helper_copy(ctx, inst);
1406}
1407
1408static int tgsi_dp(struct r600_shader_ctx *ctx)
1409{
1410	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1411	struct r600_bc_alu_src r600_src[3];
1412	struct r600_bc_alu alu;
1413	int i, j, r;
1414
1415	r = tgsi_split_constant(ctx, r600_src);
1416	if (r)
1417		return r;
1418	for (i = 0; i < 4; i++) {
1419		memset(&alu, 0, sizeof(struct r600_bc_alu));
1420		alu.inst = ctx->inst_info->r600_opcode;
1421		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1422			alu.src[j] = r600_src[j];
1423			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1424		}
1425		alu.dst.sel = ctx->temp_reg;
1426		alu.dst.chan = i;
1427		alu.dst.write = 1;
1428		/* handle some special cases */
1429		switch (ctx->inst_info->tgsi_opcode) {
1430		case TGSI_OPCODE_DP2:
1431			if (i > 1) {
1432				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1433				alu.src[0].chan = alu.src[1].chan = 0;
1434			}
1435			break;
1436		case TGSI_OPCODE_DP3:
1437			if (i > 2) {
1438				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1439				alu.src[0].chan = alu.src[1].chan = 0;
1440			}
1441			break;
1442		case TGSI_OPCODE_DPH:
1443			if (i == 3) {
1444				alu.src[0].sel = V_SQ_ALU_SRC_1;
1445				alu.src[0].chan = 0;
1446				alu.src[0].neg = 0;
1447			}
1448			break;
1449		default:
1450			break;
1451		}
1452		if (i == 3) {
1453			alu.last = 1;
1454		}
1455		r = r600_bc_add_alu(ctx->bc, &alu);
1456		if (r)
1457			return r;
1458	}
1459	return tgsi_helper_copy(ctx, inst);
1460}
1461
1462static int tgsi_tex(struct r600_shader_ctx *ctx)
1463{
1464	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1465	struct r600_bc_tex tex;
1466	struct r600_bc_alu alu;
1467	unsigned src_gpr;
1468	int r, i;
1469
1470	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1471
1472	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1473		/* Add perspective divide */
1474		memset(&alu, 0, sizeof(struct r600_bc_alu));
1475		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1476		alu.src[0].sel = src_gpr;
1477		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1478		alu.dst.sel = ctx->temp_reg;
1479		alu.dst.chan = 3;
1480		alu.last = 1;
1481		alu.dst.write = 1;
1482		r = r600_bc_add_alu(ctx->bc, &alu);
1483		if (r)
1484			return r;
1485
1486		for (i = 0; i < 3; i++) {
1487			memset(&alu, 0, sizeof(struct r600_bc_alu));
1488			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1489			alu.src[0].sel = ctx->temp_reg;
1490			alu.src[0].chan = 3;
1491			alu.src[1].sel = src_gpr;
1492			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1493			alu.dst.sel = ctx->temp_reg;
1494			alu.dst.chan = i;
1495			alu.dst.write = 1;
1496			r = r600_bc_add_alu(ctx->bc, &alu);
1497			if (r)
1498				return r;
1499		}
1500		memset(&alu, 0, sizeof(struct r600_bc_alu));
1501		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1502		alu.src[0].sel = V_SQ_ALU_SRC_1;
1503		alu.src[0].chan = 0;
1504		alu.dst.sel = ctx->temp_reg;
1505		alu.dst.chan = 3;
1506		alu.last = 1;
1507		alu.dst.write = 1;
1508		r = r600_bc_add_alu(ctx->bc, &alu);
1509		if (r)
1510			return r;
1511		src_gpr = ctx->temp_reg;
1512	} else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1513		for (i = 0; i < 4; i++) {
1514			memset(&alu, 0, sizeof(struct r600_bc_alu));
1515			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1516			alu.src[0].sel = src_gpr;
1517			alu.src[0].chan = i;
1518			alu.dst.sel = ctx->temp_reg;
1519			alu.dst.chan = i;
1520			if (i == 3)
1521				alu.last = 1;
1522			alu.dst.write = 1;
1523			r = r600_bc_add_alu(ctx->bc, &alu);
1524			if (r)
1525				return r;
1526		}
1527		src_gpr = ctx->temp_reg;
1528	}
1529
1530	memset(&tex, 0, sizeof(struct r600_bc_tex));
1531	tex.inst = ctx->inst_info->r600_opcode;
1532	tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1533	tex.sampler_id = tex.resource_id;
1534	tex.src_gpr = src_gpr;
1535	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1536	tex.dst_sel_x = 0;
1537	tex.dst_sel_y = 1;
1538	tex.dst_sel_z = 2;
1539	tex.dst_sel_w = 3;
1540	tex.src_sel_x = 0;
1541	tex.src_sel_y = 1;
1542	tex.src_sel_z = 2;
1543	tex.src_sel_w = 3;
1544
1545	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1546		tex.coord_type_x = 1;
1547		tex.coord_type_y = 1;
1548		tex.coord_type_z = 1;
1549		tex.coord_type_w = 1;
1550	}
1551	return r600_bc_add_tex(ctx->bc, &tex);
1552}
1553
1554static int tgsi_lrp(struct r600_shader_ctx *ctx)
1555{
1556	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1557	struct r600_bc_alu_src r600_src[3];
1558	struct r600_bc_alu alu;
1559	unsigned i;
1560	int r;
1561
1562	r = tgsi_split_constant(ctx, r600_src);
1563	if (r)
1564		return r;
1565	/* 1 - src0 */
1566	for (i = 0; i < 4; i++) {
1567		memset(&alu, 0, sizeof(struct r600_bc_alu));
1568		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1569		alu.src[0].sel = V_SQ_ALU_SRC_1;
1570		alu.src[0].chan = 0;
1571		alu.src[1] = r600_src[0];
1572		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1573		alu.src[1].neg = 1;
1574		alu.dst.sel = ctx->temp_reg;
1575		alu.dst.chan = i;
1576		if (i == 3) {
1577			alu.last = 1;
1578		}
1579		alu.dst.write = 1;
1580		r = r600_bc_add_alu(ctx->bc, &alu);
1581		if (r)
1582			return r;
1583	}
1584	r = r600_bc_add_literal(ctx->bc, ctx->value);
1585	if (r)
1586		return r;
1587
1588	/* (1 - src0) * src2 */
1589	for (i = 0; i < 4; i++) {
1590		memset(&alu, 0, sizeof(struct r600_bc_alu));
1591		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1592		alu.src[0].sel = ctx->temp_reg;
1593		alu.src[0].chan = i;
1594		alu.src[1] = r600_src[2];
1595		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1596		alu.dst.sel = ctx->temp_reg;
1597		alu.dst.chan = i;
1598		if (i == 3) {
1599			alu.last = 1;
1600		}
1601		alu.dst.write = 1;
1602		r = r600_bc_add_alu(ctx->bc, &alu);
1603		if (r)
1604			return r;
1605	}
1606	r = r600_bc_add_literal(ctx->bc, ctx->value);
1607	if (r)
1608		return r;
1609
1610	/* src0 * src1 + (1 - src0) * src2 */
1611	for (i = 0; i < 4; i++) {
1612		memset(&alu, 0, sizeof(struct r600_bc_alu));
1613		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1614		alu.is_op3 = 1;
1615		alu.src[0] = r600_src[0];
1616		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1617		alu.src[1] = r600_src[1];
1618		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1619		alu.src[2].sel = ctx->temp_reg;
1620		alu.src[2].chan = i;
1621		alu.dst.sel = ctx->temp_reg;
1622		alu.dst.chan = i;
1623		if (i == 3) {
1624			alu.last = 1;
1625		}
1626		r = r600_bc_add_alu(ctx->bc, &alu);
1627		if (r)
1628			return r;
1629	}
1630	return tgsi_helper_copy(ctx, inst);
1631}
1632
1633static int tgsi_cmp(struct r600_shader_ctx *ctx)
1634{
1635	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1636	struct r600_bc_alu_src r600_src[3];
1637	struct r600_bc_alu alu;
1638	int use_temp = 0;
1639	int i, r;
1640
1641	r = tgsi_split_constant(ctx, r600_src);
1642	if (r)
1643		return r;
1644
1645	if (inst->Dst[0].Register.WriteMask != 0xf)
1646		use_temp = 1;
1647
1648	for (i = 0; i < 4; i++) {
1649		memset(&alu, 0, sizeof(struct r600_bc_alu));
1650		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
1651		alu.src[0] = r600_src[0];
1652		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1653
1654		alu.src[1] = r600_src[2];
1655		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1656
1657		alu.src[2] = r600_src[1];
1658		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1659
1660		if (use_temp)
1661			alu.dst.sel = ctx->temp_reg;
1662		else {
1663			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1664			if (r)
1665				return r;
1666		}
1667		alu.dst.chan = i;
1668		alu.dst.write = 1;
1669		alu.is_op3 = 1;
1670		if (i == 3)
1671			alu.last = 1;
1672		r = r600_bc_add_alu(ctx->bc, &alu);
1673		if (r)
1674			return r;
1675	}
1676	if (use_temp)
1677		return tgsi_helper_copy(ctx, inst);
1678	return 0;
1679}
1680
1681static int tgsi_xpd(struct r600_shader_ctx *ctx)
1682{
1683	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1684	struct r600_bc_alu_src r600_src[3];
1685	struct r600_bc_alu alu;
1686	uint32_t use_temp = 0;
1687	int i, r;
1688
1689	if (inst->Dst[0].Register.WriteMask != 0xf)
1690		use_temp = 1;
1691
1692	r = tgsi_split_constant(ctx, r600_src);
1693	if (r)
1694		return r;
1695
1696	for (i = 0; i < 4; i++) {
1697		memset(&alu, 0, sizeof(struct r600_bc_alu));
1698		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1699
1700		alu.src[0] = r600_src[0];
1701		switch (i) {
1702		case 0:
1703			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1704			break;
1705		case 1:
1706			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1707			break;
1708		case 2:
1709			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1710			break;
1711		case 3:
1712			alu.src[0].sel = V_SQ_ALU_SRC_0;
1713			alu.src[0].chan = i;
1714		}
1715
1716		alu.src[1] = r600_src[1];
1717		switch (i) {
1718		case 0:
1719			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1720			break;
1721		case 1:
1722			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1723			break;
1724		case 2:
1725			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1726			break;
1727		case 3:
1728			alu.src[1].sel = V_SQ_ALU_SRC_0;
1729			alu.src[1].chan = i;
1730		}
1731
1732		alu.dst.sel = ctx->temp_reg;
1733		alu.dst.chan = i;
1734		alu.dst.write = 1;
1735
1736		if (i == 3)
1737			alu.last = 1;
1738		r = r600_bc_add_alu(ctx->bc, &alu);
1739		if (r)
1740			return r;
1741	}
1742
1743	for (i = 0; i < 4; i++) {
1744		memset(&alu, 0, sizeof(struct r600_bc_alu));
1745		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1746
1747		alu.src[0] = r600_src[0];
1748		switch (i) {
1749		case 0:
1750			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1751			break;
1752		case 1:
1753			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1754			break;
1755		case 2:
1756			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1757			break;
1758		case 3:
1759			alu.src[0].sel = V_SQ_ALU_SRC_0;
1760			alu.src[0].chan = i;
1761		}
1762
1763		alu.src[1] = r600_src[1];
1764		switch (i) {
1765		case 0:
1766			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1767			break;
1768		case 1:
1769			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1770			break;
1771		case 2:
1772			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1773			break;
1774		case 3:
1775			alu.src[1].sel = V_SQ_ALU_SRC_0;
1776			alu.src[1].chan = i;
1777		}
1778
1779		alu.src[2].sel = ctx->temp_reg;
1780		alu.src[2].neg = 1;
1781		alu.src[2].chan = i;
1782
1783		if (use_temp)
1784			alu.dst.sel = ctx->temp_reg;
1785		else {
1786			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1787			if (r)
1788				return r;
1789		}
1790		alu.dst.chan = i;
1791		alu.dst.write = 1;
1792		alu.is_op3 = 1;
1793		if (i == 3)
1794			alu.last = 1;
1795		r = r600_bc_add_alu(ctx->bc, &alu);
1796		if (r)
1797			return r;
1798	}
1799	if (use_temp)
1800		return tgsi_helper_copy(ctx, inst);
1801	return 0;
1802}
1803
1804static int tgsi_exp(struct r600_shader_ctx *ctx)
1805{
1806	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1807	struct r600_bc_alu_src r600_src[3];
1808	struct r600_bc_alu alu;
1809	int r;
1810
1811	/* result.x = 2^floor(src); */
1812	if (inst->Dst[0].Register.WriteMask & 1) {
1813		memset(&alu, 0, sizeof(struct r600_bc_alu));
1814
1815		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
1816		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1817		if (r)
1818			return r;
1819
1820		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1821
1822		alu.dst.sel = ctx->temp_reg;
1823		alu.dst.chan = 0;
1824		alu.dst.write = 1;
1825		alu.last = 1;
1826		r = r600_bc_add_alu(ctx->bc, &alu);
1827		if (r)
1828			return r;
1829
1830		r = r600_bc_add_literal(ctx->bc, ctx->value);
1831		if (r)
1832			return r;
1833
1834		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1835		alu.src[0].sel = ctx->temp_reg;
1836		alu.src[0].chan = 0;
1837
1838		alu.dst.sel = ctx->temp_reg;
1839		alu.dst.chan = 0;
1840		alu.dst.write = 1;
1841		alu.last = 1;
1842		r = r600_bc_add_alu(ctx->bc, &alu);
1843		if (r)
1844			return r;
1845
1846		r = r600_bc_add_literal(ctx->bc, ctx->value);
1847		if (r)
1848			return r;
1849	}
1850
1851	/* result.y = tmp - floor(tmp); */
1852	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1853		memset(&alu, 0, sizeof(struct r600_bc_alu));
1854
1855		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
1856		alu.src[0] = r600_src[0];
1857		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1858		if (r)
1859			return r;
1860		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1861
1862		alu.dst.sel = ctx->temp_reg;
1863//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1864//		if (r)
1865//			return r;
1866		alu.dst.write = 1;
1867		alu.dst.chan = 1;
1868
1869		alu.last = 1;
1870
1871		r = r600_bc_add_alu(ctx->bc, &alu);
1872		if (r)
1873			return r;
1874		r = r600_bc_add_literal(ctx->bc, ctx->value);
1875		if (r)
1876			return r;
1877	}
1878
1879	/* result.z = RoughApprox2ToX(tmp);*/
1880	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1881		memset(&alu, 0, sizeof(struct r600_bc_alu));
1882		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1883		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1884		if (r)
1885			return r;
1886		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1887
1888		alu.dst.sel = ctx->temp_reg;
1889		alu.dst.write = 1;
1890		alu.dst.chan = 2;
1891
1892		alu.last = 1;
1893
1894		r = r600_bc_add_alu(ctx->bc, &alu);
1895		if (r)
1896			return r;
1897		r = r600_bc_add_literal(ctx->bc, ctx->value);
1898		if (r)
1899			return r;
1900	}
1901
1902	/* result.w = 1.0;*/
1903	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
1904		memset(&alu, 0, sizeof(struct r600_bc_alu));
1905
1906		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1907		alu.src[0].sel = V_SQ_ALU_SRC_1;
1908		alu.src[0].chan = 0;
1909
1910		alu.dst.sel = ctx->temp_reg;
1911		alu.dst.chan = 3;
1912		alu.dst.write = 1;
1913		alu.last = 1;
1914		r = r600_bc_add_alu(ctx->bc, &alu);
1915		if (r)
1916			return r;
1917		r = r600_bc_add_literal(ctx->bc, ctx->value);
1918		if (r)
1919			return r;
1920	}
1921	return tgsi_helper_copy(ctx, inst);
1922}
1923
1924static int tgsi_arl(struct r600_shader_ctx *ctx)
1925{
1926	/* TODO from r600c, ar values don't persist between clauses */
1927	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1928	struct r600_bc_alu alu;
1929	int r;
1930	memset(&alu, 0, sizeof(struct r600_bc_alu));
1931
1932	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
1933
1934	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1935	if (r)
1936		return r;
1937	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1938
1939	alu.last = 1;
1940
1941	r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
1942	if (r)
1943		return r;
1944	return 0;
1945}
1946
1947static int tgsi_opdst(struct r600_shader_ctx *ctx)
1948{
1949	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1950	struct r600_bc_alu alu;
1951	int i, r = 0;
1952
1953	for (i = 0; i < 4; i++) {
1954		memset(&alu, 0, sizeof(struct r600_bc_alu));
1955
1956		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1957		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1958		if (r)
1959			return r;
1960
1961	        if (i == 0 || i == 3) {
1962			alu.src[0].sel = V_SQ_ALU_SRC_1;
1963		} else {
1964			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1965			if (r)
1966				return r;
1967			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1968		}
1969
1970	        if (i == 0 || i == 2) {
1971			alu.src[1].sel = V_SQ_ALU_SRC_1;
1972		} else {
1973			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
1974			if (r)
1975				return r;
1976			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1977		}
1978		if (i == 3)
1979			alu.last = 1;
1980		r = r600_bc_add_alu(ctx->bc, &alu);
1981		if (r)
1982			return r;
1983	}
1984	return 0;
1985}
1986
1987static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
1988{
1989	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1990	struct r600_bc_alu alu;
1991	int r;
1992
1993	memset(&alu, 0, sizeof(struct r600_bc_alu));
1994	alu.inst = opcode;
1995	alu.predicate = 1;
1996
1997	alu.dst.sel = ctx->temp_reg;
1998	alu.dst.write = 1;
1999	alu.dst.chan = 0;
2000
2001	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2002	if (r)
2003		return r;
2004	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2005	alu.src[1].sel = V_SQ_ALU_SRC_0;
2006	alu.src[1].chan = 0;
2007
2008	alu.last = 1;
2009
2010	r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
2011	if (r)
2012		return r;
2013	return 0;
2014}
2015
2016static int pops(struct r600_shader_ctx *ctx, int pops)
2017{
2018	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);
2019	ctx->bc->cf_last->pop_count = pops;
2020	return 0;
2021}
2022
2023static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2024{
2025	switch(reason) {
2026	case FC_PUSH_VPM:
2027		ctx->bc->callstack[ctx->bc->call_sp].current--;
2028		break;
2029	case FC_PUSH_WQM:
2030	case FC_LOOP:
2031		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2032		break;
2033	case FC_REP:
2034		/* TOODO : for 16 vp asic should -= 2; */
2035		ctx->bc->callstack[ctx->bc->call_sp].current --;
2036		break;
2037	}
2038}
2039
2040static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2041{
2042	if (check_max_only) {
2043		int diff;
2044		switch (reason) {
2045		case FC_PUSH_VPM:
2046			diff = 1;
2047			break;
2048		case FC_PUSH_WQM:
2049			diff = 4;
2050			break;
2051		}
2052		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2053		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2054			ctx->bc->callstack[ctx->bc->call_sp].max =
2055				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2056		}
2057		return;
2058	}
2059	switch (reason) {
2060	case FC_PUSH_VPM:
2061		ctx->bc->callstack[ctx->bc->call_sp].current++;
2062		break;
2063	case FC_PUSH_WQM:
2064	case FC_LOOP:
2065		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2066		break;
2067	case FC_REP:
2068		ctx->bc->callstack[ctx->bc->call_sp].current++;
2069		break;
2070	}
2071
2072	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2073	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2074		ctx->bc->callstack[ctx->bc->call_sp].max =
2075			ctx->bc->callstack[ctx->bc->call_sp].current;
2076	}
2077}
2078
2079static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2080{
2081	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2082
2083	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2084						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2085	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2086	sp->num_mid++;
2087}
2088
2089static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2090{
2091	ctx->bc->fc_sp++;
2092	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2093	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2094}
2095
2096static void fc_poplevel(struct r600_shader_ctx *ctx)
2097{
2098	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2099	if (sp->mid) {
2100		free(sp->mid);
2101		sp->mid = NULL;
2102	}
2103	sp->num_mid = 0;
2104	sp->start = NULL;
2105	sp->type = 0;
2106	ctx->bc->fc_sp--;
2107}
2108
2109#if 0
2110static int emit_return(struct r600_shader_ctx *ctx)
2111{
2112	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2113	return 0;
2114}
2115
2116static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2117{
2118
2119	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2120	ctx->bc->cf_last->pop_count = pops;
2121	/* TODO work out offset */
2122	return 0;
2123}
2124
2125static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2126{
2127	return 0;
2128}
2129
2130static void emit_testflag(struct r600_shader_ctx *ctx)
2131{
2132
2133}
2134
2135static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2136{
2137	emit_testflag(ctx);
2138	emit_jump_to_offset(ctx, 1, 4);
2139	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2140	pops(ctx, ifidx + 1);
2141	emit_return(ctx);
2142}
2143
2144static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2145{
2146	emit_testflag(ctx);
2147
2148	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2149	ctx->bc->cf_last->pop_count = 1;
2150
2151	fc_set_mid(ctx, fc_sp);
2152
2153	pops(ctx, 1);
2154}
2155#endif
2156
2157static int tgsi_if(struct r600_shader_ctx *ctx)
2158{
2159	emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
2160
2161	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2162
2163	fc_pushlevel(ctx, FC_IF);
2164
2165	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2166	return 0;
2167}
2168
2169static int tgsi_else(struct r600_shader_ctx *ctx)
2170{
2171	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
2172	ctx->bc->cf_last->pop_count = 1;
2173
2174	fc_set_mid(ctx, ctx->bc->fc_sp);
2175	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2176	return 0;
2177}
2178
2179static int tgsi_endif(struct r600_shader_ctx *ctx)
2180{
2181	pops(ctx, 1);
2182	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2183		R600_ERR("if/endif unbalanced in shader\n");
2184		return -1;
2185	}
2186
2187	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2188		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2189		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2190	} else {
2191		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2192	}
2193	fc_poplevel(ctx);
2194
2195	callstack_decrease_current(ctx, FC_PUSH_VPM);
2196	return 0;
2197}
2198
2199static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2200{
2201	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
2202
2203	fc_pushlevel(ctx, FC_LOOP);
2204
2205	/* check stack depth */
2206	callstack_check_depth(ctx, FC_LOOP, 0);
2207	return 0;
2208}
2209
2210static int tgsi_endloop(struct r600_shader_ctx *ctx)
2211{
2212	int i;
2213
2214	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
2215
2216	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2217		R600_ERR("loop/endloop in shader code are not paired.\n");
2218		return -EINVAL;
2219	}
2220
2221	/* fixup loop pointers - from r600isa
2222	   LOOP END points to CF after LOOP START,
2223	   LOOP START point to CF after LOOP END
2224	   BRK/CONT point to LOOP END CF
2225	*/
2226	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2227
2228	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2229
2230	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2231		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2232	}
2233	/* TODO add LOOPRET support */
2234	fc_poplevel(ctx);
2235	callstack_decrease_current(ctx, FC_LOOP);
2236	return 0;
2237}
2238
2239static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2240{
2241	unsigned int fscp;
2242
2243	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2244	{
2245		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2246			break;
2247	}
2248
2249	if (fscp == 0) {
2250		R600_ERR("Break not inside loop/endloop pair\n");
2251		return -EINVAL;
2252	}
2253
2254	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2255	ctx->bc->cf_last->pop_count = 1;
2256
2257	fc_set_mid(ctx, fscp);
2258
2259	pops(ctx, 1);
2260	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2261	return 0;
2262}
2263
2264static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2265	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2266	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2267	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2268	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2269	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
2270	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2271	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2272	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2273	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2274	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2275	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2276	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2277	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2278	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2279	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2280	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2281	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2282	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2283	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2284	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2285	/* gap */
2286	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2287	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2288	/* gap */
2289	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2290	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2291	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2292	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2293	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2294	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2295	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2296	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2297	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2298	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2299	/* gap */
2300	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2301	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2302	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2303	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2304	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2305	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2306	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2307	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2308	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2309	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2310	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2311	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2312	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2313	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2314	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2315	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2316	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2317	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2318	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2319	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2320	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2321	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2322	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2323	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2324	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2325	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2326	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2327	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2328	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2329	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2330	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2331	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2332	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2333	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2334	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2335	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2336	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2337	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2338	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2339	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2340	{TGSI_OPCODE_TXL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2341	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2342	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2343	/* gap */
2344	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2345	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2346	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2347	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2348	/* gap */
2349	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2350	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2351	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2352	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2353	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2354	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2355	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2356	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2357	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2358	/* gap */
2359	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2360	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2361	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2362	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2363	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2364	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2365	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2366	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2367	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2368	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2369	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2370	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2371	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2372	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2373	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2374	/* gap */
2375	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2376	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2377	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2378	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2379	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2380	/* gap */
2381	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2382	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2383	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2384	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2385	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2386	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2387	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2388	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2389	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2390	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2391	/* gap */
2392	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2393	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2394	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2395	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2396	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2397	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2398	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2399	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2400	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2401	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2402	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2403	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2404	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2405	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2406	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2407	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2408	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2409	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2410	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2411	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2412	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2413	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2414	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2415	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2416	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2417	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2418	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2419	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2420};
2421