r600_shader.c revision 0e6a02d29915db2ca460206656ab517ddaf0b455
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_screen.h"
29#include "r600_context.h"
30#include "r600_shader.h"
31#include "r600_asm.h"
32#include "r600_sq.h"
33#include "r600d.h"
34#include <stdio.h>
35#include <errno.h>
36
37
38struct r600_shader_tgsi_instruction;
39
40struct r600_shader_ctx {
41	struct tgsi_shader_info			info;
42	struct tgsi_parse_context		parse;
43	const struct tgsi_token			*tokens;
44	unsigned				type;
45	unsigned				file_offset[TGSI_FILE_COUNT];
46	unsigned				temp_reg;
47	struct r600_shader_tgsi_instruction	*inst_info;
48	struct r600_bc				*bc;
49	struct r600_shader			*shader;
50	u32					value[4];
51};
52
53struct r600_shader_tgsi_instruction {
54	unsigned	tgsi_opcode;
55	unsigned	is_op3;
56	unsigned	r600_opcode;
57	int (*process)(struct r600_shader_ctx *ctx);
58};
59
60static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64{
65	struct r600_context *rctx = r600_context(ctx);
66	const struct util_format_description *desc;
67	enum pipe_format resource_format[160];
68	unsigned i, nresources = 0;
69	struct r600_bc *bc = &shader->bc;
70	struct r600_bc_cf *cf;
71	struct r600_bc_vtx *vtx;
72
73	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74		return 0;
75	for (i = 0; i < rctx->vertex_elements->count; i++) {
76		resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77	}
78	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79		switch (cf->inst) {
80		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83				desc = util_format_description(resource_format[vtx->buffer_id]);
84				if (desc == NULL) {
85					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86					return -EINVAL;
87				}
88				vtx->dst_sel_x = desc->swizzle[0];
89				vtx->dst_sel_y = desc->swizzle[1];
90				vtx->dst_sel_z = desc->swizzle[2];
91				vtx->dst_sel_w = desc->swizzle[3];
92			}
93			break;
94		default:
95			break;
96		}
97	}
98	return r600_bc_build(&shader->bc);
99}
100
101int r600_pipe_shader_create(struct pipe_context *ctx,
102			struct r600_context_state *rpshader,
103			const struct tgsi_token *tokens)
104{
105	struct r600_screen *rscreen = r600_screen(ctx->screen);
106	int r;
107
108//fprintf(stderr, "--------------------------------------------------------------\n");
109//tgsi_dump(tokens, 0);
110	if (rpshader == NULL)
111		return -ENOMEM;
112	rpshader->shader.family = radeon_get_family(rscreen->rw);
113	r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114	if (r) {
115		R600_ERR("translation from TGSI failed !\n");
116		return r;
117	}
118	r = r600_bc_build(&rpshader->shader.bc);
119	if (r) {
120		R600_ERR("building bytecode failed !\n");
121		return r;
122	}
123//fprintf(stderr, "______________________________________________________________\n");
124	return 0;
125}
126
127static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
128{
129	struct r600_screen *rscreen = r600_screen(ctx->screen);
130	struct r600_shader *rshader = &rpshader->shader;
131	struct radeon_state *state;
132	unsigned i, tmp;
133
134	rpshader->rstate = radeon_state_decref(rpshader->rstate);
135	state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
136	if (state == NULL)
137		return -ENOMEM;
138	for (i = 0; i < 10; i++) {
139		state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
140	}
141	/* so far never got proper semantic id from tgsi */
142	for (i = 0; i < 32; i++) {
143		tmp = i << ((i & 3) * 8);
144		state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
145	}
146	state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
147	state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
148	rpshader->rstate = state;
149	rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
150	rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
151	rpshader->rstate->nbo = 2;
152	rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
153	rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
154	return radeon_state_pm4(state);
155}
156
157static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
158{
159	const struct pipe_rasterizer_state *rasterizer;
160	struct r600_screen *rscreen = r600_screen(ctx->screen);
161	struct r600_shader *rshader = &rpshader->shader;
162	struct r600_context *rctx = r600_context(ctx);
163	struct radeon_state *state;
164	unsigned i, tmp, exports_ps, num_cout;
165
166	rasterizer = &rctx->rasterizer->state.rasterizer;
167	rpshader->rstate = radeon_state_decref(rpshader->rstate);
168	state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
169	if (state == NULL)
170		return -ENOMEM;
171	for (i = 0; i < rshader->ninput; i++) {
172		tmp = S_028644_SEMANTIC(i);
173		tmp |= S_028644_SEL_CENTROID(1);
174		if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
175			rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
176			tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
177		}
178		if (rasterizer->sprite_coord_enable & (1 << i)) {
179			tmp |= S_028644_PT_SPRITE_TEX(1);
180		}
181		state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
182	}
183
184	exports_ps = 0;
185	num_cout = 0;
186	for (i = 0; i < rshader->noutput; i++) {
187		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
188			exports_ps |= 1;
189		else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
190			exports_ps |= (1 << (num_cout+1));
191			num_cout++;
192		}
193	}
194	if (!exports_ps) {
195		/* always at least export 1 component per pixel */
196		exports_ps = 2;
197	}
198	state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
199							S_0286CC_PERSP_GRADIENT_ENA(1);
200	state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
201	state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
202	state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
203	rpshader->rstate = state;
204	rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
205	rpshader->rstate->nbo = 1;
206	rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
207	return radeon_state_pm4(state);
208}
209
210static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
211{
212	struct r600_screen *rscreen = r600_screen(ctx->screen);
213	struct r600_context *rctx = r600_context(ctx);
214	struct r600_shader *rshader = &rpshader->shader;
215	int r;
216
217	/* copy new shader */
218	radeon_bo_decref(rscreen->rw, rpshader->bo);
219	rpshader->bo = NULL;
220	rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
221				4096, NULL);
222	if (rpshader->bo == NULL) {
223		return -ENOMEM;
224	}
225	radeon_bo_map(rscreen->rw, rpshader->bo);
226	memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
227	radeon_bo_unmap(rscreen->rw, rpshader->bo);
228	/* build state */
229	rshader->flat_shade = rctx->flat_shade;
230	switch (rshader->processor_type) {
231	case TGSI_PROCESSOR_VERTEX:
232		r = r600_pipe_shader_vs(ctx, rpshader);
233		break;
234	case TGSI_PROCESSOR_FRAGMENT:
235		r = r600_pipe_shader_ps(ctx, rpshader);
236		break;
237	default:
238		r = -EINVAL;
239		break;
240	}
241	return r;
242}
243
244int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
245{
246	struct r600_context *rctx = r600_context(ctx);
247	int r;
248
249	if (rpshader == NULL)
250		return -EINVAL;
251	/* there should be enough input */
252	if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
253		R600_ERR("%d resources provided, expecting %d\n",
254			rctx->vertex_elements->count, rpshader->shader.bc.nresource);
255		return -EINVAL;
256	}
257	r = r600_shader_update(ctx, &rpshader->shader);
258	if (r)
259		return r;
260	return r600_pipe_shader(ctx, rpshader);
261}
262
263static int tgsi_is_supported(struct r600_shader_ctx *ctx)
264{
265	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
266	int j;
267
268	if (i->Instruction.NumDstRegs > 1) {
269		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
270		return -EINVAL;
271	}
272	if (i->Instruction.Predicate) {
273		R600_ERR("predicate unsupported\n");
274		return -EINVAL;
275	}
276	if (i->Instruction.Label) {
277		R600_ERR("label unsupported\n");
278		return -EINVAL;
279	}
280	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
281		if (i->Src[j].Register.Indirect ||
282			i->Src[j].Register.Dimension ||
283			i->Src[j].Register.Absolute) {
284			R600_ERR("unsupported src (indirect|dimension|absolute)\n");
285			return -EINVAL;
286		}
287	}
288	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
289		if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
290			R600_ERR("unsupported dst (indirect|dimension)\n");
291			return -EINVAL;
292		}
293	}
294	return 0;
295}
296
297static int tgsi_declaration(struct r600_shader_ctx *ctx)
298{
299	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
300	struct r600_bc_vtx vtx;
301	unsigned i;
302	int r;
303
304	switch (d->Declaration.File) {
305	case TGSI_FILE_INPUT:
306		i = ctx->shader->ninput++;
307		ctx->shader->input[i].name = d->Semantic.Name;
308		ctx->shader->input[i].sid = d->Semantic.Index;
309		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
310		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
311		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
312			/* turn input into fetch */
313			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
314			vtx.inst = 0;
315			vtx.fetch_type = 0;
316			vtx.buffer_id = i;
317			/* register containing the index into the buffer */
318			vtx.src_gpr = 0;
319			vtx.src_sel_x = 0;
320			vtx.mega_fetch_count = 0x1F;
321			vtx.dst_gpr = ctx->shader->input[i].gpr;
322			vtx.dst_sel_x = 0;
323			vtx.dst_sel_y = 1;
324			vtx.dst_sel_z = 2;
325			vtx.dst_sel_w = 3;
326			r = r600_bc_add_vtx(ctx->bc, &vtx);
327			if (r)
328				return r;
329		}
330		break;
331	case TGSI_FILE_OUTPUT:
332		i = ctx->shader->noutput++;
333		ctx->shader->output[i].name = d->Semantic.Name;
334		ctx->shader->output[i].sid = d->Semantic.Index;
335		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
336		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
337		break;
338	case TGSI_FILE_CONSTANT:
339	case TGSI_FILE_TEMPORARY:
340	case TGSI_FILE_SAMPLER:
341		break;
342	default:
343		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
344		return -EINVAL;
345	}
346	return 0;
347}
348
349int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
350{
351	struct tgsi_full_immediate *immediate;
352	struct r600_shader_ctx ctx;
353	struct r600_bc_output output[32];
354	unsigned output_done, noutput;
355	unsigned opcode;
356	int i, r = 0, pos0;
357
358	ctx.bc = &shader->bc;
359	ctx.shader = shader;
360	r = r600_bc_init(ctx.bc, shader->family);
361	if (r)
362		return r;
363	ctx.tokens = tokens;
364	tgsi_scan_shader(tokens, &ctx.info);
365	tgsi_parse_init(&ctx.parse, tokens);
366	ctx.type = ctx.parse.FullHeader.Processor.Processor;
367	shader->processor_type = ctx.type;
368
369	/* register allocations */
370	/* Values [0,127] correspond to GPR[0..127].
371	 * Values [128,159] correspond to constant buffer bank 0
372	 * Values [160,191] correspond to constant buffer bank 1
373	 * Values [256,511] correspond to cfile constants c[0..255].
374	 * Other special values are shown in the list below.
375	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
376	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
377	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
378	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
379	 * 248	SQ_ALU_SRC_0: special constant 0.0.
380	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
381	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
382	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
383	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
384	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
385	 * 254	SQ_ALU_SRC_PV: previous vector result.
386	 * 255	SQ_ALU_SRC_PS: previous scalar result.
387	 */
388	for (i = 0; i < TGSI_FILE_COUNT; i++) {
389		ctx.file_offset[i] = 0;
390	}
391	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
392		ctx.file_offset[TGSI_FILE_INPUT] = 1;
393	}
394	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
395						ctx.info.file_count[TGSI_FILE_INPUT];
396	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
397						ctx.info.file_count[TGSI_FILE_OUTPUT];
398	ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
399	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
400	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
401			ctx.info.file_count[TGSI_FILE_TEMPORARY];
402
403	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
404		tgsi_parse_token(&ctx.parse);
405		switch (ctx.parse.FullToken.Token.Type) {
406		case TGSI_TOKEN_TYPE_IMMEDIATE:
407			immediate = &ctx.parse.FullToken.FullImmediate;
408			ctx.value[0] = immediate->u[0].Uint;
409			ctx.value[1] = immediate->u[1].Uint;
410			ctx.value[2] = immediate->u[2].Uint;
411			ctx.value[3] = immediate->u[3].Uint;
412			break;
413		case TGSI_TOKEN_TYPE_DECLARATION:
414			r = tgsi_declaration(&ctx);
415			if (r)
416				goto out_err;
417			break;
418		case TGSI_TOKEN_TYPE_INSTRUCTION:
419			r = tgsi_is_supported(&ctx);
420			if (r)
421				goto out_err;
422			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
423			ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
424			r = ctx.inst_info->process(&ctx);
425			if (r)
426				goto out_err;
427			r = r600_bc_add_literal(ctx.bc, ctx.value);
428			if (r)
429				goto out_err;
430			break;
431		default:
432			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
433			r = -EINVAL;
434			goto out_err;
435		}
436	}
437	/* export output */
438	noutput = shader->noutput;
439	for (i = 0, pos0 = 0; i < noutput; i++) {
440		memset(&output[i], 0, sizeof(struct r600_bc_output));
441		output[i].gpr = shader->output[i].gpr;
442		output[i].elem_size = 3;
443		output[i].swizzle_x = 0;
444		output[i].swizzle_y = 1;
445		output[i].swizzle_z = 2;
446		output[i].swizzle_w = 3;
447		output[i].barrier = 1;
448		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
449		output[i].array_base = i - pos0;
450		output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
451		switch (ctx.type) {
452		case TGSI_PROCESSOR_VERTEX:
453			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
454				output[i].array_base = 60;
455				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
456				/* position doesn't count in array_base */
457				pos0++;
458			}
459			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
460				output[i].array_base = 61;
461				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
462				/* position doesn't count in array_base */
463				pos0++;
464			}
465			break;
466		case TGSI_PROCESSOR_FRAGMENT:
467			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
468				output[i].array_base = shader->output[i].sid;
469				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
470			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
471				output[i].array_base = 61;
472				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
473			} else {
474				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
475				r = -EINVAL;
476				goto out_err;
477			}
478			break;
479		default:
480			R600_ERR("unsupported processor type %d\n", ctx.type);
481			r = -EINVAL;
482			goto out_err;
483		}
484	}
485	/* add fake param output for vertex shader if no param is exported */
486	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
487		for (i = 0, pos0 = 0; i < noutput; i++) {
488			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
489				pos0 = 1;
490				break;
491			}
492		}
493		if (!pos0) {
494			memset(&output[i], 0, sizeof(struct r600_bc_output));
495			output[i].gpr = 0;
496			output[i].elem_size = 3;
497			output[i].swizzle_x = 0;
498			output[i].swizzle_y = 1;
499			output[i].swizzle_z = 2;
500			output[i].swizzle_w = 3;
501			output[i].barrier = 1;
502			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
503			output[i].array_base = 0;
504			output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
505			noutput++;
506		}
507	}
508	/* add fake pixel export */
509	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
510		memset(&output[0], 0, sizeof(struct r600_bc_output));
511		output[0].gpr = 0;
512		output[0].elem_size = 3;
513		output[0].swizzle_x = 7;
514		output[0].swizzle_y = 7;
515		output[0].swizzle_z = 7;
516		output[0].swizzle_w = 7;
517		output[0].barrier = 1;
518		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
519		output[0].array_base = 0;
520		output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
521		noutput++;
522	}
523	/* set export done on last export of each type */
524	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
525		if (i == (noutput - 1)) {
526			output[i].end_of_program = 1;
527		}
528		if (!(output_done & (1 << output[i].type))) {
529			output_done |= (1 << output[i].type);
530			output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
531		}
532	}
533	/* add output to bytecode */
534	for (i = 0; i < noutput; i++) {
535		r = r600_bc_add_output(ctx.bc, &output[i]);
536		if (r)
537			goto out_err;
538	}
539	tgsi_parse_free(&ctx.parse);
540	return 0;
541out_err:
542	tgsi_parse_free(&ctx.parse);
543	return r;
544}
545
546static int tgsi_unsupported(struct r600_shader_ctx *ctx)
547{
548	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
549	return -EINVAL;
550}
551
552static int tgsi_end(struct r600_shader_ctx *ctx)
553{
554	return 0;
555}
556
557static int tgsi_src(struct r600_shader_ctx *ctx,
558			const struct tgsi_full_src_register *tgsi_src,
559			struct r600_bc_alu_src *r600_src)
560{
561	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
562	r600_src->sel = tgsi_src->Register.Index;
563	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
564		r600_src->sel = 0;
565	}
566	r600_src->neg = tgsi_src->Register.Negate;
567	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
568	return 0;
569}
570
571static int tgsi_dst(struct r600_shader_ctx *ctx,
572			const struct tgsi_full_dst_register *tgsi_dst,
573			unsigned swizzle,
574			struct r600_bc_alu_dst *r600_dst)
575{
576	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
577
578	r600_dst->sel = tgsi_dst->Register.Index;
579	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
580	r600_dst->chan = swizzle;
581	r600_dst->write = 1;
582	if (inst->Instruction.Saturate) {
583		r600_dst->clamp = 1;
584	}
585	return 0;
586}
587
588static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
589{
590	switch (swizzle) {
591	case 0:
592		return tgsi_src->Register.SwizzleX;
593	case 1:
594		return tgsi_src->Register.SwizzleY;
595	case 2:
596		return tgsi_src->Register.SwizzleZ;
597	case 3:
598		return tgsi_src->Register.SwizzleW;
599	default:
600		return 0;
601	}
602}
603
604static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
605{
606	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
607	struct r600_bc_alu alu;
608	int i, j, k, nconst, r;
609
610	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
611		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
612			nconst++;
613		}
614		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
615		if (r) {
616			return r;
617		}
618	}
619	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
620		if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
621			for (k = 0; k < 4; k++) {
622				memset(&alu, 0, sizeof(struct r600_bc_alu));
623				alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
624				alu.src[0].sel = r600_src[0].sel;
625				alu.src[0].chan = k;
626				alu.dst.sel = ctx->temp_reg + j;
627				alu.dst.chan = k;
628				alu.dst.write = 1;
629				if (k == 3)
630					alu.last = 1;
631				r = r600_bc_add_alu(ctx->bc, &alu);
632				if (r)
633					return r;
634			}
635			r600_src[0].sel = ctx->temp_reg + j;
636			j--;
637		}
638	}
639	return 0;
640}
641
642static int tgsi_op2(struct r600_shader_ctx *ctx)
643{
644	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
645	struct r600_bc_alu_src r600_src[3];
646	struct r600_bc_alu alu;
647	int i, j, r;
648
649	r = tgsi_split_constant(ctx, r600_src);
650	if (r)
651		return r;
652	for (i = 0; i < 4; i++) {
653		memset(&alu, 0, sizeof(struct r600_bc_alu));
654		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
655			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
656			alu.dst.chan = i;
657		} else {
658			alu.inst = ctx->inst_info->r600_opcode;
659			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
660				alu.src[j] = r600_src[j];
661				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
662			}
663			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
664			if (r)
665				return r;
666		}
667		/* handle some special cases */
668		switch (ctx->inst_info->tgsi_opcode) {
669		case TGSI_OPCODE_SUB:
670			alu.src[1].neg = 1;
671			break;
672		case TGSI_OPCODE_ABS:
673			alu.src[0].abs = 1;
674			break;
675		default:
676			break;
677		}
678		if (i == 3) {
679			alu.last = 1;
680		}
681		r = r600_bc_add_alu(ctx->bc, &alu);
682		if (r)
683			return r;
684	}
685	return 0;
686}
687
688/*
689 * r600 - trunc to -PI..PI range
690 * r700 - normalize by dividing by 2PI
691 * see fdo bug 27901
692 */
693static int tgsi_trig(struct r600_shader_ctx *ctx)
694{
695	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
696	struct r600_bc_alu_src r600_src[3];
697	struct r600_bc_alu alu;
698	int i, r;
699	uint32_t lit_vals[4];
700
701	memset(lit_vals, 0, 4*4);
702	r = tgsi_split_constant(ctx, r600_src);
703	if (r)
704		return r;
705	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
706	lit_vals[1] = fui(0.5f);
707
708	memset(&alu, 0, sizeof(struct r600_bc_alu));
709	alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
710	alu.is_op3 = 1;
711
712	alu.dst.chan = 0;
713	alu.dst.sel = ctx->temp_reg;
714	alu.dst.write = 1;
715
716	alu.src[0] = r600_src[0];
717	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
718
719	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
720	alu.src[1].chan = 0;
721	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
722	alu.src[2].chan = 1;
723	alu.last = 1;
724	r = r600_bc_add_alu(ctx->bc, &alu);
725	if (r)
726		return r;
727	r = r600_bc_add_literal(ctx->bc, lit_vals);
728	if (r)
729		return r;
730
731	memset(&alu, 0, sizeof(struct r600_bc_alu));
732	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
733
734	alu.dst.chan = 0;
735	alu.dst.sel = ctx->temp_reg;
736	alu.dst.write = 1;
737
738	alu.src[0].sel = ctx->temp_reg;
739	alu.src[0].chan = 0;
740	alu.last = 1;
741	r = r600_bc_add_alu(ctx->bc, &alu);
742	if (r)
743		return r;
744
745	if (ctx->bc->chiprev == 0) {
746		lit_vals[0] = fui(3.1415926535897f * 2.0f);
747		lit_vals[1] = fui(-3.1415926535897f);
748	} else {
749		lit_vals[0] = fui(1.0f);
750		lit_vals[1] = fui(-0.5f);
751	}
752
753	memset(&alu, 0, sizeof(struct r600_bc_alu));
754	alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
755	alu.is_op3 = 1;
756
757	alu.dst.chan = 0;
758	alu.dst.sel = ctx->temp_reg;
759	alu.dst.write = 1;
760
761	alu.src[0].sel = ctx->temp_reg;
762	alu.src[0].chan = 0;
763
764	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
765	alu.src[1].chan = 0;
766	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
767	alu.src[2].chan = 1;
768	alu.last = 1;
769	r = r600_bc_add_alu(ctx->bc, &alu);
770	if (r)
771		return r;
772	r = r600_bc_add_literal(ctx->bc, lit_vals);
773	if (r)
774		return r;
775
776	memset(&alu, 0, sizeof(struct r600_bc_alu));
777	alu.inst = ctx->inst_info->r600_opcode;
778	alu.dst.chan = 0;
779	alu.dst.sel = ctx->temp_reg;
780	alu.dst.write = 1;
781
782	alu.src[0].sel = ctx->temp_reg;
783	alu.src[0].chan = 0;
784	alu.last = 1;
785	r = r600_bc_add_alu(ctx->bc, &alu);
786	if (r)
787		return r;
788
789	/* replicate result */
790	for (i = 0; i < 4; i++) {
791		memset(&alu, 0, sizeof(struct r600_bc_alu));
792		alu.src[0].sel = ctx->temp_reg;
793		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
794		alu.dst.chan = i;
795		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
796		if (r)
797			return r;
798		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
799		if (i == 3)
800			alu.last = 1;
801		r = r600_bc_add_alu(ctx->bc, &alu);
802		if (r)
803			return r;
804	}
805	return 0;
806}
807
808static int tgsi_kill(struct r600_shader_ctx *ctx)
809{
810	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
811	struct r600_bc_alu alu;
812	int i, r;
813
814	for (i = 0; i < 4; i++) {
815		memset(&alu, 0, sizeof(struct r600_bc_alu));
816		alu.inst = ctx->inst_info->r600_opcode;
817		alu.dst.chan = i;
818		alu.src[0].sel = V_SQ_ALU_SRC_0;
819		r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
820		if (r)
821			return r;
822		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
823		if (i == 3) {
824			alu.last = 1;
825		}
826		r = r600_bc_add_alu(ctx->bc, &alu);
827		if (r)
828			return r;
829	}
830	return 0;
831}
832
833static int tgsi_slt(struct r600_shader_ctx *ctx)
834{
835	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
836	struct r600_bc_alu_src r600_src[3];
837	struct r600_bc_alu alu;
838	int i, r;
839
840	r = tgsi_split_constant(ctx, r600_src);
841	if (r)
842		return r;
843	for (i = 0; i < 4; i++) {
844		memset(&alu, 0, sizeof(struct r600_bc_alu));
845		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
846			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
847			alu.dst.chan = i;
848		} else {
849			alu.inst = ctx->inst_info->r600_opcode;
850			alu.src[1] = r600_src[0];
851			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
852			alu.src[0] = r600_src[1];
853			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
854			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
855			if (r)
856				return r;
857		}
858		if (i == 3) {
859			alu.last = 1;
860		}
861		r = r600_bc_add_alu(ctx->bc, &alu);
862		if (r)
863			return r;
864	}
865	return 0;
866}
867
868static int tgsi_lit(struct r600_shader_ctx *ctx)
869{
870	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
871	struct r600_bc_alu alu;
872	int r;
873
874	/* dst.x, <- 1.0  */
875	memset(&alu, 0, sizeof(struct r600_bc_alu));
876	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
877	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
878	alu.src[0].chan = 0;
879	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
880	if (r)
881		return r;
882	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
883	r = r600_bc_add_alu(ctx->bc, &alu);
884	if (r)
885		return r;
886
887	/* dst.y = max(src.x, 0.0) */
888	memset(&alu, 0, sizeof(struct r600_bc_alu));
889	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
890	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
891	if (r)
892		return r;
893	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
894	alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
895	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
896	if (r)
897		return r;
898	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
899	r = r600_bc_add_alu(ctx->bc, &alu);
900	if (r)
901		return r;
902
903	/* dst.z = NOP - fill Z slot */
904	memset(&alu, 0, sizeof(struct r600_bc_alu));
905	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
906	alu.dst.chan = 2;
907	r = r600_bc_add_alu(ctx->bc, &alu);
908	if (r)
909		return r;
910
911	/* dst.w, <- 1.0  */
912	memset(&alu, 0, sizeof(struct r600_bc_alu));
913	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
914	alu.src[0].sel  = V_SQ_ALU_SRC_1;
915	alu.src[0].chan = 0;
916	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
917	if (r)
918		return r;
919	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
920	alu.last = 1;
921	r = r600_bc_add_alu(ctx->bc, &alu);
922	if (r)
923		return r;
924
925	if (inst->Dst[0].Register.WriteMask & (1 << 2))
926	{
927		int chan;
928		int sel;
929
930		/* dst.z = log(src.y) */
931		memset(&alu, 0, sizeof(struct r600_bc_alu));
932		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
933		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
934		if (r)
935			return r;
936		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
937		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
938		if (r)
939			return r;
940		alu.last = 1;
941		r = r600_bc_add_alu(ctx->bc, &alu);
942		if (r)
943			return r;
944
945		chan = alu.dst.chan;
946		sel = alu.dst.sel;
947
948		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
949		memset(&alu, 0, sizeof(struct r600_bc_alu));
950		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
951		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
952		if (r)
953			return r;
954		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
955		alu.src[1].sel  = sel;
956		alu.src[1].chan = chan;
957		r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
958		if (r)
959			return r;
960		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
961		alu.dst.sel = ctx->temp_reg;
962		alu.dst.chan = 0;
963		alu.dst.write = 1;
964		alu.is_op3 = 1;
965		alu.last = 1;
966		r = r600_bc_add_alu(ctx->bc, &alu);
967		if (r)
968			return r;
969
970		/* dst.z = exp(tmp.x) */
971		memset(&alu, 0, sizeof(struct r600_bc_alu));
972		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
973		alu.src[0].sel = ctx->temp_reg;
974		alu.src[0].chan = 0;
975		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
976		if (r)
977			return r;
978		alu.last = 1;
979		r = r600_bc_add_alu(ctx->bc, &alu);
980		if (r)
981			return r;
982	}
983	return 0;
984}
985
986static int tgsi_trans(struct r600_shader_ctx *ctx)
987{
988	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
989	struct r600_bc_alu alu;
990	int i, j, r;
991
992	for (i = 0; i < 4; i++) {
993		memset(&alu, 0, sizeof(struct r600_bc_alu));
994		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
995			alu.inst = ctx->inst_info->r600_opcode;
996			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
997				r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
998				if (r)
999					return r;
1000				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1001			}
1002			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1003			if (r)
1004				return r;
1005			alu.last = 1;
1006			r = r600_bc_add_alu(ctx->bc, &alu);
1007			if (r)
1008				return r;
1009		}
1010	}
1011	return 0;
1012}
1013
1014static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1015{
1016	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1017	struct r600_bc_alu alu;
1018	int i, r;
1019
1020	for (i = 0; i < 4; i++) {
1021		memset(&alu, 0, sizeof(struct r600_bc_alu));
1022		alu.src[0].sel = ctx->temp_reg;
1023		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1024		alu.dst.chan = i;
1025		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1026		if (r)
1027			return r;
1028		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1029		if (i == 3)
1030			alu.last = 1;
1031		r = r600_bc_add_alu(ctx->bc, &alu);
1032		if (r)
1033			return r;
1034	}
1035	return 0;
1036}
1037
1038static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1039{
1040	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1041	struct r600_bc_alu alu;
1042	int i, r;
1043
1044	memset(&alu, 0, sizeof(struct r600_bc_alu));
1045	alu.inst = ctx->inst_info->r600_opcode;
1046	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1047		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1048		if (r)
1049			return r;
1050		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1051	}
1052	alu.dst.sel = ctx->temp_reg;
1053	alu.dst.write = 1;
1054	alu.last = 1;
1055	r = r600_bc_add_alu(ctx->bc, &alu);
1056	if (r)
1057		return r;
1058	/* replicate result */
1059	return tgsi_helper_tempx_replicate(ctx);
1060}
1061
1062static int tgsi_pow(struct r600_shader_ctx *ctx)
1063{
1064	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1065	struct r600_bc_alu alu;
1066	int r;
1067
1068	/* LOG2(a) */
1069	memset(&alu, 0, sizeof(struct r600_bc_alu));
1070	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
1071	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1072	if (r)
1073		return r;
1074	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1075	alu.dst.sel = ctx->temp_reg;
1076	alu.dst.write = 1;
1077	alu.last = 1;
1078	r = r600_bc_add_alu(ctx->bc, &alu);
1079	if (r)
1080		return r;
1081	/* b * LOG2(a) */
1082	memset(&alu, 0, sizeof(struct r600_bc_alu));
1083	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
1084	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1085	if (r)
1086		return r;
1087	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1088	alu.src[1].sel = ctx->temp_reg;
1089	alu.dst.sel = ctx->temp_reg;
1090	alu.dst.write = 1;
1091	alu.last = 1;
1092	r = r600_bc_add_alu(ctx->bc, &alu);
1093	if (r)
1094		return r;
1095	/* POW(a,b) = EXP2(b * LOG2(a))*/
1096	memset(&alu, 0, sizeof(struct r600_bc_alu));
1097	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
1098	alu.src[0].sel = ctx->temp_reg;
1099	alu.dst.sel = ctx->temp_reg;
1100	alu.dst.write = 1;
1101	alu.last = 1;
1102	r = r600_bc_add_alu(ctx->bc, &alu);
1103	if (r)
1104		return r;
1105	return tgsi_helper_tempx_replicate(ctx);
1106}
1107
1108static int tgsi_ssg(struct r600_shader_ctx *ctx)
1109{
1110	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1111	struct r600_bc_alu alu;
1112	struct r600_bc_alu_src r600_src[3];
1113	int i, r;
1114
1115	r = tgsi_split_constant(ctx, r600_src);
1116	if (r)
1117		return r;
1118
1119	/* tmp = (src > 0 ? 1 : src) */
1120	for (i = 0; i < 4; i++) {
1121		memset(&alu, 0, sizeof(struct r600_bc_alu));
1122		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1123		alu.is_op3 = 1;
1124		alu.dst.sel = ctx->temp_reg;
1125		alu.dst.write = 1;
1126
1127		alu.src[0] = r600_src[0];
1128		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1129
1130		alu.src[1].sel = V_SQ_ALU_SRC_1;
1131
1132		alu.src[2] = r600_src[0];
1133		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1134		if (i == 3)
1135			alu.last = 1;
1136		r = r600_bc_add_alu(ctx->bc, &alu);
1137		if (r)
1138			return r;
1139	}
1140
1141	/* dst = (-tmp > 0 ? -1 : tmp) */
1142	for (i = 0; i < 4; i++) {
1143		memset(&alu, 0, sizeof(struct r600_bc_alu));
1144		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
1145		alu.is_op3 = 1;
1146		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1147		if (r)
1148			return r;
1149
1150		alu.src[0].sel = ctx->temp_reg;
1151		alu.src[0].neg = 1;
1152
1153		alu.src[1].sel = V_SQ_ALU_SRC_1;
1154		alu.src[1].neg = 1;
1155
1156		alu.src[2].sel = ctx->temp_reg;
1157
1158		alu.dst.write = 1;
1159		if (i == 3)
1160			alu.last = 1;
1161		r = r600_bc_add_alu(ctx->bc, &alu);
1162		if (r)
1163			return r;
1164	}
1165	return 0;
1166}
1167
1168static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1169{
1170	struct r600_bc_alu alu;
1171	int i, r;
1172
1173	r = r600_bc_add_literal(ctx->bc, ctx->value);
1174	if (r)
1175		return r;
1176	for (i = 0; i < 4; i++) {
1177		memset(&alu, 0, sizeof(struct r600_bc_alu));
1178		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1179			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
1180			alu.dst.chan = i;
1181		} else {
1182			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1183			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1184			if (r)
1185				return r;
1186			alu.src[0].sel = ctx->temp_reg;
1187			alu.src[0].chan = i;
1188		}
1189		if (i == 3) {
1190			alu.last = 1;
1191		}
1192		r = r600_bc_add_alu(ctx->bc, &alu);
1193		if (r)
1194			return r;
1195	}
1196	return 0;
1197}
1198
1199static int tgsi_op3(struct r600_shader_ctx *ctx)
1200{
1201	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1202	struct r600_bc_alu_src r600_src[3];
1203	struct r600_bc_alu alu;
1204	int i, j, r;
1205
1206	r = tgsi_split_constant(ctx, r600_src);
1207	if (r)
1208		return r;
1209	/* do it in 2 step as op3 doesn't support writemask */
1210	for (i = 0; i < 4; i++) {
1211		memset(&alu, 0, sizeof(struct r600_bc_alu));
1212		alu.inst = ctx->inst_info->r600_opcode;
1213		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1214			alu.src[j] = r600_src[j];
1215			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1216		}
1217		alu.dst.sel = ctx->temp_reg;
1218		alu.dst.chan = i;
1219		alu.dst.write = 1;
1220		alu.is_op3 = 1;
1221		if (i == 3) {
1222			alu.last = 1;
1223		}
1224		r = r600_bc_add_alu(ctx->bc, &alu);
1225		if (r)
1226			return r;
1227	}
1228	return tgsi_helper_copy(ctx, inst);
1229}
1230
1231static int tgsi_dp(struct r600_shader_ctx *ctx)
1232{
1233	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1234	struct r600_bc_alu_src r600_src[3];
1235	struct r600_bc_alu alu;
1236	int i, j, r;
1237
1238	r = tgsi_split_constant(ctx, r600_src);
1239	if (r)
1240		return r;
1241	for (i = 0; i < 4; i++) {
1242		memset(&alu, 0, sizeof(struct r600_bc_alu));
1243		alu.inst = ctx->inst_info->r600_opcode;
1244		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1245			alu.src[j] = r600_src[j];
1246			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1247		}
1248		alu.dst.sel = ctx->temp_reg;
1249		alu.dst.chan = i;
1250		alu.dst.write = 1;
1251		/* handle some special cases */
1252		switch (ctx->inst_info->tgsi_opcode) {
1253		case TGSI_OPCODE_DP2:
1254			if (i > 1) {
1255				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1256				alu.src[0].chan = alu.src[1].chan = 0;
1257			}
1258			break;
1259		case TGSI_OPCODE_DP3:
1260			if (i > 2) {
1261				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1262				alu.src[0].chan = alu.src[1].chan = 0;
1263			}
1264			break;
1265		default:
1266			break;
1267		}
1268		if (i == 3) {
1269			alu.last = 1;
1270		}
1271		r = r600_bc_add_alu(ctx->bc, &alu);
1272		if (r)
1273			return r;
1274	}
1275	return tgsi_helper_copy(ctx, inst);
1276}
1277
1278static int tgsi_tex(struct r600_shader_ctx *ctx)
1279{
1280	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1281	struct r600_bc_tex tex;
1282	struct r600_bc_alu alu;
1283	unsigned src_gpr;
1284	int r, i;
1285
1286	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1287
1288	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1289		/* Add perspective divide */
1290		memset(&alu, 0, sizeof(struct r600_bc_alu));
1291		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
1292		alu.src[0].sel = src_gpr;
1293		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1294		alu.dst.sel = ctx->temp_reg;
1295		alu.dst.chan = 3;
1296		alu.last = 1;
1297		alu.dst.write = 1;
1298		r = r600_bc_add_alu(ctx->bc, &alu);
1299		if (r)
1300			return r;
1301
1302		for (i = 0; i < 3; i++) {
1303			memset(&alu, 0, sizeof(struct r600_bc_alu));
1304			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1305			alu.src[0].sel = ctx->temp_reg;
1306			alu.src[0].chan = 3;
1307			alu.src[1].sel = src_gpr;
1308			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1309			alu.dst.sel = ctx->temp_reg;
1310			alu.dst.chan = i;
1311			alu.dst.write = 1;
1312			r = r600_bc_add_alu(ctx->bc, &alu);
1313			if (r)
1314				return r;
1315		}
1316		memset(&alu, 0, sizeof(struct r600_bc_alu));
1317		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1318		alu.src[0].sel = V_SQ_ALU_SRC_1;
1319		alu.src[0].chan = 0;
1320		alu.dst.sel = ctx->temp_reg;
1321		alu.dst.chan = 3;
1322		alu.last = 1;
1323		alu.dst.write = 1;
1324		r = r600_bc_add_alu(ctx->bc, &alu);
1325		if (r)
1326			return r;
1327		src_gpr = ctx->temp_reg;
1328	} else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
1329		for (i = 0; i < 4; i++) {
1330			memset(&alu, 0, sizeof(struct r600_bc_alu));
1331			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
1332			alu.src[0].sel = src_gpr;
1333			alu.src[0].chan = i;
1334			alu.dst.sel = ctx->temp_reg;
1335			alu.dst.chan = i;
1336			if (i == 3)
1337				alu.last = 1;
1338			alu.dst.write = 1;
1339			r = r600_bc_add_alu(ctx->bc, &alu);
1340			if (r)
1341				return r;
1342		}
1343		src_gpr = ctx->temp_reg;
1344	}
1345
1346	memset(&tex, 0, sizeof(struct r600_bc_tex));
1347	tex.inst = ctx->inst_info->r600_opcode;
1348	tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1349	tex.sampler_id = tex.resource_id;
1350	tex.src_gpr = src_gpr;
1351	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1352	tex.dst_sel_x = 0;
1353	tex.dst_sel_y = 1;
1354	tex.dst_sel_z = 2;
1355	tex.dst_sel_w = 3;
1356	tex.src_sel_x = 0;
1357	tex.src_sel_y = 1;
1358	tex.src_sel_z = 2;
1359	tex.src_sel_w = 3;
1360
1361	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1362		tex.coord_type_x = 1;
1363		tex.coord_type_y = 1;
1364		tex.coord_type_z = 1;
1365		tex.coord_type_w = 1;
1366	}
1367	return r600_bc_add_tex(ctx->bc, &tex);
1368}
1369
1370static int tgsi_lrp(struct r600_shader_ctx *ctx)
1371{
1372	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1373	struct r600_bc_alu_src r600_src[3];
1374	struct r600_bc_alu alu;
1375	unsigned i;
1376	int r;
1377
1378	r = tgsi_split_constant(ctx, r600_src);
1379	if (r)
1380		return r;
1381	/* 1 - src0 */
1382	for (i = 0; i < 4; i++) {
1383		memset(&alu, 0, sizeof(struct r600_bc_alu));
1384		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
1385		alu.src[0].sel = V_SQ_ALU_SRC_1;
1386		alu.src[0].chan = 0;
1387		alu.src[1] = r600_src[0];
1388		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1389		alu.src[1].neg = 1;
1390		alu.dst.sel = ctx->temp_reg;
1391		alu.dst.chan = i;
1392		if (i == 3) {
1393			alu.last = 1;
1394		}
1395		alu.dst.write = 1;
1396		r = r600_bc_add_alu(ctx->bc, &alu);
1397		if (r)
1398			return r;
1399	}
1400	r = r600_bc_add_literal(ctx->bc, ctx->value);
1401	if (r)
1402		return r;
1403
1404	/* (1 - src0) * src2 */
1405	for (i = 0; i < 4; i++) {
1406		memset(&alu, 0, sizeof(struct r600_bc_alu));
1407		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1408		alu.src[0].sel = ctx->temp_reg;
1409		alu.src[0].chan = i;
1410		alu.src[1] = r600_src[2];
1411		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1412		alu.dst.sel = ctx->temp_reg;
1413		alu.dst.chan = i;
1414		if (i == 3) {
1415			alu.last = 1;
1416		}
1417		alu.dst.write = 1;
1418		r = r600_bc_add_alu(ctx->bc, &alu);
1419		if (r)
1420			return r;
1421	}
1422	r = r600_bc_add_literal(ctx->bc, ctx->value);
1423	if (r)
1424		return r;
1425
1426	/* src0 * src1 + (1 - src0) * src2 */
1427	for (i = 0; i < 4; i++) {
1428		memset(&alu, 0, sizeof(struct r600_bc_alu));
1429		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1430		alu.is_op3 = 1;
1431		alu.src[0] = r600_src[0];
1432		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1433		alu.src[1] = r600_src[1];
1434		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1435		alu.src[2].sel = ctx->temp_reg;
1436		alu.src[2].chan = i;
1437		alu.dst.sel = ctx->temp_reg;
1438		alu.dst.chan = i;
1439		if (i == 3) {
1440			alu.last = 1;
1441		}
1442		r = r600_bc_add_alu(ctx->bc, &alu);
1443		if (r)
1444			return r;
1445	}
1446	return tgsi_helper_copy(ctx, inst);
1447}
1448
1449static int tgsi_cmp(struct r600_shader_ctx *ctx)
1450{
1451	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1452	struct r600_bc_alu_src r600_src[3];
1453	struct r600_bc_alu alu;
1454	int use_temp = 0;
1455	int i, r;
1456
1457	r = tgsi_split_constant(ctx, r600_src);
1458	if (r)
1459		return r;
1460
1461	if (inst->Dst[0].Register.WriteMask != 0xf)
1462		use_temp = 1;
1463
1464	for (i = 0; i < 4; i++) {
1465		memset(&alu, 0, sizeof(struct r600_bc_alu));
1466		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
1467		alu.src[0] = r600_src[0];
1468		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1469
1470		alu.src[1] = r600_src[2];
1471		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1472
1473		alu.src[2] = r600_src[1];
1474		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1475
1476		if (use_temp)
1477			alu.dst.sel = ctx->temp_reg;
1478		else {
1479			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1480			if (r)
1481				return r;
1482		}
1483		alu.dst.chan = i;
1484		alu.dst.write = 1;
1485		alu.is_op3 = 1;
1486		if (i == 3)
1487			alu.last = 1;
1488		r = r600_bc_add_alu(ctx->bc, &alu);
1489		if (r)
1490			return r;
1491	}
1492	if (use_temp)
1493		return tgsi_helper_copy(ctx, inst);
1494	return 0;
1495}
1496
1497static int tgsi_xpd(struct r600_shader_ctx *ctx)
1498{
1499	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1500	struct r600_bc_alu_src r600_src[3];
1501	struct r600_bc_alu alu;
1502	uint32_t use_temp = 0;
1503	int i, r;
1504
1505	if (inst->Dst[0].Register.WriteMask != 0xf)
1506		use_temp = 1;
1507
1508	r = tgsi_split_constant(ctx, r600_src);
1509	if (r)
1510		return r;
1511
1512	for (i = 0; i < 4; i++) {
1513		memset(&alu, 0, sizeof(struct r600_bc_alu));
1514		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
1515
1516		alu.src[0] = r600_src[0];
1517		switch (i) {
1518		case 0:
1519			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1520			break;
1521		case 1:
1522			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1523			break;
1524		case 2:
1525			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1526			break;
1527		case 3:
1528			alu.src[0].sel = V_SQ_ALU_SRC_0;
1529			alu.src[0].chan = i;
1530		}
1531
1532		alu.src[1] = r600_src[1];
1533		switch (i) {
1534		case 0:
1535			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1536			break;
1537		case 1:
1538			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1539			break;
1540		case 2:
1541			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1542			break;
1543		case 3:
1544			alu.src[1].sel = V_SQ_ALU_SRC_0;
1545			alu.src[1].chan = i;
1546		}
1547
1548		alu.dst.sel = ctx->temp_reg;
1549		alu.dst.chan = i;
1550		alu.dst.write = 1;
1551
1552		if (i == 3)
1553			alu.last = 1;
1554		r = r600_bc_add_alu(ctx->bc, &alu);
1555		if (r)
1556			return r;
1557	}
1558
1559	for (i = 0; i < 4; i++) {
1560		memset(&alu, 0, sizeof(struct r600_bc_alu));
1561		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1562
1563		alu.src[0] = r600_src[0];
1564		switch (i) {
1565		case 0:
1566			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1567			break;
1568		case 1:
1569			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1570			break;
1571		case 2:
1572			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1573			break;
1574		case 3:
1575			alu.src[0].sel = V_SQ_ALU_SRC_0;
1576			alu.src[0].chan = i;
1577		}
1578
1579		alu.src[1] = r600_src[1];
1580		switch (i) {
1581		case 0:
1582			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1583			break;
1584		case 1:
1585			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1586			break;
1587		case 2:
1588			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1589			break;
1590		case 3:
1591			alu.src[1].sel = V_SQ_ALU_SRC_0;
1592			alu.src[1].chan = i;
1593		}
1594
1595		alu.src[2].sel = ctx->temp_reg;
1596		alu.src[2].neg = 1;
1597		alu.src[2].chan = i;
1598
1599		if (use_temp)
1600			alu.dst.sel = ctx->temp_reg;
1601		else {
1602			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1603			if (r)
1604				return r;
1605		}
1606		alu.dst.chan = i;
1607		alu.dst.write = 1;
1608		alu.is_op3 = 1;
1609		if (i == 3)
1610			alu.last = 1;
1611		r = r600_bc_add_alu(ctx->bc, &alu);
1612		if (r)
1613			return r;
1614	}
1615	if (use_temp)
1616		return tgsi_helper_copy(ctx, inst);
1617	return 0;
1618}
1619
1620
1621static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1622	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1623	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1624	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1625	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
1626	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
1627	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1628	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1629	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1630	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1631	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1632	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1633	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1634	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
1635	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1636	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1637	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
1638	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1639	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1640	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1641	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1642	/* gap */
1643	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1644	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1645	/* gap */
1646	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1647	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1648	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
1649	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1650	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
1651	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1652	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
1653	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
1654	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
1655	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
1656	/* gap */
1657	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1658	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1659	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1660	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1661	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
1662	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
1663	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
1664	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},  /* predicated kill */
1665	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1666	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1667	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1668	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1669	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1670	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
1671	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1672	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
1673	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
1674	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_slt},
1675	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
1676	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1677	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1678	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1679	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
1680	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1681	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1682	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1683	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1684	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1685	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1686	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1687	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1688	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1689	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1690	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
1691	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
1692	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1693	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
1694	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1695	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1696	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1697	{TGSI_OPCODE_TXL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1698	{TGSI_OPCODE_BRK,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1699	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1700	/* gap */
1701	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1702	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1703	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1704	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1705	/* gap */
1706	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1707	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1708	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1709	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1710	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1711	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1712	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1713	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
1714	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1715	/* gap */
1716	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1717	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1718	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1719	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1720	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1721	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1722	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1723	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1724	{TGSI_OPCODE_CONT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1725	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1726	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1727	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1728	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1729	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1730	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1731	/* gap */
1732	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1733	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1734	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1735	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1736	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1737	/* gap */
1738	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1739	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1740	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1741	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1742	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1743	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1744	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1745	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1746	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
1747	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
1748	/* gap */
1749	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1750	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1751	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1752	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1753	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1754	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1755	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1756	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1757	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1758	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1759	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1760	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1761	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1762	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1763	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1764	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1765	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1766	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1767	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1768	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1769	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1770	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1771	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1772	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1773	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1774	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1775	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1776	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1777};
1778