r600_shader.c revision 094d66f45992830929d620782c70836b4b9b4a37
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_screen.h"
29#include "r600_context.h"
30#include "r600_shader.h"
31#include "r600_asm.h"
32#include "r600_sq.h"
33#include "r600d.h"
34#include <stdio.h>
35#include <errno.h>
36
37
38struct r600_shader_tgsi_instruction;
39
40struct r600_shader_ctx {
41	struct tgsi_shader_info			info;
42	struct tgsi_parse_context		parse;
43	const struct tgsi_token			*tokens;
44	unsigned				type;
45	unsigned				file_offset[TGSI_FILE_COUNT];
46	unsigned				temp_reg;
47	struct r600_shader_tgsi_instruction	*inst_info;
48	struct r600_bc				*bc;
49	struct r600_shader			*shader;
50	u32					value[4];
51};
52
53struct r600_shader_tgsi_instruction {
54	unsigned	tgsi_opcode;
55	unsigned	is_op3;
56	unsigned	r600_opcode;
57	int (*process)(struct r600_shader_ctx *ctx);
58};
59
60static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
61static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
62
63static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
64{
65	struct r600_context *rctx = r600_context(ctx);
66	const struct util_format_description *desc;
67	enum pipe_format resource_format[160];
68	unsigned i, nresources = 0;
69	struct r600_bc *bc = &shader->bc;
70	struct r600_bc_cf *cf;
71	struct r600_bc_vtx *vtx;
72
73	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
74		return 0;
75	for (i = 0; i < rctx->vertex_elements->count; i++) {
76		resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
77	}
78	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
79		switch (cf->inst) {
80		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
81		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
82			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
83				desc = util_format_description(resource_format[vtx->buffer_id]);
84				if (desc == NULL) {
85					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
86					return -EINVAL;
87				}
88				vtx->dst_sel_x = desc->swizzle[0];
89				vtx->dst_sel_y = desc->swizzle[1];
90				vtx->dst_sel_z = desc->swizzle[2];
91				vtx->dst_sel_w = desc->swizzle[3];
92			}
93			break;
94		default:
95			break;
96		}
97	}
98	return r600_bc_build(&shader->bc);
99}
100
101int r600_pipe_shader_create(struct pipe_context *ctx,
102			struct r600_context_state *rpshader,
103			const struct tgsi_token *tokens)
104{
105	struct r600_screen *rscreen = r600_screen(ctx->screen);
106	int r;
107
108fprintf(stderr, "--------------------------------------------------------------\n");
109tgsi_dump(tokens, 0);
110	if (rpshader == NULL)
111		return -ENOMEM;
112	rpshader->shader.family = radeon_get_family(rscreen->rw);
113	r = r600_shader_from_tgsi(tokens, &rpshader->shader);
114	if (r) {
115		R600_ERR("translation from TGSI failed !\n");
116		return r;
117	}
118	r = r600_bc_build(&rpshader->shader.bc);
119	if (r) {
120		R600_ERR("building bytecode failed !\n");
121		return r;
122	}
123fprintf(stderr, "______________________________________________________________\n");
124	return 0;
125}
126
127static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
128{
129	struct r600_screen *rscreen = r600_screen(ctx->screen);
130	struct r600_shader *rshader = &rpshader->shader;
131	struct radeon_state *state;
132	unsigned i, j, tmp;
133
134	rpshader->rstate = radeon_state_decref(rpshader->rstate);
135	state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
136	if (state == NULL)
137		return -ENOMEM;
138	for (i = 0; i < 10; i++) {
139		state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
140	}
141	/* so far never got proper semantic id from tgsi */
142	for (i = 0; i < 32; i++) {
143		tmp = i << ((i & 3) * 8);
144		state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
145	}
146	state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
147	state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
148	rpshader->rstate = state;
149	rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
150	rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
151	rpshader->rstate->nbo = 2;
152	rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
153	return radeon_state_pm4(state);
154}
155
156static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
157{
158	struct r600_screen *rscreen = r600_screen(ctx->screen);
159	struct r600_shader *rshader = &rpshader->shader;
160	struct radeon_state *state;
161	unsigned i, tmp;
162
163	rpshader->rstate = radeon_state_decref(rpshader->rstate);
164	state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
165	if (state == NULL)
166		return -ENOMEM;
167	for (i = 0; i < rshader->ninput; i++) {
168		tmp = S_028644_SEMANTIC(i);
169		tmp |= S_028644_SEL_CENTROID(1);
170		if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
171			rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
172			tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
173		}
174		state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
175	}
176	state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
177							S_0286CC_PERSP_GRADIENT_ENA(1);
178	state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
179	state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
180	state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002;
181	rpshader->rstate = state;
182	rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
183	rpshader->rstate->nbo = 1;
184	rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
185	return radeon_state_pm4(state);
186}
187
188static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
189{
190	struct r600_screen *rscreen = r600_screen(ctx->screen);
191	struct r600_context *rctx = r600_context(ctx);
192	struct r600_shader *rshader = &rpshader->shader;
193	int r;
194
195	/* copy new shader */
196	radeon_bo_decref(rscreen->rw, rpshader->bo);
197	rpshader->bo = NULL;
198	rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
199				4096, NULL);
200	if (rpshader->bo == NULL) {
201		return -ENOMEM;
202	}
203	radeon_bo_map(rscreen->rw, rpshader->bo);
204	memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
205	radeon_bo_unmap(rscreen->rw, rpshader->bo);
206	/* build state */
207	rshader->flat_shade = rctx->flat_shade;
208	switch (rshader->processor_type) {
209	case TGSI_PROCESSOR_VERTEX:
210		r = r600_pipe_shader_vs(ctx, rpshader);
211		break;
212	case TGSI_PROCESSOR_FRAGMENT:
213		r = r600_pipe_shader_ps(ctx, rpshader);
214		break;
215	default:
216		r = -EINVAL;
217		break;
218	}
219	return r;
220}
221
222int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
223{
224	struct r600_context *rctx = r600_context(ctx);
225	int r;
226
227	if (rpshader == NULL)
228		return -EINVAL;
229	/* there should be enough input */
230	if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
231		R600_ERR("%d resources provided, expecting %d\n",
232			rctx->vertex_elements->count, rpshader->shader.bc.nresource);
233		return -EINVAL;
234	}
235	r = r600_shader_update(ctx, &rpshader->shader);
236	if (r)
237		return r;
238	return r600_pipe_shader(ctx, rpshader);
239}
240
241static int tgsi_is_supported(struct r600_shader_ctx *ctx)
242{
243	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
244	int j;
245
246	if (i->Instruction.NumDstRegs > 1) {
247		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
248		return -EINVAL;
249	}
250	if (i->Instruction.Predicate) {
251		R600_ERR("predicate unsupported\n");
252		return -EINVAL;
253	}
254	if (i->Instruction.Label) {
255		R600_ERR("label unsupported\n");
256		return -EINVAL;
257	}
258	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
259		if (i->Src[j].Register.Indirect ||
260			i->Src[j].Register.Dimension ||
261			i->Src[j].Register.Absolute) {
262			R600_ERR("unsupported src (indirect|dimension|absolute)\n");
263			return -EINVAL;
264		}
265	}
266	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
267		if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
268			R600_ERR("unsupported dst (indirect|dimension)\n");
269			return -EINVAL;
270		}
271	}
272	return 0;
273}
274
275static int tgsi_declaration(struct r600_shader_ctx *ctx)
276{
277	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
278	struct r600_bc_vtx vtx;
279	unsigned i;
280	int r;
281
282	switch (d->Declaration.File) {
283	case TGSI_FILE_INPUT:
284		i = ctx->shader->ninput++;
285		ctx->shader->input[i].name = d->Semantic.Name;
286		ctx->shader->input[i].sid = d->Semantic.Index;
287		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
288		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
289		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
290			/* turn input into fetch */
291			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
292			vtx.inst = 0;
293			vtx.fetch_type = 0;
294			vtx.buffer_id = i;
295			/* register containing the index into the buffer */
296			vtx.src_gpr = 0;
297			vtx.src_sel_x = 0;
298			vtx.mega_fetch_count = 0x1F;
299			vtx.dst_gpr = ctx->shader->input[i].gpr;
300			vtx.dst_sel_x = 0;
301			vtx.dst_sel_y = 1;
302			vtx.dst_sel_z = 2;
303			vtx.dst_sel_w = 3;
304			r = r600_bc_add_vtx(ctx->bc, &vtx);
305			if (r)
306				return r;
307		}
308		break;
309	case TGSI_FILE_OUTPUT:
310		i = ctx->shader->noutput++;
311		ctx->shader->output[i].name = d->Semantic.Name;
312		ctx->shader->output[i].sid = d->Semantic.Index;
313		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
314		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
315		break;
316	case TGSI_FILE_CONSTANT:
317	case TGSI_FILE_TEMPORARY:
318	case TGSI_FILE_SAMPLER:
319		break;
320	default:
321		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
322		return -EINVAL;
323	}
324	return 0;
325}
326
327int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
328{
329	struct tgsi_full_immediate *immediate;
330	struct r600_shader_ctx ctx;
331	struct r600_bc_output output;
332	unsigned opcode;
333	int i, r = 0, pos0;
334
335	ctx.bc = &shader->bc;
336	ctx.shader = shader;
337	r = r600_bc_init(ctx.bc, shader->family);
338	if (r)
339		return r;
340	ctx.tokens = tokens;
341	tgsi_scan_shader(tokens, &ctx.info);
342	tgsi_parse_init(&ctx.parse, tokens);
343	ctx.type = ctx.parse.FullHeader.Processor.Processor;
344	shader->processor_type = ctx.type;
345
346	/* register allocations */
347	/* Values [0,127] correspond to GPR[0..127].
348	 * Values [256,511] correspond to cfile constants c[0..255].
349	 * Other special values are shown in the list below.
350	 * 248	SQ_ALU_SRC_0: special constant 0.0.
351	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
352	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
353	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
354	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
355	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
356	 * 254	SQ_ALU_SRC_PV: previous vector result.
357	 * 255	SQ_ALU_SRC_PS: previous scalar result.
358	 */
359	for (i = 0; i < TGSI_FILE_COUNT; i++) {
360		ctx.file_offset[i] = 0;
361	}
362	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
363		ctx.file_offset[TGSI_FILE_INPUT] = 1;
364	}
365	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
366						ctx.info.file_count[TGSI_FILE_INPUT];
367	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
368						ctx.info.file_count[TGSI_FILE_OUTPUT];
369	ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
370	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
371	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
372			ctx.info.file_count[TGSI_FILE_TEMPORARY];
373
374	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
375		tgsi_parse_token(&ctx.parse);
376		switch (ctx.parse.FullToken.Token.Type) {
377		case TGSI_TOKEN_TYPE_IMMEDIATE:
378			immediate = &ctx.parse.FullToken.FullImmediate;
379			ctx.value[0] = immediate->u[0].Uint;
380			ctx.value[1] = immediate->u[1].Uint;
381			ctx.value[2] = immediate->u[2].Uint;
382			ctx.value[3] = immediate->u[3].Uint;
383			break;
384		case TGSI_TOKEN_TYPE_DECLARATION:
385			r = tgsi_declaration(&ctx);
386			if (r)
387				goto out_err;
388			break;
389		case TGSI_TOKEN_TYPE_INSTRUCTION:
390			r = tgsi_is_supported(&ctx);
391			if (r)
392				goto out_err;
393			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
394			ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
395			r = ctx.inst_info->process(&ctx);
396			if (r)
397				goto out_err;
398			r = r600_bc_add_literal(ctx.bc, ctx.value);
399			if (r)
400				goto out_err;
401			break;
402		default:
403			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
404			r = -EINVAL;
405			goto out_err;
406		}
407	}
408	/* export output */
409	for (i = 0, pos0 = 0; i < shader->noutput; i++) {
410		memset(&output, 0, sizeof(struct r600_bc_output));
411		output.gpr = shader->output[i].gpr;
412		output.elem_size = 3;
413		output.swizzle_x = 0;
414		output.swizzle_y = 1;
415		output.swizzle_z = 2;
416		output.swizzle_w = 3;
417		output.barrier = 1;
418		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
419		output.array_base = i - pos0;
420		output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
421		switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
422		case TGSI_PROCESSOR_VERTEX:
423			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
424				output.array_base = 60;
425				output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
426				/* position doesn't count in array_base */
427				pos0 = 1;
428			}
429			break;
430		case TGSI_PROCESSOR_FRAGMENT:
431			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
432				output.array_base = 0;
433				output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
434			} else {
435				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
436				r = -EINVAL;
437				goto out_err;
438			}
439			break;
440		default:
441			R600_ERR("unsupported processor type %d\n", ctx.type);
442			r = -EINVAL;
443			goto out_err;
444		}
445		if (i == (shader->noutput - 1)) {
446			output.end_of_program = 1;
447		}
448		r = r600_bc_add_output(ctx.bc, &output);
449		if (r)
450			goto out_err;
451	}
452	tgsi_parse_free(&ctx.parse);
453	return 0;
454out_err:
455	tgsi_parse_free(&ctx.parse);
456	return r;
457}
458
459static int tgsi_unsupported(struct r600_shader_ctx *ctx)
460{
461	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
462	return -EINVAL;
463}
464
465static int tgsi_end(struct r600_shader_ctx *ctx)
466{
467	return 0;
468}
469
470static int tgsi_src(struct r600_shader_ctx *ctx,
471			const struct tgsi_full_src_register *tgsi_src,
472			unsigned swizzle,
473			struct r600_bc_alu_src *r600_src)
474{
475	r600_src->sel = tgsi_src->Register.Index;
476	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
477		r600_src->sel = 0;
478	}
479	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
480	switch (swizzle) {
481	case 0:
482		r600_src->chan = tgsi_src->Register.SwizzleX;
483		break;
484	case 1:
485		r600_src->chan = tgsi_src->Register.SwizzleY;
486		break;
487	case 2:
488		r600_src->chan = tgsi_src->Register.SwizzleZ;
489		break;
490	case 3:
491		r600_src->chan = tgsi_src->Register.SwizzleW;
492		break;
493	default:
494		return -EINVAL;
495	}
496	return 0;
497}
498
499static int tgsi_dst(struct r600_shader_ctx *ctx,
500			const struct tgsi_full_dst_register *tgsi_dst,
501			unsigned swizzle,
502			struct r600_bc_alu_dst *r600_dst)
503{
504	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
505
506	r600_dst->sel = tgsi_dst->Register.Index;
507	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
508	r600_dst->chan = swizzle;
509	r600_dst->write = 1;
510	if (inst->Instruction.Saturate) {
511		r600_dst->clamp = 1;
512	}
513	return 0;
514}
515
516static int tgsi_op2(struct r600_shader_ctx *ctx)
517{
518	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
519	struct r600_bc_alu alu;
520	int i, j, r;
521
522	for (i = 0; i < 4; i++) {
523		memset(&alu, 0, sizeof(struct r600_bc_alu));
524		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
525			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
526			alu.dst.chan = i;
527		} else {
528			alu.inst = ctx->inst_info->r600_opcode;
529			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
530				r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
531				if (r)
532					return r;
533			}
534			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
535			if (r)
536				return r;
537		}
538		/* handle some special cases */
539		switch (ctx->inst_info->tgsi_opcode) {
540		case TGSI_OPCODE_SUB:
541			alu.src[1].neg = 1;
542			break;
543		case TGSI_OPCODE_ABS:
544			alu.src[0].abs = 1;
545			break;
546		default:
547			break;
548		}
549		if (i == 3) {
550			alu.last = 1;
551		}
552		r = r600_bc_add_alu(ctx->bc, &alu);
553		if (r)
554			return r;
555	}
556	return 0;
557}
558
559static int tgsi_kill(struct r600_shader_ctx *ctx)
560{
561	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
562	struct r600_bc_alu alu;
563	int i, r;
564
565	for (i = 0; i < 4; i++) {
566		memset(&alu, 0, sizeof(struct r600_bc_alu));
567		alu.inst = ctx->inst_info->r600_opcode;
568		alu.dst.chan = i;
569		alu.src[0].sel = 248;
570		r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]);
571		if (r)
572			return r;
573		if (i == 3) {
574			alu.last = 1;
575		}
576		r = r600_bc_add_alu(ctx->bc, &alu);
577		if (r)
578			return r;
579	}
580	return 0;
581}
582
583static int tgsi_slt(struct r600_shader_ctx *ctx)
584{
585	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
586	struct r600_bc_alu alu;
587	int i, r;
588
589	for (i = 0; i < 4; i++) {
590		memset(&alu, 0, sizeof(struct r600_bc_alu));
591		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
592			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
593			alu.dst.chan = i;
594		} else {
595			alu.inst = ctx->inst_info->r600_opcode;
596			r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]);
597			if (r)
598				return r;
599			r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[0]);
600			if (r)
601				return r;
602			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
603			if (r)
604				return r;
605		}
606		if (i == 3) {
607			alu.last = 1;
608		}
609		r = r600_bc_add_alu(ctx->bc, &alu);
610		if (r)
611			return r;
612	}
613	return 0;
614}
615
616static int tgsi_lit(struct r600_shader_ctx *ctx)
617{
618	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
619	struct r600_bc_alu alu;
620	int r;
621
622	if (inst->Dst[0].Register.WriteMask & (1 << 0))
623	{
624		/* dst.x, <- 1.0  */
625		memset(&alu, 0, sizeof(struct r600_bc_alu));
626		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
627		alu.src[0].sel  = 249; /*1.0*/
628		alu.src[0].chan = 0;
629		r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
630		if (r)
631			return r;
632		if ((inst->Dst[0].Register.WriteMask & 0xe) == 0)
633			alu.last = 1;
634		r = r600_bc_add_alu(ctx->bc, &alu);
635		if (r)
636			return r;
637	}
638
639
640	if (inst->Dst[0].Register.WriteMask & (1 << 1))
641	{
642		/* dst.y = max(src.x, 0.0) */
643		memset(&alu, 0, sizeof(struct r600_bc_alu));
644		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
645		r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[0]);
646		if (r)
647			return r;
648		alu.src[1].sel  = 248; /*0.0*/
649		alu.src[1].chan = 0;
650		r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
651		if (r)
652			return r;
653		if ((inst->Dst[0].Register.WriteMask & 0xa) == 0)
654			alu.last = 1;
655		r = r600_bc_add_alu(ctx->bc, &alu);
656		if (r)
657			return r;
658	}
659
660	if (inst->Dst[0].Register.WriteMask & (1 << 3))
661	{
662		/* dst.w, <- 1.0  */
663		memset(&alu, 0, sizeof(struct r600_bc_alu));
664		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
665		alu.src[0].sel  = 249;
666		alu.src[0].chan = 0;
667		r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
668		if (r)
669			return r;
670		if ((inst->Dst[0].Register.WriteMask & 0x4) == 0)
671			alu.last = 1;
672		r = r600_bc_add_alu(ctx->bc, &alu);
673		if (r)
674			return r;
675	}
676
677	if (inst->Dst[0].Register.WriteMask & (1 << 2))
678	{
679		int chan;
680		int sel;
681
682		/* dst.z = log(src.y) */
683		memset(&alu, 0, sizeof(struct r600_bc_alu));
684		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
685		r = tgsi_src(ctx, &inst->Src[0], 1, &alu.src[0]);
686		if (r)
687			return r;
688		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
689		if (r)
690			return r;
691		alu.last = 1;
692		r = r600_bc_add_alu(ctx->bc, &alu);
693		if (r)
694			return r;
695
696		chan = alu.dst.chan;
697		sel = alu.dst.sel;
698
699		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
700		memset(&alu, 0, sizeof(struct r600_bc_alu));
701		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
702		r = tgsi_src(ctx, &inst->Src[0], 3, &alu.src[0]);
703		if (r)
704		return r;
705		alu.src[1].sel  = sel;
706		alu.src[1].chan = chan;
707		r = tgsi_src(ctx, &inst->Src[0], 0, &alu.src[2]);
708		if (r)
709			return r;
710		alu.dst.sel = ctx->temp_reg;
711		alu.dst.chan = 0;
712		alu.dst.write = 1;
713		alu.is_op3 = 1;
714		alu.last = 1;
715		r = r600_bc_add_alu(ctx->bc, &alu);
716		if (r)
717			return r;
718
719		/* dst.z = exp(tmp.x) */
720		memset(&alu, 0, sizeof(struct r600_bc_alu));
721		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
722		alu.src[0].sel = ctx->temp_reg;
723		alu.src[0].chan = 0;
724		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
725		if (r)
726			return r;
727		alu.last = 1;
728		r = r600_bc_add_alu(ctx->bc, &alu);
729		if (r)
730			return r;
731	}
732	return 0;
733}
734
735static int tgsi_trans(struct r600_shader_ctx *ctx)
736{
737	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
738	struct r600_bc_alu alu;
739	int i, j, r;
740
741	for (i = 0; i < 4; i++) {
742		memset(&alu, 0, sizeof(struct r600_bc_alu));
743		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
744			alu.inst = ctx->inst_info->r600_opcode;
745			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
746				r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
747				if (r)
748					return r;
749			}
750			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
751			if (r)
752				return r;
753			alu.last = 1;
754			r = r600_bc_add_alu(ctx->bc, &alu);
755			if (r)
756				return r;
757		}
758	}
759	return 0;
760}
761
762static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
763{
764	struct r600_bc_alu alu;
765	int i, r;
766
767	r = r600_bc_add_literal(ctx->bc, ctx->value);
768	if (r)
769		return r;
770	for (i = 0; i < 4; i++) {
771		memset(&alu, 0, sizeof(struct r600_bc_alu));
772		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
773			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
774			alu.dst.chan = i;
775		} else {
776			alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
777			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
778			if (r)
779				return r;
780			alu.src[0].sel = ctx->temp_reg;
781			alu.src[0].chan = i;
782		}
783		if (i == 3) {
784			alu.last = 1;
785		}
786		r = r600_bc_add_alu(ctx->bc, &alu);
787		if (r)
788			return r;
789	}
790	return 0;
791}
792
793static int tgsi_op3(struct r600_shader_ctx *ctx)
794{
795	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
796	struct r600_bc_alu alu;
797	int i, j, r;
798
799	/* do it in 2 step as op3 doesn't support writemask */
800	for (i = 0; i < 4; i++) {
801		memset(&alu, 0, sizeof(struct r600_bc_alu));
802		alu.inst = ctx->inst_info->r600_opcode;
803		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
804			r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
805			if (r)
806				return r;
807		}
808		alu.dst.sel = ctx->temp_reg;
809		alu.dst.chan = i;
810		alu.dst.write = 1;
811		alu.is_op3 = 1;
812		if (i == 3) {
813			alu.last = 1;
814		}
815		r = r600_bc_add_alu(ctx->bc, &alu);
816		if (r)
817			return r;
818	}
819	return tgsi_helper_copy(ctx, inst);
820}
821
822static int tgsi_dp(struct r600_shader_ctx *ctx)
823{
824	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
825	struct r600_bc_alu alu;
826	int i, j, r;
827
828	for (i = 0; i < 4; i++) {
829		memset(&alu, 0, sizeof(struct r600_bc_alu));
830		alu.inst = ctx->inst_info->r600_opcode;
831		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
832			r = tgsi_src(ctx, &inst->Src[j], i, &alu.src[j]);
833			if (r)
834				return r;
835		}
836		alu.dst.sel = ctx->temp_reg;
837		alu.dst.chan = i;
838		alu.dst.write = 1;
839		/* handle some special cases */
840		switch (ctx->inst_info->tgsi_opcode) {
841		case TGSI_OPCODE_DP2:
842			if (i > 1) {
843				alu.src[0].sel = alu.src[1].sel = 248;
844				alu.src[0].chan = alu.src[1].chan = 0;
845			}
846			break;
847		case TGSI_OPCODE_DP3:
848			if (i > 2) {
849				alu.src[0].sel = alu.src[1].sel = 248;
850				alu.src[0].chan = alu.src[1].chan = 0;
851			}
852			break;
853		default:
854			break;
855		}
856		if (i == 3) {
857			alu.last = 1;
858		}
859		r = r600_bc_add_alu(ctx->bc, &alu);
860		if (r)
861			return r;
862	}
863	return tgsi_helper_copy(ctx, inst);
864}
865
866static int tgsi_tex(struct r600_shader_ctx *ctx)
867{
868	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
869	struct r600_bc_tex tex;
870	struct r600_bc_alu alu;
871	unsigned src_gpr;
872	int r;
873
874	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
875
876	/* Add perspective divide */
877	if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) {
878		memset(&alu, 0, sizeof(struct r600_bc_alu));
879		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
880		alu.src[0].sel = src_gpr;
881		alu.src[0].chan = 3;
882		alu.dst.sel = ctx->temp_reg;
883		alu.dst.chan = 3;
884		alu.last = 1;
885		alu.dst.write = 1;
886		r = r600_bc_add_alu(ctx->bc, &alu);
887		if (r)
888			return r;
889
890		memset(&alu, 0, sizeof(struct r600_bc_alu));
891		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
892		alu.src[0].sel = ctx->temp_reg;
893		alu.src[0].chan = 3;
894		alu.src[1].sel = src_gpr;
895		alu.src[1].chan = 0;
896		alu.dst.sel = ctx->temp_reg;
897		alu.dst.chan = 0;
898		alu.dst.write = 1;
899		r = r600_bc_add_alu(ctx->bc, &alu);
900		if (r)
901			return r;
902		memset(&alu, 0, sizeof(struct r600_bc_alu));
903		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
904		alu.src[0].sel = ctx->temp_reg;
905		alu.src[0].chan = 3;
906		alu.src[1].sel = src_gpr;
907		alu.src[1].chan = 1;
908		alu.dst.sel = ctx->temp_reg;
909		alu.dst.chan = 1;
910		alu.dst.write = 1;
911		r = r600_bc_add_alu(ctx->bc, &alu);
912		if (r)
913			return r;
914		memset(&alu, 0, sizeof(struct r600_bc_alu));
915		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
916		alu.src[0].sel = ctx->temp_reg;
917		alu.src[0].chan = 3;
918		alu.src[1].sel = src_gpr;
919		alu.src[1].chan = 2;
920		alu.dst.sel = ctx->temp_reg;
921		alu.dst.chan = 2;
922		alu.dst.write = 1;
923		r = r600_bc_add_alu(ctx->bc, &alu);
924		if (r)
925			return r;
926		memset(&alu, 0, sizeof(struct r600_bc_alu));
927		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
928		alu.src[0].sel = 249;
929		alu.src[0].chan = 0;
930		alu.dst.sel = ctx->temp_reg;
931		alu.dst.chan = 3;
932		alu.last = 1;
933		alu.dst.write = 1;
934		r = r600_bc_add_alu(ctx->bc, &alu);
935		if (r)
936			return r;
937		src_gpr = ctx->temp_reg;
938	}
939
940	/* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */
941	memset(&tex, 0, sizeof(struct r600_bc_tex));
942	tex.inst = ctx->inst_info->r600_opcode;
943	tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
944	tex.sampler_id = tex.resource_id;
945	tex.src_gpr = src_gpr;
946	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
947	tex.dst_sel_x = 0;
948	tex.dst_sel_y = 1;
949	tex.dst_sel_z = 2;
950	tex.dst_sel_w = 3;
951	tex.src_sel_x = 0;
952	tex.src_sel_y = 1;
953	tex.src_sel_z = 2;
954	tex.src_sel_w = 3;
955	tex.coord_type_x = 1;
956	tex.coord_type_y = 1;
957	tex.coord_type_z = 1;
958	tex.coord_type_w = 1;
959	return r600_bc_add_tex(ctx->bc, &tex);
960}
961
962static int tgsi_lrp(struct r600_shader_ctx *ctx)
963{
964	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
965	struct r600_bc_alu alu;
966	unsigned i;
967	int r;
968
969	/* 1 - src0 */
970	for (i = 0; i < 4; i++) {
971		memset(&alu, 0, sizeof(struct r600_bc_alu));
972		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
973		alu.src[0].sel = 249;
974		alu.src[0].chan = 0;
975		r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[1]);
976		if (r)
977			return r;
978		alu.src[1].neg = 1;
979		alu.dst.sel = ctx->temp_reg;
980		alu.dst.chan = i;
981		if (i == 3) {
982			alu.last = 1;
983		}
984		alu.dst.write = 1;
985		r = r600_bc_add_alu(ctx->bc, &alu);
986		if (r)
987			return r;
988	}
989	r = r600_bc_add_literal(ctx->bc, ctx->value);
990	if (r)
991		return r;
992
993	/* (1 - src0) * src2 */
994	for (i = 0; i < 4; i++) {
995		memset(&alu, 0, sizeof(struct r600_bc_alu));
996		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
997		alu.src[0].sel = ctx->temp_reg;
998		alu.src[0].chan = i;
999		r = tgsi_src(ctx, &inst->Src[2], i, &alu.src[1]);
1000		if (r)
1001			return r;
1002		alu.dst.sel = ctx->temp_reg;
1003		alu.dst.chan = i;
1004		if (i == 3) {
1005			alu.last = 1;
1006		}
1007		alu.dst.write = 1;
1008		r = r600_bc_add_alu(ctx->bc, &alu);
1009		if (r)
1010			return r;
1011	}
1012	r = r600_bc_add_literal(ctx->bc, ctx->value);
1013	if (r)
1014		return r;
1015
1016	/* src0 * src1 + (1 - src0) * src2 */
1017	for (i = 0; i < 4; i++) {
1018		memset(&alu, 0, sizeof(struct r600_bc_alu));
1019		alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
1020		alu.is_op3 = 1;
1021		r = tgsi_src(ctx, &inst->Src[0], i, &alu.src[0]);
1022		if (r)
1023			return r;
1024		r = tgsi_src(ctx, &inst->Src[1], i, &alu.src[1]);
1025		if (r)
1026			return r;
1027		alu.src[2].sel = ctx->temp_reg;
1028		alu.src[2].chan = i;
1029		alu.dst.sel = ctx->temp_reg;
1030		alu.dst.chan = i;
1031		if (i == 3) {
1032			alu.last = 1;
1033		}
1034		r = r600_bc_add_alu(ctx->bc, &alu);
1035		if (r)
1036			return r;
1037	}
1038	return tgsi_helper_copy(ctx, inst);
1039}
1040
1041static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
1042	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1043	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1044	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
1045	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1046	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans},
1047	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1048	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1049	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
1050	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1051	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1052	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1053	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1054	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1055	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
1056	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
1057	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1058	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
1059	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
1060	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
1061	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1062	/* gap */
1063	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1064	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1065	/* gap */
1066	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1067	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1068	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1069	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1070	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1071	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1072	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans},
1073	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1074	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1075	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1076	/* gap */
1077	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1078	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
1079	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1080	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1081	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1082	{TGSI_OPCODE_DDX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1083	{TGSI_OPCODE_DDY,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1084	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},  /* predicated kill */
1085	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1086	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1087	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1088	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1089	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1090	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1091	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1092	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1093	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1094	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1095	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1096	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1097	{TGSI_OPCODE_TEX,	0, 0x10, tgsi_tex},
1098	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1099	{TGSI_OPCODE_TXP,	0, 0x10, tgsi_tex},
1100	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1101	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1102	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1103	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1104	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1105	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1106	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1107	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1108	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1109	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1110	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */
1111	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1112	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1113	{TGSI_OPCODE_TXB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1114	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1115	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1116	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
1117	{TGSI_OPCODE_TXL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1118	{TGSI_OPCODE_BRK,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1119	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1120	/* gap */
1121	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1122	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1123	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1124	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1125	/* gap */
1126	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1127	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1128	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1129	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1130	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1131	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1132	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1133	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1134	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1135	/* gap */
1136	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1137	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1138	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1139	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1140	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1141	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1142	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1143	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1144	{TGSI_OPCODE_CONT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1145	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1146	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1147	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1148	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1149	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1150	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1151	/* gap */
1152	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1153	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1154	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1155	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1156	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1157	/* gap */
1158	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1159	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1160	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1161	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1162	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1163	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1164	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1165	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1166	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
1167	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
1168	/* gap */
1169	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1170	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1171	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1172	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1173	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1174	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1175	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1176	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1177	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1178	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1179	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1180	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1181	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1182	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1183	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1184	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1185	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1186	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1187	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1188	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1189	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1190	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1191	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1192	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1193	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1194	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1195	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1196	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
1197};
1198