r600_shader.c revision 460c5304aba646143938b76d536a6fc13a302ca8
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "pipe/p_shader_tokens.h"
24#include "tgsi/tgsi_parse.h"
25#include "tgsi/tgsi_scan.h"
26#include "tgsi/tgsi_dump.h"
27#include "util/u_format.h"
28#include "r600_screen.h"
29#include "r600_context.h"
30#include "r600_shader.h"
31#include "r600_asm.h"
32#include "r600_sq.h"
33#include "r600_opcodes.h"
34#include "r600d.h"
35#include <stdio.h>
36#include <errno.h>
37
38
39struct r600_shader_tgsi_instruction;
40
41struct r600_shader_ctx {
42	struct tgsi_shader_info			info;
43	struct tgsi_parse_context		parse;
44	const struct tgsi_token			*tokens;
45	unsigned				type;
46	unsigned				file_offset[TGSI_FILE_COUNT];
47	unsigned				temp_reg;
48	struct r600_shader_tgsi_instruction	*inst_info;
49	struct r600_bc				*bc;
50	struct r600_shader			*shader;
51	u32					value[4];
52	u32					*literals;
53	u32					nliterals;
54	u32                                     max_driver_temp_used;
55};
56
57struct r600_shader_tgsi_instruction {
58	unsigned	tgsi_opcode;
59	unsigned	is_op3;
60	unsigned	r600_opcode;
61	int (*process)(struct r600_shader_ctx *ctx);
62};
63
64static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[];
65static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
66static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
67
68static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader)
69{
70	struct r600_context *rctx = r600_context(ctx);
71	const struct util_format_description *desc;
72	enum pipe_format resource_format[160];
73	unsigned i, nresources = 0;
74	struct r600_bc *bc = &shader->bc;
75	struct r600_bc_cf *cf;
76	struct r600_bc_vtx *vtx;
77
78	if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
79		return 0;
80	for (i = 0; i < rctx->vertex_elements->count; i++) {
81		resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format;
82	}
83	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
84		switch (cf->inst) {
85		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
86		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
87			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
88				desc = util_format_description(resource_format[vtx->buffer_id]);
89				if (desc == NULL) {
90					R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
91					return -EINVAL;
92				}
93				vtx->dst_sel_x = desc->swizzle[0];
94				vtx->dst_sel_y = desc->swizzle[1];
95				vtx->dst_sel_z = desc->swizzle[2];
96				vtx->dst_sel_w = desc->swizzle[3];
97			}
98			break;
99		default:
100			break;
101		}
102	}
103	return r600_bc_build(&shader->bc);
104}
105
106int r600_pipe_shader_create(struct pipe_context *ctx,
107			struct r600_context_state *rpshader,
108			const struct tgsi_token *tokens)
109{
110	struct r600_screen *rscreen = r600_screen(ctx->screen);
111	int r;
112
113//fprintf(stderr, "--------------------------------------------------------------\n");
114//tgsi_dump(tokens, 0);
115	if (rpshader == NULL)
116		return -ENOMEM;
117	rpshader->shader.family = radeon_get_family(rscreen->rw);
118	rpshader->shader.use_mem_constant = rscreen->use_mem_constant;
119	r = r600_shader_from_tgsi(tokens, &rpshader->shader);
120	if (r) {
121		R600_ERR("translation from TGSI failed !\n");
122		return r;
123	}
124	r = r600_bc_build(&rpshader->shader.bc);
125	if (r) {
126		R600_ERR("building bytecode failed !\n");
127		return r;
128	}
129//fprintf(stderr, "______________________________________________________________\n");
130	return 0;
131}
132
133static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader)
134{
135	struct r600_context *rctx = r600_context(ctx);
136	struct radeon_state *state;
137
138	state = &rpshader->rstate[0];
139	radeon_state_fini(&rpshader->rstate[0]);
140
141	return rctx->vtbl->vs_shader(rctx, rpshader, state);
142}
143
144static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
145{
146	struct r600_context *rctx = r600_context(ctx);
147	struct radeon_state *state;
148
149	state = &rpshader->rstate[0];
150	radeon_state_fini(state);
151
152	return rctx->vtbl->ps_shader(rctx, rpshader, state);
153}
154
155static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader)
156{
157	struct r600_screen *rscreen = r600_screen(ctx->screen);
158	struct r600_context *rctx = r600_context(ctx);
159	struct r600_shader *rshader = &rpshader->shader;
160	int r;
161
162	/* copy new shader */
163	radeon_bo_decref(rscreen->rw, rpshader->bo);
164	rpshader->bo = NULL;
165	rpshader->bo = radeon_bo(rscreen->rw, 0, rshader->bc.ndw * 4,
166				4096, NULL);
167	if (rpshader->bo == NULL) {
168		return -ENOMEM;
169	}
170	radeon_bo_map(rscreen->rw, rpshader->bo);
171	memcpy(rpshader->bo->data, rshader->bc.bytecode, rshader->bc.ndw * 4);
172	radeon_bo_unmap(rscreen->rw, rpshader->bo);
173	/* build state */
174	rshader->flat_shade = rctx->flat_shade;
175	switch (rshader->processor_type) {
176	case TGSI_PROCESSOR_VERTEX:
177		r = r600_pipe_shader_vs(ctx, rpshader);
178		break;
179	case TGSI_PROCESSOR_FRAGMENT:
180		r = r600_pipe_shader_ps(ctx, rpshader);
181		break;
182	default:
183		r = -EINVAL;
184		break;
185	}
186	return r;
187}
188
189int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader)
190{
191	struct r600_context *rctx = r600_context(ctx);
192	int r;
193
194	if (rpshader == NULL)
195		return -EINVAL;
196	/* there should be enough input */
197	if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) {
198		R600_ERR("%d resources provided, expecting %d\n",
199			rctx->vertex_elements->count, rpshader->shader.bc.nresource);
200		return -EINVAL;
201	}
202	r = r600_shader_update(ctx, &rpshader->shader);
203	if (r)
204		return r;
205	return r600_pipe_shader(ctx, rpshader);
206}
207
208static int tgsi_is_supported(struct r600_shader_ctx *ctx)
209{
210	struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
211	int j;
212
213	if (i->Instruction.NumDstRegs > 1) {
214		R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
215		return -EINVAL;
216	}
217	if (i->Instruction.Predicate) {
218		R600_ERR("predicate unsupported\n");
219		return -EINVAL;
220	}
221#if 0
222	if (i->Instruction.Label) {
223		R600_ERR("label unsupported\n");
224		return -EINVAL;
225	}
226#endif
227	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
228		if (i->Src[j].Register.Dimension ||
229			i->Src[j].Register.Absolute) {
230			R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
231				 i->Src[j].Register.Dimension,
232				 i->Src[j].Register.Absolute);
233			return -EINVAL;
234		}
235	}
236	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
237		if (i->Dst[j].Register.Dimension) {
238			R600_ERR("unsupported dst (dimension)\n");
239			return -EINVAL;
240		}
241	}
242	return 0;
243}
244
245static int tgsi_declaration(struct r600_shader_ctx *ctx)
246{
247	struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
248	struct r600_bc_vtx vtx;
249	unsigned i;
250	int r;
251
252	switch (d->Declaration.File) {
253	case TGSI_FILE_INPUT:
254		i = ctx->shader->ninput++;
255		ctx->shader->input[i].name = d->Semantic.Name;
256		ctx->shader->input[i].sid = d->Semantic.Index;
257		ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
258		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
259		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
260			/* turn input into fetch */
261			memset(&vtx, 0, sizeof(struct r600_bc_vtx));
262			vtx.inst = 0;
263			vtx.fetch_type = 0;
264			vtx.buffer_id = i;
265			/* register containing the index into the buffer */
266			vtx.src_gpr = 0;
267			vtx.src_sel_x = 0;
268			vtx.mega_fetch_count = 0x1F;
269			vtx.dst_gpr = ctx->shader->input[i].gpr;
270			vtx.dst_sel_x = 0;
271			vtx.dst_sel_y = 1;
272			vtx.dst_sel_z = 2;
273			vtx.dst_sel_w = 3;
274			r = r600_bc_add_vtx(ctx->bc, &vtx);
275			if (r)
276				return r;
277		}
278		break;
279	case TGSI_FILE_OUTPUT:
280		i = ctx->shader->noutput++;
281		ctx->shader->output[i].name = d->Semantic.Name;
282		ctx->shader->output[i].sid = d->Semantic.Index;
283		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
284		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
285		break;
286	case TGSI_FILE_CONSTANT:
287	case TGSI_FILE_TEMPORARY:
288	case TGSI_FILE_SAMPLER:
289	case TGSI_FILE_ADDRESS:
290		break;
291	default:
292		R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
293		return -EINVAL;
294	}
295	return 0;
296}
297
298static int r600_get_temp(struct r600_shader_ctx *ctx)
299{
300	return ctx->temp_reg + ctx->max_driver_temp_used++;
301}
302
303int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
304{
305	struct tgsi_full_immediate *immediate;
306	struct r600_shader_ctx ctx;
307	struct r600_bc_output output[32];
308	unsigned output_done, noutput;
309	unsigned opcode;
310	int i, r = 0, pos0;
311
312	ctx.bc = &shader->bc;
313	ctx.shader = shader;
314	r = r600_bc_init(ctx.bc, shader->family);
315	if (r)
316		return r;
317	ctx.bc->use_mem_constant = shader->use_mem_constant;
318	ctx.tokens = tokens;
319	tgsi_scan_shader(tokens, &ctx.info);
320	tgsi_parse_init(&ctx.parse, tokens);
321	ctx.type = ctx.parse.FullHeader.Processor.Processor;
322	shader->processor_type = ctx.type;
323
324	/* register allocations */
325	/* Values [0,127] correspond to GPR[0..127].
326	 * Values [128,159] correspond to constant buffer bank 0
327	 * Values [160,191] correspond to constant buffer bank 1
328	 * Values [256,511] correspond to cfile constants c[0..255].
329	 * Other special values are shown in the list below.
330	 * 244  ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
331	 * 245  ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
332	 * 246  ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
333	 * 247  ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
334	 * 248	SQ_ALU_SRC_0: special constant 0.0.
335	 * 249	SQ_ALU_SRC_1: special constant 1.0 float.
336	 * 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
337	 * 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
338	 * 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
339	 * 253	SQ_ALU_SRC_LITERAL: literal constant.
340	 * 254	SQ_ALU_SRC_PV: previous vector result.
341	 * 255	SQ_ALU_SRC_PS: previous scalar result.
342	 */
343	for (i = 0; i < TGSI_FILE_COUNT; i++) {
344		ctx.file_offset[i] = 0;
345	}
346	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
347		ctx.file_offset[TGSI_FILE_INPUT] = 1;
348	}
349	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
350						ctx.info.file_count[TGSI_FILE_INPUT];
351	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
352						ctx.info.file_count[TGSI_FILE_OUTPUT];
353	if (ctx.shader->use_mem_constant)
354		ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
355	else
356		ctx.file_offset[TGSI_FILE_CONSTANT] = 256;
357
358	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
359	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
360			ctx.info.file_count[TGSI_FILE_TEMPORARY];
361
362	ctx.nliterals = 0;
363	ctx.literals = NULL;
364
365	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
366		tgsi_parse_token(&ctx.parse);
367		switch (ctx.parse.FullToken.Token.Type) {
368		case TGSI_TOKEN_TYPE_IMMEDIATE:
369			immediate = &ctx.parse.FullToken.FullImmediate;
370			ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
371			if(ctx.literals == NULL) {
372				r = -ENOMEM;
373				goto out_err;
374			}
375			ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
376			ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
377			ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
378			ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
379			ctx.nliterals++;
380			break;
381		case TGSI_TOKEN_TYPE_DECLARATION:
382			r = tgsi_declaration(&ctx);
383			if (r)
384				goto out_err;
385			break;
386		case TGSI_TOKEN_TYPE_INSTRUCTION:
387			r = tgsi_is_supported(&ctx);
388			if (r)
389				goto out_err;
390			ctx.max_driver_temp_used = 0;
391			/* reserve first tmp for everyone */
392			r600_get_temp(&ctx);
393			opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
394			ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
395			r = ctx.inst_info->process(&ctx);
396			if (r)
397				goto out_err;
398			r = r600_bc_add_literal(ctx.bc, ctx.value);
399			if (r)
400				goto out_err;
401			break;
402		default:
403			R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
404			r = -EINVAL;
405			goto out_err;
406		}
407	}
408	/* export output */
409	noutput = shader->noutput;
410	for (i = 0, pos0 = 0; i < noutput; i++) {
411		memset(&output[i], 0, sizeof(struct r600_bc_output));
412		output[i].gpr = shader->output[i].gpr;
413		output[i].elem_size = 3;
414		output[i].swizzle_x = 0;
415		output[i].swizzle_y = 1;
416		output[i].swizzle_z = 2;
417		output[i].swizzle_w = 3;
418		output[i].barrier = 1;
419		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
420		output[i].array_base = i - pos0;
421		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
422		switch (ctx.type) {
423		case TGSI_PROCESSOR_VERTEX:
424			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
425				output[i].array_base = 60;
426				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
427				/* position doesn't count in array_base */
428				pos0++;
429			}
430			if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
431				output[i].array_base = 61;
432				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
433				/* position doesn't count in array_base */
434				pos0++;
435			}
436			break;
437		case TGSI_PROCESSOR_FRAGMENT:
438			if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
439				output[i].array_base = shader->output[i].sid;
440				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
441			} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
442				output[i].array_base = 61;
443				output[i].swizzle_x = 2;
444				output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
445				output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
446			} else {
447				R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
448				r = -EINVAL;
449				goto out_err;
450			}
451			break;
452		default:
453			R600_ERR("unsupported processor type %d\n", ctx.type);
454			r = -EINVAL;
455			goto out_err;
456		}
457	}
458	/* add fake param output for vertex shader if no param is exported */
459	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
460		for (i = 0, pos0 = 0; i < noutput; i++) {
461			if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
462				pos0 = 1;
463				break;
464			}
465		}
466		if (!pos0) {
467			memset(&output[i], 0, sizeof(struct r600_bc_output));
468			output[i].gpr = 0;
469			output[i].elem_size = 3;
470			output[i].swizzle_x = 0;
471			output[i].swizzle_y = 1;
472			output[i].swizzle_z = 2;
473			output[i].swizzle_w = 3;
474			output[i].barrier = 1;
475			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
476			output[i].array_base = 0;
477			output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
478			noutput++;
479		}
480	}
481	/* add fake pixel export */
482	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
483		memset(&output[0], 0, sizeof(struct r600_bc_output));
484		output[0].gpr = 0;
485		output[0].elem_size = 3;
486		output[0].swizzle_x = 7;
487		output[0].swizzle_y = 7;
488		output[0].swizzle_z = 7;
489		output[0].swizzle_w = 7;
490		output[0].barrier = 1;
491		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
492		output[0].array_base = 0;
493		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
494		noutput++;
495	}
496	/* set export done on last export of each type */
497	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
498		if (i == (noutput - 1)) {
499			output[i].end_of_program = 1;
500		}
501		if (!(output_done & (1 << output[i].type))) {
502			output_done |= (1 << output[i].type);
503			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
504		}
505	}
506	/* add output to bytecode */
507	for (i = 0; i < noutput; i++) {
508		r = r600_bc_add_output(ctx.bc, &output[i]);
509		if (r)
510			goto out_err;
511	}
512	free(ctx.literals);
513	tgsi_parse_free(&ctx.parse);
514	return 0;
515out_err:
516	free(ctx.literals);
517	tgsi_parse_free(&ctx.parse);
518	return r;
519}
520
521static int tgsi_unsupported(struct r600_shader_ctx *ctx)
522{
523	R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
524	return -EINVAL;
525}
526
527static int tgsi_end(struct r600_shader_ctx *ctx)
528{
529	return 0;
530}
531
532static int tgsi_src(struct r600_shader_ctx *ctx,
533			const struct tgsi_full_src_register *tgsi_src,
534			struct r600_bc_alu_src *r600_src)
535{
536	int index;
537	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
538	r600_src->sel = tgsi_src->Register.Index;
539	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
540		r600_src->sel = 0;
541		index = tgsi_src->Register.Index;
542		ctx->value[0] = ctx->literals[index * 4 + 0];
543		ctx->value[1] = ctx->literals[index * 4 + 1];
544		ctx->value[2] = ctx->literals[index * 4 + 2];
545		ctx->value[3] = ctx->literals[index * 4 + 3];
546	}
547	if (tgsi_src->Register.Indirect)
548		r600_src->rel = V_SQ_REL_RELATIVE;
549	r600_src->neg = tgsi_src->Register.Negate;
550	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
551	return 0;
552}
553
554static int tgsi_dst(struct r600_shader_ctx *ctx,
555			const struct tgsi_full_dst_register *tgsi_dst,
556			unsigned swizzle,
557			struct r600_bc_alu_dst *r600_dst)
558{
559	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
560
561	r600_dst->sel = tgsi_dst->Register.Index;
562	r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
563	r600_dst->chan = swizzle;
564	r600_dst->write = 1;
565	if (tgsi_dst->Register.Indirect)
566		r600_dst->rel = V_SQ_REL_RELATIVE;
567	if (inst->Instruction.Saturate) {
568		r600_dst->clamp = 1;
569	}
570	return 0;
571}
572
573static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
574{
575	switch (swizzle) {
576	case 0:
577		return tgsi_src->Register.SwizzleX;
578	case 1:
579		return tgsi_src->Register.SwizzleY;
580	case 2:
581		return tgsi_src->Register.SwizzleZ;
582	case 3:
583		return tgsi_src->Register.SwizzleW;
584	default:
585		return 0;
586	}
587}
588
589static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
590{
591	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
592	struct r600_bc_alu alu;
593	int i, j, k, nconst, r;
594
595	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
596		if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
597			nconst++;
598		}
599		r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
600		if (r) {
601			return r;
602		}
603	}
604	for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
605		if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
606			int treg = r600_get_temp(ctx);
607			for (k = 0; k < 4; k++) {
608				memset(&alu, 0, sizeof(struct r600_bc_alu));
609				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
610				alu.src[0].sel = r600_src[j].sel;
611				alu.src[0].chan = k;
612				alu.dst.sel = treg;
613				alu.dst.chan = k;
614				alu.dst.write = 1;
615				if (k == 3)
616					alu.last = 1;
617				r = r600_bc_add_alu(ctx->bc, &alu);
618				if (r)
619					return r;
620			}
621			r600_src[j].sel = treg;
622			j--;
623		}
624	}
625	return 0;
626}
627
628/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
629static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
630{
631	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
632	struct r600_bc_alu alu;
633	int i, j, k, nliteral, r;
634
635	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
636		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
637			nliteral++;
638		}
639	}
640	for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
641		if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
642			int treg = r600_get_temp(ctx);
643			for (k = 0; k < 4; k++) {
644				memset(&alu, 0, sizeof(struct r600_bc_alu));
645				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
646				alu.src[0].sel = r600_src[j].sel;
647				alu.src[0].chan = k;
648				alu.dst.sel = treg;
649				alu.dst.chan = k;
650				alu.dst.write = 1;
651				if (k == 3)
652					alu.last = 1;
653				r = r600_bc_add_alu(ctx->bc, &alu);
654				if (r)
655					return r;
656			}
657			r = r600_bc_add_literal(ctx->bc, ctx->value);
658			if (r)
659				return r;
660			r600_src[j].sel = treg;
661			j++;
662		}
663	}
664	return 0;
665}
666
667static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
668{
669	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
670	struct r600_bc_alu_src r600_src[3];
671	struct r600_bc_alu alu;
672	int i, j, r;
673	int lasti = 0;
674
675	for (i = 0; i < 4; i++) {
676		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
677			lasti = i;
678		}
679	}
680
681	r = tgsi_split_constant(ctx, r600_src);
682	if (r)
683		return r;
684	for (i = 0; i < lasti + 1; i++) {
685		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
686			continue;
687
688		memset(&alu, 0, sizeof(struct r600_bc_alu));
689		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
690		if (r)
691			return r;
692
693		alu.inst = ctx->inst_info->r600_opcode;
694		if (!swap) {
695			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
696				alu.src[j] = r600_src[j];
697				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
698			}
699		} else {
700			alu.src[0] = r600_src[1];
701			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
702
703			alu.src[1] = r600_src[0];
704			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
705		}
706		/* handle some special cases */
707		switch (ctx->inst_info->tgsi_opcode) {
708		case TGSI_OPCODE_SUB:
709			alu.src[1].neg = 1;
710			break;
711		case TGSI_OPCODE_ABS:
712			alu.src[0].abs = 1;
713			break;
714		default:
715			break;
716		}
717		if (i == lasti) {
718			alu.last = 1;
719		}
720		r = r600_bc_add_alu(ctx->bc, &alu);
721		if (r)
722			return r;
723	}
724	return 0;
725}
726
727static int tgsi_op2(struct r600_shader_ctx *ctx)
728{
729	return tgsi_op2_s(ctx, 0);
730}
731
732static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
733{
734	return tgsi_op2_s(ctx, 1);
735}
736
737/*
738 * r600 - trunc to -PI..PI range
739 * r700 - normalize by dividing by 2PI
740 * see fdo bug 27901
741 */
742static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
743			   struct r600_bc_alu_src r600_src[3])
744{
745	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
746	int r;
747	uint32_t lit_vals[4];
748	struct r600_bc_alu alu;
749
750	memset(lit_vals, 0, 4*4);
751	r = tgsi_split_constant(ctx, r600_src);
752	if (r)
753		return r;
754
755	r = tgsi_split_literal_constant(ctx, r600_src);
756	if (r)
757		return r;
758
759	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
760	lit_vals[1] = fui(0.5f);
761
762	memset(&alu, 0, sizeof(struct r600_bc_alu));
763	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
764	alu.is_op3 = 1;
765
766	alu.dst.chan = 0;
767	alu.dst.sel = ctx->temp_reg;
768	alu.dst.write = 1;
769
770	alu.src[0] = r600_src[0];
771	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
772
773	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
774	alu.src[1].chan = 0;
775	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
776	alu.src[2].chan = 1;
777	alu.last = 1;
778	r = r600_bc_add_alu(ctx->bc, &alu);
779	if (r)
780		return r;
781	r = r600_bc_add_literal(ctx->bc, lit_vals);
782	if (r)
783		return r;
784
785	memset(&alu, 0, sizeof(struct r600_bc_alu));
786	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
787
788	alu.dst.chan = 0;
789	alu.dst.sel = ctx->temp_reg;
790	alu.dst.write = 1;
791
792	alu.src[0].sel = ctx->temp_reg;
793	alu.src[0].chan = 0;
794	alu.last = 1;
795	r = r600_bc_add_alu(ctx->bc, &alu);
796	if (r)
797		return r;
798
799	if (ctx->bc->chiprev == 0) {
800		lit_vals[0] = fui(3.1415926535897f * 2.0f);
801		lit_vals[1] = fui(-3.1415926535897f);
802	} else {
803		lit_vals[0] = fui(1.0f);
804		lit_vals[1] = fui(-0.5f);
805	}
806
807	memset(&alu, 0, sizeof(struct r600_bc_alu));
808	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
809	alu.is_op3 = 1;
810
811	alu.dst.chan = 0;
812	alu.dst.sel = ctx->temp_reg;
813	alu.dst.write = 1;
814
815	alu.src[0].sel = ctx->temp_reg;
816	alu.src[0].chan = 0;
817
818	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
819	alu.src[1].chan = 0;
820	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
821	alu.src[2].chan = 1;
822	alu.last = 1;
823	r = r600_bc_add_alu(ctx->bc, &alu);
824	if (r)
825		return r;
826	r = r600_bc_add_literal(ctx->bc, lit_vals);
827	if (r)
828		return r;
829	return 0;
830}
831
832static int tgsi_trig(struct r600_shader_ctx *ctx)
833{
834	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
835	struct r600_bc_alu_src r600_src[3];
836	struct r600_bc_alu alu;
837	int i, r;
838	int lasti = 0;
839
840	r = tgsi_setup_trig(ctx, r600_src);
841	if (r)
842		return r;
843
844	memset(&alu, 0, sizeof(struct r600_bc_alu));
845	alu.inst = ctx->inst_info->r600_opcode;
846	alu.dst.chan = 0;
847	alu.dst.sel = ctx->temp_reg;
848	alu.dst.write = 1;
849
850	alu.src[0].sel = ctx->temp_reg;
851	alu.src[0].chan = 0;
852	alu.last = 1;
853	r = r600_bc_add_alu(ctx->bc, &alu);
854	if (r)
855		return r;
856
857	/* replicate result */
858	for (i = 0; i < 4; i++) {
859		if (inst->Dst[0].Register.WriteMask & (1 << i))
860			lasti = i;
861	}
862	for (i = 0; i < lasti + 1; i++) {
863		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
864			continue;
865
866		memset(&alu, 0, sizeof(struct r600_bc_alu));
867		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
868
869		alu.src[0].sel = ctx->temp_reg;
870		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
871		if (r)
872			return r;
873		if (i == lasti)
874			alu.last = 1;
875		r = r600_bc_add_alu(ctx->bc, &alu);
876		if (r)
877			return r;
878	}
879	return 0;
880}
881
882static int tgsi_scs(struct r600_shader_ctx *ctx)
883{
884	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
885	struct r600_bc_alu_src r600_src[3];
886	struct r600_bc_alu alu;
887	int r;
888
889	r = tgsi_setup_trig(ctx, r600_src);
890	if (r)
891		return r;
892
893
894	/* dst.x = COS */
895	memset(&alu, 0, sizeof(struct r600_bc_alu));
896	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
897	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
898	if (r)
899		return r;
900
901	alu.src[0].sel = ctx->temp_reg;
902	alu.src[0].chan = 0;
903	alu.last = 1;
904	r = r600_bc_add_alu(ctx->bc, &alu);
905	if (r)
906		return r;
907
908	/* dst.y = SIN */
909	memset(&alu, 0, sizeof(struct r600_bc_alu));
910	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
911	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
912	if (r)
913		return r;
914
915	alu.src[0].sel = ctx->temp_reg;
916	alu.src[0].chan = 0;
917	alu.last = 1;
918	r = r600_bc_add_alu(ctx->bc, &alu);
919	if (r)
920		return r;
921	return 0;
922}
923
924static int tgsi_kill(struct r600_shader_ctx *ctx)
925{
926	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
927	struct r600_bc_alu alu;
928	int i, r;
929
930	for (i = 0; i < 4; i++) {
931		memset(&alu, 0, sizeof(struct r600_bc_alu));
932		alu.inst = ctx->inst_info->r600_opcode;
933
934		alu.dst.chan = i;
935
936		alu.src[0].sel = V_SQ_ALU_SRC_0;
937
938		if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
939			alu.src[1].sel = V_SQ_ALU_SRC_1;
940			alu.src[1].neg = 1;
941		} else {
942			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
943			if (r)
944				return r;
945			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
946		}
947		if (i == 3) {
948			alu.last = 1;
949		}
950		r = r600_bc_add_alu(ctx->bc, &alu);
951		if (r)
952			return r;
953	}
954	r = r600_bc_add_literal(ctx->bc, ctx->value);
955	if (r)
956		return r;
957
958	/* kill must be last in ALU */
959	ctx->bc->force_add_cf = 1;
960	ctx->shader->uses_kill = TRUE;
961	return 0;
962}
963
964static int tgsi_lit(struct r600_shader_ctx *ctx)
965{
966	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
967	struct r600_bc_alu alu;
968	struct r600_bc_alu_src r600_src[3];
969	int r;
970
971	r = tgsi_split_constant(ctx, r600_src);
972	if (r)
973		return r;
974	r = tgsi_split_literal_constant(ctx, r600_src);
975	if (r)
976		return r;
977
978	/* dst.x, <- 1.0  */
979	memset(&alu, 0, sizeof(struct r600_bc_alu));
980	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
981	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
982	alu.src[0].chan = 0;
983	r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
984	if (r)
985		return r;
986	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
987	r = r600_bc_add_alu(ctx->bc, &alu);
988	if (r)
989		return r;
990
991	/* dst.y = max(src.x, 0.0) */
992	memset(&alu, 0, sizeof(struct r600_bc_alu));
993	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
994	alu.src[0] = r600_src[0];
995	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
996	alu.src[1].chan = 0;
997	r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
998	if (r)
999		return r;
1000	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
1001	r = r600_bc_add_alu(ctx->bc, &alu);
1002	if (r)
1003		return r;
1004
1005	/* dst.w, <- 1.0  */
1006	memset(&alu, 0, sizeof(struct r600_bc_alu));
1007	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1008	alu.src[0].sel  = V_SQ_ALU_SRC_1;
1009	alu.src[0].chan = 0;
1010	r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
1011	if (r)
1012		return r;
1013	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
1014	alu.last = 1;
1015	r = r600_bc_add_alu(ctx->bc, &alu);
1016	if (r)
1017		return r;
1018
1019	r = r600_bc_add_literal(ctx->bc, ctx->value);
1020	if (r)
1021		return r;
1022
1023	if (inst->Dst[0].Register.WriteMask & (1 << 2))
1024	{
1025		int chan;
1026		int sel;
1027
1028		/* dst.z = log(src.y) */
1029		memset(&alu, 0, sizeof(struct r600_bc_alu));
1030		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
1031		alu.src[0] = r600_src[0];
1032		alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1033		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1034		if (r)
1035			return r;
1036		alu.last = 1;
1037		r = r600_bc_add_alu(ctx->bc, &alu);
1038		if (r)
1039			return r;
1040
1041		r = r600_bc_add_literal(ctx->bc, ctx->value);
1042		if (r)
1043			return r;
1044
1045		chan = alu.dst.chan;
1046		sel = alu.dst.sel;
1047
1048		/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
1049		memset(&alu, 0, sizeof(struct r600_bc_alu));
1050		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
1051		alu.src[0] = r600_src[0];
1052		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1053		alu.src[1].sel  = sel;
1054		alu.src[1].chan = chan;
1055
1056		alu.src[2] = r600_src[0];
1057		alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
1058		alu.dst.sel = ctx->temp_reg;
1059		alu.dst.chan = 0;
1060		alu.dst.write = 1;
1061		alu.is_op3 = 1;
1062		alu.last = 1;
1063		r = r600_bc_add_alu(ctx->bc, &alu);
1064		if (r)
1065			return r;
1066
1067		r = r600_bc_add_literal(ctx->bc, ctx->value);
1068		if (r)
1069			return r;
1070		/* dst.z = exp(tmp.x) */
1071		memset(&alu, 0, sizeof(struct r600_bc_alu));
1072		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1073		alu.src[0].sel = ctx->temp_reg;
1074		alu.src[0].chan = 0;
1075		r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
1076		if (r)
1077			return r;
1078		alu.last = 1;
1079		r = r600_bc_add_alu(ctx->bc, &alu);
1080		if (r)
1081			return r;
1082	}
1083	return 0;
1084}
1085
1086static int tgsi_rsq(struct r600_shader_ctx *ctx)
1087{
1088	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1089	struct r600_bc_alu alu;
1090	int i, r;
1091
1092	memset(&alu, 0, sizeof(struct r600_bc_alu));
1093	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE);
1094	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1095		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1096		if (r)
1097			return r;
1098		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1099		alu.src[i].abs = 1;
1100	}
1101	alu.dst.sel = ctx->temp_reg;
1102	alu.dst.write = 1;
1103	alu.last = 1;
1104	r = r600_bc_add_alu(ctx->bc, &alu);
1105	if (r)
1106		return r;
1107	r = r600_bc_add_literal(ctx->bc, ctx->value);
1108	if (r)
1109		return r;
1110	/* replicate result */
1111	return tgsi_helper_tempx_replicate(ctx);
1112}
1113
1114static int tgsi_trans(struct r600_shader_ctx *ctx)
1115{
1116	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1117	struct r600_bc_alu alu;
1118	int i, j, r;
1119
1120	for (i = 0; i < 4; i++) {
1121		memset(&alu, 0, sizeof(struct r600_bc_alu));
1122		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
1123			alu.inst = ctx->inst_info->r600_opcode;
1124			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1125				r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
1126				if (r)
1127					return r;
1128				alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1129			}
1130			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1131			if (r)
1132				return r;
1133			alu.last = 1;
1134			r = r600_bc_add_alu(ctx->bc, &alu);
1135			if (r)
1136				return r;
1137		}
1138	}
1139	return 0;
1140}
1141
1142static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
1143{
1144	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1145	struct r600_bc_alu alu;
1146	int i, r;
1147
1148	for (i = 0; i < 4; i++) {
1149		memset(&alu, 0, sizeof(struct r600_bc_alu));
1150		alu.src[0].sel = ctx->temp_reg;
1151		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1152		alu.dst.chan = i;
1153		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1154		if (r)
1155			return r;
1156		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
1157		if (i == 3)
1158			alu.last = 1;
1159		r = r600_bc_add_alu(ctx->bc, &alu);
1160		if (r)
1161			return r;
1162	}
1163	return 0;
1164}
1165
1166static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
1167{
1168	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1169	struct r600_bc_alu alu;
1170	int i, r;
1171
1172	memset(&alu, 0, sizeof(struct r600_bc_alu));
1173	alu.inst = ctx->inst_info->r600_opcode;
1174	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1175		r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
1176		if (r)
1177			return r;
1178		alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
1179	}
1180	alu.dst.sel = ctx->temp_reg;
1181	alu.dst.write = 1;
1182	alu.last = 1;
1183	r = r600_bc_add_alu(ctx->bc, &alu);
1184	if (r)
1185		return r;
1186	r = r600_bc_add_literal(ctx->bc, ctx->value);
1187	if (r)
1188		return r;
1189	/* replicate result */
1190	return tgsi_helper_tempx_replicate(ctx);
1191}
1192
1193static int tgsi_pow(struct r600_shader_ctx *ctx)
1194{
1195	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1196	struct r600_bc_alu alu;
1197	int r;
1198
1199	/* LOG2(a) */
1200	memset(&alu, 0, sizeof(struct r600_bc_alu));
1201	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
1202	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1203	if (r)
1204		return r;
1205	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1206	alu.dst.sel = ctx->temp_reg;
1207	alu.dst.write = 1;
1208	alu.last = 1;
1209	r = r600_bc_add_alu(ctx->bc, &alu);
1210	if (r)
1211		return r;
1212	r = r600_bc_add_literal(ctx->bc,ctx->value);
1213	if (r)
1214		return r;
1215	/* b * LOG2(a) */
1216	memset(&alu, 0, sizeof(struct r600_bc_alu));
1217	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
1218	r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
1219	if (r)
1220		return r;
1221	alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
1222	alu.src[1].sel = ctx->temp_reg;
1223	alu.dst.sel = ctx->temp_reg;
1224	alu.dst.write = 1;
1225	alu.last = 1;
1226	r = r600_bc_add_alu(ctx->bc, &alu);
1227	if (r)
1228		return r;
1229	r = r600_bc_add_literal(ctx->bc,ctx->value);
1230	if (r)
1231		return r;
1232	/* POW(a,b) = EXP2(b * LOG2(a))*/
1233	memset(&alu, 0, sizeof(struct r600_bc_alu));
1234	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1235	alu.src[0].sel = ctx->temp_reg;
1236	alu.dst.sel = ctx->temp_reg;
1237	alu.dst.write = 1;
1238	alu.last = 1;
1239	r = r600_bc_add_alu(ctx->bc, &alu);
1240	if (r)
1241		return r;
1242	r = r600_bc_add_literal(ctx->bc,ctx->value);
1243	if (r)
1244		return r;
1245	return tgsi_helper_tempx_replicate(ctx);
1246}
1247
1248static int tgsi_ssg(struct r600_shader_ctx *ctx)
1249{
1250	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1251	struct r600_bc_alu alu;
1252	struct r600_bc_alu_src r600_src[3];
1253	int i, r;
1254
1255	r = tgsi_split_constant(ctx, r600_src);
1256	if (r)
1257		return r;
1258
1259	/* tmp = (src > 0 ? 1 : src) */
1260	for (i = 0; i < 4; i++) {
1261		memset(&alu, 0, sizeof(struct r600_bc_alu));
1262		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1263		alu.is_op3 = 1;
1264
1265		alu.dst.sel = ctx->temp_reg;
1266		alu.dst.chan = i;
1267
1268		alu.src[0] = r600_src[0];
1269		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1270
1271		alu.src[1].sel = V_SQ_ALU_SRC_1;
1272
1273		alu.src[2] = r600_src[0];
1274		alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
1275		if (i == 3)
1276			alu.last = 1;
1277		r = r600_bc_add_alu(ctx->bc, &alu);
1278		if (r)
1279			return r;
1280	}
1281	r = r600_bc_add_literal(ctx->bc, ctx->value);
1282	if (r)
1283		return r;
1284
1285	/* dst = (-tmp > 0 ? -1 : tmp) */
1286	for (i = 0; i < 4; i++) {
1287		memset(&alu, 0, sizeof(struct r600_bc_alu));
1288		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
1289		alu.is_op3 = 1;
1290		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1291		if (r)
1292			return r;
1293
1294		alu.src[0].sel = ctx->temp_reg;
1295		alu.src[0].chan = i;
1296		alu.src[0].neg = 1;
1297
1298		alu.src[1].sel = V_SQ_ALU_SRC_1;
1299		alu.src[1].neg = 1;
1300
1301		alu.src[2].sel = ctx->temp_reg;
1302		alu.src[2].chan = i;
1303
1304		if (i == 3)
1305			alu.last = 1;
1306		r = r600_bc_add_alu(ctx->bc, &alu);
1307		if (r)
1308			return r;
1309	}
1310	return 0;
1311}
1312
1313static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
1314{
1315	struct r600_bc_alu alu;
1316	int i, r;
1317
1318	r = r600_bc_add_literal(ctx->bc, ctx->value);
1319	if (r)
1320		return r;
1321	for (i = 0; i < 4; i++) {
1322		memset(&alu, 0, sizeof(struct r600_bc_alu));
1323		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
1324			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
1325			alu.dst.chan = i;
1326		} else {
1327			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1328			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1329			if (r)
1330				return r;
1331			alu.src[0].sel = ctx->temp_reg;
1332			alu.src[0].chan = i;
1333		}
1334		if (i == 3) {
1335			alu.last = 1;
1336		}
1337		r = r600_bc_add_alu(ctx->bc, &alu);
1338		if (r)
1339			return r;
1340	}
1341	return 0;
1342}
1343
1344static int tgsi_op3(struct r600_shader_ctx *ctx)
1345{
1346	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1347	struct r600_bc_alu_src r600_src[3];
1348	struct r600_bc_alu alu;
1349	int i, j, r;
1350
1351	r = tgsi_split_constant(ctx, r600_src);
1352	if (r)
1353		return r;
1354	/* do it in 2 step as op3 doesn't support writemask */
1355	for (i = 0; i < 4; i++) {
1356		memset(&alu, 0, sizeof(struct r600_bc_alu));
1357		alu.inst = ctx->inst_info->r600_opcode;
1358		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1359			alu.src[j] = r600_src[j];
1360			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1361		}
1362		alu.dst.sel = ctx->temp_reg;
1363		alu.dst.chan = i;
1364		alu.dst.write = 1;
1365		alu.is_op3 = 1;
1366		if (i == 3) {
1367			alu.last = 1;
1368		}
1369		r = r600_bc_add_alu(ctx->bc, &alu);
1370		if (r)
1371			return r;
1372	}
1373	return tgsi_helper_copy(ctx, inst);
1374}
1375
1376static int tgsi_dp(struct r600_shader_ctx *ctx)
1377{
1378	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1379	struct r600_bc_alu_src r600_src[3];
1380	struct r600_bc_alu alu;
1381	int i, j, r;
1382
1383	r = tgsi_split_constant(ctx, r600_src);
1384	if (r)
1385		return r;
1386	for (i = 0; i < 4; i++) {
1387		memset(&alu, 0, sizeof(struct r600_bc_alu));
1388		alu.inst = ctx->inst_info->r600_opcode;
1389		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
1390			alu.src[j] = r600_src[j];
1391			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
1392		}
1393		alu.dst.sel = ctx->temp_reg;
1394		alu.dst.chan = i;
1395		alu.dst.write = 1;
1396		/* handle some special cases */
1397		switch (ctx->inst_info->tgsi_opcode) {
1398		case TGSI_OPCODE_DP2:
1399			if (i > 1) {
1400				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1401				alu.src[0].chan = alu.src[1].chan = 0;
1402			}
1403			break;
1404		case TGSI_OPCODE_DP3:
1405			if (i > 2) {
1406				alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
1407				alu.src[0].chan = alu.src[1].chan = 0;
1408			}
1409			break;
1410		case TGSI_OPCODE_DPH:
1411			if (i == 3) {
1412				alu.src[0].sel = V_SQ_ALU_SRC_1;
1413				alu.src[0].chan = 0;
1414				alu.src[0].neg = 0;
1415			}
1416			break;
1417		default:
1418			break;
1419		}
1420		if (i == 3) {
1421			alu.last = 1;
1422		}
1423		r = r600_bc_add_alu(ctx->bc, &alu);
1424		if (r)
1425			return r;
1426	}
1427	return tgsi_helper_copy(ctx, inst);
1428}
1429
1430static int tgsi_tex(struct r600_shader_ctx *ctx)
1431{
1432	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1433	struct r600_bc_tex tex;
1434	struct r600_bc_alu alu;
1435	unsigned src_gpr;
1436	int r, i;
1437	int opcode;
1438	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
1439	uint32_t lit_vals[4];
1440
1441	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
1442
1443	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1444		/* Add perspective divide */
1445		memset(&alu, 0, sizeof(struct r600_bc_alu));
1446		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1447		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1448		if (r)
1449			return r;
1450
1451		alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
1452		alu.dst.sel = ctx->temp_reg;
1453		alu.dst.chan = 3;
1454		alu.last = 1;
1455		alu.dst.write = 1;
1456		r = r600_bc_add_alu(ctx->bc, &alu);
1457		if (r)
1458			return r;
1459
1460		for (i = 0; i < 3; i++) {
1461			memset(&alu, 0, sizeof(struct r600_bc_alu));
1462			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1463			alu.src[0].sel = ctx->temp_reg;
1464			alu.src[0].chan = 3;
1465			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1466			if (r)
1467				return r;
1468			alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1469			alu.dst.sel = ctx->temp_reg;
1470			alu.dst.chan = i;
1471			alu.dst.write = 1;
1472			r = r600_bc_add_alu(ctx->bc, &alu);
1473			if (r)
1474				return r;
1475		}
1476		memset(&alu, 0, sizeof(struct r600_bc_alu));
1477		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1478		alu.src[0].sel = V_SQ_ALU_SRC_1;
1479		alu.src[0].chan = 0;
1480		alu.dst.sel = ctx->temp_reg;
1481		alu.dst.chan = 3;
1482		alu.last = 1;
1483		alu.dst.write = 1;
1484		r = r600_bc_add_alu(ctx->bc, &alu);
1485		if (r)
1486			return r;
1487		src_not_temp = false;
1488		src_gpr = ctx->temp_reg;
1489	}
1490
1491	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1492		int src_chan, src2_chan;
1493
1494		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
1495		for (i = 0; i < 4; i++) {
1496			memset(&alu, 0, sizeof(struct r600_bc_alu));
1497			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
1498			switch (i) {
1499			case 0:
1500				src_chan = 2;
1501				src2_chan = 1;
1502				break;
1503			case 1:
1504				src_chan = 2;
1505				src2_chan = 0;
1506				break;
1507			case 2:
1508				src_chan = 0;
1509				src2_chan = 2;
1510				break;
1511			case 3:
1512				src_chan = 1;
1513				src2_chan = 2;
1514				break;
1515			}
1516			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1517			if (r)
1518				return r;
1519			alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
1520			r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
1521			if (r)
1522				return r;
1523			alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
1524			alu.dst.sel = ctx->temp_reg;
1525			alu.dst.chan = i;
1526			if (i == 3)
1527				alu.last = 1;
1528			alu.dst.write = 1;
1529			r = r600_bc_add_alu(ctx->bc, &alu);
1530			if (r)
1531				return r;
1532		}
1533
1534		/* tmp1.z = RCP_e(|tmp1.z|) */
1535		memset(&alu, 0, sizeof(struct r600_bc_alu));
1536		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
1537		alu.src[0].sel = ctx->temp_reg;
1538		alu.src[0].chan = 2;
1539		alu.src[0].abs = 1;
1540		alu.dst.sel = ctx->temp_reg;
1541		alu.dst.chan = 2;
1542		alu.dst.write = 1;
1543		alu.last = 1;
1544		r = r600_bc_add_alu(ctx->bc, &alu);
1545		if (r)
1546			return r;
1547
1548		/* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
1549		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
1550		 * muladd has no writemask, have to use another temp
1551		 */
1552		memset(&alu, 0, sizeof(struct r600_bc_alu));
1553		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1554		alu.is_op3 = 1;
1555
1556		alu.src[0].sel = ctx->temp_reg;
1557		alu.src[0].chan = 0;
1558		alu.src[1].sel = ctx->temp_reg;
1559		alu.src[1].chan = 2;
1560
1561		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1562		alu.src[2].chan = 0;
1563
1564		alu.dst.sel = ctx->temp_reg;
1565		alu.dst.chan = 0;
1566		alu.dst.write = 1;
1567
1568		r = r600_bc_add_alu(ctx->bc, &alu);
1569		if (r)
1570			return r;
1571
1572		memset(&alu, 0, sizeof(struct r600_bc_alu));
1573		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1574		alu.is_op3 = 1;
1575
1576		alu.src[0].sel = ctx->temp_reg;
1577		alu.src[0].chan = 1;
1578		alu.src[1].sel = ctx->temp_reg;
1579		alu.src[1].chan = 2;
1580
1581		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
1582		alu.src[2].chan = 0;
1583
1584		alu.dst.sel = ctx->temp_reg;
1585		alu.dst.chan = 1;
1586		alu.dst.write = 1;
1587
1588		alu.last = 1;
1589		r = r600_bc_add_alu(ctx->bc, &alu);
1590		if (r)
1591			return r;
1592
1593		lit_vals[0] = fui(1.5f);
1594
1595		r = r600_bc_add_literal(ctx->bc, lit_vals);
1596		if (r)
1597			return r;
1598		src_not_temp = false;
1599		src_gpr = ctx->temp_reg;
1600	}
1601
1602	if (src_not_temp) {
1603		for (i = 0; i < 4; i++) {
1604			memset(&alu, 0, sizeof(struct r600_bc_alu));
1605			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
1606			alu.src[0].sel = src_gpr;
1607			alu.src[0].chan = i;
1608			alu.dst.sel = ctx->temp_reg;
1609			alu.dst.chan = i;
1610			if (i == 3)
1611				alu.last = 1;
1612			alu.dst.write = 1;
1613			r = r600_bc_add_alu(ctx->bc, &alu);
1614			if (r)
1615				return r;
1616		}
1617		src_gpr = ctx->temp_reg;
1618	}
1619
1620	opcode = ctx->inst_info->r600_opcode;
1621	if (opcode == SQ_TEX_INST_SAMPLE &&
1622	    (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
1623		opcode = SQ_TEX_INST_SAMPLE_C;
1624
1625	memset(&tex, 0, sizeof(struct r600_bc_tex));
1626	tex.inst = opcode;
1627	tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
1628	tex.sampler_id = tex.resource_id;
1629	tex.src_gpr = src_gpr;
1630	tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
1631	tex.dst_sel_x = 0;
1632	tex.dst_sel_y = 1;
1633	tex.dst_sel_z = 2;
1634	tex.dst_sel_w = 3;
1635	tex.src_sel_x = 0;
1636	tex.src_sel_y = 1;
1637	tex.src_sel_z = 2;
1638	tex.src_sel_w = 3;
1639
1640	if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
1641		tex.src_sel_x = 1;
1642		tex.src_sel_y = 0;
1643		tex.src_sel_z = 3;
1644		tex.src_sel_w = 1;
1645	}
1646
1647	if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
1648		tex.coord_type_x = 1;
1649		tex.coord_type_y = 1;
1650		tex.coord_type_z = 1;
1651		tex.coord_type_w = 1;
1652	}
1653
1654	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
1655		tex.src_sel_w = 2;
1656
1657	r = r600_bc_add_tex(ctx->bc, &tex);
1658	if (r)
1659		return r;
1660
1661	/* add shadow ambient support  - gallium doesn't do it yet */
1662	return 0;
1663
1664}
1665
1666static int tgsi_lrp(struct r600_shader_ctx *ctx)
1667{
1668	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1669	struct r600_bc_alu_src r600_src[3];
1670	struct r600_bc_alu alu;
1671	unsigned i;
1672	int r;
1673
1674	r = tgsi_split_constant(ctx, r600_src);
1675	if (r)
1676		return r;
1677	/* 1 - src0 */
1678	for (i = 0; i < 4; i++) {
1679		memset(&alu, 0, sizeof(struct r600_bc_alu));
1680		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
1681		alu.src[0].sel = V_SQ_ALU_SRC_1;
1682		alu.src[0].chan = 0;
1683		alu.src[1] = r600_src[0];
1684		alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
1685		alu.src[1].neg = 1;
1686		alu.dst.sel = ctx->temp_reg;
1687		alu.dst.chan = i;
1688		if (i == 3) {
1689			alu.last = 1;
1690		}
1691		alu.dst.write = 1;
1692		r = r600_bc_add_alu(ctx->bc, &alu);
1693		if (r)
1694			return r;
1695	}
1696	r = r600_bc_add_literal(ctx->bc, ctx->value);
1697	if (r)
1698		return r;
1699
1700	/* (1 - src0) * src2 */
1701	for (i = 0; i < 4; i++) {
1702		memset(&alu, 0, sizeof(struct r600_bc_alu));
1703		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1704		alu.src[0].sel = ctx->temp_reg;
1705		alu.src[0].chan = i;
1706		alu.src[1] = r600_src[2];
1707		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1708		alu.dst.sel = ctx->temp_reg;
1709		alu.dst.chan = i;
1710		if (i == 3) {
1711			alu.last = 1;
1712		}
1713		alu.dst.write = 1;
1714		r = r600_bc_add_alu(ctx->bc, &alu);
1715		if (r)
1716			return r;
1717	}
1718	r = r600_bc_add_literal(ctx->bc, ctx->value);
1719	if (r)
1720		return r;
1721
1722	/* src0 * src1 + (1 - src0) * src2 */
1723	for (i = 0; i < 4; i++) {
1724		memset(&alu, 0, sizeof(struct r600_bc_alu));
1725		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1726		alu.is_op3 = 1;
1727		alu.src[0] = r600_src[0];
1728		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1729		alu.src[1] = r600_src[1];
1730		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
1731		alu.src[2].sel = ctx->temp_reg;
1732		alu.src[2].chan = i;
1733		alu.dst.sel = ctx->temp_reg;
1734		alu.dst.chan = i;
1735		if (i == 3) {
1736			alu.last = 1;
1737		}
1738		r = r600_bc_add_alu(ctx->bc, &alu);
1739		if (r)
1740			return r;
1741	}
1742	return tgsi_helper_copy(ctx, inst);
1743}
1744
1745static int tgsi_cmp(struct r600_shader_ctx *ctx)
1746{
1747	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1748	struct r600_bc_alu_src r600_src[3];
1749	struct r600_bc_alu alu;
1750	int use_temp = 0;
1751	int i, r;
1752
1753	r = tgsi_split_constant(ctx, r600_src);
1754	if (r)
1755		return r;
1756
1757	if (inst->Dst[0].Register.WriteMask != 0xf)
1758		use_temp = 1;
1759
1760	for (i = 0; i < 4; i++) {
1761		memset(&alu, 0, sizeof(struct r600_bc_alu));
1762		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
1763		alu.src[0] = r600_src[0];
1764		alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
1765
1766		alu.src[1] = r600_src[2];
1767		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
1768
1769		alu.src[2] = r600_src[1];
1770		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
1771
1772		if (use_temp)
1773			alu.dst.sel = ctx->temp_reg;
1774		else {
1775			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1776			if (r)
1777				return r;
1778		}
1779		alu.dst.chan = i;
1780		alu.dst.write = 1;
1781		alu.is_op3 = 1;
1782		if (i == 3)
1783			alu.last = 1;
1784		r = r600_bc_add_alu(ctx->bc, &alu);
1785		if (r)
1786			return r;
1787	}
1788	if (use_temp)
1789		return tgsi_helper_copy(ctx, inst);
1790	return 0;
1791}
1792
1793static int tgsi_xpd(struct r600_shader_ctx *ctx)
1794{
1795	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1796	struct r600_bc_alu_src r600_src[3];
1797	struct r600_bc_alu alu;
1798	uint32_t use_temp = 0;
1799	int i, r;
1800
1801	if (inst->Dst[0].Register.WriteMask != 0xf)
1802		use_temp = 1;
1803
1804	r = tgsi_split_constant(ctx, r600_src);
1805	if (r)
1806		return r;
1807
1808	for (i = 0; i < 4; i++) {
1809		memset(&alu, 0, sizeof(struct r600_bc_alu));
1810		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
1811
1812		alu.src[0] = r600_src[0];
1813		switch (i) {
1814		case 0:
1815			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1816			break;
1817		case 1:
1818			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1819			break;
1820		case 2:
1821			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1822			break;
1823		case 3:
1824			alu.src[0].sel = V_SQ_ALU_SRC_0;
1825			alu.src[0].chan = i;
1826		}
1827
1828		alu.src[1] = r600_src[1];
1829		switch (i) {
1830		case 0:
1831			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1832			break;
1833		case 1:
1834			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1835			break;
1836		case 2:
1837			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1838			break;
1839		case 3:
1840			alu.src[1].sel = V_SQ_ALU_SRC_0;
1841			alu.src[1].chan = i;
1842		}
1843
1844		alu.dst.sel = ctx->temp_reg;
1845		alu.dst.chan = i;
1846		alu.dst.write = 1;
1847
1848		if (i == 3)
1849			alu.last = 1;
1850		r = r600_bc_add_alu(ctx->bc, &alu);
1851		if (r)
1852			return r;
1853	}
1854
1855	for (i = 0; i < 4; i++) {
1856		memset(&alu, 0, sizeof(struct r600_bc_alu));
1857		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
1858
1859		alu.src[0] = r600_src[0];
1860		switch (i) {
1861		case 0:
1862			alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
1863			break;
1864		case 1:
1865			alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
1866			break;
1867		case 2:
1868			alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1869			break;
1870		case 3:
1871			alu.src[0].sel = V_SQ_ALU_SRC_0;
1872			alu.src[0].chan = i;
1873		}
1874
1875		alu.src[1] = r600_src[1];
1876		switch (i) {
1877		case 0:
1878			alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
1879			break;
1880		case 1:
1881			alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
1882			break;
1883		case 2:
1884			alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
1885			break;
1886		case 3:
1887			alu.src[1].sel = V_SQ_ALU_SRC_0;
1888			alu.src[1].chan = i;
1889		}
1890
1891		alu.src[2].sel = ctx->temp_reg;
1892		alu.src[2].neg = 1;
1893		alu.src[2].chan = i;
1894
1895		if (use_temp)
1896			alu.dst.sel = ctx->temp_reg;
1897		else {
1898			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1899			if (r)
1900				return r;
1901		}
1902		alu.dst.chan = i;
1903		alu.dst.write = 1;
1904		alu.is_op3 = 1;
1905		if (i == 3)
1906			alu.last = 1;
1907		r = r600_bc_add_alu(ctx->bc, &alu);
1908		if (r)
1909			return r;
1910	}
1911	if (use_temp)
1912		return tgsi_helper_copy(ctx, inst);
1913	return 0;
1914}
1915
1916static int tgsi_exp(struct r600_shader_ctx *ctx)
1917{
1918	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
1919	struct r600_bc_alu_src r600_src[3];
1920	struct r600_bc_alu alu;
1921	int r;
1922
1923	/* result.x = 2^floor(src); */
1924	if (inst->Dst[0].Register.WriteMask & 1) {
1925		memset(&alu, 0, sizeof(struct r600_bc_alu));
1926
1927		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
1928		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1929		if (r)
1930			return r;
1931
1932		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1933
1934		alu.dst.sel = ctx->temp_reg;
1935		alu.dst.chan = 0;
1936		alu.dst.write = 1;
1937		alu.last = 1;
1938		r = r600_bc_add_alu(ctx->bc, &alu);
1939		if (r)
1940			return r;
1941
1942		r = r600_bc_add_literal(ctx->bc, ctx->value);
1943		if (r)
1944			return r;
1945
1946		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1947		alu.src[0].sel = ctx->temp_reg;
1948		alu.src[0].chan = 0;
1949
1950		alu.dst.sel = ctx->temp_reg;
1951		alu.dst.chan = 0;
1952		alu.dst.write = 1;
1953		alu.last = 1;
1954		r = r600_bc_add_alu(ctx->bc, &alu);
1955		if (r)
1956			return r;
1957
1958		r = r600_bc_add_literal(ctx->bc, ctx->value);
1959		if (r)
1960			return r;
1961	}
1962
1963	/* result.y = tmp - floor(tmp); */
1964	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
1965		memset(&alu, 0, sizeof(struct r600_bc_alu));
1966
1967		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
1968		alu.src[0] = r600_src[0];
1969		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1970		if (r)
1971			return r;
1972		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1973
1974		alu.dst.sel = ctx->temp_reg;
1975//		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
1976//		if (r)
1977//			return r;
1978		alu.dst.write = 1;
1979		alu.dst.chan = 1;
1980
1981		alu.last = 1;
1982
1983		r = r600_bc_add_alu(ctx->bc, &alu);
1984		if (r)
1985			return r;
1986		r = r600_bc_add_literal(ctx->bc, ctx->value);
1987		if (r)
1988			return r;
1989	}
1990
1991	/* result.z = RoughApprox2ToX(tmp);*/
1992	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
1993		memset(&alu, 0, sizeof(struct r600_bc_alu));
1994		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
1995		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
1996		if (r)
1997			return r;
1998		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
1999
2000		alu.dst.sel = ctx->temp_reg;
2001		alu.dst.write = 1;
2002		alu.dst.chan = 2;
2003
2004		alu.last = 1;
2005
2006		r = r600_bc_add_alu(ctx->bc, &alu);
2007		if (r)
2008			return r;
2009		r = r600_bc_add_literal(ctx->bc, ctx->value);
2010		if (r)
2011			return r;
2012	}
2013
2014	/* result.w = 1.0;*/
2015	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
2016		memset(&alu, 0, sizeof(struct r600_bc_alu));
2017
2018		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2019		alu.src[0].sel = V_SQ_ALU_SRC_1;
2020		alu.src[0].chan = 0;
2021
2022		alu.dst.sel = ctx->temp_reg;
2023		alu.dst.chan = 3;
2024		alu.dst.write = 1;
2025		alu.last = 1;
2026		r = r600_bc_add_alu(ctx->bc, &alu);
2027		if (r)
2028			return r;
2029		r = r600_bc_add_literal(ctx->bc, ctx->value);
2030		if (r)
2031			return r;
2032	}
2033	return tgsi_helper_copy(ctx, inst);
2034}
2035
2036static int tgsi_log(struct r600_shader_ctx *ctx)
2037{
2038	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2039	struct r600_bc_alu_src r600_src[3];
2040	struct r600_bc_alu alu;
2041	int r;
2042
2043	/* result.x = floor(log2(src)); */
2044	if (inst->Dst[0].Register.WriteMask & 1) {
2045		memset(&alu, 0, sizeof(struct r600_bc_alu));
2046
2047		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2048		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2049		if (r)
2050			return r;
2051
2052		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2053
2054		alu.dst.sel = ctx->temp_reg;
2055		alu.dst.chan = 0;
2056		alu.dst.write = 1;
2057		alu.last = 1;
2058		r = r600_bc_add_alu(ctx->bc, &alu);
2059		if (r)
2060			return r;
2061
2062		r = r600_bc_add_literal(ctx->bc, ctx->value);
2063		if (r)
2064			return r;
2065
2066		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
2067		alu.src[0].sel = ctx->temp_reg;
2068		alu.src[0].chan = 0;
2069
2070		alu.dst.sel = ctx->temp_reg;
2071		alu.dst.chan = 0;
2072		alu.dst.write = 1;
2073		alu.last = 1;
2074
2075		r = r600_bc_add_alu(ctx->bc, &alu);
2076		if (r)
2077			return r;
2078
2079		r = r600_bc_add_literal(ctx->bc, ctx->value);
2080		if (r)
2081			return r;
2082	}
2083
2084	/* result.y = FIXME; */
2085	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
2086		memset(&alu, 0, sizeof(struct r600_bc_alu));
2087
2088		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2089		alu.src[0].sel = V_SQ_ALU_SRC_1;
2090		alu.src[0].chan = 0;
2091
2092		alu.dst.sel = ctx->temp_reg;
2093		alu.dst.chan = 1;
2094		alu.dst.write = 1;
2095		alu.last = 1;
2096
2097		r = r600_bc_add_alu(ctx->bc, &alu);
2098		if (r)
2099			return r;
2100
2101		r = r600_bc_add_literal(ctx->bc, ctx->value);
2102		if (r)
2103			return r;
2104	}
2105
2106	/* result.z = log2(src);*/
2107	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
2108		memset(&alu, 0, sizeof(struct r600_bc_alu));
2109
2110		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
2111		r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2112		if (r)
2113			return r;
2114
2115		alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2116
2117		alu.dst.sel = ctx->temp_reg;
2118		alu.dst.write = 1;
2119		alu.dst.chan = 2;
2120		alu.last = 1;
2121
2122		r = r600_bc_add_alu(ctx->bc, &alu);
2123		if (r)
2124			return r;
2125
2126		r = r600_bc_add_literal(ctx->bc, ctx->value);
2127		if (r)
2128			return r;
2129	}
2130
2131	/* result.w = 1.0; */
2132	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
2133		memset(&alu, 0, sizeof(struct r600_bc_alu));
2134
2135		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
2136		alu.src[0].sel = V_SQ_ALU_SRC_1;
2137		alu.src[0].chan = 0;
2138
2139		alu.dst.sel = ctx->temp_reg;
2140		alu.dst.chan = 3;
2141		alu.dst.write = 1;
2142		alu.last = 1;
2143
2144		r = r600_bc_add_alu(ctx->bc, &alu);
2145		if (r)
2146			return r;
2147
2148		r = r600_bc_add_literal(ctx->bc, ctx->value);
2149		if (r)
2150			return r;
2151	}
2152
2153	return tgsi_helper_copy(ctx, inst);
2154}
2155
2156static int tgsi_arl(struct r600_shader_ctx *ctx)
2157{
2158	/* TODO from r600c, ar values don't persist between clauses */
2159	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2160	struct r600_bc_alu alu;
2161	int r;
2162	memset(&alu, 0, sizeof(struct r600_bc_alu));
2163
2164	alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
2165
2166	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2167	if (r)
2168		return r;
2169	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2170
2171	alu.last = 1;
2172
2173	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
2174	if (r)
2175		return r;
2176	return 0;
2177}
2178
2179static int tgsi_opdst(struct r600_shader_ctx *ctx)
2180{
2181	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2182	struct r600_bc_alu alu;
2183	int i, r = 0;
2184
2185	for (i = 0; i < 4; i++) {
2186		memset(&alu, 0, sizeof(struct r600_bc_alu));
2187
2188		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
2189		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
2190		if (r)
2191			return r;
2192
2193	        if (i == 0 || i == 3) {
2194			alu.src[0].sel = V_SQ_ALU_SRC_1;
2195		} else {
2196			r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2197			if (r)
2198				return r;
2199			alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
2200		}
2201
2202	        if (i == 0 || i == 2) {
2203			alu.src[1].sel = V_SQ_ALU_SRC_1;
2204		} else {
2205			r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
2206			if (r)
2207				return r;
2208			alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
2209		}
2210		if (i == 3)
2211			alu.last = 1;
2212		r = r600_bc_add_alu(ctx->bc, &alu);
2213		if (r)
2214			return r;
2215	}
2216	return 0;
2217}
2218
2219static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
2220{
2221	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
2222	struct r600_bc_alu alu;
2223	int r;
2224
2225	memset(&alu, 0, sizeof(struct r600_bc_alu));
2226	alu.inst = opcode;
2227	alu.predicate = 1;
2228
2229	alu.dst.sel = ctx->temp_reg;
2230	alu.dst.write = 1;
2231	alu.dst.chan = 0;
2232
2233	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
2234	if (r)
2235		return r;
2236	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
2237	alu.src[1].sel = V_SQ_ALU_SRC_0;
2238	alu.src[1].chan = 0;
2239
2240	alu.last = 1;
2241
2242	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
2243	if (r)
2244		return r;
2245	return 0;
2246}
2247
2248static int pops(struct r600_shader_ctx *ctx, int pops)
2249{
2250	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
2251	ctx->bc->cf_last->pop_count = pops;
2252	return 0;
2253}
2254
2255static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
2256{
2257	switch(reason) {
2258	case FC_PUSH_VPM:
2259		ctx->bc->callstack[ctx->bc->call_sp].current--;
2260		break;
2261	case FC_PUSH_WQM:
2262	case FC_LOOP:
2263		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
2264		break;
2265	case FC_REP:
2266		/* TOODO : for 16 vp asic should -= 2; */
2267		ctx->bc->callstack[ctx->bc->call_sp].current --;
2268		break;
2269	}
2270}
2271
2272static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
2273{
2274	if (check_max_only) {
2275		int diff;
2276		switch (reason) {
2277		case FC_PUSH_VPM:
2278			diff = 1;
2279			break;
2280		case FC_PUSH_WQM:
2281			diff = 4;
2282			break;
2283		}
2284		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
2285		    ctx->bc->callstack[ctx->bc->call_sp].max) {
2286			ctx->bc->callstack[ctx->bc->call_sp].max =
2287				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
2288		}
2289		return;
2290	}
2291	switch (reason) {
2292	case FC_PUSH_VPM:
2293		ctx->bc->callstack[ctx->bc->call_sp].current++;
2294		break;
2295	case FC_PUSH_WQM:
2296	case FC_LOOP:
2297		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
2298		break;
2299	case FC_REP:
2300		ctx->bc->callstack[ctx->bc->call_sp].current++;
2301		break;
2302	}
2303
2304	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
2305	    ctx->bc->callstack[ctx->bc->call_sp].max) {
2306		ctx->bc->callstack[ctx->bc->call_sp].max =
2307			ctx->bc->callstack[ctx->bc->call_sp].current;
2308	}
2309}
2310
2311static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
2312{
2313	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
2314
2315	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
2316						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
2317	sp->mid[sp->num_mid] = ctx->bc->cf_last;
2318	sp->num_mid++;
2319}
2320
2321static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
2322{
2323	ctx->bc->fc_sp++;
2324	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
2325	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
2326}
2327
2328static void fc_poplevel(struct r600_shader_ctx *ctx)
2329{
2330	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
2331	if (sp->mid) {
2332		free(sp->mid);
2333		sp->mid = NULL;
2334	}
2335	sp->num_mid = 0;
2336	sp->start = NULL;
2337	sp->type = 0;
2338	ctx->bc->fc_sp--;
2339}
2340
2341#if 0
2342static int emit_return(struct r600_shader_ctx *ctx)
2343{
2344	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
2345	return 0;
2346}
2347
2348static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
2349{
2350
2351	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
2352	ctx->bc->cf_last->pop_count = pops;
2353	/* TODO work out offset */
2354	return 0;
2355}
2356
2357static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
2358{
2359	return 0;
2360}
2361
2362static void emit_testflag(struct r600_shader_ctx *ctx)
2363{
2364
2365}
2366
2367static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
2368{
2369	emit_testflag(ctx);
2370	emit_jump_to_offset(ctx, 1, 4);
2371	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
2372	pops(ctx, ifidx + 1);
2373	emit_return(ctx);
2374}
2375
2376static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
2377{
2378	emit_testflag(ctx);
2379
2380	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2381	ctx->bc->cf_last->pop_count = 1;
2382
2383	fc_set_mid(ctx, fc_sp);
2384
2385	pops(ctx, 1);
2386}
2387#endif
2388
2389static int tgsi_if(struct r600_shader_ctx *ctx)
2390{
2391	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
2392
2393	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
2394
2395	fc_pushlevel(ctx, FC_IF);
2396
2397	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
2398	return 0;
2399}
2400
2401static int tgsi_else(struct r600_shader_ctx *ctx)
2402{
2403	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
2404	ctx->bc->cf_last->pop_count = 1;
2405
2406	fc_set_mid(ctx, ctx->bc->fc_sp);
2407	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
2408	return 0;
2409}
2410
2411static int tgsi_endif(struct r600_shader_ctx *ctx)
2412{
2413	pops(ctx, 1);
2414	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
2415		R600_ERR("if/endif unbalanced in shader\n");
2416		return -1;
2417	}
2418
2419	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
2420		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2421		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
2422	} else {
2423		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
2424	}
2425	fc_poplevel(ctx);
2426
2427	callstack_decrease_current(ctx, FC_PUSH_VPM);
2428	return 0;
2429}
2430
2431static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
2432{
2433	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
2434
2435	fc_pushlevel(ctx, FC_LOOP);
2436
2437	/* check stack depth */
2438	callstack_check_depth(ctx, FC_LOOP, 0);
2439	return 0;
2440}
2441
2442static int tgsi_endloop(struct r600_shader_ctx *ctx)
2443{
2444	int i;
2445
2446	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
2447
2448	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
2449		R600_ERR("loop/endloop in shader code are not paired.\n");
2450		return -EINVAL;
2451	}
2452
2453	/* fixup loop pointers - from r600isa
2454	   LOOP END points to CF after LOOP START,
2455	   LOOP START point to CF after LOOP END
2456	   BRK/CONT point to LOOP END CF
2457	*/
2458	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
2459
2460	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
2461
2462	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
2463		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
2464	}
2465	/* TODO add LOOPRET support */
2466	fc_poplevel(ctx);
2467	callstack_decrease_current(ctx, FC_LOOP);
2468	return 0;
2469}
2470
2471static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
2472{
2473	unsigned int fscp;
2474
2475	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
2476	{
2477		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
2478			break;
2479	}
2480
2481	if (fscp == 0) {
2482		R600_ERR("Break not inside loop/endloop pair\n");
2483		return -EINVAL;
2484	}
2485
2486	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
2487	ctx->bc->cf_last->pop_count = 1;
2488
2489	fc_set_mid(ctx, fscp);
2490
2491	pops(ctx, 1);
2492	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
2493	return 0;
2494}
2495
2496static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
2497	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
2498	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2499	{TGSI_OPCODE_LIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
2500	{TGSI_OPCODE_RCP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
2501	{TGSI_OPCODE_RSQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq},
2502	{TGSI_OPCODE_EXP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
2503	{TGSI_OPCODE_LOG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
2504	{TGSI_OPCODE_MUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
2505	{TGSI_OPCODE_ADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2506	{TGSI_OPCODE_DP3,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2507	{TGSI_OPCODE_DP4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2508	{TGSI_OPCODE_DST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
2509	{TGSI_OPCODE_MIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
2510	{TGSI_OPCODE_MAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
2511	{TGSI_OPCODE_SLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
2512	{TGSI_OPCODE_SGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
2513	{TGSI_OPCODE_MAD,	1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
2514	{TGSI_OPCODE_SUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
2515	{TGSI_OPCODE_LRP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
2516	{TGSI_OPCODE_CND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2517	/* gap */
2518	{20,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2519	{TGSI_OPCODE_DP2A,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2520	/* gap */
2521	{22,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2522	{23,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2523	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
2524	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2525	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
2526	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2527	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
2528	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
2529	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
2530	{TGSI_OPCODE_XPD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
2531	/* gap */
2532	{32,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2533	{TGSI_OPCODE_ABS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
2534	{TGSI_OPCODE_RCC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2535	{TGSI_OPCODE_DPH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2536	{TGSI_OPCODE_COS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
2537	{TGSI_OPCODE_DDX,	0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
2538	{TGSI_OPCODE_DDY,	0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
2539	{TGSI_OPCODE_KILP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* predicated kill */
2540	{TGSI_OPCODE_PK2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2541	{TGSI_OPCODE_PK2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2542	{TGSI_OPCODE_PK4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2543	{TGSI_OPCODE_PK4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2544	{TGSI_OPCODE_RFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2545	{TGSI_OPCODE_SEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
2546	{TGSI_OPCODE_SFL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2547	{TGSI_OPCODE_SGT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
2548	{TGSI_OPCODE_SIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
2549	{TGSI_OPCODE_SLE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
2550	{TGSI_OPCODE_SNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
2551	{TGSI_OPCODE_STR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2552	{TGSI_OPCODE_TEX,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2553	{TGSI_OPCODE_TXD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2554	{TGSI_OPCODE_TXP,	0, SQ_TEX_INST_SAMPLE, tgsi_tex},
2555	{TGSI_OPCODE_UP2H,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2556	{TGSI_OPCODE_UP2US,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2557	{TGSI_OPCODE_UP4B,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2558	{TGSI_OPCODE_UP4UB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2559	{TGSI_OPCODE_X2D,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2560	{TGSI_OPCODE_ARA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2561	{TGSI_OPCODE_ARR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2562	{TGSI_OPCODE_BRA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2563	{TGSI_OPCODE_CAL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2564	{TGSI_OPCODE_RET,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2565	{TGSI_OPCODE_SSG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
2566	{TGSI_OPCODE_CMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
2567	{TGSI_OPCODE_SCS,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
2568	{TGSI_OPCODE_TXB,	0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
2569	{TGSI_OPCODE_NRM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2570	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2571	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
2572	{TGSI_OPCODE_TXL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2573	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
2574	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
2575	/* gap */
2576	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2577	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2578	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
2579	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
2580	/* gap */
2581	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2582	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2583	{TGSI_OPCODE_PUSHA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2584	{TGSI_OPCODE_POPA,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2585	{TGSI_OPCODE_CEIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2586	{TGSI_OPCODE_I2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2587	{TGSI_OPCODE_NOT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2588	{TGSI_OPCODE_TRUNC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
2589	{TGSI_OPCODE_SHL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2590	/* gap */
2591	{88,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2592	{TGSI_OPCODE_AND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2593	{TGSI_OPCODE_OR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2594	{TGSI_OPCODE_MOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2595	{TGSI_OPCODE_XOR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2596	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2597	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2598	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2599	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
2600	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2601	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2602	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
2603	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2604	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
2605	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2606	/* gap */
2607	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2608	{104,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2609	{105,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2610	{106,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2611	{TGSI_OPCODE_NOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2612	/* gap */
2613	{108,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2614	{109,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2615	{110,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2616	{111,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2617	{TGSI_OPCODE_NRM4,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2618	{TGSI_OPCODE_CALLNZ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2619	{TGSI_OPCODE_IFC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2620	{TGSI_OPCODE_BREAKC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2621	{TGSI_OPCODE_KIL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill},  /* conditional kill */
2622	{TGSI_OPCODE_END,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end},  /* aka HALT */
2623	/* gap */
2624	{118,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2625	{TGSI_OPCODE_F2I,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2626	{TGSI_OPCODE_IDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2627	{TGSI_OPCODE_IMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2628	{TGSI_OPCODE_IMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2629	{TGSI_OPCODE_INEG,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2630	{TGSI_OPCODE_ISGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2631	{TGSI_OPCODE_ISHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2632	{TGSI_OPCODE_ISLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2633	{TGSI_OPCODE_F2U,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2634	{TGSI_OPCODE_U2F,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2635	{TGSI_OPCODE_UADD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2636	{TGSI_OPCODE_UDIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2637	{TGSI_OPCODE_UMAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2638	{TGSI_OPCODE_UMAX,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2639	{TGSI_OPCODE_UMIN,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2640	{TGSI_OPCODE_UMOD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2641	{TGSI_OPCODE_UMUL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2642	{TGSI_OPCODE_USEQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2643	{TGSI_OPCODE_USGE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2644	{TGSI_OPCODE_USHR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2645	{TGSI_OPCODE_USLT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2646	{TGSI_OPCODE_USNE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2647	{TGSI_OPCODE_SWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2648	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2649	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2650	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2651	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
2652};
2653