1/*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31/**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 */
41
42#include "r500_fragprog.h"
43
44#include "r300_reg.h"
45
46#include "radeon_program_pair.h"
47
48#define PROG_CODE \
49	struct r500_fragment_program_code *code = &c->code->code.r500
50
51#define error(fmt, args...) do {			\
52		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
53			__FILE__, __FUNCTION__, ##args);	\
54	} while(0)
55
56
57struct branch_info {
58	int If;
59	int Else;
60	int Endif;
61};
62
63struct r500_loop_info {
64	int BgnLoop;
65
66	int BranchDepth;
67	int * Brks;
68	int BrkCount;
69	int BrkReserved;
70
71	int * Conts;
72	int ContCount;
73	int ContReserved;
74};
75
76struct emit_state {
77	struct radeon_compiler * C;
78	struct r500_fragment_program_code * Code;
79
80	struct branch_info * Branches;
81	unsigned int CurrentBranchDepth;
82	unsigned int BranchesReserved;
83
84	struct r500_loop_info * Loops;
85	unsigned int CurrentLoopDepth;
86	unsigned int LoopsReserved;
87
88	unsigned int MaxBranchDepth;
89
90};
91
92static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
93{
94	switch(opcode) {
95	case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
96	case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
97	case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
98	case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
99	case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
100	case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
101	case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
102	default:
103		error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
104		/* fall through */
105	case RC_OPCODE_NOP:
106		/* fall through */
107	case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
108	case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
109	case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
110	case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
111	}
112}
113
114static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
115{
116	switch(opcode) {
117	case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
118	case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
119	case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
120	case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
121	case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
122	case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
123	case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
124	case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
125	case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
126	case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
127	default:
128		error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
129		/* fall through */
130	case RC_OPCODE_NOP:
131		/* fall through */
132	case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
133	case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
134	case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
135	case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
136	case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
137	case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
138	}
139}
140
141static unsigned int fix_hw_swizzle(unsigned int swz)
142{
143    switch (swz) {
144        case RC_SWIZZLE_ZERO:
145        case RC_SWIZZLE_UNUSED:
146            swz = 4;
147            break;
148        case RC_SWIZZLE_HALF:
149            swz = 5;
150            break;
151        case RC_SWIZZLE_ONE:
152            swz = 6;
153            break;
154    }
155
156	return swz;
157}
158
159static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
160{
161	unsigned int t = inst->RGB.Arg[arg].Source;
162	int comp;
163	t |= inst->RGB.Arg[arg].Negate << 11;
164	t |= inst->RGB.Arg[arg].Abs << 12;
165
166	for(comp = 0; comp < 3; ++comp)
167		t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
168
169	return t;
170}
171
172static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
173{
174	unsigned int t = inst->Alpha.Arg[i].Source;
175	t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
176	t |= inst->Alpha.Arg[i].Negate << 5;
177	t |= inst->Alpha.Arg[i].Abs << 6;
178	return t;
179}
180
181static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
182{
183	switch(func) {
184	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
185	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
186	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
187	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
188	default:
189		rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
190		return 0;
191	}
192}
193
194static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
195{
196	if (index > code->max_temp_idx)
197		code->max_temp_idx = index;
198}
199
200static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
201{
202	/* From docs:
203	 *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
204	 * MSB = 1 << 7 */
205	if (!src.Used)
206		return 1 << 7;
207
208	if (src.File == RC_FILE_CONSTANT) {
209		return src.Index | R500_RGB_ADDR0_CONST;
210	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
211		use_temporary(code, src.Index);
212		return src.Index;
213	} else if (src.File == RC_FILE_INLINE) {
214		return src.Index | (1 << 7);
215	}
216
217	return 0;
218}
219
220/**
221 * NOP the specified instruction if it is not a texture lookup.
222 */
223static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
224{
225	PROG_CODE;
226
227	if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
228		code->inst[ip].inst0 |= R500_INST_NOP;
229	}
230}
231
232/**
233 * Emit a paired ALU instruction.
234 */
235static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
236{
237	int ip;
238	PROG_CODE;
239
240	if (code->inst_end >= c->Base.max_alu_insts-1) {
241		error("emit_alu: Too many instructions");
242		return;
243	}
244
245	ip = ++code->inst_end;
246
247	/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
248	if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
249		inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
250		if (ip > 0) {
251			alu_nop(c, ip - 1);
252		}
253	}
254
255	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
256	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
257
258	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
259		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
260		if (inst->WriteALUResult) {
261			error("Cannot write output and ALU result at the same time");
262			return;
263		}
264	} else {
265		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
266	}
267	code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
268
269	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
270	code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
271	code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
272	if (inst->Nop) {
273		code->inst[ip].inst0 |= R500_INST_NOP;
274	}
275	if (inst->Alpha.DepthWriteMask) {
276		code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
277		c->code->writes_depth = 1;
278	}
279
280	code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
281	code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
282	use_temporary(code, inst->Alpha.DestIndex);
283	use_temporary(code, inst->RGB.DestIndex);
284
285	if (inst->RGB.Saturate)
286		code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
287	if (inst->Alpha.Saturate)
288		code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
289
290	/* Set the presubtract operation. */
291	switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
292		case RC_PRESUB_BIAS:
293			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
294			break;
295		case RC_PRESUB_SUB:
296			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
297			break;
298		case RC_PRESUB_ADD:
299			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
300			break;
301		case RC_PRESUB_INV:
302			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
303			break;
304		default:
305			break;
306	}
307	switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
308		case RC_PRESUB_BIAS:
309			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
310			break;
311		case RC_PRESUB_SUB:
312			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
313			break;
314		case RC_PRESUB_ADD:
315			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
316			break;
317		case RC_PRESUB_INV:
318			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
319			break;
320		default:
321			break;
322	}
323
324	/* Set the output modifier */
325	code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
326	code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
327
328	code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
329	code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
330	code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
331
332	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
333	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
334	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
335
336	code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
337	code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
338	code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
339
340	code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
341	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
342	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
343
344	code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
345	code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
346
347	if (inst->WriteALUResult) {
348		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
349
350		if (inst->WriteALUResult == RC_ALURESULT_X)
351			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
352		else
353			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
354
355		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
356	}
357}
358
359static unsigned int translate_strq_swizzle(unsigned int swizzle)
360{
361	unsigned int swiz = 0;
362	int i;
363	for (i = 0; i < 4; i++)
364		swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
365	return swiz;
366}
367
368/**
369 * Emit a single TEX instruction
370 */
371static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
372{
373	int ip;
374	PROG_CODE;
375
376	if (code->inst_end >= c->Base.max_alu_insts-1) {
377		error("emit_tex: Too many instructions");
378		return 0;
379	}
380
381	ip = ++code->inst_end;
382
383	code->inst[ip].inst0 = R500_INST_TYPE_TEX
384		| (inst->DstReg.WriteMask << 11)
385		| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
386	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
387		| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
388
389	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
390		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
391
392	switch (inst->Opcode) {
393	case RC_OPCODE_KIL:
394		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
395		break;
396	case RC_OPCODE_TEX:
397		code->inst[ip].inst1 |= R500_TEX_INST_LD;
398		break;
399	case RC_OPCODE_TXB:
400		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
401		break;
402	case RC_OPCODE_TXP:
403		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
404		break;
405	case RC_OPCODE_TXD:
406		code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
407		break;
408	case RC_OPCODE_TXL:
409		code->inst[ip].inst1 |= R500_TEX_INST_LOD;
410		break;
411	default:
412		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
413	}
414
415	use_temporary(code, inst->SrcReg[0].Index);
416	if (inst->Opcode != RC_OPCODE_KIL)
417		use_temporary(code, inst->DstReg.Index);
418
419	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
420		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
421		| R500_TEX_DST_ADDR(inst->DstReg.Index)
422		| (GET_SWZ(inst->TexSwizzle, 0) << 24)
423		| (GET_SWZ(inst->TexSwizzle, 1) << 26)
424		| (GET_SWZ(inst->TexSwizzle, 2) << 28)
425		| (GET_SWZ(inst->TexSwizzle, 3) << 30)
426		;
427
428	if (inst->Opcode == RC_OPCODE_TXD) {
429		use_temporary(code, inst->SrcReg[1].Index);
430		use_temporary(code, inst->SrcReg[2].Index);
431
432		/* DX and DY parameters are specified in a separate register. */
433		code->inst[ip].inst3 =
434			R500_DX_ADDR(inst->SrcReg[1].Index) |
435			(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
436			R500_DY_ADDR(inst->SrcReg[2].Index) |
437			(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
438	}
439
440	return 1;
441}
442
443static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
444{
445	unsigned int newip;
446
447	if (s->Code->inst_end >= s->C->max_alu_insts-1) {
448		rc_error(s->C, "emit_tex: Too many instructions");
449		return;
450	}
451
452	newip = ++s->Code->inst_end;
453
454	/* Currently all loops use the same integer constant to intialize
455	 * the loop variables. */
456	if(!s->Code->int_constants[0]) {
457		s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
458		s->Code->int_constant_count = 1;
459	}
460	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
461
462	switch(inst->U.I.Opcode){
463	struct branch_info * branch;
464	struct r500_loop_info * loop;
465	case RC_OPCODE_BGNLOOP:
466		memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
467			s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
468
469		loop = &s->Loops[s->CurrentLoopDepth++];
470		memset(loop, 0, sizeof(struct r500_loop_info));
471		loop->BranchDepth = s->CurrentBranchDepth;
472		loop->BgnLoop = newip;
473
474		s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
475			| R500_FC_JUMP_FUNC(0x00)
476			| R500_FC_IGNORE_UNCOVERED
477			;
478		break;
479	case RC_OPCODE_BRK:
480		loop = &s->Loops[s->CurrentLoopDepth - 1];
481		memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
482					loop->BrkCount, loop->BrkReserved, 1);
483
484		loop->Brks[loop->BrkCount++] = newip;
485		s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
486			| R500_FC_JUMP_FUNC(0xff)
487			| R500_FC_B_OP1_DECR
488			| R500_FC_B_POP_CNT(
489				s->CurrentBranchDepth - loop->BranchDepth)
490			| R500_FC_IGNORE_UNCOVERED
491			;
492		break;
493
494	case RC_OPCODE_CONT:
495		loop = &s->Loops[s->CurrentLoopDepth - 1];
496		memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
497					loop->ContCount, loop->ContReserved, 1);
498		loop->Conts[loop->ContCount++] = newip;
499		s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
500			| R500_FC_JUMP_FUNC(0xff)
501			| R500_FC_B_OP1_DECR
502			| R500_FC_B_POP_CNT(
503				s->CurrentBranchDepth -	loop->BranchDepth)
504			| R500_FC_IGNORE_UNCOVERED
505			;
506		break;
507
508	case RC_OPCODE_ENDLOOP:
509	{
510		loop = &s->Loops[s->CurrentLoopDepth - 1];
511		/* Emit ENDLOOP */
512		s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
513			| R500_FC_JUMP_FUNC(0xff)
514			| R500_FC_JUMP_ANY
515			| R500_FC_IGNORE_UNCOVERED
516			;
517		/* The constant integer at index 0 is used by all loops. */
518		s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
519			| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
520			;
521
522		/* Set jump address and int constant for BGNLOOP */
523		s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
524			| R500_FC_JUMP_ADDR(newip)
525			;
526
527		/* Set jump address for the BRK instructions. */
528		while(loop->BrkCount--) {
529			s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
530						R500_FC_JUMP_ADDR(newip + 1);
531		}
532
533		/* Set jump address for CONT instructions. */
534		while(loop->ContCount--) {
535			s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
536						R500_FC_JUMP_ADDR(newip);
537		}
538		s->CurrentLoopDepth--;
539		break;
540	}
541	case RC_OPCODE_IF:
542		if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
543			rc_error(s->C, "Branch depth exceeds hardware limit");
544			return;
545		}
546		memory_pool_array_reserve(&s->C->Pool, struct branch_info,
547				s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
548
549		branch = &s->Branches[s->CurrentBranchDepth++];
550		branch->If = newip;
551		branch->Else = -1;
552		branch->Endif = -1;
553
554		if (s->CurrentBranchDepth > s->MaxBranchDepth)
555			s->MaxBranchDepth = s->CurrentBranchDepth;
556
557		/* actual instruction is filled in at ENDIF time */
558		break;
559
560	case RC_OPCODE_ELSE:
561		if (!s->CurrentBranchDepth) {
562			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
563			return;
564		}
565
566		branch = &s->Branches[s->CurrentBranchDepth - 1];
567		branch->Else = newip;
568
569		/* actual instruction is filled in at ENDIF time */
570		break;
571
572	case RC_OPCODE_ENDIF:
573		if (!s->CurrentBranchDepth) {
574			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
575			return;
576		}
577
578		branch = &s->Branches[s->CurrentBranchDepth - 1];
579		branch->Endif = newip;
580
581		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
582			| R500_FC_A_OP_NONE /* no address stack */
583			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
584			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
585			| R500_FC_B_OP1_NONE /* no branch counter if stay */
586			| R500_FC_B_POP_CNT(1)
587			;
588		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
589		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
590			| R500_FC_A_OP_NONE /* no address stack */
591			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
592			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
593			| R500_FC_IGNORE_UNCOVERED
594		;
595
596		if (branch->Else >= 0) {
597			/* increment branch counter also if jump */
598			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
599			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
600
601			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
602				| R500_FC_A_OP_NONE /* no address stack */
603				| R500_FC_B_ELSE /* all active pixels want to jump */
604				| R500_FC_B_OP0_NONE /* no counter op if stay */
605				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
606				| R500_FC_B_POP_CNT(1)
607			;
608			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
609		} else {
610			/* don't touch branch counter on jump */
611			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
612			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
613		}
614
615
616		s->CurrentBranchDepth--;
617		break;
618	default:
619		rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
620	}
621}
622
623void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
624{
625	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
626	struct emit_state s;
627	struct r500_fragment_program_code *code = &compiler->code->code.r500;
628
629	memset(&s, 0, sizeof(s));
630	s.C = &compiler->Base;
631	s.Code = code;
632
633	memset(code, 0, sizeof(*code));
634	code->max_temp_idx = 1;
635	code->inst_end = -1;
636
637	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
638	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
639	    inst = inst->Next) {
640		if (inst->Type == RC_INSTRUCTION_NORMAL) {
641			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
642
643			if (opcode->IsFlowControl) {
644				emit_flowcontrol(&s, inst);
645			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
646				continue;
647			} else {
648				emit_tex(compiler, &inst->U.I);
649			}
650		} else {
651			emit_paired(compiler, &inst->U.P);
652		}
653	}
654
655	if (code->max_temp_idx >= compiler->Base.max_temp_regs)
656		rc_error(&compiler->Base, "Too many hardware temporaries used");
657
658	if (compiler->Base.Error)
659		return;
660
661	if (code->inst_end == -1 ||
662	    (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
663		int ip;
664
665		/* This may happen when dead-code elimination is disabled or
666		 * when most of the fragment program logic is leading to a KIL */
667		if (code->inst_end >= compiler->Base.max_alu_insts-1) {
668			rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
669			return;
670		}
671
672		ip = ++code->inst_end;
673		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
674	}
675
676	/* Make sure TEX_SEM_WAIT is set on the last instruction */
677	code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
678
679	/* Enable full flow control mode if we are using loops or have if
680	 * statements nested at least four deep. */
681	if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
682		if (code->max_temp_idx < 1)
683			code->max_temp_idx = 1;
684
685		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
686	}
687}
688