1/* 2 * Copyright (C) 2005 Ben Skeggs. 3 * 4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> 5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. 6 * 7 * All Rights Reserved. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining 10 * a copy of this software and associated documentation files (the 11 * "Software"), to deal in the Software without restriction, including 12 * without limitation the rights to use, copy, modify, merge, publish, 13 * distribute, sublicense, and/or sell copies of the Software, and to 14 * permit persons to whom the Software is furnished to do so, subject to 15 * the following conditions: 16 * 17 * The above copyright notice and this permission notice (including the 18 * next paragraph) shall be included in all copies or substantial 19 * portions of the Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 * 29 */ 30 31/** 32 * \file 33 * 34 * \author Ben Skeggs <darktama@iinet.net.au> 35 * 36 * \author Jerome Glisse <j.glisse@gmail.com> 37 * 38 * \author Corbin Simpson <MostAwesomeDude@gmail.com> 39 * 40 */ 41 42#include "r500_fragprog.h" 43 44#include "../r300_reg.h" 45 46#include "radeon_program_pair.h" 47 48#define PROG_CODE \ 49 struct r500_fragment_program_code *code = &c->code->code.r500 50 51#define error(fmt, args...) do { \ 52 rc_error(&c->Base, "%s::%s(): " fmt "\n", \ 53 __FILE__, __FUNCTION__, ##args); \ 54 } while(0) 55 56 57struct branch_info { 58 int If; 59 int Else; 60 int Endif; 61}; 62 63struct r500_loop_info { 64 int BgnLoop; 65 66 int BranchDepth; 67 int * Brks; 68 int BrkCount; 69 int BrkReserved; 70 71 int * Conts; 72 int ContCount; 73 int ContReserved; 74}; 75 76struct emit_state { 77 struct radeon_compiler * C; 78 struct r500_fragment_program_code * Code; 79 80 struct branch_info * Branches; 81 unsigned int CurrentBranchDepth; 82 unsigned int BranchesReserved; 83 84 struct r500_loop_info * Loops; 85 unsigned int CurrentLoopDepth; 86 unsigned int LoopsReserved; 87 88 unsigned int MaxBranchDepth; 89 90}; 91 92static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) 93{ 94 switch(opcode) { 95 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; 96 case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND; 97 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; 98 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; 99 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; 100 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; 101 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; 102 default: 103 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); 104 /* fall through */ 105 case RC_OPCODE_NOP: 106 /* fall through */ 107 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; 108 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; 109 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; 110 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; 111 } 112} 113 114static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) 115{ 116 switch(opcode) { 117 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; 118 case RC_OPCODE_CND: return R500_ALPHA_OP_CND; 119 case RC_OPCODE_COS: return R500_ALPHA_OP_COS; 120 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; 121 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; 122 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; 123 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; 124 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; 125 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; 126 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; 127 default: 128 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); 129 /* fall through */ 130 case RC_OPCODE_NOP: 131 /* fall through */ 132 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; 133 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; 134 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; 135 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; 136 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; 137 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; 138 } 139} 140 141static unsigned int fix_hw_swizzle(unsigned int swz) 142{ 143 switch (swz) { 144 case RC_SWIZZLE_ZERO: 145 case RC_SWIZZLE_UNUSED: 146 swz = 4; 147 break; 148 case RC_SWIZZLE_HALF: 149 swz = 5; 150 break; 151 case RC_SWIZZLE_ONE: 152 swz = 6; 153 break; 154 } 155 156 return swz; 157} 158 159static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) 160{ 161 unsigned int t = inst->RGB.Arg[arg].Source; 162 int comp; 163 t |= inst->RGB.Arg[arg].Negate << 11; 164 t |= inst->RGB.Arg[arg].Abs << 12; 165 166 for(comp = 0; comp < 3; ++comp) 167 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); 168 169 return t; 170} 171 172static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) 173{ 174 unsigned int t = inst->Alpha.Arg[i].Source; 175 t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; 176 t |= inst->Alpha.Arg[i].Negate << 5; 177 t |= inst->Alpha.Arg[i].Abs << 6; 178 return t; 179} 180 181static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) 182{ 183 switch(func) { 184 case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; 185 case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; 186 case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; 187 case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; 188 default: 189 rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); 190 return 0; 191 } 192} 193 194static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) 195{ 196 if (index > code->max_temp_idx) 197 code->max_temp_idx = index; 198} 199 200static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) 201{ 202 /* From docs: 203 * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST. 204 * MSB = 1 << 7 */ 205 if (!src.Used) 206 return 1 << 7; 207 208 if (src.File == RC_FILE_CONSTANT) { 209 return src.Index | R500_RGB_ADDR0_CONST; 210 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { 211 use_temporary(code, src.Index); 212 return src.Index; 213 } else if (src.File == RC_FILE_INLINE) { 214 return src.Index | (1 << 7); 215 } 216 217 return 0; 218} 219 220/** 221 * NOP the specified instruction if it is not a texture lookup. 222 */ 223static void alu_nop(struct r300_fragment_program_compiler *c, int ip) 224{ 225 PROG_CODE; 226 227 if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { 228 code->inst[ip].inst0 |= R500_INST_NOP; 229 } 230} 231 232/** 233 * Emit a paired ALU instruction. 234 */ 235static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) 236{ 237 int ip; 238 PROG_CODE; 239 240 if (code->inst_end >= c->Base.max_alu_insts-1) { 241 error("emit_alu: Too many instructions"); 242 return; 243 } 244 245 ip = ++code->inst_end; 246 247 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ 248 if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || 249 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { 250 if (ip > 0) { 251 alu_nop(c, ip - 1); 252 } 253 } 254 255 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); 256 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); 257 258 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { 259 code->inst[ip].inst0 = R500_INST_TYPE_OUT; 260 if (inst->WriteALUResult) { 261 error("Cannot write output and ALU result at the same time"); 262 return; 263 } 264 } else { 265 code->inst[ip].inst0 = R500_INST_TYPE_ALU; 266 } 267 code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT); 268 269 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11); 270 code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0; 271 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); 272 if (inst->Nop) { 273 code->inst[ip].inst0 |= R500_INST_NOP; 274 } 275 if (inst->Alpha.DepthWriteMask) { 276 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; 277 c->code->writes_depth = 1; 278 } 279 280 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); 281 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); 282 use_temporary(code, inst->Alpha.DestIndex); 283 use_temporary(code, inst->RGB.DestIndex); 284 285 if (inst->RGB.Saturate) 286 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; 287 if (inst->Alpha.Saturate) 288 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; 289 290 /* Set the presubtract operation. */ 291 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { 292 case RC_PRESUB_BIAS: 293 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; 294 break; 295 case RC_PRESUB_SUB: 296 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; 297 break; 298 case RC_PRESUB_ADD: 299 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; 300 break; 301 case RC_PRESUB_INV: 302 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; 303 break; 304 default: 305 break; 306 } 307 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { 308 case RC_PRESUB_BIAS: 309 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; 310 break; 311 case RC_PRESUB_SUB: 312 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; 313 break; 314 case RC_PRESUB_ADD: 315 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; 316 break; 317 case RC_PRESUB_INV: 318 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; 319 break; 320 default: 321 break; 322 } 323 324 /* Set the output modifier */ 325 code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT; 326 code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT; 327 328 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); 329 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); 330 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); 331 332 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); 333 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); 334 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); 335 336 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; 337 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; 338 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; 339 340 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; 341 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; 342 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; 343 344 code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); 345 code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); 346 347 if (inst->WriteALUResult) { 348 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; 349 350 if (inst->WriteALUResult == RC_ALURESULT_X) 351 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; 352 else 353 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; 354 355 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); 356 } 357} 358 359static unsigned int translate_strq_swizzle(unsigned int swizzle) 360{ 361 unsigned int swiz = 0; 362 int i; 363 for (i = 0; i < 4; i++) 364 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; 365 return swiz; 366} 367 368/** 369 * Emit a single TEX instruction 370 */ 371static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) 372{ 373 int ip; 374 PROG_CODE; 375 376 if (code->inst_end >= c->Base.max_alu_insts-1) { 377 error("emit_tex: Too many instructions"); 378 return 0; 379 } 380 381 ip = ++code->inst_end; 382 383 code->inst[ip].inst0 = R500_INST_TYPE_TEX 384 | (inst->DstReg.WriteMask << 11) 385 | (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT); 386 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) 387 | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT); 388 389 if (inst->TexSrcTarget == RC_TEXTURE_RECT) 390 code->inst[ip].inst1 |= R500_TEX_UNSCALED; 391 392 switch (inst->Opcode) { 393 case RC_OPCODE_KIL: 394 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; 395 break; 396 case RC_OPCODE_TEX: 397 code->inst[ip].inst1 |= R500_TEX_INST_LD; 398 break; 399 case RC_OPCODE_TXB: 400 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; 401 break; 402 case RC_OPCODE_TXP: 403 code->inst[ip].inst1 |= R500_TEX_INST_PROJ; 404 break; 405 case RC_OPCODE_TXD: 406 code->inst[ip].inst1 |= R500_TEX_INST_DXDY; 407 break; 408 case RC_OPCODE_TXL: 409 code->inst[ip].inst1 |= R500_TEX_INST_LOD; 410 break; 411 default: 412 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); 413 } 414 415 use_temporary(code, inst->SrcReg[0].Index); 416 if (inst->Opcode != RC_OPCODE_KIL) 417 use_temporary(code, inst->DstReg.Index); 418 419 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) 420 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) 421 | R500_TEX_DST_ADDR(inst->DstReg.Index) 422 | (GET_SWZ(inst->TexSwizzle, 0) << 24) 423 | (GET_SWZ(inst->TexSwizzle, 1) << 26) 424 | (GET_SWZ(inst->TexSwizzle, 2) << 28) 425 | (GET_SWZ(inst->TexSwizzle, 3) << 30) 426 ; 427 428 if (inst->Opcode == RC_OPCODE_TXD) { 429 use_temporary(code, inst->SrcReg[1].Index); 430 use_temporary(code, inst->SrcReg[2].Index); 431 432 /* DX and DY parameters are specified in a separate register. */ 433 code->inst[ip].inst3 = 434 R500_DX_ADDR(inst->SrcReg[1].Index) | 435 (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) | 436 R500_DY_ADDR(inst->SrcReg[2].Index) | 437 (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24); 438 } 439 440 return 1; 441} 442 443static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) 444{ 445 unsigned int newip; 446 447 if (s->Code->inst_end >= s->C->max_alu_insts-1) { 448 rc_error(s->C, "emit_tex: Too many instructions"); 449 return; 450 } 451 452 newip = ++s->Code->inst_end; 453 454 /* Currently all loops use the same integer constant to intialize 455 * the loop variables. */ 456 if(!s->Code->int_constants[0]) { 457 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); 458 s->Code->int_constant_count = 1; 459 } 460 s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; 461 462 switch(inst->U.I.Opcode){ 463 struct branch_info * branch; 464 struct r500_loop_info * loop; 465 case RC_OPCODE_BGNLOOP: 466 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, 467 s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); 468 469 loop = &s->Loops[s->CurrentLoopDepth++]; 470 memset(loop, 0, sizeof(struct r500_loop_info)); 471 loop->BranchDepth = s->CurrentBranchDepth; 472 loop->BgnLoop = newip; 473 474 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP 475 | R500_FC_JUMP_FUNC(0x00) 476 | R500_FC_IGNORE_UNCOVERED 477 ; 478 break; 479 case RC_OPCODE_BRK: 480 loop = &s->Loops[s->CurrentLoopDepth - 1]; 481 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, 482 loop->BrkCount, loop->BrkReserved, 1); 483 484 loop->Brks[loop->BrkCount++] = newip; 485 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP 486 | R500_FC_JUMP_FUNC(0xff) 487 | R500_FC_B_OP1_DECR 488 | R500_FC_B_POP_CNT( 489 s->CurrentBranchDepth - loop->BranchDepth) 490 | R500_FC_IGNORE_UNCOVERED 491 ; 492 break; 493 494 case RC_OPCODE_CONT: 495 loop = &s->Loops[s->CurrentLoopDepth - 1]; 496 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, 497 loop->ContCount, loop->ContReserved, 1); 498 loop->Conts[loop->ContCount++] = newip; 499 s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE 500 | R500_FC_JUMP_FUNC(0xff) 501 | R500_FC_B_OP1_DECR 502 | R500_FC_B_POP_CNT( 503 s->CurrentBranchDepth - loop->BranchDepth) 504 | R500_FC_IGNORE_UNCOVERED 505 ; 506 break; 507 508 case RC_OPCODE_ENDLOOP: 509 { 510 loop = &s->Loops[s->CurrentLoopDepth - 1]; 511 /* Emit ENDLOOP */ 512 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP 513 | R500_FC_JUMP_FUNC(0xff) 514 | R500_FC_JUMP_ANY 515 | R500_FC_IGNORE_UNCOVERED 516 ; 517 /* The constant integer at index 0 is used by all loops. */ 518 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) 519 | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) 520 ; 521 522 /* Set jump address and int constant for BGNLOOP */ 523 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) 524 | R500_FC_JUMP_ADDR(newip) 525 ; 526 527 /* Set jump address for the BRK instructions. */ 528 while(loop->BrkCount--) { 529 s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = 530 R500_FC_JUMP_ADDR(newip + 1); 531 } 532 533 /* Set jump address for CONT instructions. */ 534 while(loop->ContCount--) { 535 s->Code->inst[loop->Conts[loop->ContCount]].inst3 = 536 R500_FC_JUMP_ADDR(newip); 537 } 538 s->CurrentLoopDepth--; 539 break; 540 } 541 case RC_OPCODE_IF: 542 if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { 543 rc_error(s->C, "Branch depth exceeds hardware limit"); 544 return; 545 } 546 memory_pool_array_reserve(&s->C->Pool, struct branch_info, 547 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); 548 549 branch = &s->Branches[s->CurrentBranchDepth++]; 550 branch->If = newip; 551 branch->Else = -1; 552 branch->Endif = -1; 553 554 if (s->CurrentBranchDepth > s->MaxBranchDepth) 555 s->MaxBranchDepth = s->CurrentBranchDepth; 556 557 /* actual instruction is filled in at ENDIF time */ 558 break; 559 560 case RC_OPCODE_ELSE: 561 if (!s->CurrentBranchDepth) { 562 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); 563 return; 564 } 565 566 branch = &s->Branches[s->CurrentBranchDepth - 1]; 567 branch->Else = newip; 568 569 /* actual instruction is filled in at ENDIF time */ 570 break; 571 572 case RC_OPCODE_ENDIF: 573 if (!s->CurrentBranchDepth) { 574 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); 575 return; 576 } 577 578 branch = &s->Branches[s->CurrentBranchDepth - 1]; 579 branch->Endif = newip; 580 581 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP 582 | R500_FC_A_OP_NONE /* no address stack */ 583 | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ 584 | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ 585 | R500_FC_B_OP1_NONE /* no branch counter if stay */ 586 | R500_FC_B_POP_CNT(1) 587 ; 588 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); 589 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP 590 | R500_FC_A_OP_NONE /* no address stack */ 591 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ 592 | R500_FC_B_OP0_INCR /* increment branch counter if stay */ 593 | R500_FC_IGNORE_UNCOVERED 594 ; 595 596 if (branch->Else >= 0) { 597 /* increment branch counter also if jump */ 598 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; 599 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); 600 601 s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP 602 | R500_FC_A_OP_NONE /* no address stack */ 603 | R500_FC_B_ELSE /* all active pixels want to jump */ 604 | R500_FC_B_OP0_NONE /* no counter op if stay */ 605 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ 606 | R500_FC_B_POP_CNT(1) 607 ; 608 s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); 609 } else { 610 /* don't touch branch counter on jump */ 611 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; 612 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); 613 } 614 615 616 s->CurrentBranchDepth--; 617 break; 618 default: 619 rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); 620 } 621} 622 623void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) 624{ 625 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; 626 struct emit_state s; 627 struct r500_fragment_program_code *code = &compiler->code->code.r500; 628 629 memset(&s, 0, sizeof(s)); 630 s.C = &compiler->Base; 631 s.Code = code; 632 633 memset(code, 0, sizeof(*code)); 634 code->max_temp_idx = 1; 635 code->inst_end = -1; 636 637 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; 638 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; 639 inst = inst->Next) { 640 if (inst->Type == RC_INSTRUCTION_NORMAL) { 641 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 642 643 if (opcode->IsFlowControl) { 644 emit_flowcontrol(&s, inst); 645 } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { 646 continue; 647 } else { 648 emit_tex(compiler, &inst->U.I); 649 } 650 } else { 651 emit_paired(compiler, &inst->U.P); 652 } 653 } 654 655 if (code->max_temp_idx >= compiler->Base.max_temp_regs) 656 rc_error(&compiler->Base, "Too many hardware temporaries used"); 657 658 if (compiler->Base.Error) 659 return; 660 661 if (code->inst_end == -1 || 662 (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { 663 int ip; 664 665 /* This may happen when dead-code elimination is disabled or 666 * when most of the fragment program logic is leading to a KIL */ 667 if (code->inst_end >= compiler->Base.max_alu_insts-1) { 668 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); 669 return; 670 } 671 672 ip = ++code->inst_end; 673 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; 674 } 675 676 /* Make sure TEX_SEM_WAIT is set on the last instruction */ 677 code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT; 678 679 /* Enable full flow control mode if we are using loops or have if 680 * statements nested at least four deep. */ 681 if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { 682 if (code->max_temp_idx < 1) 683 code->max_temp_idx = 1; 684 685 code->us_fc_ctrl |= R500_FC_FULL_FC_EN; 686 } 687} 688