1/* 2 * Copyright 2010 Tom Stellard <tstellar@gmail.com> 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28/** 29 * \file 30 */ 31 32#include <stdio.h> 33#include "c99_math.h" 34#include "radeon_emulate_loops.h" 35#include "radeon_compiler.h" 36#include "radeon_compiler_util.h" 37#include "radeon_dataflow.h" 38 39#define VERBOSE 0 40 41#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) 42 43struct const_value { 44 struct radeon_compiler * C; 45 struct rc_src_register * Src; 46 float Value; 47 int HasValue; 48}; 49 50struct count_inst { 51 struct radeon_compiler * C; 52 int Index; 53 rc_swizzle Swz; 54 float Amount; 55 int Unknown; 56 unsigned BranchDepth; 57}; 58 59static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, 60 struct loop_info * loop) 61{ 62 unsigned int total_i = rc_recompute_ips(c); 63 unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; 64 /* +1 because the program already has one iteration of the loop. */ 65 return 1 + ((c->max_alu_insts - total_i) / loop_i); 66} 67 68static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, 69 unsigned int iterations) 70{ 71 unsigned int i; 72 struct rc_instruction * ptr; 73 struct rc_instruction * first = loop->BeginLoop->Next; 74 struct rc_instruction * last = loop->EndLoop->Prev; 75 struct rc_instruction * append_to = last; 76 rc_remove_instruction(loop->BeginLoop); 77 rc_remove_instruction(loop->EndLoop); 78 for( i = 1; i < iterations; i++){ 79 for(ptr = first; ptr != last->Next; ptr = ptr->Next){ 80 struct rc_instruction *new = rc_alloc_instruction(c); 81 memcpy(new, ptr, sizeof(struct rc_instruction)); 82 rc_insert_instruction(append_to, new); 83 append_to = new; 84 } 85 } 86} 87 88 89static void update_const_value(void * data, struct rc_instruction * inst, 90 rc_register_file file, unsigned int index, unsigned int mask) 91{ 92 struct const_value * value = data; 93 if(value->Src->File != file || 94 value->Src->Index != index || 95 !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ 96 return; 97 } 98 switch(inst->U.I.Opcode){ 99 case RC_OPCODE_MOV: 100 if(!rc_src_reg_is_immediate(value->C, inst->U.I.SrcReg[0].File, 101 inst->U.I.SrcReg[0].Index)){ 102 return; 103 } 104 value->HasValue = 1; 105 value->Value = 106 rc_get_constant_value(value->C, 107 inst->U.I.SrcReg[0].Index, 108 inst->U.I.SrcReg[0].Swizzle, 109 inst->U.I.SrcReg[0].Negate, 0); 110 break; 111 } 112} 113 114static void get_incr_amount(void * data, struct rc_instruction * inst, 115 rc_register_file file, unsigned int index, unsigned int mask) 116{ 117 struct count_inst * count_inst = data; 118 int amnt_src_index; 119 const struct rc_opcode_info * opcode; 120 float amount; 121 122 if(file != RC_FILE_TEMPORARY || 123 count_inst->Index != index || 124 (1 << GET_SWZ(count_inst->Swz,0) != mask)){ 125 return; 126 } 127 128 /* XXX: Give up if the counter is modified within an IF block. We 129 * could handle this case with better analysis. */ 130 if (count_inst->BranchDepth > 0) { 131 count_inst->Unknown = 1; 132 return; 133 } 134 135 /* Find the index of the counter register. */ 136 opcode = rc_get_opcode_info(inst->U.I.Opcode); 137 if(opcode->NumSrcRegs != 2){ 138 count_inst->Unknown = 1; 139 return; 140 } 141 if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && 142 inst->U.I.SrcReg[0].Index == count_inst->Index && 143 inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ 144 amnt_src_index = 1; 145 } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && 146 inst->U.I.SrcReg[1].Index == count_inst->Index && 147 inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ 148 amnt_src_index = 0; 149 } 150 else{ 151 count_inst->Unknown = 1; 152 return; 153 } 154 if(rc_src_reg_is_immediate(count_inst->C, 155 inst->U.I.SrcReg[amnt_src_index].File, 156 inst->U.I.SrcReg[amnt_src_index].Index)){ 157 amount = rc_get_constant_value(count_inst->C, 158 inst->U.I.SrcReg[amnt_src_index].Index, 159 inst->U.I.SrcReg[amnt_src_index].Swizzle, 160 inst->U.I.SrcReg[amnt_src_index].Negate, 0); 161 } 162 else{ 163 count_inst->Unknown = 1 ; 164 return; 165 } 166 switch(inst->U.I.Opcode){ 167 case RC_OPCODE_ADD: 168 count_inst->Amount += amount; 169 break; 170 case RC_OPCODE_SUB: 171 if(amnt_src_index == 0){ 172 count_inst->Unknown = 0; 173 return; 174 } 175 count_inst->Amount -= amount; 176 break; 177 default: 178 count_inst->Unknown = 1; 179 return; 180 } 181} 182 183/** 184 * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless 185 * of how many iterations they have. 186 */ 187static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) 188{ 189 int end_loops; 190 int iterations; 191 struct count_inst count_inst; 192 float limit_value; 193 struct rc_src_register * counter; 194 struct rc_src_register * limit; 195 struct const_value counter_value; 196 struct rc_instruction * inst; 197 198 /* Find the counter and the upper limit */ 199 200 if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[0].File, 201 loop->Cond->U.I.SrcReg[0].Index)){ 202 limit = &loop->Cond->U.I.SrcReg[0]; 203 counter = &loop->Cond->U.I.SrcReg[1]; 204 } 205 else if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[1].File, 206 loop->Cond->U.I.SrcReg[1].Index)){ 207 limit = &loop->Cond->U.I.SrcReg[1]; 208 counter = &loop->Cond->U.I.SrcReg[0]; 209 } 210 else{ 211 DBG("No constant limit.\n"); 212 return 0; 213 } 214 215 /* Find the initial value of the counter */ 216 counter_value.Src = counter; 217 counter_value.Value = 0.0f; 218 counter_value.HasValue = 0; 219 counter_value.C = c; 220 for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; 221 inst = inst->Next){ 222 rc_for_all_writes_mask(inst, update_const_value, &counter_value); 223 } 224 if(!counter_value.HasValue){ 225 DBG("Initial counter value cannot be determined.\n"); 226 return 0; 227 } 228 DBG("Initial counter value is %f\n", counter_value.Value); 229 /* Determine how the counter is modified each loop */ 230 count_inst.C = c; 231 count_inst.Index = counter->Index; 232 count_inst.Swz = counter->Swizzle; 233 count_inst.Amount = 0.0f; 234 count_inst.Unknown = 0; 235 count_inst.BranchDepth = 0; 236 end_loops = 1; 237 for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ 238 switch(inst->U.I.Opcode){ 239 /* XXX In the future we might want to try to unroll nested 240 * loops here.*/ 241 case RC_OPCODE_BGNLOOP: 242 end_loops++; 243 break; 244 case RC_OPCODE_ENDLOOP: 245 loop->EndLoop = inst; 246 end_loops--; 247 break; 248 case RC_OPCODE_BRK: 249 /* Don't unroll loops if it has a BRK instruction 250 * other one used when testing the main conditional 251 * of the loop. */ 252 253 /* Make sure we haven't entered a nested loops. */ 254 if(inst != loop->Brk && end_loops == 1) { 255 return 0; 256 } 257 break; 258 case RC_OPCODE_IF: 259 count_inst.BranchDepth++; 260 break; 261 case RC_OPCODE_ENDIF: 262 count_inst.BranchDepth--; 263 break; 264 default: 265 rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); 266 if(count_inst.Unknown){ 267 return 0; 268 } 269 break; 270 } 271 } 272 /* Infinite loop */ 273 if(count_inst.Amount == 0.0f){ 274 return 0; 275 } 276 DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); 277 /* Calculate the number of iterations of this loop. Keeping this 278 * simple, since we only support increment and decrement loops. 279 */ 280 limit_value = rc_get_constant_value(c, limit->Index, limit->Swizzle, 281 limit->Negate, 0); 282 DBG("Limit is %f.\n", limit_value); 283 /* The iteration calculations are opposite of what you would expect. 284 * In a normal loop, if the condition is met, then loop continues, but 285 * with our loops, if the condition is met, the is exited. */ 286 switch(loop->Cond->U.I.Opcode){ 287 case RC_OPCODE_SGE: 288 case RC_OPCODE_SLE: 289 iterations = (int) ceilf((limit_value - counter_value.Value) / 290 count_inst.Amount); 291 break; 292 293 case RC_OPCODE_SGT: 294 case RC_OPCODE_SLT: 295 iterations = (int) floorf((limit_value - counter_value.Value) / 296 count_inst.Amount) + 1; 297 break; 298 default: 299 return 0; 300 } 301 302 if (c->max_alu_insts > 0 303 && iterations > loop_max_possible_iterations(c, loop)) { 304 return 0; 305 } 306 307 DBG("Loop will have %d iterations.\n", iterations); 308 309 /* Prepare loop for unrolling */ 310 rc_remove_instruction(loop->Cond); 311 rc_remove_instruction(loop->If); 312 rc_remove_instruction(loop->Brk); 313 rc_remove_instruction(loop->EndIf); 314 315 unroll_loop(c, loop, iterations); 316 loop->EndLoop = NULL; 317 return 1; 318} 319 320/** 321 * @param c 322 * @param loop 323 * @param inst A pointer to a BGNLOOP instruction. 324 * @return 1 if all of the members of loop where set. 325 * @return 0 if there was an error and some members of loop are still NULL. 326 */ 327static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, 328 struct rc_instruction * inst) 329{ 330 struct rc_instruction * ptr; 331 332 if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ 333 rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); 334 return 0; 335 } 336 337 memset(loop, 0, sizeof(struct loop_info)); 338 339 loop->BeginLoop = inst; 340 341 for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { 342 343 if (ptr == &c->Program.Instructions) { 344 rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", 345 __FUNCTION__); 346 return 0; 347 } 348 349 switch(ptr->U.I.Opcode){ 350 case RC_OPCODE_BGNLOOP: 351 { 352 /* Nested loop, skip ahead to the end. */ 353 unsigned int loop_depth = 1; 354 for(ptr = ptr->Next; ptr != &c->Program.Instructions; 355 ptr = ptr->Next){ 356 if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { 357 loop_depth++; 358 } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { 359 if (!--loop_depth) { 360 break; 361 } 362 } 363 } 364 if (ptr == &c->Program.Instructions) { 365 rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", 366 __FUNCTION__); 367 return 0; 368 } 369 break; 370 } 371 case RC_OPCODE_BRK: 372 { 373 struct rc_src_register *src; 374 if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF 375 || ptr->Prev->U.I.Opcode != RC_OPCODE_IF 376 || loop->Brk){ 377 continue; 378 } 379 loop->Brk = ptr; 380 loop->If = ptr->Prev; 381 loop->EndIf = ptr->Next; 382 src = &loop->If->U.I.SrcReg[0]; 383 384 for (loop->Cond = loop->If->Prev; 385 loop->Cond->U.I.Opcode != RC_OPCODE_BGNLOOP; 386 loop->Cond = loop->Cond->Prev) { 387 388 const struct rc_dst_register *dst = &loop->Cond->U.I.DstReg; 389 if (dst->File == src->File && 390 dst->Index == src->Index && 391 dst->WriteMask & (rc_swizzle_to_writemask(src->Swizzle))) { 392 if (loop->Cond->U.I.Opcode == RC_OPCODE_CMP) { 393 src = &loop->Cond->U.I.SrcReg[0]; 394 continue; 395 } 396 break; 397 } 398 } 399 400 if (loop->Cond->U.I.Opcode == RC_OPCODE_BGNLOOP) { 401 rc_error(c, "%s: Cannot find condition for if\n", __FUNCTION__); 402 return 0; 403 } 404 break; 405 } 406 case RC_OPCODE_ENDLOOP: 407 loop->EndLoop = ptr; 408 break; 409 } 410 } 411 412 if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf 413 && loop->Cond && loop->EndLoop) { 414 return 1; 415 } 416 return 0; 417} 418 419/** 420 * This function prepares a loop to be unrolled by converting it into an if 421 * statement. Here is an outline of the conversion process: 422 * BGNLOOP; -> BGNLOOP; 423 * <Additional conditional code> -> <Additional conditional code> 424 * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; 425 * IF temp[0]; -> IF temp[0]; 426 * BRK; -> 427 * ENDIF; -> <Loop Body> 428 * <Loop Body> -> ENDIF; 429 * ENDLOOP; -> ENDLOOP 430 * 431 * @param inst A pointer to a BGNLOOP instruction. 432 * @return 1 for success, 0 for failure 433 */ 434static int transform_loop(struct emulate_loop_state * s, 435 struct rc_instruction * inst) 436{ 437 struct loop_info * loop; 438 439 memory_pool_array_reserve(&s->C->Pool, struct loop_info, 440 s->Loops, s->LoopCount, s->LoopReserved, 1); 441 442 loop = &s->Loops[s->LoopCount++]; 443 444 if (!build_loop_info(s->C, loop, inst)) { 445 rc_error(s->C, "Failed to build loop info\n"); 446 return 0; 447 } 448 449 if(try_unroll_loop(s->C, loop)){ 450 return 1; 451 } 452 453 /* Reverse the conditional instruction */ 454 switch(loop->Cond->U.I.Opcode){ 455 case RC_OPCODE_SGE: 456 loop->Cond->U.I.Opcode = RC_OPCODE_SLT; 457 break; 458 case RC_OPCODE_SLT: 459 loop->Cond->U.I.Opcode = RC_OPCODE_SGE; 460 break; 461 case RC_OPCODE_SLE: 462 loop->Cond->U.I.Opcode = RC_OPCODE_SGT; 463 break; 464 case RC_OPCODE_SGT: 465 loop->Cond->U.I.Opcode = RC_OPCODE_SLE; 466 break; 467 case RC_OPCODE_SEQ: 468 loop->Cond->U.I.Opcode = RC_OPCODE_SNE; 469 break; 470 case RC_OPCODE_SNE: 471 loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; 472 break; 473 default: 474 rc_error(s->C, "loop->Cond is not a conditional.\n"); 475 return 0; 476 } 477 478 /* Prepare the loop to be emulated */ 479 rc_remove_instruction(loop->Brk); 480 rc_remove_instruction(loop->EndIf); 481 rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); 482 return 1; 483} 484 485void rc_transform_loops(struct radeon_compiler *c, void *user) 486{ 487 struct emulate_loop_state * s = &c->loop_state; 488 struct rc_instruction * ptr; 489 490 memset(s, 0, sizeof(struct emulate_loop_state)); 491 s->C = c; 492 for(ptr = s->C->Program.Instructions.Next; 493 ptr != &s->C->Program.Instructions; ptr = ptr->Next) { 494 if(ptr->Type == RC_INSTRUCTION_NORMAL && 495 ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ 496 if (!transform_loop(s, ptr)) 497 return; 498 } 499 } 500} 501 502void rc_unroll_loops(struct radeon_compiler *c, void *user) 503{ 504 struct rc_instruction * inst; 505 struct loop_info loop; 506 507 for(inst = c->Program.Instructions.Next; 508 inst != &c->Program.Instructions; inst = inst->Next) { 509 510 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { 511 if (build_loop_info(c, &loop, inst)) { 512 try_unroll_loop(c, &loop); 513 } 514 } 515 } 516} 517 518void rc_emulate_loops(struct radeon_compiler *c, void *user) 519{ 520 struct emulate_loop_state * s = &c->loop_state; 521 int i; 522 /* Iterate backwards of the list of loops so that loops that nested 523 * loops are unrolled first. 524 */ 525 for( i = s->LoopCount - 1; i >= 0; i-- ){ 526 unsigned int iterations; 527 528 if(!s->Loops[i].EndLoop){ 529 continue; 530 } 531 iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); 532 unroll_loop(s->C, &s->Loops[i], iterations); 533 } 534} 535