1/* 2 * Copyright 2010 Tom Stellard <tstellar@gmail.com> 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28/** 29 * \file 30 */ 31 32#include "radeon_emulate_loops.h" 33 34#include "radeon_compiler.h" 35#include "radeon_compiler_util.h" 36#include "radeon_dataflow.h" 37 38#define VERBOSE 0 39 40#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) 41 42struct const_value { 43 struct radeon_compiler * C; 44 struct rc_src_register * Src; 45 float Value; 46 int HasValue; 47}; 48 49struct count_inst { 50 struct radeon_compiler * C; 51 int Index; 52 rc_swizzle Swz; 53 float Amount; 54 int Unknown; 55 unsigned BranchDepth; 56}; 57 58static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, 59 struct loop_info * loop) 60{ 61 unsigned int total_i = rc_recompute_ips(c); 62 unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; 63 /* +1 because the program already has one iteration of the loop. */ 64 return 1 + ((c->max_alu_insts - total_i) / loop_i); 65} 66 67static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, 68 unsigned int iterations) 69{ 70 unsigned int i; 71 struct rc_instruction * ptr; 72 struct rc_instruction * first = loop->BeginLoop->Next; 73 struct rc_instruction * last = loop->EndLoop->Prev; 74 struct rc_instruction * append_to = last; 75 rc_remove_instruction(loop->BeginLoop); 76 rc_remove_instruction(loop->EndLoop); 77 for( i = 1; i < iterations; i++){ 78 for(ptr = first; ptr != last->Next; ptr = ptr->Next){ 79 struct rc_instruction *new = rc_alloc_instruction(c); 80 memcpy(new, ptr, sizeof(struct rc_instruction)); 81 rc_insert_instruction(append_to, new); 82 append_to = new; 83 } 84 } 85} 86 87 88static void update_const_value(void * data, struct rc_instruction * inst, 89 rc_register_file file, unsigned int index, unsigned int mask) 90{ 91 struct const_value * value = data; 92 if(value->Src->File != file || 93 value->Src->Index != index || 94 !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ 95 return; 96 } 97 switch(inst->U.I.Opcode){ 98 case RC_OPCODE_MOV: 99 if(!rc_src_reg_is_immediate(value->C, inst->U.I.SrcReg[0].File, 100 inst->U.I.SrcReg[0].Index)){ 101 return; 102 } 103 value->HasValue = 1; 104 value->Value = 105 rc_get_constant_value(value->C, 106 inst->U.I.SrcReg[0].Index, 107 inst->U.I.SrcReg[0].Swizzle, 108 inst->U.I.SrcReg[0].Negate, 0); 109 break; 110 } 111} 112 113static void get_incr_amount(void * data, struct rc_instruction * inst, 114 rc_register_file file, unsigned int index, unsigned int mask) 115{ 116 struct count_inst * count_inst = data; 117 int amnt_src_index; 118 const struct rc_opcode_info * opcode; 119 float amount; 120 121 if(file != RC_FILE_TEMPORARY || 122 count_inst->Index != index || 123 (1 << GET_SWZ(count_inst->Swz,0) != mask)){ 124 return; 125 } 126 127 /* XXX: Give up if the counter is modified within an IF block. We 128 * could handle this case with better analysis. */ 129 if (count_inst->BranchDepth > 0) { 130 count_inst->Unknown = 1; 131 return; 132 } 133 134 /* Find the index of the counter register. */ 135 opcode = rc_get_opcode_info(inst->U.I.Opcode); 136 if(opcode->NumSrcRegs != 2){ 137 count_inst->Unknown = 1; 138 return; 139 } 140 if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && 141 inst->U.I.SrcReg[0].Index == count_inst->Index && 142 inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ 143 amnt_src_index = 1; 144 } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && 145 inst->U.I.SrcReg[1].Index == count_inst->Index && 146 inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ 147 amnt_src_index = 0; 148 } 149 else{ 150 count_inst->Unknown = 1; 151 return; 152 } 153 if(rc_src_reg_is_immediate(count_inst->C, 154 inst->U.I.SrcReg[amnt_src_index].File, 155 inst->U.I.SrcReg[amnt_src_index].Index)){ 156 amount = rc_get_constant_value(count_inst->C, 157 inst->U.I.SrcReg[amnt_src_index].Index, 158 inst->U.I.SrcReg[amnt_src_index].Swizzle, 159 inst->U.I.SrcReg[amnt_src_index].Negate, 0); 160 } 161 else{ 162 count_inst->Unknown = 1 ; 163 return; 164 } 165 switch(inst->U.I.Opcode){ 166 case RC_OPCODE_ADD: 167 count_inst->Amount += amount; 168 break; 169 case RC_OPCODE_SUB: 170 if(amnt_src_index == 0){ 171 count_inst->Unknown = 0; 172 return; 173 } 174 count_inst->Amount -= amount; 175 break; 176 default: 177 count_inst->Unknown = 1; 178 return; 179 } 180} 181 182/** 183 * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless 184 * of how many iterations they have. 185 */ 186static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) 187{ 188 int end_loops; 189 int iterations; 190 struct count_inst count_inst; 191 float limit_value; 192 struct rc_src_register * counter; 193 struct rc_src_register * limit; 194 struct const_value counter_value; 195 struct rc_instruction * inst; 196 197 /* Find the counter and the upper limit */ 198 199 if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[0].File, 200 loop->Cond->U.I.SrcReg[0].Index)){ 201 limit = &loop->Cond->U.I.SrcReg[0]; 202 counter = &loop->Cond->U.I.SrcReg[1]; 203 } 204 else if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[1].File, 205 loop->Cond->U.I.SrcReg[1].Index)){ 206 limit = &loop->Cond->U.I.SrcReg[1]; 207 counter = &loop->Cond->U.I.SrcReg[0]; 208 } 209 else{ 210 DBG("No constant limit.\n"); 211 return 0; 212 } 213 214 /* Find the initial value of the counter */ 215 counter_value.Src = counter; 216 counter_value.Value = 0.0f; 217 counter_value.HasValue = 0; 218 counter_value.C = c; 219 for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; 220 inst = inst->Next){ 221 rc_for_all_writes_mask(inst, update_const_value, &counter_value); 222 } 223 if(!counter_value.HasValue){ 224 DBG("Initial counter value cannot be determined.\n"); 225 return 0; 226 } 227 DBG("Initial counter value is %f\n", counter_value.Value); 228 /* Determine how the counter is modified each loop */ 229 count_inst.C = c; 230 count_inst.Index = counter->Index; 231 count_inst.Swz = counter->Swizzle; 232 count_inst.Amount = 0.0f; 233 count_inst.Unknown = 0; 234 count_inst.BranchDepth = 0; 235 end_loops = 1; 236 for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ 237 switch(inst->U.I.Opcode){ 238 /* XXX In the future we might want to try to unroll nested 239 * loops here.*/ 240 case RC_OPCODE_BGNLOOP: 241 end_loops++; 242 break; 243 case RC_OPCODE_ENDLOOP: 244 loop->EndLoop = inst; 245 end_loops--; 246 break; 247 case RC_OPCODE_BRK: 248 /* Don't unroll loops if it has a BRK instruction 249 * other one used when testing the main conditional 250 * of the loop. */ 251 252 /* Make sure we haven't entered a nested loops. */ 253 if(inst != loop->Brk && end_loops == 1) { 254 return 0; 255 } 256 break; 257 case RC_OPCODE_IF: 258 count_inst.BranchDepth++; 259 break; 260 case RC_OPCODE_ENDIF: 261 count_inst.BranchDepth--; 262 break; 263 default: 264 rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); 265 if(count_inst.Unknown){ 266 return 0; 267 } 268 break; 269 } 270 } 271 /* Infinite loop */ 272 if(count_inst.Amount == 0.0f){ 273 return 0; 274 } 275 DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); 276 /* Calculate the number of iterations of this loop. Keeping this 277 * simple, since we only support increment and decrement loops. 278 */ 279 limit_value = rc_get_constant_value(c, limit->Index, limit->Swizzle, 280 limit->Negate, 0); 281 DBG("Limit is %f.\n", limit_value); 282 /* The iteration calculations are opposite of what you would expect. 283 * In a normal loop, if the condition is met, then loop continues, but 284 * with our loops, if the condition is met, the is exited. */ 285 switch(loop->Cond->U.I.Opcode){ 286 case RC_OPCODE_SGE: 287 case RC_OPCODE_SLE: 288 iterations = (int) ceilf((limit_value - counter_value.Value) / 289 count_inst.Amount); 290 break; 291 292 case RC_OPCODE_SGT: 293 case RC_OPCODE_SLT: 294 iterations = (int) floorf((limit_value - counter_value.Value) / 295 count_inst.Amount) + 1; 296 break; 297 default: 298 return 0; 299 } 300 301 if (c->max_alu_insts > 0 302 && iterations > loop_max_possible_iterations(c, loop)) { 303 return 0; 304 } 305 306 DBG("Loop will have %d iterations.\n", iterations); 307 308 /* Prepare loop for unrolling */ 309 rc_remove_instruction(loop->Cond); 310 rc_remove_instruction(loop->If); 311 rc_remove_instruction(loop->Brk); 312 rc_remove_instruction(loop->EndIf); 313 314 unroll_loop(c, loop, iterations); 315 loop->EndLoop = NULL; 316 return 1; 317} 318 319/** 320 * @param c 321 * @param loop 322 * @param inst A pointer to a BGNLOOP instruction. 323 * @return 1 if all of the members of loop where set. 324 * @return 0 if there was an error and some members of loop are still NULL. 325 */ 326static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, 327 struct rc_instruction * inst) 328{ 329 struct rc_instruction * ptr; 330 331 if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ 332 rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); 333 return 0; 334 } 335 336 memset(loop, 0, sizeof(struct loop_info)); 337 338 loop->BeginLoop = inst; 339 340 for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { 341 342 if (ptr == &c->Program.Instructions) { 343 rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", 344 __FUNCTION__); 345 return 0; 346 } 347 348 switch(ptr->U.I.Opcode){ 349 case RC_OPCODE_BGNLOOP: 350 { 351 /* Nested loop, skip ahead to the end. */ 352 unsigned int loop_depth = 1; 353 for(ptr = ptr->Next; ptr != &c->Program.Instructions; 354 ptr = ptr->Next){ 355 if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { 356 loop_depth++; 357 } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { 358 if (!--loop_depth) { 359 break; 360 } 361 } 362 } 363 if (ptr == &c->Program.Instructions) { 364 rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", 365 __FUNCTION__); 366 return 0; 367 } 368 break; 369 } 370 case RC_OPCODE_BRK: 371 if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF 372 || ptr->Prev->U.I.Opcode != RC_OPCODE_IF 373 || loop->Brk){ 374 continue; 375 } 376 loop->Brk = ptr; 377 loop->If = ptr->Prev; 378 loop->EndIf = ptr->Next; 379 switch(loop->If->Prev->U.I.Opcode){ 380 case RC_OPCODE_SLT: 381 case RC_OPCODE_SGE: 382 case RC_OPCODE_SGT: 383 case RC_OPCODE_SLE: 384 case RC_OPCODE_SEQ: 385 case RC_OPCODE_SNE: 386 break; 387 default: 388 return 0; 389 } 390 loop->Cond = loop->If->Prev; 391 break; 392 393 case RC_OPCODE_ENDLOOP: 394 loop->EndLoop = ptr; 395 break; 396 } 397 } 398 399 if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf 400 && loop->Cond && loop->EndLoop) { 401 return 1; 402 } 403 return 0; 404} 405 406/** 407 * This function prepares a loop to be unrolled by converting it into an if 408 * statement. Here is an outline of the conversion process: 409 * BGNLOOP; -> BGNLOOP; 410 * <Additional conditional code> -> <Additional conditional code> 411 * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; 412 * IF temp[0]; -> IF temp[0]; 413 * BRK; -> 414 * ENDIF; -> <Loop Body> 415 * <Loop Body> -> ENDIF; 416 * ENDLOOP; -> ENDLOOP 417 * 418 * @param inst A pointer to a BGNLOOP instruction. 419 * @return 1 for success, 0 for failure 420 */ 421static int transform_loop(struct emulate_loop_state * s, 422 struct rc_instruction * inst) 423{ 424 struct loop_info * loop; 425 426 memory_pool_array_reserve(&s->C->Pool, struct loop_info, 427 s->Loops, s->LoopCount, s->LoopReserved, 1); 428 429 loop = &s->Loops[s->LoopCount++]; 430 431 if (!build_loop_info(s->C, loop, inst)) { 432 rc_error(s->C, "Failed to build loop info\n"); 433 return 0; 434 } 435 436 if(try_unroll_loop(s->C, loop)){ 437 return 1; 438 } 439 440 /* Reverse the conditional instruction */ 441 switch(loop->Cond->U.I.Opcode){ 442 case RC_OPCODE_SGE: 443 loop->Cond->U.I.Opcode = RC_OPCODE_SLT; 444 break; 445 case RC_OPCODE_SLT: 446 loop->Cond->U.I.Opcode = RC_OPCODE_SGE; 447 break; 448 case RC_OPCODE_SLE: 449 loop->Cond->U.I.Opcode = RC_OPCODE_SGT; 450 break; 451 case RC_OPCODE_SGT: 452 loop->Cond->U.I.Opcode = RC_OPCODE_SLE; 453 break; 454 case RC_OPCODE_SEQ: 455 loop->Cond->U.I.Opcode = RC_OPCODE_SNE; 456 break; 457 case RC_OPCODE_SNE: 458 loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; 459 break; 460 default: 461 rc_error(s->C, "loop->Cond is not a conditional.\n"); 462 return 0; 463 } 464 465 /* Prepare the loop to be emulated */ 466 rc_remove_instruction(loop->Brk); 467 rc_remove_instruction(loop->EndIf); 468 rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); 469 return 1; 470} 471 472void rc_transform_loops(struct radeon_compiler *c, void *user) 473{ 474 struct emulate_loop_state * s = &c->loop_state; 475 struct rc_instruction * ptr; 476 477 memset(s, 0, sizeof(struct emulate_loop_state)); 478 s->C = c; 479 for(ptr = s->C->Program.Instructions.Next; 480 ptr != &s->C->Program.Instructions; ptr = ptr->Next) { 481 if(ptr->Type == RC_INSTRUCTION_NORMAL && 482 ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ 483 if (!transform_loop(s, ptr)) 484 return; 485 } 486 } 487} 488 489void rc_unroll_loops(struct radeon_compiler *c, void *user) 490{ 491 struct rc_instruction * inst; 492 struct loop_info loop; 493 494 for(inst = c->Program.Instructions.Next; 495 inst != &c->Program.Instructions; inst = inst->Next) { 496 497 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { 498 if (build_loop_info(c, &loop, inst)) { 499 try_unroll_loop(c, &loop); 500 } 501 } 502 } 503} 504 505void rc_emulate_loops(struct radeon_compiler *c, void *user) 506{ 507 struct emulate_loop_state * s = &c->loop_state; 508 int i; 509 /* Iterate backwards of the list of loops so that loops that nested 510 * loops are unrolled first. 511 */ 512 for( i = s->LoopCount - 1; i >= 0; i-- ){ 513 unsigned int iterations; 514 515 if(!s->Loops[i].EndLoop){ 516 continue; 517 } 518 iterations = loop_max_possible_iterations(s->C, &s->Loops[i]); 519 unroll_loop(s->C, &s->Loops[i], iterations); 520 } 521} 522