1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "compiler/glsl_types.h" 25#include "loop_analysis.h" 26#include "ir_hierarchical_visitor.h" 27 28#include "main/mtypes.h" 29 30namespace { 31 32class loop_unroll_visitor : public ir_hierarchical_visitor { 33public: 34 loop_unroll_visitor(loop_state *state, 35 const struct gl_shader_compiler_options *options) 36 { 37 this->state = state; 38 this->progress = false; 39 this->options = options; 40 } 41 42 virtual ir_visitor_status visit_leave(ir_loop *ir); 43 void simple_unroll(ir_loop *ir, int iterations); 44 void complex_unroll(ir_loop *ir, int iterations, 45 bool continue_from_then_branch); 46 void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest); 47 48 loop_state *state; 49 50 bool progress; 51 const struct gl_shader_compiler_options *options; 52}; 53 54} /* anonymous namespace */ 55 56static bool 57is_break(ir_instruction *ir) 58{ 59 return ir != NULL && ir->ir_type == ir_type_loop_jump 60 && ((ir_loop_jump *) ir)->is_break(); 61} 62 63class loop_unroll_count : public ir_hierarchical_visitor { 64public: 65 int nodes; 66 bool unsupported_variable_indexing; 67 bool array_indexed_by_induction_var_with_exact_iterations; 68 /* If there are nested loops, the node count will be inaccurate. */ 69 bool nested_loop; 70 71 loop_unroll_count(exec_list *list, loop_variable_state *ls, 72 const struct gl_shader_compiler_options *options) 73 : ls(ls), options(options) 74 { 75 nodes = 0; 76 nested_loop = false; 77 unsupported_variable_indexing = false; 78 array_indexed_by_induction_var_with_exact_iterations = false; 79 80 run(list); 81 } 82 83 virtual ir_visitor_status visit_enter(ir_assignment *) 84 { 85 nodes++; 86 return visit_continue; 87 } 88 89 virtual ir_visitor_status visit_enter(ir_expression *) 90 { 91 nodes++; 92 return visit_continue; 93 } 94 95 virtual ir_visitor_status visit_enter(ir_loop *) 96 { 97 nested_loop = true; 98 return visit_continue; 99 } 100 101 virtual ir_visitor_status visit_enter(ir_dereference_array *ir) 102 { 103 /* Force unroll in case of dynamic indexing with sampler arrays 104 * when EmitNoIndirectSampler is set. 105 */ 106 if (options->EmitNoIndirectSampler) { 107 if ((ir->array->type->is_array() && 108 ir->array->type->contains_sampler()) && 109 !ir->array_index->constant_expression_value()) { 110 unsupported_variable_indexing = true; 111 return visit_continue; 112 } 113 } 114 115 /* Check for arrays variably-indexed by a loop induction variable. 116 * Unrolling the loop may convert that access into constant-indexing. 117 * 118 * Many drivers don't support particular kinds of variable indexing, 119 * and have to resort to using lower_variable_index_to_cond_assign to 120 * handle it. This results in huge amounts of horrible code, so we'd 121 * like to avoid that if possible. Here, we just note that it will 122 * happen. 123 */ 124 if ((ir->array->type->is_array() || ir->array->type->is_matrix()) && 125 !ir->array_index->as_constant()) { 126 ir_variable *array = ir->array->variable_referenced(); 127 loop_variable *lv = ls->get(ir->array_index->variable_referenced()); 128 if (array && lv && lv->is_induction_var()) { 129 /* If an array is indexed by a loop induction variable, and the 130 * array size is exactly the number of loop iterations, this is 131 * probably a simple for-loop trying to access each element in 132 * turn; the application may expect it to be unrolled. 133 */ 134 if (int(array->type->length) == ls->limiting_terminator->iterations) 135 array_indexed_by_induction_var_with_exact_iterations = true; 136 137 switch (array->data.mode) { 138 case ir_var_auto: 139 case ir_var_temporary: 140 case ir_var_const_in: 141 case ir_var_function_in: 142 case ir_var_function_out: 143 case ir_var_function_inout: 144 if (options->EmitNoIndirectTemp) 145 unsupported_variable_indexing = true; 146 break; 147 case ir_var_uniform: 148 case ir_var_shader_storage: 149 if (options->EmitNoIndirectUniform) 150 unsupported_variable_indexing = true; 151 break; 152 case ir_var_shader_in: 153 if (options->EmitNoIndirectInput) 154 unsupported_variable_indexing = true; 155 break; 156 case ir_var_shader_out: 157 if (options->EmitNoIndirectOutput) 158 unsupported_variable_indexing = true; 159 break; 160 } 161 } 162 } 163 return visit_continue; 164 } 165 166private: 167 loop_variable_state *ls; 168 const struct gl_shader_compiler_options *options; 169}; 170 171 172/** 173 * Unroll a loop which does not contain any jumps. For example, if the input 174 * is: 175 * 176 * (loop (...) ...instrs...) 177 * 178 * And the iteration count is 3, the output will be: 179 * 180 * ...instrs... ...instrs... ...instrs... 181 */ 182void 183loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) 184{ 185 void *const mem_ctx = ralloc_parent(ir); 186 187 for (int i = 0; i < iterations; i++) { 188 exec_list copy_list; 189 190 copy_list.make_empty(); 191 clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); 192 193 ir->insert_before(©_list); 194 } 195 196 /* The loop has been replaced by the unrolled copies. Remove the original 197 * loop from the IR sequence. 198 */ 199 ir->remove(); 200 201 this->progress = true; 202} 203 204 205/** 206 * Unroll a loop whose last statement is an ir_if. If \c 207 * continue_from_then_branch is true, the loop is repeated only when the 208 * "then" branch of the if is taken; otherwise it is repeated only when the 209 * "else" branch of the if is taken. 210 * 211 * For example, if the input is: 212 * 213 * (loop (...) 214 * ...body... 215 * (if (cond) 216 * (...then_instrs...) 217 * (...else_instrs...))) 218 * 219 * And the iteration count is 3, and \c continue_from_then_branch is true, 220 * then the output will be: 221 * 222 * ...body... 223 * (if (cond) 224 * (...then_instrs... 225 * ...body... 226 * (if (cond) 227 * (...then_instrs... 228 * ...body... 229 * (if (cond) 230 * (...then_instrs...) 231 * (...else_instrs...))) 232 * (...else_instrs...))) 233 * (...else_instrs)) 234 */ 235void 236loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, 237 bool continue_from_then_branch) 238{ 239 void *const mem_ctx = ralloc_parent(ir); 240 ir_instruction *ir_to_replace = ir; 241 242 for (int i = 0; i < iterations; i++) { 243 exec_list copy_list; 244 245 copy_list.make_empty(); 246 clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); 247 248 ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if(); 249 assert(ir_if != NULL); 250 251 ir_to_replace->insert_before(©_list); 252 ir_to_replace->remove(); 253 254 /* placeholder that will be removed in the next iteration */ 255 ir_to_replace = 256 new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); 257 258 exec_list *const list = (continue_from_then_branch) 259 ? &ir_if->then_instructions : &ir_if->else_instructions; 260 261 list->push_tail(ir_to_replace); 262 } 263 264 ir_to_replace->remove(); 265 266 this->progress = true; 267} 268 269 270/** 271 * Move all of the instructions which follow \c ir_if to the end of 272 * \c splice_dest. 273 * 274 * For example, in the code snippet: 275 * 276 * (if (cond) 277 * (...then_instructions... 278 * break) 279 * (...else_instructions...)) 280 * ...post_if_instructions... 281 * 282 * If \c ir_if points to the "if" instruction, and \c splice_dest points to 283 * (...else_instructions...), the code snippet is transformed into: 284 * 285 * (if (cond) 286 * (...then_instructions... 287 * break) 288 * (...else_instructions... 289 * ...post_if_instructions...)) 290 */ 291void 292loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if, 293 exec_list *splice_dest) 294{ 295 while (!ir_if->get_next()->is_tail_sentinel()) { 296 ir_instruction *move_ir = (ir_instruction *) ir_if->get_next(); 297 298 move_ir->remove(); 299 splice_dest->push_tail(move_ir); 300 } 301} 302 303 304ir_visitor_status 305loop_unroll_visitor::visit_leave(ir_loop *ir) 306{ 307 loop_variable_state *const ls = this->state->get(ir); 308 int iterations; 309 310 /* If we've entered a loop that hasn't been analyzed, something really, 311 * really bad has happened. 312 */ 313 if (ls == NULL) { 314 assert(ls != NULL); 315 return visit_continue; 316 } 317 318 if (ls->limiting_terminator == NULL) { 319 ir_instruction *last_ir = 320 (ir_instruction *) ir->body_instructions.get_tail(); 321 322 /* If a loop has no induction variable and the last instruction is 323 * a break, unroll the loop with a count of 1. This is the classic 324 * 325 * do { 326 * // ... 327 * } while (false) 328 * 329 * that is used to wrap multi-line macros. 330 * 331 * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to 332 * be at least num_loop_jumps instructions in the loop. 333 */ 334 if (ls->num_loop_jumps == 1 && is_break(last_ir)) { 335 last_ir->remove(); 336 337 simple_unroll(ir, 1); 338 } 339 340 /* Don't try to unroll loops where the number of iterations is not known 341 * at compile-time. 342 */ 343 return visit_continue; 344 } 345 346 iterations = ls->limiting_terminator->iterations; 347 348 const int max_iterations = options->MaxUnrollIterations; 349 350 /* Don't try to unroll loops that have zillions of iterations either. 351 */ 352 if (iterations > max_iterations) 353 return visit_continue; 354 355 /* Don't try to unroll nested loops and loops with a huge body. 356 */ 357 loop_unroll_count count(&ir->body_instructions, ls, options); 358 359 bool loop_too_large = 360 count.nested_loop || count.nodes * iterations > max_iterations * 5; 361 362 if (loop_too_large && !count.unsupported_variable_indexing && 363 !count.array_indexed_by_induction_var_with_exact_iterations) 364 return visit_continue; 365 366 /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. 367 * We'll be removing the limiting terminator before we unroll. 368 */ 369 assert(ls->num_loop_jumps > 0); 370 unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1; 371 372 if (predicted_num_loop_jumps > 1) 373 return visit_continue; 374 375 if (predicted_num_loop_jumps == 0) { 376 ls->limiting_terminator->ir->remove(); 377 simple_unroll(ir, iterations); 378 return visit_continue; 379 } 380 381 ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail(); 382 assert(last_ir != NULL); 383 384 if (is_break(last_ir)) { 385 /* If the only loop-jump is a break at the end of the loop, the loop 386 * will execute exactly once. Remove the break and use the simple 387 * unroller with an iteration count of 1. 388 */ 389 last_ir->remove(); 390 391 ls->limiting_terminator->ir->remove(); 392 simple_unroll(ir, 1); 393 return visit_continue; 394 } 395 396 /* recognize loops in the form produced by ir_lower_jumps */ 397 foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) { 398 /* Skip the limiting terminator, since it will go away when we 399 * unroll. 400 */ 401 if (cur_ir == ls->limiting_terminator->ir) 402 continue; 403 404 ir_if *ir_if = cur_ir->as_if(); 405 if (ir_if != NULL) { 406 /* Determine which if-statement branch, if any, ends with a 407 * break. The branch that did *not* have the break will get a 408 * temporary continue inserted in each iteration of the loop 409 * unroll. 410 * 411 * Note that since ls->num_loop_jumps is <= 1, it is impossible 412 * for both branches to end with a break. 413 */ 414 ir_instruction *ir_if_last = 415 (ir_instruction *) ir_if->then_instructions.get_tail(); 416 417 if (is_break(ir_if_last)) { 418 ls->limiting_terminator->ir->remove(); 419 splice_post_if_instructions(ir_if, &ir_if->else_instructions); 420 ir_if_last->remove(); 421 complex_unroll(ir, iterations, false); 422 return visit_continue; 423 } else { 424 ir_if_last = 425 (ir_instruction *) ir_if->else_instructions.get_tail(); 426 427 if (is_break(ir_if_last)) { 428 ls->limiting_terminator->ir->remove(); 429 splice_post_if_instructions(ir_if, &ir_if->then_instructions); 430 ir_if_last->remove(); 431 complex_unroll(ir, iterations, true); 432 return visit_continue; 433 } 434 } 435 } 436 } 437 438 /* Did not find the break statement. It must be in a complex if-nesting, 439 * so don't try to unroll. 440 */ 441 return visit_continue; 442} 443 444 445bool 446unroll_loops(exec_list *instructions, loop_state *ls, 447 const struct gl_shader_compiler_options *options) 448{ 449 loop_unroll_visitor v(ls, options); 450 451 v.run(instructions); 452 453 return v.progress; 454} 455