1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file opt_algebraic.cpp 26 * 27 * Takes advantage of association, commutivity, and other algebraic 28 * properties to simplify expressions. 29 */ 30 31#include "ir.h" 32#include "ir_visitor.h" 33#include "ir_rvalue_visitor.h" 34#include "ir_optimization.h" 35#include "ir_builder.h" 36#include "compiler/glsl_types.h" 37 38using namespace ir_builder; 39 40namespace { 41 42/** 43 * Visitor class for replacing expressions with ir_constant values. 44 */ 45 46class ir_algebraic_visitor : public ir_rvalue_visitor { 47public: 48 ir_algebraic_visitor(bool native_integers, 49 const struct gl_shader_compiler_options *options) 50 : options(options) 51 { 52 this->progress = false; 53 this->mem_ctx = NULL; 54 this->native_integers = native_integers; 55 } 56 57 virtual ~ir_algebraic_visitor() 58 { 59 } 60 61 virtual ir_visitor_status visit_enter(ir_assignment *ir); 62 63 ir_rvalue *handle_expression(ir_expression *ir); 64 void handle_rvalue(ir_rvalue **rvalue); 65 bool reassociate_constant(ir_expression *ir1, 66 int const_index, 67 ir_constant *constant, 68 ir_expression *ir2); 69 void reassociate_operands(ir_expression *ir1, 70 int op1, 71 ir_expression *ir2, 72 int op2); 73 ir_rvalue *swizzle_if_required(ir_expression *expr, 74 ir_rvalue *operand); 75 76 const struct gl_shader_compiler_options *options; 77 void *mem_ctx; 78 79 bool native_integers; 80 bool progress; 81}; 82 83} /* unnamed namespace */ 84 85ir_visitor_status 86ir_algebraic_visitor::visit_enter(ir_assignment *ir) 87{ 88 ir_variable *var = ir->lhs->variable_referenced(); 89 if (var->data.invariant || var->data.precise) { 90 /* If we're assigning to an invariant or precise variable, just bail. 91 * Most of the algebraic optimizations aren't precision-safe. 92 * 93 * FINISHME: Find out which optimizations are precision-safe and enable 94 * then only for invariant or precise trees. 95 */ 96 return visit_continue_with_parent; 97 } else { 98 return visit_continue; 99 } 100} 101 102static inline bool 103is_vec_zero(ir_constant *ir) 104{ 105 return (ir == NULL) ? false : ir->is_zero(); 106} 107 108static inline bool 109is_vec_one(ir_constant *ir) 110{ 111 return (ir == NULL) ? false : ir->is_one(); 112} 113 114static inline bool 115is_vec_two(ir_constant *ir) 116{ 117 return (ir == NULL) ? false : ir->is_value(2.0, 2); 118} 119 120static inline bool 121is_vec_four(ir_constant *ir) 122{ 123 return (ir == NULL) ? false : ir->is_value(4.0, 4); 124} 125 126static inline bool 127is_vec_negative_one(ir_constant *ir) 128{ 129 return (ir == NULL) ? false : ir->is_negative_one(); 130} 131 132static inline bool 133is_valid_vec_const(ir_constant *ir) 134{ 135 if (ir == NULL) 136 return false; 137 138 if (!ir->type->is_scalar() && !ir->type->is_vector()) 139 return false; 140 141 return true; 142} 143 144static inline bool 145is_less_than_one(ir_constant *ir) 146{ 147 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 148 149 if (!is_valid_vec_const(ir)) 150 return false; 151 152 unsigned component = 0; 153 for (int c = 0; c < ir->type->vector_elements; c++) { 154 if (ir->get_float_component(c) < 1.0f) 155 component++; 156 } 157 158 return (component == ir->type->vector_elements); 159} 160 161static inline bool 162is_greater_than_zero(ir_constant *ir) 163{ 164 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 165 166 if (!is_valid_vec_const(ir)) 167 return false; 168 169 unsigned component = 0; 170 for (int c = 0; c < ir->type->vector_elements; c++) { 171 if (ir->get_float_component(c) > 0.0f) 172 component++; 173 } 174 175 return (component == ir->type->vector_elements); 176} 177 178static void 179update_type(ir_expression *ir) 180{ 181 if (ir->operands[0]->type->is_vector()) 182 ir->type = ir->operands[0]->type; 183 else 184 ir->type = ir->operands[1]->type; 185} 186 187/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ 188static ir_expression * 189try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx) 190{ 191 if (expr0 && expr0->operation == ir_binop_add && 192 expr0->type->is_float() && 193 expr1 && expr1->operation == ir_binop_add && 194 expr1->type->is_float()) { 195 ir_swizzle *x = expr0->operands[0]->as_swizzle(); 196 ir_swizzle *y = expr0->operands[1]->as_swizzle(); 197 ir_swizzle *z = expr1->operands[0]->as_swizzle(); 198 ir_swizzle *w = expr1->operands[1]->as_swizzle(); 199 200 if (!x || x->mask.num_components != 1 || 201 !y || y->mask.num_components != 1 || 202 !z || z->mask.num_components != 1 || 203 !w || w->mask.num_components != 1) { 204 return NULL; 205 } 206 207 bool swiz_seen[4] = {false, false, false, false}; 208 swiz_seen[x->mask.x] = true; 209 swiz_seen[y->mask.x] = true; 210 swiz_seen[z->mask.x] = true; 211 swiz_seen[w->mask.x] = true; 212 213 if (!swiz_seen[0] || !swiz_seen[1] || 214 !swiz_seen[2] || !swiz_seen[3]) { 215 return NULL; 216 } 217 218 if (x->val->equals(y->val) && 219 x->val->equals(z->val) && 220 x->val->equals(w->val)) { 221 return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); 222 } 223 } 224 return NULL; 225} 226 227void 228ir_algebraic_visitor::reassociate_operands(ir_expression *ir1, 229 int op1, 230 ir_expression *ir2, 231 int op2) 232{ 233 ir_rvalue *temp = ir2->operands[op2]; 234 ir2->operands[op2] = ir1->operands[op1]; 235 ir1->operands[op1] = temp; 236 237 /* Update the type of ir2. The type of ir1 won't have changed -- 238 * base types matched, and at least one of the operands of the 2 239 * binops is still a vector if any of them were. 240 */ 241 update_type(ir2); 242 243 this->progress = true; 244} 245 246/** 247 * Reassociates a constant down a tree of adds or multiplies. 248 * 249 * Consider (2 * (a * (b * 0.5))). We want to send up with a * b. 250 */ 251bool 252ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index, 253 ir_constant *constant, 254 ir_expression *ir2) 255{ 256 if (!ir2 || ir1->operation != ir2->operation) 257 return false; 258 259 /* Don't want to even think about matrices. */ 260 if (ir1->operands[0]->type->is_matrix() || 261 ir1->operands[1]->type->is_matrix() || 262 ir2->operands[0]->type->is_matrix() || 263 ir2->operands[1]->type->is_matrix()) 264 return false; 265 266 ir_constant *ir2_const[2]; 267 ir2_const[0] = ir2->operands[0]->constant_expression_value(); 268 ir2_const[1] = ir2->operands[1]->constant_expression_value(); 269 270 if (ir2_const[0] && ir2_const[1]) 271 return false; 272 273 if (ir2_const[0]) { 274 reassociate_operands(ir1, const_index, ir2, 1); 275 return true; 276 } else if (ir2_const[1]) { 277 reassociate_operands(ir1, const_index, ir2, 0); 278 return true; 279 } 280 281 if (reassociate_constant(ir1, const_index, constant, 282 ir2->operands[0]->as_expression())) { 283 update_type(ir2); 284 return true; 285 } 286 287 if (reassociate_constant(ir1, const_index, constant, 288 ir2->operands[1]->as_expression())) { 289 update_type(ir2); 290 return true; 291 } 292 293 return false; 294} 295 296/* When eliminating an expression and just returning one of its operands, 297 * we may need to swizzle that operand out to a vector if the expression was 298 * vector type. 299 */ 300ir_rvalue * 301ir_algebraic_visitor::swizzle_if_required(ir_expression *expr, 302 ir_rvalue *operand) 303{ 304 if (expr->type->is_vector() && operand->type->is_scalar()) { 305 return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0, 306 expr->type->vector_elements); 307 } else 308 return operand; 309} 310 311ir_rvalue * 312ir_algebraic_visitor::handle_expression(ir_expression *ir) 313{ 314 ir_constant *op_const[4] = {NULL, NULL, NULL, NULL}; 315 ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL}; 316 unsigned int i; 317 318 if (ir->operation == ir_binop_mul && 319 ir->operands[0]->type->is_matrix() && 320 ir->operands[1]->type->is_vector()) { 321 ir_expression *matrix_mul = ir->operands[0]->as_expression(); 322 323 if (matrix_mul && matrix_mul->operation == ir_binop_mul && 324 matrix_mul->operands[0]->type->is_matrix() && 325 matrix_mul->operands[1]->type->is_matrix()) { 326 327 return mul(matrix_mul->operands[0], 328 mul(matrix_mul->operands[1], ir->operands[1])); 329 } 330 } 331 332 assert(ir->get_num_operands() <= 4); 333 for (i = 0; i < ir->get_num_operands(); i++) { 334 if (ir->operands[i]->type->is_matrix()) 335 return ir; 336 337 op_const[i] = ir->operands[i]->constant_expression_value(); 338 op_expr[i] = ir->operands[i]->as_expression(); 339 } 340 341 if (this->mem_ctx == NULL) 342 this->mem_ctx = ralloc_parent(ir); 343 344 switch (ir->operation) { 345 case ir_unop_bit_not: 346 if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not) 347 return op_expr[0]->operands[0]; 348 break; 349 350 case ir_unop_abs: 351 if (op_expr[0] == NULL) 352 break; 353 354 switch (op_expr[0]->operation) { 355 case ir_unop_abs: 356 case ir_unop_neg: 357 return abs(op_expr[0]->operands[0]); 358 default: 359 break; 360 } 361 break; 362 363 case ir_unop_neg: 364 if (op_expr[0] == NULL) 365 break; 366 367 if (op_expr[0]->operation == ir_unop_neg) { 368 return op_expr[0]->operands[0]; 369 } 370 break; 371 372 case ir_unop_exp: 373 if (op_expr[0] == NULL) 374 break; 375 376 if (op_expr[0]->operation == ir_unop_log) { 377 return op_expr[0]->operands[0]; 378 } 379 break; 380 381 case ir_unop_log: 382 if (op_expr[0] == NULL) 383 break; 384 385 if (op_expr[0]->operation == ir_unop_exp) { 386 return op_expr[0]->operands[0]; 387 } 388 break; 389 390 case ir_unop_exp2: 391 if (op_expr[0] == NULL) 392 break; 393 394 if (op_expr[0]->operation == ir_unop_log2) { 395 return op_expr[0]->operands[0]; 396 } 397 398 if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) { 399 for (int log2_pos = 0; log2_pos < 2; log2_pos++) { 400 ir_expression *log2_expr = 401 op_expr[0]->operands[log2_pos]->as_expression(); 402 403 if (log2_expr && log2_expr->operation == ir_unop_log2) { 404 return new(mem_ctx) ir_expression(ir_binop_pow, 405 ir->type, 406 log2_expr->operands[0], 407 op_expr[0]->operands[1 - log2_pos]); 408 } 409 } 410 } 411 break; 412 413 case ir_unop_log2: 414 if (op_expr[0] == NULL) 415 break; 416 417 if (op_expr[0]->operation == ir_unop_exp2) { 418 return op_expr[0]->operands[0]; 419 } 420 break; 421 422 case ir_unop_f2i: 423 case ir_unop_f2u: 424 if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) { 425 return new(mem_ctx) ir_expression(ir->operation, 426 ir->type, 427 op_expr[0]->operands[0]); 428 } 429 break; 430 431 case ir_unop_logic_not: { 432 enum ir_expression_operation new_op = ir_unop_logic_not; 433 434 if (op_expr[0] == NULL) 435 break; 436 437 switch (op_expr[0]->operation) { 438 case ir_binop_less: new_op = ir_binop_gequal; break; 439 case ir_binop_greater: new_op = ir_binop_lequal; break; 440 case ir_binop_lequal: new_op = ir_binop_greater; break; 441 case ir_binop_gequal: new_op = ir_binop_less; break; 442 case ir_binop_equal: new_op = ir_binop_nequal; break; 443 case ir_binop_nequal: new_op = ir_binop_equal; break; 444 case ir_binop_all_equal: new_op = ir_binop_any_nequal; break; 445 case ir_binop_any_nequal: new_op = ir_binop_all_equal; break; 446 447 default: 448 /* The default case handler is here to silence a warning from GCC. 449 */ 450 break; 451 } 452 453 if (new_op != ir_unop_logic_not) { 454 return new(mem_ctx) ir_expression(new_op, 455 ir->type, 456 op_expr[0]->operands[0], 457 op_expr[0]->operands[1]); 458 } 459 460 break; 461 } 462 463 case ir_unop_saturate: 464 if (op_expr[0] && op_expr[0]->operation == ir_binop_add) { 465 ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression(); 466 ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression(); 467 468 if (b2f_0 && b2f_0->operation == ir_unop_b2f && 469 b2f_1 && b2f_1->operation == ir_unop_b2f) { 470 return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0])); 471 } 472 } 473 break; 474 475 case ir_binop_add: 476 if (is_vec_zero(op_const[0])) 477 return ir->operands[1]; 478 if (is_vec_zero(op_const[1])) 479 return ir->operands[0]; 480 481 /* Reassociate addition of constants so that we can do constant 482 * folding. 483 */ 484 if (op_const[0] && !op_const[1]) 485 reassociate_constant(ir, 0, op_const[0], op_expr[1]); 486 if (op_const[1] && !op_const[0]) 487 reassociate_constant(ir, 1, op_const[1], op_expr[0]); 488 489 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ 490 if (options->OptimizeForAOS) { 491 ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], 492 mem_ctx); 493 if (expr) 494 return expr; 495 } 496 497 /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). 498 * 499 * (-x + y) * a + x 500 * (x * -a) + (y * a) + x 501 * x + (x * -a) + (y * a) 502 * x * (1 - a) + y * a 503 * lrp(x, y, a) 504 */ 505 for (int mul_pos = 0; mul_pos < 2; mul_pos++) { 506 ir_expression *mul = op_expr[mul_pos]; 507 508 if (!mul || mul->operation != ir_binop_mul) 509 continue; 510 511 /* Multiply found on one of the operands. Now check for an 512 * inner addition operation. 513 */ 514 for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) { 515 ir_expression *inner_add = 516 mul->operands[inner_add_pos]->as_expression(); 517 518 if (!inner_add || inner_add->operation != ir_binop_add) 519 continue; 520 521 /* Inner addition found on one of the operands. Now check for 522 * one of the operands of the inner addition to be the negative 523 * of x_operand. 524 */ 525 for (int neg_pos = 0; neg_pos < 2; neg_pos++) { 526 ir_expression *neg = 527 inner_add->operands[neg_pos]->as_expression(); 528 529 if (!neg || neg->operation != ir_unop_neg) 530 continue; 531 532 ir_rvalue *x_operand = ir->operands[1 - mul_pos]; 533 534 if (!neg->operands[0]->equals(x_operand)) 535 continue; 536 537 ir_rvalue *y_operand = inner_add->operands[1 - neg_pos]; 538 ir_rvalue *a_operand = mul->operands[1 - inner_add_pos]; 539 540 if (x_operand->type != y_operand->type || 541 x_operand->type != a_operand->type) 542 continue; 543 544 return lrp(x_operand, y_operand, a_operand); 545 } 546 } 547 } 548 549 break; 550 551 case ir_binop_sub: 552 if (is_vec_zero(op_const[0])) 553 return neg(ir->operands[1]); 554 if (is_vec_zero(op_const[1])) 555 return ir->operands[0]; 556 break; 557 558 case ir_binop_mul: 559 if (is_vec_one(op_const[0])) 560 return ir->operands[1]; 561 if (is_vec_one(op_const[1])) 562 return ir->operands[0]; 563 564 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) 565 return ir_constant::zero(ir, ir->type); 566 567 if (is_vec_negative_one(op_const[0])) 568 return neg(ir->operands[1]); 569 if (is_vec_negative_one(op_const[1])) 570 return neg(ir->operands[0]); 571 572 if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f && 573 op_expr[1] && op_expr[1]->operation == ir_unop_b2f) { 574 return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0])); 575 } 576 577 /* Reassociate multiplication of constants so that we can do 578 * constant folding. 579 */ 580 if (op_const[0] && !op_const[1]) 581 reassociate_constant(ir, 0, op_const[0], op_expr[1]); 582 if (op_const[1] && !op_const[0]) 583 reassociate_constant(ir, 1, op_const[1], op_expr[0]); 584 585 /* Optimizes 586 * 587 * (mul (floor (add (abs x) 0.5) (sign x))) 588 * 589 * into 590 * 591 * (trunc (add x (mul (sign x) 0.5))) 592 */ 593 for (int i = 0; i < 2; i++) { 594 ir_expression *sign_expr = ir->operands[i]->as_expression(); 595 ir_expression *floor_expr = ir->operands[1 - i]->as_expression(); 596 597 if (!sign_expr || sign_expr->operation != ir_unop_sign || 598 !floor_expr || floor_expr->operation != ir_unop_floor) 599 continue; 600 601 ir_expression *add_expr = floor_expr->operands[0]->as_expression(); 602 if (!add_expr || add_expr->operation != ir_binop_add) 603 continue; 604 605 for (int j = 0; j < 2; j++) { 606 ir_expression *abs_expr = add_expr->operands[j]->as_expression(); 607 if (!abs_expr || abs_expr->operation != ir_unop_abs) 608 continue; 609 610 ir_constant *point_five = add_expr->operands[1 - j]->as_constant(); 611 if (!point_five || !point_five->is_value(0.5, 0)) 612 continue; 613 614 if (abs_expr->operands[0]->equals(sign_expr->operands[0])) { 615 return trunc(add(abs_expr->operands[0], 616 mul(sign_expr, point_five))); 617 } 618 } 619 } 620 break; 621 622 case ir_binop_div: 623 if (is_vec_one(op_const[0]) && ( 624 ir->type->base_type == GLSL_TYPE_FLOAT || 625 ir->type->base_type == GLSL_TYPE_DOUBLE)) { 626 return new(mem_ctx) ir_expression(ir_unop_rcp, 627 ir->operands[1]->type, 628 ir->operands[1], 629 NULL); 630 } 631 if (is_vec_one(op_const[1])) 632 return ir->operands[0]; 633 break; 634 635 case ir_binop_dot: 636 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) 637 return ir_constant::zero(mem_ctx, ir->type); 638 639 for (int i = 0; i < 2; i++) { 640 if (!op_const[i]) 641 continue; 642 643 unsigned components[4] = { 0 }, count = 0; 644 645 for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) { 646 if (op_const[i]->is_zero()) 647 continue; 648 649 components[count] = c; 650 count++; 651 } 652 653 /* No channels had zero values; bail. */ 654 if (count >= op_const[i]->type->vector_elements) 655 break; 656 657 ir_expression_operation op = count == 1 ? 658 ir_binop_mul : ir_binop_dot; 659 660 /* Swizzle both operands to remove the channels that were zero. */ 661 return new(mem_ctx) 662 ir_expression(op, ir->type, 663 new(mem_ctx) ir_swizzle(ir->operands[0], 664 components, count), 665 new(mem_ctx) ir_swizzle(ir->operands[1], 666 components, count)); 667 } 668 break; 669 670 case ir_binop_less: 671 case ir_binop_lequal: 672 case ir_binop_greater: 673 case ir_binop_gequal: 674 case ir_binop_equal: 675 case ir_binop_nequal: 676 for (int add_pos = 0; add_pos < 2; add_pos++) { 677 ir_expression *add = op_expr[add_pos]; 678 679 if (!add || add->operation != ir_binop_add) 680 continue; 681 682 ir_constant *zero = op_const[1 - add_pos]; 683 if (!is_vec_zero(zero)) 684 continue; 685 686 /* Depending of the zero position we want to optimize 687 * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y) 688 */ 689 if (add_pos == 1) { 690 return new(mem_ctx) ir_expression(ir->operation, 691 neg(add->operands[0]), 692 add->operands[1]); 693 } else { 694 return new(mem_ctx) ir_expression(ir->operation, 695 add->operands[0], 696 neg(add->operands[1])); 697 } 698 } 699 break; 700 701 case ir_binop_all_equal: 702 case ir_binop_any_nequal: 703 if (ir->operands[0]->type->is_scalar() && 704 ir->operands[1]->type->is_scalar()) 705 return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal 706 ? ir_binop_equal : ir_binop_nequal, 707 ir->operands[0], 708 ir->operands[1]); 709 break; 710 711 case ir_binop_rshift: 712 case ir_binop_lshift: 713 /* 0 >> x == 0 */ 714 if (is_vec_zero(op_const[0])) 715 return ir->operands[0]; 716 /* x >> 0 == x */ 717 if (is_vec_zero(op_const[1])) 718 return ir->operands[0]; 719 break; 720 721 case ir_binop_logic_and: 722 if (is_vec_one(op_const[0])) { 723 return ir->operands[1]; 724 } else if (is_vec_one(op_const[1])) { 725 return ir->operands[0]; 726 } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { 727 return ir_constant::zero(mem_ctx, ir->type); 728 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && 729 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { 730 /* De Morgan's Law: 731 * (not A) and (not B) === not (A or B) 732 */ 733 return logic_not(logic_or(op_expr[0]->operands[0], 734 op_expr[1]->operands[0])); 735 } else if (ir->operands[0]->equals(ir->operands[1])) { 736 /* (a && a) == a */ 737 return ir->operands[0]; 738 } 739 break; 740 741 case ir_binop_logic_xor: 742 if (is_vec_zero(op_const[0])) { 743 return ir->operands[1]; 744 } else if (is_vec_zero(op_const[1])) { 745 return ir->operands[0]; 746 } else if (is_vec_one(op_const[0])) { 747 return logic_not(ir->operands[1]); 748 } else if (is_vec_one(op_const[1])) { 749 return logic_not(ir->operands[0]); 750 } else if (ir->operands[0]->equals(ir->operands[1])) { 751 /* (a ^^ a) == false */ 752 return ir_constant::zero(mem_ctx, ir->type); 753 } 754 break; 755 756 case ir_binop_logic_or: 757 if (is_vec_zero(op_const[0])) { 758 return ir->operands[1]; 759 } else if (is_vec_zero(op_const[1])) { 760 return ir->operands[0]; 761 } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) { 762 ir_constant_data data; 763 764 for (unsigned i = 0; i < 16; i++) 765 data.b[i] = true; 766 767 return new(mem_ctx) ir_constant(ir->type, &data); 768 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && 769 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { 770 /* De Morgan's Law: 771 * (not A) or (not B) === not (A and B) 772 */ 773 return logic_not(logic_and(op_expr[0]->operands[0], 774 op_expr[1]->operands[0])); 775 } else if (ir->operands[0]->equals(ir->operands[1])) { 776 /* (a || a) == a */ 777 return ir->operands[0]; 778 } 779 break; 780 781 case ir_binop_pow: 782 /* 1^x == 1 */ 783 if (is_vec_one(op_const[0])) 784 return op_const[0]; 785 786 /* x^1 == x */ 787 if (is_vec_one(op_const[1])) 788 return ir->operands[0]; 789 790 /* pow(2,x) == exp2(x) */ 791 if (is_vec_two(op_const[0])) 792 return expr(ir_unop_exp2, ir->operands[1]); 793 794 if (is_vec_two(op_const[1])) { 795 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", 796 ir_var_temporary); 797 base_ir->insert_before(x); 798 base_ir->insert_before(assign(x, ir->operands[0])); 799 return mul(x, x); 800 } 801 802 if (is_vec_four(op_const[1])) { 803 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", 804 ir_var_temporary); 805 base_ir->insert_before(x); 806 base_ir->insert_before(assign(x, ir->operands[0])); 807 808 ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type, 809 "squared", 810 ir_var_temporary); 811 base_ir->insert_before(squared); 812 base_ir->insert_before(assign(squared, mul(x, x))); 813 return mul(squared, squared); 814 } 815 816 break; 817 818 case ir_binop_min: 819 case ir_binop_max: 820 if (ir->type->base_type != GLSL_TYPE_FLOAT || options->EmitNoSat) 821 break; 822 823 /* Replace min(max) operations and its commutative combinations with 824 * a saturate operation 825 */ 826 for (int op = 0; op < 2; op++) { 827 ir_expression *inner_expr = op_expr[op]; 828 ir_constant *outer_const = op_const[1 - op]; 829 ir_expression_operation op_cond = (ir->operation == ir_binop_max) ? 830 ir_binop_min : ir_binop_max; 831 832 if (!inner_expr || !outer_const || (inner_expr->operation != op_cond)) 833 continue; 834 835 /* One of these has to be a constant */ 836 if (!inner_expr->operands[0]->as_constant() && 837 !inner_expr->operands[1]->as_constant()) 838 break; 839 840 /* Found a min(max) combination. Now try to see if its operands 841 * meet our conditions that we can do just a single saturate operation 842 */ 843 for (int minmax_op = 0; minmax_op < 2; minmax_op++) { 844 ir_rvalue *x = inner_expr->operands[minmax_op]; 845 ir_rvalue *y = inner_expr->operands[1 - minmax_op]; 846 847 ir_constant *inner_const = y->as_constant(); 848 if (!inner_const) 849 continue; 850 851 /* min(max(x, 0.0), 1.0) is sat(x) */ 852 if (ir->operation == ir_binop_min && 853 inner_const->is_zero() && 854 outer_const->is_one()) 855 return saturate(x); 856 857 /* max(min(x, 1.0), 0.0) is sat(x) */ 858 if (ir->operation == ir_binop_max && 859 inner_const->is_one() && 860 outer_const->is_zero()) 861 return saturate(x); 862 863 /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */ 864 if (ir->operation == ir_binop_min && 865 inner_const->is_zero() && 866 is_less_than_one(outer_const)) 867 return saturate(expr(ir_binop_min, x, outer_const)); 868 869 /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */ 870 if (ir->operation == ir_binop_max && 871 is_less_than_one(inner_const) && 872 outer_const->is_zero()) 873 return saturate(expr(ir_binop_min, x, inner_const)); 874 875 /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */ 876 if (ir->operation == ir_binop_max && 877 inner_const->is_one() && 878 is_greater_than_zero(outer_const)) 879 return saturate(expr(ir_binop_max, x, outer_const)); 880 881 /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */ 882 if (ir->operation == ir_binop_min && 883 is_greater_than_zero(inner_const) && 884 outer_const->is_one()) 885 return saturate(expr(ir_binop_max, x, inner_const)); 886 } 887 } 888 889 break; 890 891 case ir_unop_rcp: 892 if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) 893 return op_expr[0]->operands[0]; 894 895 if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 || 896 op_expr[0]->operation == ir_unop_exp)) { 897 return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type, 898 neg(op_expr[0]->operands[0])); 899 } 900 901 /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at 902 * its IR level, so we can always apply this transformation. 903 */ 904 if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq) 905 return sqrt(op_expr[0]->operands[0]); 906 907 /* As far as we know, all backends are OK with rsq. */ 908 if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) { 909 return rsq(op_expr[0]->operands[0]); 910 } 911 912 break; 913 914 case ir_triop_fma: 915 /* Operands are op0 * op1 + op2. */ 916 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { 917 return ir->operands[2]; 918 } else if (is_vec_zero(op_const[2])) { 919 return mul(ir->operands[0], ir->operands[1]); 920 } else if (is_vec_one(op_const[0])) { 921 return add(ir->operands[1], ir->operands[2]); 922 } else if (is_vec_one(op_const[1])) { 923 return add(ir->operands[0], ir->operands[2]); 924 } 925 break; 926 927 case ir_triop_lrp: 928 /* Operands are (x, y, a). */ 929 if (is_vec_zero(op_const[2])) { 930 return ir->operands[0]; 931 } else if (is_vec_one(op_const[2])) { 932 return ir->operands[1]; 933 } else if (ir->operands[0]->equals(ir->operands[1])) { 934 return ir->operands[0]; 935 } else if (is_vec_zero(op_const[0])) { 936 return mul(ir->operands[1], ir->operands[2]); 937 } else if (is_vec_zero(op_const[1])) { 938 unsigned op2_components = ir->operands[2]->type->vector_elements; 939 ir_constant *one; 940 941 switch (ir->type->base_type) { 942 case GLSL_TYPE_FLOAT: 943 one = new(mem_ctx) ir_constant(1.0f, op2_components); 944 break; 945 case GLSL_TYPE_DOUBLE: 946 one = new(mem_ctx) ir_constant(1.0, op2_components); 947 break; 948 default: 949 one = NULL; 950 unreachable("unexpected type"); 951 } 952 953 return mul(ir->operands[0], add(one, neg(ir->operands[2]))); 954 } 955 break; 956 957 case ir_triop_csel: 958 if (is_vec_one(op_const[0])) 959 return ir->operands[1]; 960 if (is_vec_zero(op_const[0])) 961 return ir->operands[2]; 962 break; 963 964 /* Remove interpolateAt* instructions for demoted inputs. They are 965 * assigned a constant expression to facilitate this. 966 */ 967 case ir_unop_interpolate_at_centroid: 968 case ir_binop_interpolate_at_offset: 969 case ir_binop_interpolate_at_sample: 970 if (op_const[0]) 971 return ir->operands[0]; 972 break; 973 974 default: 975 break; 976 } 977 978 return ir; 979} 980 981void 982ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue) 983{ 984 if (!*rvalue) 985 return; 986 987 ir_expression *expr = (*rvalue)->as_expression(); 988 if (!expr || expr->operation == ir_quadop_vector) 989 return; 990 991 ir_rvalue *new_rvalue = handle_expression(expr); 992 if (new_rvalue == *rvalue) 993 return; 994 995 /* If the expr used to be some vec OP scalar returning a vector, and the 996 * optimization gave us back a scalar, we still need to turn it into a 997 * vector. 998 */ 999 *rvalue = swizzle_if_required(expr, new_rvalue); 1000 1001 this->progress = true; 1002} 1003 1004bool 1005do_algebraic(exec_list *instructions, bool native_integers, 1006 const struct gl_shader_compiler_options *options) 1007{ 1008 ir_algebraic_visitor v(native_integers, options); 1009 1010 visit_list_elements(&v, instructions); 1011 1012 return v.progress; 1013} 1014