11d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner/*
21d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * Copyright © 2014 Intel Corporation
31d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
41d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * Permission is hereby granted, free of charge, to any person obtaining a
51d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * copy of this software and associated documentation files (the "Software"),
61d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * to deal in the Software without restriction, including without limitation
71d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * the rights to use, copy, modify, merge, publish, distribute, sublicense,
81d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * and/or sell copies of the Software, and to permit persons to whom the
91d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * Software is furnished to do so, subject to the following conditions:
101d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
111d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * The above copyright notice and this permission notice (including the next
121d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * paragraph) shall be included in all copies or substantial portions of the
131d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * Software.
141d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
151d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
161d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
171d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
181d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
191d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
201d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
211d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * DEALINGS IN THE SOFTWARE.
221d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner */
231d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
241d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner/**
251d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * \file opt_rebalance_tree.cpp
261d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
271d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * Rebalances a reduction expression tree.
281d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
291d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * For reduction operations (e.g., x + y + z + w) we generate an expression
301d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * tree like
311d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
321d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *        +
331d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *       / \
341d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *      +   w
351d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *     / \
361d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *    +   z
371d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *   / \
381d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *  x   y
391d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
401d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * which we can rebalance into
411d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
421d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *       +
431d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *      / \
441d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *     /   \
451d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *    +     +
461d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *   / \   / \
471d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *  x   y z   w
481d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
491d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * to get a better instruction scheduling.
501d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
511d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
521d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * and Bette L. Warren.
531d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
541d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
551d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * explanation of the of the tree_to_vine() (rightward rotation) and
561d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * vine_to_tree() (leftward rotation) algorithms.
571d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner */
581d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
591d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner#include "ir.h"
601d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner#include "ir_visitor.h"
611d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner#include "ir_rvalue_visitor.h"
621d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner#include "ir_optimization.h"
63103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner#include "main/macros.h" /* for MAX2 */
641d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
651d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner/* The DSW algorithm generates a degenerate tree (really, a linked list) in
661d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * tree_to_vine(). We'd rather not leave a binary expression with only one
671d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * operand, so trivial modifications (the ternary operators below) are needed
681d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * to ensure that we only rotate around the ir_expression nodes of the tree.
691d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner */
701d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerstatic unsigned
711d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnertree_to_vine(ir_expression *root)
721d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner{
731d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   unsigned size = 0;
741d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ir_rvalue *vine_tail = root;
751d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ir_rvalue *remainder = root->operands[1];
761d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
771d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   while (remainder != NULL) {
781d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ir_expression *remainder_temp = remainder->as_expression();
791d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ir_expression *remainder_left = remainder_temp ?
801d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         remainder_temp->operands[0]->as_expression() : NULL;
811d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
821d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      if (remainder_left == NULL) {
831d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         /* move vine_tail down one */
841d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         vine_tail = remainder;
851d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         remainder = remainder->as_expression() ?
861d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner            ((ir_expression *)remainder)->operands[1] : NULL;
871d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         size++;
881d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      } else {
891d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         /* rotate */
901d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         ir_expression *tempptr = remainder_left;
911d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
921d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         tempptr->operands[1] = remainder;
931d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         remainder = tempptr;
941d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         ((ir_expression *)vine_tail)->operands[1] = tempptr;
951d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      }
961d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
971d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
981d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   return size;
991d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner}
1001d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
1011d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerstatic void
1021d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnercompression(ir_expression *root, unsigned count)
1031d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner{
1041d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ir_expression *scanner = root;
1051d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
1061d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   for (unsigned i = 0; i < count; i++) {
1071d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ir_expression *child = (ir_expression *)scanner->operands[1];
1081d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      scanner->operands[1] = child->operands[1];
1091d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      scanner = (ir_expression *)scanner->operands[1];
1101d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      child->operands[1] = scanner->operands[0];
1111d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      scanner->operands[0] = child;
1121d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
1131d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner}
1141d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
1151d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerstatic void
1161d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnervine_to_tree(ir_expression *root, unsigned size)
1171d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner{
1181d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   int n = size - 1;
1191d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   for (int m = n / 2; m > 0; m = n / 2) {
1201d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      compression(root, m);
1211d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      n -= m + 1;
1221d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
1231d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner}
1241d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
1251d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnernamespace {
1261d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
1271d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerclass ir_rebalance_visitor : public ir_rvalue_enter_visitor {
1281d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerpublic:
1291d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ir_rebalance_visitor()
1301d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   {
1311d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      progress = false;
1321d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
1331d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
134028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand   virtual ir_visitor_status visit_enter(ir_assignment *ir);
135028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand
1361d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   void handle_rvalue(ir_rvalue **rvalue);
1371d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
1381d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   bool progress;
1391d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner};
1401d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
1411d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerstruct is_reduction_data {
1421d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ir_expression_operation operation;
1431d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   const glsl_type *type;
1441d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   unsigned num_expr;
1451d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   bool is_reduction;
1461d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   bool contains_constant;
1471d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner};
1481d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
1491d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner} /* anonymous namespace */
1501d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
151028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrandir_visitor_status
152028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrandir_rebalance_visitor::visit_enter(ir_assignment *ir)
153028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand{
154028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand   ir_variable *var = ir->lhs->variable_referenced();
155028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand   if (var->data.invariant || var->data.precise) {
156028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand      /* If we're assigning to an invariant variable, just bail.  Tree
157028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand       * rebalancing (reassociation) isn't precision-safe.
158028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand       */
159028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand      return visit_continue_with_parent;
160028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand   } else {
161028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand      return visit_continue;
162028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand   }
163028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand}
164028d6ecfe0feecd1e543322d2953bef810f13d23Jason Ekstrand
1651d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerstatic bool
1661d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turneris_reduction_operation(ir_expression_operation operation)
1671d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner{
1681d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   switch (operation) {
1691d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_add:
1701d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_mul:
1711d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_bit_and:
1721d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_bit_xor:
1731d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_bit_or:
1741d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_logic_and:
1751d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_logic_xor:
1761d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_logic_or:
1771d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_min:
1781d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   case ir_binop_max:
1791d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return true;
1801d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   default:
1811d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return false;
1821d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
1831d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner}
1841d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
1851d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner/* Note that this function does not attempt to recognize that reduction trees
1861d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * are already balanced.
1871d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
1881d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * We return false from this function for a number of reasons other than an
1891d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner * expression tree not being a mathematical reduction. Namely,
1901d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *
1911d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *    - if the tree contains multiple constants that we may be able to combine.
1921d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *    - if the tree contains matrices:
1931d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *       - they might contain vec4's with many constant components that we can
1941d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *         simplify after splitting.
1951d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *       - applying the matrix chain ordering optimization is more than just
1961d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *         balancing an expression tree.
1971d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *    - if the tree contains operations on multiple types.
1981d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *    - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
1991d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner *      would trick the visiting pass.
2001d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner */
2011d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerstatic void
2021d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turneris_reduction(ir_instruction *ir, void *data)
2031d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner{
2041d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   struct is_reduction_data *ird = (struct is_reduction_data *)data;
2051d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (!ird->is_reduction)
2061d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return;
2071d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2081d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   /* We don't want to balance a tree that contains multiple constants, since
2091d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * we'll be able to constant fold them if they're not in separate subtrees.
2101d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    */
2111d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (ir->as_constant()) {
2121d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      if (ird->contains_constant) {
2131d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         ird->is_reduction = false;
2141d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      }
2151d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ird->contains_constant = true;
2161d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return;
2171d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
2181d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2191d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   /* Array/record dereferences have subtrees that are not part of the expr
2201d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * tree we're balancing. Skip trees containing them.
2211d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    */
2221d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (ir->ir_type == ir_type_dereference_array ||
2231d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner       ir->ir_type == ir_type_dereference_record) {
2241d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ird->is_reduction = false;
2251d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return;
2261d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
2271d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2281d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ir_expression *expr = ir->as_expression();
2291d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (!expr)
2301d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return;
2311d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2321d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   /* Non-constant matrices might still contain constant vec4 that we can
2331d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * constant fold once split up. Handling matrices will need some more
2341d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * work.
2351d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    */
2369697f8088f9e1c1b1f1b39b57a26ac4bd21b247fKenneth Graunke   if (expr->type->is_matrix() ||
2379697f8088f9e1c1b1f1b39b57a26ac4bd21b247fKenneth Graunke       expr->operands[0]->type->is_matrix() ||
2389697f8088f9e1c1b1f1b39b57a26ac4bd21b247fKenneth Graunke       (expr->operands[1] && expr->operands[1]->type->is_matrix())) {
2391d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ird->is_reduction = false;
2401d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return;
2411d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
2421d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2431d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (ird->type != NULL && ird->type != expr->type) {
2441d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ird->is_reduction = false;
2451d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return;
2461d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
2471d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ird->type = expr->type;
2481d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2491d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ird->num_expr++;
2501d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (is_reduction_operation(expr->operation)) {
2511d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      if (ird->operation != 0 && ird->operation != expr->operation)
2521d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner         ird->is_reduction = false;
2531d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ird->operation = expr->operation;
2541d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   } else {
2551d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ird->is_reduction = false;
2561d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
2571d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner}
2581d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2591d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerstatic ir_rvalue *
2601d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerhandle_expression(ir_expression *expr)
2611d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner{
2621d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   struct is_reduction_data ird;
2631d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ird.operation = (ir_expression_operation)0;
2641d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ird.type = NULL;
2651d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ird.num_expr = 0;
2661d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ird.is_reduction = true;
2671d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ird.contains_constant = false;
2681d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2691d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   visit_tree(expr, is_reduction, (void *)&ird);
2701d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2711d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (ird.is_reduction && ird.num_expr > 2) {
2721d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ir_constant z = ir_constant(0.0f);
2731d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
2741d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2751d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      unsigned size = tree_to_vine(&pseudo_root);
2761d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      vine_to_tree(&pseudo_root, size);
2771d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
2781d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      expr = (ir_expression *)pseudo_root.operands[1];
2791d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   }
2801d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   return expr;
2811d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner}
2821d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
283103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turnerstatic void
284103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turnerupdate_types(ir_instruction *ir, void *)
285103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner{
286103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner   ir_expression *expr = ir->as_expression();
287103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner   if (!expr)
288103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner      return;
289103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner
2907db75927ca9f15bcbb28d23f9cfbc34541a51938Kenneth Graunke   const glsl_type *const new_type =
291103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner      glsl_type::get_instance(expr->type->base_type,
2929e47ed2f77d8c274104cdcbc6b7c0c7334c50fdbKenneth Graunke                              MAX2(expr->operands[0]->type->vector_elements,
2939e47ed2f77d8c274104cdcbc6b7c0c7334c50fdbKenneth Graunke                                   expr->operands[1]->type->vector_elements),
294103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner                              1);
2957db75927ca9f15bcbb28d23f9cfbc34541a51938Kenneth Graunke   assert(new_type != glsl_type::error_type);
2967db75927ca9f15bcbb28d23f9cfbc34541a51938Kenneth Graunke   expr->type = new_type;
297103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner}
298103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner
2991d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnervoid
3001d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
3011d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner{
3021d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (!*rvalue)
3031d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return;
3041d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
3051d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ir_expression *expr = (*rvalue)->as_expression();
3061d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (!expr || !is_reduction_operation(expr->operation))
3071d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return;
3081d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
3091d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ir_rvalue *new_rvalue = handle_expression(expr);
3101d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
3111d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
3121d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * or some other set of cases) new_rvalue will point to the same root as
3131d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * before.
3141d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    *
3151d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * Similarly, if the tree rooted at *rvalue was a reduction and was already
3161d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * balanced, the algorithm will rearrange the tree but will ultimately
3171d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * return an identical tree, so this check will handle that as well and
3181d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    * will not set progress = true.
3191d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner    */
3201d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   if (new_rvalue == *rvalue)
3211d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner      return;
3221d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
323103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner   visit_tree(new_rvalue, NULL, NULL, update_types);
324103716a8629858f6af32a3a6b195a4dc78c356d2Matt Turner
3251d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   *rvalue = new_rvalue;
3261d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   this->progress = true;
3271d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner}
3281d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
3291d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerbool
3301d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turnerdo_rebalance_tree(exec_list *instructions)
3311d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner{
3321d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   ir_rebalance_visitor v;
3331d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
3341d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   v.run(instructions);
3351d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner
3361d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner   return v.progress;
3371d9f74eda75da05b4d5c7df5fc1e6f5ab8d88322Matt Turner}
338