1/*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_minmax.cpp
26 *
27 * Drop operands from an expression tree of only min/max operations if they
28 * can be proven to not contribute to the final result.
29 *
30 * The algorithm is similar to alpha-beta pruning on a minmax search.
31 */
32
33#include "ir.h"
34#include "ir_visitor.h"
35#include "ir_rvalue_visitor.h"
36#include "ir_optimization.h"
37#include "ir_builder.h"
38#include "program/prog_instruction.h"
39#include "compiler/glsl_types.h"
40#include "main/macros.h"
41
42using namespace ir_builder;
43
44namespace {
45
46enum compare_components_result {
47   LESS,
48   LESS_OR_EQUAL,
49   EQUAL,
50   GREATER_OR_EQUAL,
51   GREATER,
52   MIXED
53};
54
55class minmax_range {
56public:
57   minmax_range(ir_constant *low = NULL, ir_constant *high = NULL)
58   {
59      this->low = low;
60      this->high = high;
61   }
62
63   /* low is the lower limit of the range, high is the higher limit. NULL on
64    * low means negative infinity (unlimited) and on high positive infinity
65    * (unlimited). Because of the two interpretations of the value NULL,
66    * arbitrary comparison between ir_constants is impossible.
67    */
68   ir_constant *low;
69   ir_constant *high;
70};
71
72class ir_minmax_visitor : public ir_rvalue_enter_visitor {
73public:
74   ir_minmax_visitor()
75      : progress(false)
76   {
77   }
78
79   ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange);
80
81   void handle_rvalue(ir_rvalue **rvalue);
82
83   bool progress;
84};
85
86/*
87 * Returns LESS if all vector components of `a' are strictly lower than of `b',
88 * GREATER if all vector components of `a' are strictly greater than of `b',
89 * MIXED if some vector components of `a' are strictly lower than of `b' while
90 * others are strictly greater, or EQUAL otherwise.
91 */
92static enum compare_components_result
93compare_components(ir_constant *a, ir_constant *b)
94{
95   assert(a != NULL);
96   assert(b != NULL);
97
98   assert(a->type->base_type == b->type->base_type);
99
100   unsigned a_inc = a->type->is_scalar() ? 0 : 1;
101   unsigned b_inc = b->type->is_scalar() ? 0 : 1;
102   unsigned components = MAX2(a->type->components(), b->type->components());
103
104   bool foundless = false;
105   bool foundgreater = false;
106   bool foundequal = false;
107
108   for (unsigned i = 0, c0 = 0, c1 = 0;
109        i < components;
110        c0 += a_inc, c1 += b_inc, ++i) {
111      switch (a->type->base_type) {
112      case GLSL_TYPE_UINT:
113         if (a->value.u[c0] < b->value.u[c1])
114            foundless = true;
115         else if (a->value.u[c0] > b->value.u[c1])
116            foundgreater = true;
117         else
118            foundequal = true;
119         break;
120      case GLSL_TYPE_INT:
121         if (a->value.i[c0] < b->value.i[c1])
122            foundless = true;
123         else if (a->value.i[c0] > b->value.i[c1])
124            foundgreater = true;
125         else
126            foundequal = true;
127         break;
128      case GLSL_TYPE_FLOAT:
129         if (a->value.f[c0] < b->value.f[c1])
130            foundless = true;
131         else if (a->value.f[c0] > b->value.f[c1])
132            foundgreater = true;
133         else
134            foundequal = true;
135         break;
136      case GLSL_TYPE_DOUBLE:
137         if (a->value.d[c0] < b->value.d[c1])
138            foundless = true;
139         else if (a->value.d[c0] > b->value.d[c1])
140            foundgreater = true;
141         else
142            foundequal = true;
143         break;
144      default:
145         unreachable("not reached");
146      }
147   }
148
149   if (foundless && foundgreater) {
150      /* Some components are strictly lower, others are strictly greater */
151      return MIXED;
152   }
153
154   if (foundequal) {
155       /* It is not mixed, but it is not strictly lower or greater */
156      if (foundless)
157         return LESS_OR_EQUAL;
158      if (foundgreater)
159         return GREATER_OR_EQUAL;
160      return EQUAL;
161   }
162
163   /* All components are strictly lower or strictly greater */
164   return foundless ? LESS : GREATER;
165}
166
167static ir_constant *
168combine_constant(bool ismin, ir_constant *a, ir_constant *b)
169{
170   void *mem_ctx = ralloc_parent(a);
171   ir_constant *c = a->clone(mem_ctx, NULL);
172   for (unsigned i = 0; i < c->type->components(); i++) {
173      switch (c->type->base_type) {
174      case GLSL_TYPE_UINT:
175         if ((ismin && b->value.u[i] < c->value.u[i]) ||
176             (!ismin && b->value.u[i] > c->value.u[i]))
177            c->value.u[i] = b->value.u[i];
178         break;
179      case GLSL_TYPE_INT:
180         if ((ismin && b->value.i[i] < c->value.i[i]) ||
181             (!ismin && b->value.i[i] > c->value.i[i]))
182            c->value.i[i] = b->value.i[i];
183         break;
184      case GLSL_TYPE_FLOAT:
185         if ((ismin && b->value.f[i] < c->value.f[i]) ||
186             (!ismin && b->value.f[i] > c->value.f[i]))
187            c->value.f[i] = b->value.f[i];
188         break;
189      case GLSL_TYPE_DOUBLE:
190         if ((ismin && b->value.d[i] < c->value.d[i]) ||
191             (!ismin && b->value.d[i] > c->value.d[i]))
192            c->value.d[i] = b->value.d[i];
193         break;
194      default:
195         assert(!"not reached");
196      }
197   }
198   return c;
199}
200
201static ir_constant *
202smaller_constant(ir_constant *a, ir_constant *b)
203{
204   assert(a != NULL);
205   assert(b != NULL);
206
207   enum compare_components_result ret = compare_components(a, b);
208   if (ret == MIXED)
209      return combine_constant(true, a, b);
210   else if (ret < EQUAL)
211      return a;
212   else
213      return b;
214}
215
216static ir_constant *
217larger_constant(ir_constant *a, ir_constant *b)
218{
219   assert(a != NULL);
220   assert(b != NULL);
221
222   enum compare_components_result ret = compare_components(a, b);
223   if (ret == MIXED)
224      return combine_constant(false, a, b);
225   else if (ret < EQUAL)
226      return b;
227   else
228      return a;
229}
230
231/* Combines two ranges by doing an element-wise min() / max() depending on the
232 * operation.
233 */
234static minmax_range
235combine_range(minmax_range r0, minmax_range r1, bool ismin)
236{
237   minmax_range ret;
238
239   if (!r0.low) {
240      ret.low = ismin ? r0.low : r1.low;
241   } else if (!r1.low) {
242      ret.low = ismin ? r1.low : r0.low;
243   } else {
244      ret.low = ismin ? smaller_constant(r0.low, r1.low) :
245         larger_constant(r0.low, r1.low);
246   }
247
248   if (!r0.high) {
249      ret.high = ismin ? r1.high : r0.high;
250   } else if (!r1.high) {
251      ret.high = ismin ? r0.high : r1.high;
252   } else {
253      ret.high = ismin ? smaller_constant(r0.high, r1.high) :
254         larger_constant(r0.high, r1.high);
255   }
256
257   return ret;
258}
259
260/* Returns a range so that lower limit is the larger of the two lower limits,
261 * and higher limit is the smaller of the two higher limits.
262 */
263static minmax_range
264range_intersection(minmax_range r0, minmax_range r1)
265{
266   minmax_range ret;
267
268   if (!r0.low)
269      ret.low = r1.low;
270   else if (!r1.low)
271      ret.low = r0.low;
272   else
273      ret.low = larger_constant(r0.low, r1.low);
274
275   if (!r0.high)
276      ret.high = r1.high;
277   else if (!r1.high)
278      ret.high = r0.high;
279   else
280      ret.high = smaller_constant(r0.high, r1.high);
281
282   return ret;
283}
284
285static minmax_range
286get_range(ir_rvalue *rval)
287{
288   ir_expression *expr = rval->as_expression();
289   if (expr && (expr->operation == ir_binop_min ||
290                expr->operation == ir_binop_max)) {
291      minmax_range r0 = get_range(expr->operands[0]);
292      minmax_range r1 = get_range(expr->operands[1]);
293      return combine_range(r0, r1, expr->operation == ir_binop_min);
294   }
295
296   ir_constant *c = rval->as_constant();
297   if (c) {
298      return minmax_range(c, c);
299   }
300
301   return minmax_range();
302}
303
304/**
305 * Prunes a min/max expression considering the base range of the parent
306 * min/max expression.
307 *
308 * @param baserange the range that the parents of this min/max expression
309 * in the min/max tree will clamp its value to.
310 */
311ir_rvalue *
312ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
313{
314   assert(expr->operation == ir_binop_min ||
315          expr->operation == ir_binop_max);
316
317   bool ismin = expr->operation == ir_binop_min;
318   minmax_range limits[2];
319
320   /* Recurse to get the ranges for each of the subtrees of this
321    * expression. We need to do this as a separate step because we need to
322    * know the ranges of each of the subtrees before we prune either one.
323    * Consider something like this:
324    *
325    *        max
326    *     /       \
327    *    max     max
328    *   /   \   /   \
329    *  3    a   b    2
330    *
331    * We would like to prune away the max on the bottom-right, but to do so
332    * we need to know the range of the expression on the left beforehand,
333    * and there's no guarantee that we will visit either subtree in a
334    * particular order.
335    */
336   for (unsigned i = 0; i < 2; ++i)
337      limits[i] = get_range(expr->operands[i]);
338
339   for (unsigned i = 0; i < 2; ++i) {
340      bool is_redundant = false;
341
342      enum compare_components_result cr = LESS;
343      if (ismin) {
344         /* If this operand will always be greater than the other one, it's
345          * redundant.
346          */
347         if (limits[i].low && limits[1 - i].high) {
348               cr = compare_components(limits[i].low, limits[1 - i].high);
349            if (cr >= EQUAL && cr != MIXED)
350               is_redundant = true;
351         }
352         /* If this operand is always greater than baserange, then even if
353          * it's smaller than the other one it'll get clamped, so it's
354          * redundant.
355          */
356         if (!is_redundant && limits[i].low && baserange.high) {
357            cr = compare_components(limits[i].low, baserange.high);
358            if (cr > EQUAL && cr != MIXED)
359               is_redundant = true;
360         }
361      } else {
362         /* If this operand will always be lower than the other one, it's
363          * redundant.
364          */
365         if (limits[i].high && limits[1 - i].low) {
366            cr = compare_components(limits[i].high, limits[1 - i].low);
367            if (cr <= EQUAL)
368               is_redundant = true;
369         }
370         /* If this operand is always lower than baserange, then even if
371          * it's greater than the other one it'll get clamped, so it's
372          * redundant.
373          */
374         if (!is_redundant && limits[i].high && baserange.low) {
375            cr = compare_components(limits[i].high, baserange.low);
376            if (cr < EQUAL)
377               is_redundant = true;
378         }
379      }
380
381      if (is_redundant) {
382         progress = true;
383
384         /* Recurse if necessary. */
385         ir_expression *op_expr = expr->operands[1 - i]->as_expression();
386         if (op_expr && (op_expr->operation == ir_binop_min ||
387                         op_expr->operation == ir_binop_max)) {
388            return prune_expression(op_expr, baserange);
389         }
390
391         return expr->operands[1 - i];
392      } else if (cr == MIXED) {
393         /* If we have mixed vector operands, we can try to resolve the minmax
394          * expression by doing a component-wise minmax:
395          *
396          *             min                          min
397          *           /    \                       /    \
398          *         min     a       ===>        [1,1]    a
399          *       /    \
400          *    [1,3]   [3,1]
401          *
402          */
403         ir_constant *a = expr->operands[0]->as_constant();
404         ir_constant *b = expr->operands[1]->as_constant();
405         if (a && b)
406            return combine_constant(ismin, a, b);
407      }
408   }
409
410   /* Now recurse to operands giving them the proper baserange. The baserange
411    * to pass is the intersection of our baserange and the other operand's
412    * limit with one of the ranges unlimited. If we can't compute a valid
413    * intersection, we use the current baserange.
414    */
415   for (unsigned i = 0; i < 2; ++i) {
416      ir_expression *op_expr = expr->operands[i]->as_expression();
417      if (op_expr && (op_expr->operation == ir_binop_min ||
418                      op_expr->operation == ir_binop_max)) {
419         /* We can only compute a new baserange for this operand if we managed
420          * to compute a valid range for the other operand.
421          */
422         if (ismin)
423            limits[1 - i].low = NULL;
424         else
425            limits[1 - i].high = NULL;
426         minmax_range base = range_intersection(limits[1 - i], baserange);
427         expr->operands[i] = prune_expression(op_expr, base);
428      }
429   }
430
431   /* If we got here we could not discard any of the operands of the minmax
432    * expression, but we can still try to resolve the expression if both
433    * operands are constant. We do this after the loop above, to make sure
434    * that if our operands are minmax expressions we have tried to prune them
435    * first (hopefully reducing them to constants).
436    */
437   ir_constant *a = expr->operands[0]->as_constant();
438   ir_constant *b = expr->operands[1]->as_constant();
439   if (a && b)
440      return combine_constant(ismin, a, b);
441
442   return expr;
443}
444
445static ir_rvalue *
446swizzle_if_required(ir_expression *expr, ir_rvalue *rval)
447{
448   if (expr->type->is_vector() && rval->type->is_scalar()) {
449      return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements);
450   } else {
451      return rval;
452   }
453}
454
455void
456ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue)
457{
458   if (!*rvalue)
459      return;
460
461   ir_expression *expr = (*rvalue)->as_expression();
462   if (!expr || (expr->operation != ir_binop_min &&
463                 expr->operation != ir_binop_max))
464      return;
465
466   ir_rvalue *new_rvalue = prune_expression(expr, minmax_range());
467   if (new_rvalue == *rvalue)
468      return;
469
470   /* If the expression type is a vector and the optimization leaves a scalar
471    * as the result, we need to turn it into a vector.
472    */
473   *rvalue = swizzle_if_required(expr, new_rvalue);
474
475   progress = true;
476}
477
478}
479
480bool
481do_minmax_prune(exec_list *instructions)
482{
483   ir_minmax_visitor v;
484
485   visit_list_elements(&v, instructions);
486
487   return v.progress;
488}
489