1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_algebraic.cpp
26 *
27 * Takes advantage of association, commutivity, and other algebraic
28 * properties to simplify expressions.
29 */
30
31#include "ir.h"
32#include "ir_visitor.h"
33#include "ir_rvalue_visitor.h"
34#include "ir_optimization.h"
35#include "ir_builder.h"
36#include "compiler/glsl_types.h"
37
38using namespace ir_builder;
39
40namespace {
41
42/**
43 * Visitor class for replacing expressions with ir_constant values.
44 */
45
46class ir_algebraic_visitor : public ir_rvalue_visitor {
47public:
48   ir_algebraic_visitor(bool native_integers,
49                        const struct gl_shader_compiler_options *options)
50      : options(options)
51   {
52      this->progress = false;
53      this->mem_ctx = NULL;
54      this->native_integers = native_integers;
55   }
56
57   virtual ~ir_algebraic_visitor()
58   {
59   }
60
61   virtual ir_visitor_status visit_enter(ir_assignment *ir);
62
63   ir_rvalue *handle_expression(ir_expression *ir);
64   void handle_rvalue(ir_rvalue **rvalue);
65   bool reassociate_constant(ir_expression *ir1,
66			     int const_index,
67			     ir_constant *constant,
68			     ir_expression *ir2);
69   void reassociate_operands(ir_expression *ir1,
70			     int op1,
71			     ir_expression *ir2,
72			     int op2);
73   ir_rvalue *swizzle_if_required(ir_expression *expr,
74				  ir_rvalue *operand);
75
76   const struct gl_shader_compiler_options *options;
77   void *mem_ctx;
78
79   bool native_integers;
80   bool progress;
81};
82
83} /* unnamed namespace */
84
85ir_visitor_status
86ir_algebraic_visitor::visit_enter(ir_assignment *ir)
87{
88   ir_variable *var = ir->lhs->variable_referenced();
89   if (var->data.invariant || var->data.precise) {
90      /* If we're assigning to an invariant or precise variable, just bail.
91       * Most of the algebraic optimizations aren't precision-safe.
92       *
93       * FINISHME: Find out which optimizations are precision-safe and enable
94       * then only for invariant or precise trees.
95       */
96      return visit_continue_with_parent;
97   } else {
98      return visit_continue;
99   }
100}
101
102static inline bool
103is_vec_zero(ir_constant *ir)
104{
105   return (ir == NULL) ? false : ir->is_zero();
106}
107
108static inline bool
109is_vec_one(ir_constant *ir)
110{
111   return (ir == NULL) ? false : ir->is_one();
112}
113
114static inline bool
115is_vec_two(ir_constant *ir)
116{
117   return (ir == NULL) ? false : ir->is_value(2.0, 2);
118}
119
120static inline bool
121is_vec_four(ir_constant *ir)
122{
123   return (ir == NULL) ? false : ir->is_value(4.0, 4);
124}
125
126static inline bool
127is_vec_negative_one(ir_constant *ir)
128{
129   return (ir == NULL) ? false : ir->is_negative_one();
130}
131
132static inline bool
133is_valid_vec_const(ir_constant *ir)
134{
135   if (ir == NULL)
136      return false;
137
138   if (!ir->type->is_scalar() && !ir->type->is_vector())
139      return false;
140
141   return true;
142}
143
144static inline bool
145is_less_than_one(ir_constant *ir)
146{
147   assert(ir->type->base_type == GLSL_TYPE_FLOAT);
148
149   if (!is_valid_vec_const(ir))
150      return false;
151
152   unsigned component = 0;
153   for (int c = 0; c < ir->type->vector_elements; c++) {
154      if (ir->get_float_component(c) < 1.0f)
155         component++;
156   }
157
158   return (component == ir->type->vector_elements);
159}
160
161static inline bool
162is_greater_than_zero(ir_constant *ir)
163{
164   assert(ir->type->base_type == GLSL_TYPE_FLOAT);
165
166   if (!is_valid_vec_const(ir))
167      return false;
168
169   unsigned component = 0;
170   for (int c = 0; c < ir->type->vector_elements; c++) {
171      if (ir->get_float_component(c) > 0.0f)
172         component++;
173   }
174
175   return (component == ir->type->vector_elements);
176}
177
178static void
179update_type(ir_expression *ir)
180{
181   if (ir->operands[0]->type->is_vector())
182      ir->type = ir->operands[0]->type;
183   else
184      ir->type = ir->operands[1]->type;
185}
186
187/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
188static ir_expression *
189try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
190{
191   if (expr0 && expr0->operation == ir_binop_add &&
192       expr0->type->is_float() &&
193       expr1 && expr1->operation == ir_binop_add &&
194       expr1->type->is_float()) {
195      ir_swizzle *x = expr0->operands[0]->as_swizzle();
196      ir_swizzle *y = expr0->operands[1]->as_swizzle();
197      ir_swizzle *z = expr1->operands[0]->as_swizzle();
198      ir_swizzle *w = expr1->operands[1]->as_swizzle();
199
200      if (!x || x->mask.num_components != 1 ||
201          !y || y->mask.num_components != 1 ||
202          !z || z->mask.num_components != 1 ||
203          !w || w->mask.num_components != 1) {
204         return NULL;
205      }
206
207      bool swiz_seen[4] = {false, false, false, false};
208      swiz_seen[x->mask.x] = true;
209      swiz_seen[y->mask.x] = true;
210      swiz_seen[z->mask.x] = true;
211      swiz_seen[w->mask.x] = true;
212
213      if (!swiz_seen[0] || !swiz_seen[1] ||
214          !swiz_seen[2] || !swiz_seen[3]) {
215         return NULL;
216      }
217
218      if (x->val->equals(y->val) &&
219          x->val->equals(z->val) &&
220          x->val->equals(w->val)) {
221         return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
222      }
223   }
224   return NULL;
225}
226
227void
228ir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
229					   int op1,
230					   ir_expression *ir2,
231					   int op2)
232{
233   ir_rvalue *temp = ir2->operands[op2];
234   ir2->operands[op2] = ir1->operands[op1];
235   ir1->operands[op1] = temp;
236
237   /* Update the type of ir2.  The type of ir1 won't have changed --
238    * base types matched, and at least one of the operands of the 2
239    * binops is still a vector if any of them were.
240    */
241   update_type(ir2);
242
243   this->progress = true;
244}
245
246/**
247 * Reassociates a constant down a tree of adds or multiplies.
248 *
249 * Consider (2 * (a * (b * 0.5))).  We want to send up with a * b.
250 */
251bool
252ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
253					   ir_constant *constant,
254					   ir_expression *ir2)
255{
256   if (!ir2 || ir1->operation != ir2->operation)
257      return false;
258
259   /* Don't want to even think about matrices. */
260   if (ir1->operands[0]->type->is_matrix() ||
261       ir1->operands[1]->type->is_matrix() ||
262       ir2->operands[0]->type->is_matrix() ||
263       ir2->operands[1]->type->is_matrix())
264      return false;
265
266   ir_constant *ir2_const[2];
267   ir2_const[0] = ir2->operands[0]->constant_expression_value();
268   ir2_const[1] = ir2->operands[1]->constant_expression_value();
269
270   if (ir2_const[0] && ir2_const[1])
271      return false;
272
273   if (ir2_const[0]) {
274      reassociate_operands(ir1, const_index, ir2, 1);
275      return true;
276   } else if (ir2_const[1]) {
277      reassociate_operands(ir1, const_index, ir2, 0);
278      return true;
279   }
280
281   if (reassociate_constant(ir1, const_index, constant,
282			    ir2->operands[0]->as_expression())) {
283      update_type(ir2);
284      return true;
285   }
286
287   if (reassociate_constant(ir1, const_index, constant,
288			    ir2->operands[1]->as_expression())) {
289      update_type(ir2);
290      return true;
291   }
292
293   return false;
294}
295
296/* When eliminating an expression and just returning one of its operands,
297 * we may need to swizzle that operand out to a vector if the expression was
298 * vector type.
299 */
300ir_rvalue *
301ir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
302					  ir_rvalue *operand)
303{
304   if (expr->type->is_vector() && operand->type->is_scalar()) {
305      return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0,
306				     expr->type->vector_elements);
307   } else
308      return operand;
309}
310
311ir_rvalue *
312ir_algebraic_visitor::handle_expression(ir_expression *ir)
313{
314   ir_constant *op_const[4] = {NULL, NULL, NULL, NULL};
315   ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
316   unsigned int i;
317
318   if (ir->operation == ir_binop_mul &&
319       ir->operands[0]->type->is_matrix() &&
320       ir->operands[1]->type->is_vector()) {
321      ir_expression *matrix_mul = ir->operands[0]->as_expression();
322
323      if (matrix_mul && matrix_mul->operation == ir_binop_mul &&
324         matrix_mul->operands[0]->type->is_matrix() &&
325         matrix_mul->operands[1]->type->is_matrix()) {
326
327         return mul(matrix_mul->operands[0],
328                    mul(matrix_mul->operands[1], ir->operands[1]));
329      }
330   }
331
332   assert(ir->get_num_operands() <= 4);
333   for (i = 0; i < ir->get_num_operands(); i++) {
334      if (ir->operands[i]->type->is_matrix())
335	 return ir;
336
337      op_const[i] = ir->operands[i]->constant_expression_value();
338      op_expr[i] = ir->operands[i]->as_expression();
339   }
340
341   if (this->mem_ctx == NULL)
342      this->mem_ctx = ralloc_parent(ir);
343
344   switch (ir->operation) {
345   case ir_unop_bit_not:
346      if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not)
347         return op_expr[0]->operands[0];
348      break;
349
350   case ir_unop_abs:
351      if (op_expr[0] == NULL)
352	 break;
353
354      switch (op_expr[0]->operation) {
355      case ir_unop_abs:
356      case ir_unop_neg:
357         return abs(op_expr[0]->operands[0]);
358      default:
359         break;
360      }
361      break;
362
363   case ir_unop_neg:
364      if (op_expr[0] == NULL)
365	 break;
366
367      if (op_expr[0]->operation == ir_unop_neg) {
368         return op_expr[0]->operands[0];
369      }
370      break;
371
372   case ir_unop_exp:
373      if (op_expr[0] == NULL)
374	 break;
375
376      if (op_expr[0]->operation == ir_unop_log) {
377         return op_expr[0]->operands[0];
378      }
379      break;
380
381   case ir_unop_log:
382      if (op_expr[0] == NULL)
383	 break;
384
385      if (op_expr[0]->operation == ir_unop_exp) {
386         return op_expr[0]->operands[0];
387      }
388      break;
389
390   case ir_unop_exp2:
391      if (op_expr[0] == NULL)
392	 break;
393
394      if (op_expr[0]->operation == ir_unop_log2) {
395         return op_expr[0]->operands[0];
396      }
397
398      if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) {
399         for (int log2_pos = 0; log2_pos < 2; log2_pos++) {
400            ir_expression *log2_expr =
401               op_expr[0]->operands[log2_pos]->as_expression();
402
403            if (log2_expr && log2_expr->operation == ir_unop_log2) {
404               return new(mem_ctx) ir_expression(ir_binop_pow,
405                                                 ir->type,
406                                                 log2_expr->operands[0],
407                                                 op_expr[0]->operands[1 - log2_pos]);
408            }
409         }
410      }
411      break;
412
413   case ir_unop_log2:
414      if (op_expr[0] == NULL)
415	 break;
416
417      if (op_expr[0]->operation == ir_unop_exp2) {
418         return op_expr[0]->operands[0];
419      }
420      break;
421
422   case ir_unop_f2i:
423   case ir_unop_f2u:
424      if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) {
425         return new(mem_ctx) ir_expression(ir->operation,
426                                           ir->type,
427                                           op_expr[0]->operands[0]);
428      }
429      break;
430
431   case ir_unop_logic_not: {
432      enum ir_expression_operation new_op = ir_unop_logic_not;
433
434      if (op_expr[0] == NULL)
435	 break;
436
437      switch (op_expr[0]->operation) {
438      case ir_binop_less:    new_op = ir_binop_gequal;  break;
439      case ir_binop_greater: new_op = ir_binop_lequal;  break;
440      case ir_binop_lequal:  new_op = ir_binop_greater; break;
441      case ir_binop_gequal:  new_op = ir_binop_less;    break;
442      case ir_binop_equal:   new_op = ir_binop_nequal;  break;
443      case ir_binop_nequal:  new_op = ir_binop_equal;   break;
444      case ir_binop_all_equal:   new_op = ir_binop_any_nequal;  break;
445      case ir_binop_any_nequal:  new_op = ir_binop_all_equal;   break;
446
447      default:
448	 /* The default case handler is here to silence a warning from GCC.
449	  */
450	 break;
451      }
452
453      if (new_op != ir_unop_logic_not) {
454	 return new(mem_ctx) ir_expression(new_op,
455					   ir->type,
456					   op_expr[0]->operands[0],
457					   op_expr[0]->operands[1]);
458      }
459
460      break;
461   }
462
463   case ir_unop_saturate:
464      if (op_expr[0] && op_expr[0]->operation == ir_binop_add) {
465         ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression();
466         ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression();
467
468         if (b2f_0 && b2f_0->operation == ir_unop_b2f &&
469             b2f_1 && b2f_1->operation == ir_unop_b2f) {
470            return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0]));
471         }
472      }
473      break;
474
475   case ir_binop_add:
476      if (is_vec_zero(op_const[0]))
477	 return ir->operands[1];
478      if (is_vec_zero(op_const[1]))
479	 return ir->operands[0];
480
481      /* Reassociate addition of constants so that we can do constant
482       * folding.
483       */
484      if (op_const[0] && !op_const[1])
485	 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
486      if (op_const[1] && !op_const[0])
487	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
488
489      /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
490      if (options->OptimizeForAOS) {
491         ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
492                                                    mem_ctx);
493         if (expr)
494            return expr;
495      }
496
497      /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
498       *
499       * (-x + y) * a + x
500       * (x * -a) + (y * a) + x
501       * x + (x * -a) + (y * a)
502       * x * (1 - a) + y * a
503       * lrp(x, y, a)
504       */
505      for (int mul_pos = 0; mul_pos < 2; mul_pos++) {
506         ir_expression *mul = op_expr[mul_pos];
507
508         if (!mul || mul->operation != ir_binop_mul)
509            continue;
510
511         /* Multiply found on one of the operands. Now check for an
512          * inner addition operation.
513          */
514         for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) {
515            ir_expression *inner_add =
516               mul->operands[inner_add_pos]->as_expression();
517
518            if (!inner_add || inner_add->operation != ir_binop_add)
519               continue;
520
521            /* Inner addition found on one of the operands. Now check for
522             * one of the operands of the inner addition to be the negative
523             * of x_operand.
524             */
525            for (int neg_pos = 0; neg_pos < 2; neg_pos++) {
526               ir_expression *neg =
527                  inner_add->operands[neg_pos]->as_expression();
528
529               if (!neg || neg->operation != ir_unop_neg)
530                  continue;
531
532               ir_rvalue *x_operand = ir->operands[1 - mul_pos];
533
534               if (!neg->operands[0]->equals(x_operand))
535                  continue;
536
537               ir_rvalue *y_operand = inner_add->operands[1 - neg_pos];
538               ir_rvalue *a_operand = mul->operands[1 - inner_add_pos];
539
540               if (x_operand->type != y_operand->type ||
541                   x_operand->type != a_operand->type)
542                  continue;
543
544               return lrp(x_operand, y_operand, a_operand);
545            }
546         }
547      }
548
549      break;
550
551   case ir_binop_sub:
552      if (is_vec_zero(op_const[0]))
553	 return neg(ir->operands[1]);
554      if (is_vec_zero(op_const[1]))
555	 return ir->operands[0];
556      break;
557
558   case ir_binop_mul:
559      if (is_vec_one(op_const[0]))
560	 return ir->operands[1];
561      if (is_vec_one(op_const[1]))
562	 return ir->operands[0];
563
564      if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
565	 return ir_constant::zero(ir, ir->type);
566
567      if (is_vec_negative_one(op_const[0]))
568         return neg(ir->operands[1]);
569      if (is_vec_negative_one(op_const[1]))
570         return neg(ir->operands[0]);
571
572      if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f &&
573          op_expr[1] && op_expr[1]->operation == ir_unop_b2f) {
574         return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0]));
575      }
576
577      /* Reassociate multiplication of constants so that we can do
578       * constant folding.
579       */
580      if (op_const[0] && !op_const[1])
581	 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
582      if (op_const[1] && !op_const[0])
583	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
584
585      /* Optimizes
586       *
587       *    (mul (floor (add (abs x) 0.5) (sign x)))
588       *
589       * into
590       *
591       *    (trunc (add x (mul (sign x) 0.5)))
592       */
593      for (int i = 0; i < 2; i++) {
594         ir_expression *sign_expr = ir->operands[i]->as_expression();
595         ir_expression *floor_expr = ir->operands[1 - i]->as_expression();
596
597         if (!sign_expr || sign_expr->operation != ir_unop_sign ||
598             !floor_expr || floor_expr->operation != ir_unop_floor)
599            continue;
600
601         ir_expression *add_expr = floor_expr->operands[0]->as_expression();
602         if (!add_expr || add_expr->operation != ir_binop_add)
603            continue;
604
605         for (int j = 0; j < 2; j++) {
606            ir_expression *abs_expr = add_expr->operands[j]->as_expression();
607            if (!abs_expr || abs_expr->operation != ir_unop_abs)
608               continue;
609
610            ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
611            if (!point_five || !point_five->is_value(0.5, 0))
612               continue;
613
614            if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
615               return trunc(add(abs_expr->operands[0],
616                                mul(sign_expr, point_five)));
617            }
618         }
619      }
620      break;
621
622   case ir_binop_div:
623      if (is_vec_one(op_const[0]) && (
624                ir->type->base_type == GLSL_TYPE_FLOAT ||
625                ir->type->base_type == GLSL_TYPE_DOUBLE)) {
626	 return new(mem_ctx) ir_expression(ir_unop_rcp,
627					   ir->operands[1]->type,
628					   ir->operands[1],
629					   NULL);
630      }
631      if (is_vec_one(op_const[1]))
632	 return ir->operands[0];
633      break;
634
635   case ir_binop_dot:
636      if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
637	 return ir_constant::zero(mem_ctx, ir->type);
638
639      for (int i = 0; i < 2; i++) {
640         if (!op_const[i])
641            continue;
642
643         unsigned components[4] = { 0 }, count = 0;
644
645         for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) {
646            if (op_const[i]->is_zero())
647               continue;
648
649            components[count] = c;
650            count++;
651         }
652
653         /* No channels had zero values; bail. */
654         if (count >= op_const[i]->type->vector_elements)
655            break;
656
657         ir_expression_operation op = count == 1 ?
658            ir_binop_mul : ir_binop_dot;
659
660         /* Swizzle both operands to remove the channels that were zero. */
661         return new(mem_ctx)
662            ir_expression(op, ir->type,
663                          new(mem_ctx) ir_swizzle(ir->operands[0],
664                                                  components, count),
665                          new(mem_ctx) ir_swizzle(ir->operands[1],
666                                                  components, count));
667      }
668      break;
669
670   case ir_binop_less:
671   case ir_binop_lequal:
672   case ir_binop_greater:
673   case ir_binop_gequal:
674   case ir_binop_equal:
675   case ir_binop_nequal:
676      for (int add_pos = 0; add_pos < 2; add_pos++) {
677         ir_expression *add = op_expr[add_pos];
678
679         if (!add || add->operation != ir_binop_add)
680            continue;
681
682         ir_constant *zero = op_const[1 - add_pos];
683         if (!is_vec_zero(zero))
684            continue;
685
686         /* Depending of the zero position we want to optimize
687          * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y)
688          */
689         if (add_pos == 1) {
690            return new(mem_ctx) ir_expression(ir->operation,
691                                              neg(add->operands[0]),
692                                              add->operands[1]);
693         } else {
694            return new(mem_ctx) ir_expression(ir->operation,
695                                              add->operands[0],
696                                              neg(add->operands[1]));
697         }
698      }
699      break;
700
701   case ir_binop_all_equal:
702   case ir_binop_any_nequal:
703      if (ir->operands[0]->type->is_scalar() &&
704          ir->operands[1]->type->is_scalar())
705         return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal
706                                           ? ir_binop_equal : ir_binop_nequal,
707                                           ir->operands[0],
708                                           ir->operands[1]);
709      break;
710
711   case ir_binop_rshift:
712   case ir_binop_lshift:
713      /* 0 >> x == 0 */
714      if (is_vec_zero(op_const[0]))
715         return ir->operands[0];
716      /* x >> 0 == x */
717      if (is_vec_zero(op_const[1]))
718         return ir->operands[0];
719      break;
720
721   case ir_binop_logic_and:
722      if (is_vec_one(op_const[0])) {
723	 return ir->operands[1];
724      } else if (is_vec_one(op_const[1])) {
725	 return ir->operands[0];
726      } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
727	 return ir_constant::zero(mem_ctx, ir->type);
728      } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
729                 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
730         /* De Morgan's Law:
731          *    (not A) and (not B) === not (A or B)
732          */
733         return logic_not(logic_or(op_expr[0]->operands[0],
734                                   op_expr[1]->operands[0]));
735      } else if (ir->operands[0]->equals(ir->operands[1])) {
736         /* (a && a) == a */
737         return ir->operands[0];
738      }
739      break;
740
741   case ir_binop_logic_xor:
742      if (is_vec_zero(op_const[0])) {
743	 return ir->operands[1];
744      } else if (is_vec_zero(op_const[1])) {
745	 return ir->operands[0];
746      } else if (is_vec_one(op_const[0])) {
747	 return logic_not(ir->operands[1]);
748      } else if (is_vec_one(op_const[1])) {
749	 return logic_not(ir->operands[0]);
750      } else if (ir->operands[0]->equals(ir->operands[1])) {
751         /* (a ^^ a) == false */
752	 return ir_constant::zero(mem_ctx, ir->type);
753      }
754      break;
755
756   case ir_binop_logic_or:
757      if (is_vec_zero(op_const[0])) {
758	 return ir->operands[1];
759      } else if (is_vec_zero(op_const[1])) {
760	 return ir->operands[0];
761      } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) {
762	 ir_constant_data data;
763
764	 for (unsigned i = 0; i < 16; i++)
765	    data.b[i] = true;
766
767	 return new(mem_ctx) ir_constant(ir->type, &data);
768      } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
769                 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
770         /* De Morgan's Law:
771          *    (not A) or (not B) === not (A and B)
772          */
773         return logic_not(logic_and(op_expr[0]->operands[0],
774                                    op_expr[1]->operands[0]));
775      } else if (ir->operands[0]->equals(ir->operands[1])) {
776         /* (a || a) == a */
777         return ir->operands[0];
778      }
779      break;
780
781   case ir_binop_pow:
782      /* 1^x == 1 */
783      if (is_vec_one(op_const[0]))
784         return op_const[0];
785
786      /* x^1 == x */
787      if (is_vec_one(op_const[1]))
788         return ir->operands[0];
789
790      /* pow(2,x) == exp2(x) */
791      if (is_vec_two(op_const[0]))
792         return expr(ir_unop_exp2, ir->operands[1]);
793
794      if (is_vec_two(op_const[1])) {
795         ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
796                                              ir_var_temporary);
797         base_ir->insert_before(x);
798         base_ir->insert_before(assign(x, ir->operands[0]));
799         return mul(x, x);
800      }
801
802      if (is_vec_four(op_const[1])) {
803         ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
804                                              ir_var_temporary);
805         base_ir->insert_before(x);
806         base_ir->insert_before(assign(x, ir->operands[0]));
807
808         ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type,
809                                                    "squared",
810                                                    ir_var_temporary);
811         base_ir->insert_before(squared);
812         base_ir->insert_before(assign(squared, mul(x, x)));
813         return mul(squared, squared);
814      }
815
816      break;
817
818   case ir_binop_min:
819   case ir_binop_max:
820      if (ir->type->base_type != GLSL_TYPE_FLOAT || options->EmitNoSat)
821         break;
822
823      /* Replace min(max) operations and its commutative combinations with
824       * a saturate operation
825       */
826      for (int op = 0; op < 2; op++) {
827         ir_expression *inner_expr = op_expr[op];
828         ir_constant *outer_const = op_const[1 - op];
829         ir_expression_operation op_cond = (ir->operation == ir_binop_max) ?
830            ir_binop_min : ir_binop_max;
831
832         if (!inner_expr || !outer_const || (inner_expr->operation != op_cond))
833            continue;
834
835         /* One of these has to be a constant */
836         if (!inner_expr->operands[0]->as_constant() &&
837             !inner_expr->operands[1]->as_constant())
838            break;
839
840         /* Found a min(max) combination. Now try to see if its operands
841          * meet our conditions that we can do just a single saturate operation
842          */
843         for (int minmax_op = 0; minmax_op < 2; minmax_op++) {
844            ir_rvalue *x = inner_expr->operands[minmax_op];
845            ir_rvalue *y = inner_expr->operands[1 - minmax_op];
846
847            ir_constant *inner_const = y->as_constant();
848            if (!inner_const)
849               continue;
850
851            /* min(max(x, 0.0), 1.0) is sat(x) */
852            if (ir->operation == ir_binop_min &&
853                inner_const->is_zero() &&
854                outer_const->is_one())
855               return saturate(x);
856
857            /* max(min(x, 1.0), 0.0) is sat(x) */
858            if (ir->operation == ir_binop_max &&
859                inner_const->is_one() &&
860                outer_const->is_zero())
861               return saturate(x);
862
863            /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */
864            if (ir->operation == ir_binop_min &&
865                inner_const->is_zero() &&
866                is_less_than_one(outer_const))
867               return saturate(expr(ir_binop_min, x, outer_const));
868
869            /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */
870            if (ir->operation == ir_binop_max &&
871                is_less_than_one(inner_const) &&
872                outer_const->is_zero())
873               return saturate(expr(ir_binop_min, x, inner_const));
874
875            /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */
876            if (ir->operation == ir_binop_max &&
877                inner_const->is_one() &&
878                is_greater_than_zero(outer_const))
879               return saturate(expr(ir_binop_max, x, outer_const));
880
881            /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */
882            if (ir->operation == ir_binop_min &&
883                is_greater_than_zero(inner_const) &&
884                outer_const->is_one())
885               return saturate(expr(ir_binop_max, x, inner_const));
886         }
887      }
888
889      break;
890
891   case ir_unop_rcp:
892      if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
893	 return op_expr[0]->operands[0];
894
895      if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 ||
896                         op_expr[0]->operation == ir_unop_exp)) {
897         return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type,
898                                           neg(op_expr[0]->operands[0]));
899      }
900
901      /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at
902       * its IR level, so we can always apply this transformation.
903       */
904      if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq)
905         return sqrt(op_expr[0]->operands[0]);
906
907      /* As far as we know, all backends are OK with rsq. */
908      if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) {
909	 return rsq(op_expr[0]->operands[0]);
910      }
911
912      break;
913
914   case ir_triop_fma:
915      /* Operands are op0 * op1 + op2. */
916      if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
917         return ir->operands[2];
918      } else if (is_vec_zero(op_const[2])) {
919         return mul(ir->operands[0], ir->operands[1]);
920      } else if (is_vec_one(op_const[0])) {
921         return add(ir->operands[1], ir->operands[2]);
922      } else if (is_vec_one(op_const[1])) {
923         return add(ir->operands[0], ir->operands[2]);
924      }
925      break;
926
927   case ir_triop_lrp:
928      /* Operands are (x, y, a). */
929      if (is_vec_zero(op_const[2])) {
930         return ir->operands[0];
931      } else if (is_vec_one(op_const[2])) {
932         return ir->operands[1];
933      } else if (ir->operands[0]->equals(ir->operands[1])) {
934         return ir->operands[0];
935      } else if (is_vec_zero(op_const[0])) {
936         return mul(ir->operands[1], ir->operands[2]);
937      } else if (is_vec_zero(op_const[1])) {
938         unsigned op2_components = ir->operands[2]->type->vector_elements;
939         ir_constant *one;
940
941         switch (ir->type->base_type) {
942         case GLSL_TYPE_FLOAT:
943            one = new(mem_ctx) ir_constant(1.0f, op2_components);
944            break;
945         case GLSL_TYPE_DOUBLE:
946            one = new(mem_ctx) ir_constant(1.0, op2_components);
947            break;
948         default:
949            one = NULL;
950            unreachable("unexpected type");
951         }
952
953         return mul(ir->operands[0], add(one, neg(ir->operands[2])));
954      }
955      break;
956
957   case ir_triop_csel:
958      if (is_vec_one(op_const[0]))
959	 return ir->operands[1];
960      if (is_vec_zero(op_const[0]))
961	 return ir->operands[2];
962      break;
963
964   /* Remove interpolateAt* instructions for demoted inputs. They are
965    * assigned a constant expression to facilitate this.
966    */
967   case ir_unop_interpolate_at_centroid:
968   case ir_binop_interpolate_at_offset:
969   case ir_binop_interpolate_at_sample:
970      if (op_const[0])
971         return ir->operands[0];
972      break;
973
974   default:
975      break;
976   }
977
978   return ir;
979}
980
981void
982ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
983{
984   if (!*rvalue)
985      return;
986
987   ir_expression *expr = (*rvalue)->as_expression();
988   if (!expr || expr->operation == ir_quadop_vector)
989      return;
990
991   ir_rvalue *new_rvalue = handle_expression(expr);
992   if (new_rvalue == *rvalue)
993      return;
994
995   /* If the expr used to be some vec OP scalar returning a vector, and the
996    * optimization gave us back a scalar, we still need to turn it into a
997    * vector.
998    */
999   *rvalue = swizzle_if_required(expr, new_rvalue);
1000
1001   this->progress = true;
1002}
1003
1004bool
1005do_algebraic(exec_list *instructions, bool native_integers,
1006             const struct gl_shader_compiler_options *options)
1007{
1008   ir_algebraic_visitor v(native_integers, options);
1009
1010   visit_list_elements(&v, instructions);
1011
1012   return v.progress;
1013}
1014