1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * constant of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, constant, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above constantright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR CONSTANTRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_constant_propagation.cpp
26 *
27 * Tracks assignments of constants to channels of variables, and
28 * usage of those constant channels with direct usage of the constants.
29 *
30 * This can lead to constant folding and algebraic optimizations in
31 * those later expressions, while causing no increase in instruction
32 * count (due to constants being generally free to load from a
33 * constant push buffer or as instruction immediate values) and
34 * possibly reducing register pressure.
35 */
36
37#include "ir.h"
38#include "ir_visitor.h"
39#include "ir_rvalue_visitor.h"
40#include "ir_basic_block.h"
41#include "ir_optimization.h"
42#include "glsl_types.h"
43
44namespace {
45
46class acp_entry : public exec_node
47{
48public:
49   acp_entry(ir_variable *var, unsigned write_mask, ir_constant *constant)
50   {
51      assert(var);
52      assert(constant);
53      this->var = var;
54      this->write_mask = write_mask;
55      this->constant = constant;
56      this->initial_values = write_mask;
57   }
58
59   acp_entry(const acp_entry *src)
60   {
61      this->var = src->var;
62      this->write_mask = src->write_mask;
63      this->constant = src->constant;
64      this->initial_values = src->initial_values;
65   }
66
67   ir_variable *var;
68   ir_constant *constant;
69   unsigned write_mask;
70
71   /** Mask of values initially available in the constant. */
72   unsigned initial_values;
73};
74
75
76class kill_entry : public exec_node
77{
78public:
79   kill_entry(ir_variable *var, unsigned write_mask)
80   {
81      assert(var);
82      this->var = var;
83      this->write_mask = write_mask;
84   }
85
86   ir_variable *var;
87   unsigned write_mask;
88};
89
90class ir_constant_propagation_visitor : public ir_rvalue_visitor {
91public:
92   ir_constant_propagation_visitor()
93   {
94      progress = false;
95      killed_all = false;
96      mem_ctx = ralloc_context(0);
97      this->acp = new(mem_ctx) exec_list;
98      this->kills = new(mem_ctx) exec_list;
99   }
100   ~ir_constant_propagation_visitor()
101   {
102      ralloc_free(mem_ctx);
103   }
104
105   virtual ir_visitor_status visit_enter(class ir_loop *);
106   virtual ir_visitor_status visit_enter(class ir_function_signature *);
107   virtual ir_visitor_status visit_enter(class ir_function *);
108   virtual ir_visitor_status visit_leave(class ir_assignment *);
109   virtual ir_visitor_status visit_enter(class ir_call *);
110   virtual ir_visitor_status visit_enter(class ir_if *);
111
112   void add_constant(ir_assignment *ir);
113   void kill(ir_variable *ir, unsigned write_mask);
114   void handle_if_block(exec_list *instructions);
115   void handle_rvalue(ir_rvalue **rvalue);
116
117   /** List of acp_entry: The available constants to propagate */
118   exec_list *acp;
119
120   /**
121    * List of kill_entry: The masks of variables whose values were
122    * killed in this block.
123    */
124   exec_list *kills;
125
126   bool progress;
127
128   bool killed_all;
129
130   void *mem_ctx;
131};
132
133
134void
135ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue)
136{
137   if (this->in_assignee || !*rvalue)
138      return;
139
140   const glsl_type *type = (*rvalue)->type;
141   if (!type->is_scalar() && !type->is_vector())
142      return;
143
144   ir_swizzle *swiz = NULL;
145   ir_dereference_variable *deref = (*rvalue)->as_dereference_variable();
146   if (!deref) {
147      swiz = (*rvalue)->as_swizzle();
148      if (!swiz)
149	 return;
150
151      deref = swiz->val->as_dereference_variable();
152      if (!deref)
153	 return;
154   }
155
156   ir_constant_data data;
157   memset(&data, 0, sizeof(data));
158
159   for (unsigned int i = 0; i < type->components(); i++) {
160      int channel;
161      acp_entry *found = NULL;
162
163      if (swiz) {
164	 switch (i) {
165	 case 0: channel = swiz->mask.x; break;
166	 case 1: channel = swiz->mask.y; break;
167	 case 2: channel = swiz->mask.z; break;
168	 case 3: channel = swiz->mask.w; break;
169	 default: assert(!"shouldn't be reached"); channel = 0; break;
170	 }
171      } else {
172	 channel = i;
173      }
174
175      foreach_iter(exec_list_iterator, iter, *this->acp) {
176	 acp_entry *entry = (acp_entry *)iter.get();
177	 if (entry->var == deref->var && entry->write_mask & (1 << channel)) {
178	    found = entry;
179	    break;
180	 }
181      }
182
183      if (!found)
184	 return;
185
186      int rhs_channel = 0;
187      for (int j = 0; j < 4; j++) {
188	 if (j == channel)
189	    break;
190	 if (found->initial_values & (1 << j))
191	    rhs_channel++;
192      }
193
194      switch (type->base_type) {
195      case GLSL_TYPE_FLOAT:
196	 data.f[i] = found->constant->value.f[rhs_channel];
197	 break;
198      case GLSL_TYPE_INT:
199	 data.i[i] = found->constant->value.i[rhs_channel];
200	 break;
201      case GLSL_TYPE_UINT:
202	 data.u[i] = found->constant->value.u[rhs_channel];
203	 break;
204      case GLSL_TYPE_BOOL:
205	 data.b[i] = found->constant->value.b[rhs_channel];
206	 break;
207      default:
208	 assert(!"not reached");
209	 break;
210      }
211   }
212
213   *rvalue = new(ralloc_parent(deref)) ir_constant(type, &data);
214   this->progress = true;
215}
216
217ir_visitor_status
218ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir)
219{
220   /* Treat entry into a function signature as a completely separate
221    * block.  Any instructions at global scope will be shuffled into
222    * main() at link time, so they're irrelevant to us.
223    */
224   exec_list *orig_acp = this->acp;
225   exec_list *orig_kills = this->kills;
226   bool orig_killed_all = this->killed_all;
227
228   this->acp = new(mem_ctx) exec_list;
229   this->kills = new(mem_ctx) exec_list;
230   this->killed_all = false;
231
232   visit_list_elements(this, &ir->body);
233
234   this->kills = orig_kills;
235   this->acp = orig_acp;
236   this->killed_all = orig_killed_all;
237
238   return visit_continue_with_parent;
239}
240
241ir_visitor_status
242ir_constant_propagation_visitor::visit_leave(ir_assignment *ir)
243{
244   if (this->in_assignee)
245      return visit_continue;
246
247   unsigned kill_mask = ir->write_mask;
248   if (ir->lhs->as_dereference_array()) {
249      /* The LHS of the assignment uses an array indexing operator (e.g. v[i]
250       * = ...;).  Since we only try to constant propagate vectors and
251       * scalars, this means that either (a) array indexing is being used to
252       * select a vector component, or (b) the variable in question is neither
253       * a scalar or a vector, so we don't care about it.  In the former case,
254       * we want to kill the whole vector, since in general we can't predict
255       * which vector component will be selected by array indexing.  In the
256       * latter case, it doesn't matter what we do, so go ahead and kill the
257       * whole variable anyway.
258       *
259       * Note that if the array index is constant (e.g. v[2] = ...;), we could
260       * in principle be smarter, but we don't need to, because a future
261       * optimization pass will convert it to a simple assignment with the
262       * correct mask.
263       */
264      kill_mask = ~0;
265   }
266   kill(ir->lhs->variable_referenced(), kill_mask);
267
268   add_constant(ir);
269
270   return visit_continue;
271}
272
273ir_visitor_status
274ir_constant_propagation_visitor::visit_enter(ir_function *ir)
275{
276   (void) ir;
277   return visit_continue;
278}
279
280ir_visitor_status
281ir_constant_propagation_visitor::visit_enter(ir_call *ir)
282{
283   /* Do constant propagation on call parameters, but skip any out params */
284   exec_list_iterator sig_param_iter = ir->callee->parameters.iterator();
285   foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {
286      ir_variable *sig_param = (ir_variable *)sig_param_iter.get();
287      ir_rvalue *param = (ir_rvalue *)iter.get();
288      if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) {
289	 ir_rvalue *new_param = param;
290	 handle_rvalue(&new_param);
291         if (new_param != param)
292	    param->replace_with(new_param);
293	 else
294	    param->accept(this);
295      }
296      sig_param_iter.next();
297   }
298
299   /* Since we're unlinked, we don't (necssarily) know the side effects of
300    * this call.  So kill all copies.
301    */
302   acp->make_empty();
303   this->killed_all = true;
304
305   return visit_continue_with_parent;
306}
307
308void
309ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
310{
311   exec_list *orig_acp = this->acp;
312   exec_list *orig_kills = this->kills;
313   bool orig_killed_all = this->killed_all;
314
315   this->acp = new(mem_ctx) exec_list;
316   this->kills = new(mem_ctx) exec_list;
317   this->killed_all = false;
318
319   /* Populate the initial acp with a constant of the original */
320   foreach_iter(exec_list_iterator, iter, *orig_acp) {
321      acp_entry *a = (acp_entry *)iter.get();
322      this->acp->push_tail(new(this->mem_ctx) acp_entry(a));
323   }
324
325   visit_list_elements(this, instructions);
326
327   if (this->killed_all) {
328      orig_acp->make_empty();
329   }
330
331   exec_list *new_kills = this->kills;
332   this->kills = orig_kills;
333   this->acp = orig_acp;
334   this->killed_all = this->killed_all || orig_killed_all;
335
336   foreach_iter(exec_list_iterator, iter, *new_kills) {
337      kill_entry *k = (kill_entry *)iter.get();
338      kill(k->var, k->write_mask);
339   }
340}
341
342ir_visitor_status
343ir_constant_propagation_visitor::visit_enter(ir_if *ir)
344{
345   ir->condition->accept(this);
346   handle_rvalue(&ir->condition);
347
348   handle_if_block(&ir->then_instructions);
349   handle_if_block(&ir->else_instructions);
350
351   /* handle_if_block() already descended into the children. */
352   return visit_continue_with_parent;
353}
354
355ir_visitor_status
356ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
357{
358   exec_list *orig_acp = this->acp;
359   exec_list *orig_kills = this->kills;
360   bool orig_killed_all = this->killed_all;
361
362   /* FINISHME: For now, the initial acp for loops is totally empty.
363    * We could go through once, then go through again with the acp
364    * cloned minus the killed entries after the first run through.
365    */
366   this->acp = new(mem_ctx) exec_list;
367   this->kills = new(mem_ctx) exec_list;
368   this->killed_all = false;
369
370   visit_list_elements(this, &ir->body_instructions);
371
372   if (this->killed_all) {
373      orig_acp->make_empty();
374   }
375
376   exec_list *new_kills = this->kills;
377   this->kills = orig_kills;
378   this->acp = orig_acp;
379   this->killed_all = this->killed_all || orig_killed_all;
380
381   foreach_iter(exec_list_iterator, iter, *new_kills) {
382      kill_entry *k = (kill_entry *)iter.get();
383      kill(k->var, k->write_mask);
384   }
385
386   /* already descended into the children. */
387   return visit_continue_with_parent;
388}
389
390void
391ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask)
392{
393   assert(var != NULL);
394
395   /* We don't track non-vectors. */
396   if (!var->type->is_vector() && !var->type->is_scalar())
397      return;
398
399   /* Remove any entries currently in the ACP for this kill. */
400   foreach_iter(exec_list_iterator, iter, *this->acp) {
401      acp_entry *entry = (acp_entry *)iter.get();
402
403      if (entry->var == var) {
404	 entry->write_mask &= ~write_mask;
405	 if (entry->write_mask == 0)
406	    entry->remove();
407      }
408   }
409
410   /* Add this writemask of the variable to the list of killed
411    * variables in this block.
412    */
413   foreach_iter(exec_list_iterator, iter, *this->kills) {
414      kill_entry *entry = (kill_entry *)iter.get();
415
416      if (entry->var == var) {
417	 entry->write_mask |= write_mask;
418	 return;
419      }
420   }
421   /* Not already in the list.  Make new entry. */
422   this->kills->push_tail(new(this->mem_ctx) kill_entry(var, write_mask));
423}
424
425/**
426 * Adds an entry to the available constant list if it's a plain assignment
427 * of a variable to a variable.
428 */
429void
430ir_constant_propagation_visitor::add_constant(ir_assignment *ir)
431{
432   acp_entry *entry;
433
434   if (ir->condition)
435      return;
436
437   if (!ir->write_mask)
438      return;
439
440   ir_dereference_variable *deref = ir->lhs->as_dereference_variable();
441   ir_constant *constant = ir->rhs->as_constant();
442
443   if (!deref || !constant)
444      return;
445
446   /* Only do constant propagation on vectors.  Constant matrices,
447    * arrays, or structures would require more work elsewhere.
448    */
449   if (!deref->var->type->is_vector() && !deref->var->type->is_scalar())
450      return;
451
452   entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant);
453   this->acp->push_tail(entry);
454}
455
456} /* unnamed namespace */
457
458/**
459 * Does a constant propagation pass on the code present in the instruction stream.
460 */
461bool
462do_constant_propagation(exec_list *instructions)
463{
464   ir_constant_propagation_visitor v;
465
466   visit_list_elements(&v, instructions);
467
468   return v.progress;
469}
470