1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_copy_propagation_elements.cpp
26 *
27 * Replaces usage of recently-copied components of variables with the
28 * previous copy of the variable.
29 *
30 * This pass can be compared with opt_copy_propagation, which operands
31 * on arbitrary whole-variable copies.  However, in order to handle
32 * the copy propagation of swizzled variables or writemasked writes,
33 * we want to track things on a channel-wise basis.  I found that
34 * trying to mix the swizzled/writemasked support here with the
35 * whole-variable stuff in opt_copy_propagation.cpp just made a mess,
36 * so this is separate despite the ACP handling being somewhat
37 * similar.
38 *
39 * This should reduce the number of MOV instructions in the generated
40 * programs unless copy propagation is also done on the LIR, and may
41 * help anyway by triggering other optimizations that live in the HIR.
42 */
43
44#include "ir.h"
45#include "ir_rvalue_visitor.h"
46#include "ir_basic_block.h"
47#include "ir_optimization.h"
48#include "compiler/glsl_types.h"
49#include "util/hash_table.h"
50
51static bool debug = false;
52
53namespace {
54
55class acp_entry;
56
57/* Class that refers to acp_entry in another exec_list. Used
58 * when making removals based on rhs.
59 */
60class acp_ref : public exec_node
61{
62public:
63   acp_ref(acp_entry *e)
64   {
65      entry = e;
66   }
67   acp_entry *entry;
68};
69
70class acp_entry : public exec_node
71{
72public:
73   /* override operator new from exec_node */
74   DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry)
75
76   acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4])
77      : rhs_node(this)
78   {
79      this->lhs = lhs;
80      this->rhs = rhs;
81      this->write_mask = write_mask;
82      memcpy(this->swizzle, swizzle, sizeof(this->swizzle));
83   }
84
85   ir_variable *lhs;
86   ir_variable *rhs;
87   unsigned int write_mask;
88   int swizzle[4];
89   acp_ref rhs_node;
90};
91
92
93class kill_entry : public exec_node
94{
95public:
96   /* override operator new from exec_node */
97   DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry)
98
99   kill_entry(ir_variable *var, int write_mask)
100   {
101      this->var = var;
102      this->write_mask = write_mask;
103   }
104
105   ir_variable *var;
106   unsigned int write_mask;
107};
108
109class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
110public:
111   ir_copy_propagation_elements_visitor()
112   {
113      this->progress = false;
114      this->killed_all = false;
115      this->mem_ctx = ralloc_context(NULL);
116      this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
117      this->shader_mem_ctx = NULL;
118      this->kills = new(mem_ctx) exec_list;
119
120      create_acp();
121   }
122   ~ir_copy_propagation_elements_visitor()
123   {
124      ralloc_free(mem_ctx);
125   }
126
127   void create_acp()
128   {
129      lhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
130                                       _mesa_key_pointer_equal);
131      rhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
132                                       _mesa_key_pointer_equal);
133   }
134
135   void destroy_acp()
136   {
137      _mesa_hash_table_destroy(lhs_ht, NULL);
138      _mesa_hash_table_destroy(rhs_ht, NULL);
139   }
140
141   void populate_acp(hash_table *lhs, hash_table *rhs)
142   {
143      struct hash_entry *entry;
144
145      hash_table_foreach(lhs, entry) {
146         _mesa_hash_table_insert(lhs_ht, entry->key, entry->data);
147      }
148
149      hash_table_foreach(rhs, entry) {
150         _mesa_hash_table_insert(rhs_ht, entry->key, entry->data);
151      }
152   }
153
154   void handle_loop(ir_loop *, bool keep_acp);
155   virtual ir_visitor_status visit_enter(class ir_loop *);
156   virtual ir_visitor_status visit_enter(class ir_function_signature *);
157   virtual ir_visitor_status visit_leave(class ir_assignment *);
158   virtual ir_visitor_status visit_enter(class ir_call *);
159   virtual ir_visitor_status visit_enter(class ir_if *);
160   virtual ir_visitor_status visit_leave(class ir_swizzle *);
161
162   void handle_rvalue(ir_rvalue **rvalue);
163
164   void add_copy(ir_assignment *ir);
165   void kill(kill_entry *k);
166   void handle_if_block(exec_list *instructions);
167
168   /** Hash of acp_entry: The available copies to propagate */
169   hash_table *lhs_ht;
170   hash_table *rhs_ht;
171
172   /**
173    * List of kill_entry: The variables whose values were killed in this
174    * block.
175    */
176   exec_list *kills;
177
178   bool progress;
179
180   bool killed_all;
181
182   /* Context for our local data structures. */
183   void *mem_ctx;
184   void *lin_ctx;
185   /* Context for allocating new shader nodes. */
186   void *shader_mem_ctx;
187};
188
189} /* unnamed namespace */
190
191ir_visitor_status
192ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
193{
194   /* Treat entry into a function signature as a completely separate
195    * block.  Any instructions at global scope will be shuffled into
196    * main() at link time, so they're irrelevant to us.
197    */
198   exec_list *orig_kills = this->kills;
199   bool orig_killed_all = this->killed_all;
200
201   hash_table *orig_lhs_ht = lhs_ht;
202   hash_table *orig_rhs_ht = rhs_ht;
203
204   this->kills = new(mem_ctx) exec_list;
205   this->killed_all = false;
206
207   create_acp();
208
209   visit_list_elements(this, &ir->body);
210
211   ralloc_free(this->kills);
212
213   destroy_acp();
214
215   this->kills = orig_kills;
216   this->killed_all = orig_killed_all;
217
218   lhs_ht = orig_lhs_ht;
219   rhs_ht = orig_rhs_ht;
220
221   return visit_continue_with_parent;
222}
223
224ir_visitor_status
225ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
226{
227   ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
228   ir_variable *var = ir->lhs->variable_referenced();
229
230   if (var->type->is_scalar() || var->type->is_vector()) {
231      kill_entry *k;
232
233      if (lhs)
234	 k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
235      else
236	 k = new(this->lin_ctx) kill_entry(var, ~0);
237
238      kill(k);
239   }
240
241   add_copy(ir);
242
243   return visit_continue;
244}
245
246ir_visitor_status
247ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *)
248{
249   /* Don't visit the values of swizzles since they are handled while
250    * visiting the swizzle itself.
251    */
252   return visit_continue;
253}
254
255/**
256 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
257 *
258 * This is where the actual copy propagation occurs.  Note that the
259 * rewriting of ir_dereference means that the ir_dereference instance
260 * must not be shared by multiple IR operations!
261 */
262void
263ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
264{
265   int swizzle_chan[4];
266   ir_dereference_variable *deref_var;
267   ir_variable *source[4] = {NULL, NULL, NULL, NULL};
268   int source_chan[4] = {0, 0, 0, 0};
269   int chans;
270   bool noop_swizzle = true;
271
272   if (!*ir)
273      return;
274
275   ir_swizzle *swizzle = (*ir)->as_swizzle();
276   if (swizzle) {
277      deref_var = swizzle->val->as_dereference_variable();
278      if (!deref_var)
279	 return;
280
281      swizzle_chan[0] = swizzle->mask.x;
282      swizzle_chan[1] = swizzle->mask.y;
283      swizzle_chan[2] = swizzle->mask.z;
284      swizzle_chan[3] = swizzle->mask.w;
285      chans = swizzle->type->vector_elements;
286   } else {
287      deref_var = (*ir)->as_dereference_variable();
288      if (!deref_var)
289	 return;
290
291      swizzle_chan[0] = 0;
292      swizzle_chan[1] = 1;
293      swizzle_chan[2] = 2;
294      swizzle_chan[3] = 3;
295      chans = deref_var->type->vector_elements;
296   }
297
298   if (this->in_assignee)
299      return;
300
301   ir_variable *var = deref_var->var;
302
303   /* Try to find ACP entries covering swizzle_chan[], hoping they're
304    * the same source variable.
305    */
306   hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, var);
307   if (ht_entry) {
308      exec_list *ht_list = (exec_list *) ht_entry->data;
309      foreach_in_list(acp_entry, entry, ht_list) {
310         for (int c = 0; c < chans; c++) {
311            if (entry->write_mask & (1 << swizzle_chan[c])) {
312               source[c] = entry->rhs;
313               source_chan[c] = entry->swizzle[swizzle_chan[c]];
314
315               if (source_chan[c] != swizzle_chan[c])
316                  noop_swizzle = false;
317            }
318         }
319      }
320   }
321
322   /* Make sure all channels are copying from the same source variable. */
323   if (!source[0])
324      return;
325   for (int c = 1; c < chans; c++) {
326      if (source[c] != source[0])
327	 return;
328   }
329
330   if (!shader_mem_ctx)
331      shader_mem_ctx = ralloc_parent(deref_var);
332
333   /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
334    * of itself, which would just be deleted by opt_noop_swizzle).
335    */
336   if (source[0] == var && noop_swizzle)
337      return;
338
339   if (debug) {
340      printf("Copy propagation from:\n");
341      (*ir)->print();
342   }
343
344   deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
345   *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
346					source_chan[0],
347					source_chan[1],
348					source_chan[2],
349					source_chan[3],
350					chans);
351   progress = true;
352
353   if (debug) {
354      printf("to:\n");
355      (*ir)->print();
356      printf("\n");
357   }
358}
359
360
361ir_visitor_status
362ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
363{
364   /* Do copy propagation on call parameters, but skip any out params */
365   foreach_two_lists(formal_node, &ir->callee->parameters,
366                     actual_node, &ir->actual_parameters) {
367      ir_variable *sig_param = (ir_variable *) formal_node;
368      ir_rvalue *ir = (ir_rvalue *) actual_node;
369      if (sig_param->data.mode != ir_var_function_out
370          && sig_param->data.mode != ir_var_function_inout) {
371         ir->accept(this);
372      }
373   }
374
375   /* Since we're unlinked, we don't (necessarily) know the side effects of
376    * this call.  So kill all copies.
377    */
378   _mesa_hash_table_clear(lhs_ht, NULL);
379   _mesa_hash_table_clear(rhs_ht, NULL);
380
381   this->killed_all = true;
382
383   return visit_continue_with_parent;
384}
385
386void
387ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
388{
389   exec_list *orig_kills = this->kills;
390   bool orig_killed_all = this->killed_all;
391
392   hash_table *orig_lhs_ht = lhs_ht;
393   hash_table *orig_rhs_ht = rhs_ht;
394
395   this->kills = new(mem_ctx) exec_list;
396   this->killed_all = false;
397
398   create_acp();
399
400   /* Populate the initial acp with a copy of the original */
401   populate_acp(orig_lhs_ht, orig_rhs_ht);
402
403   visit_list_elements(this, instructions);
404
405   if (this->killed_all) {
406      _mesa_hash_table_clear(orig_lhs_ht, NULL);
407      _mesa_hash_table_clear(orig_rhs_ht, NULL);
408   }
409
410   exec_list *new_kills = this->kills;
411   this->kills = orig_kills;
412   this->killed_all = this->killed_all || orig_killed_all;
413
414   destroy_acp();
415
416   lhs_ht = orig_lhs_ht;
417   rhs_ht = orig_rhs_ht;
418
419   /* Move the new kills into the parent block's list, removing them
420    * from the parent's ACP list in the process.
421    */
422   foreach_in_list_safe(kill_entry, k, new_kills) {
423      kill(k);
424   }
425
426   ralloc_free(new_kills);
427}
428
429ir_visitor_status
430ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
431{
432   ir->condition->accept(this);
433
434   handle_if_block(&ir->then_instructions);
435   handle_if_block(&ir->else_instructions);
436
437   /* handle_if_block() already descended into the children. */
438   return visit_continue_with_parent;
439}
440
441void
442ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp)
443{
444   exec_list *orig_kills = this->kills;
445   bool orig_killed_all = this->killed_all;
446
447   hash_table *orig_lhs_ht = lhs_ht;
448   hash_table *orig_rhs_ht = rhs_ht;
449
450   /* FINISHME: For now, the initial acp for loops is totally empty.
451    * We could go through once, then go through again with the acp
452    * cloned minus the killed entries after the first run through.
453    */
454   this->kills = new(mem_ctx) exec_list;
455   this->killed_all = false;
456
457   create_acp();
458
459   if (keep_acp) {
460      /* Populate the initial acp with a copy of the original */
461      populate_acp(orig_lhs_ht, orig_rhs_ht);
462   }
463
464   visit_list_elements(this, &ir->body_instructions);
465
466   if (this->killed_all) {
467      _mesa_hash_table_clear(orig_lhs_ht, NULL);
468      _mesa_hash_table_clear(orig_rhs_ht, NULL);
469   }
470
471   exec_list *new_kills = this->kills;
472   this->kills = orig_kills;
473   this->killed_all = this->killed_all || orig_killed_all;
474
475   destroy_acp();
476
477   lhs_ht = orig_lhs_ht;
478   rhs_ht = orig_rhs_ht;
479
480   foreach_in_list_safe(kill_entry, k, new_kills) {
481      kill(k);
482   }
483
484   ralloc_free(new_kills);
485}
486
487ir_visitor_status
488ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
489{
490   handle_loop(ir, false);
491   handle_loop(ir, true);
492
493   /* already descended into the children. */
494   return visit_continue_with_parent;
495}
496
497/* Remove any entries currently in the ACP for this kill. */
498void
499ir_copy_propagation_elements_visitor::kill(kill_entry *k)
500{
501   /* removal of lhs entries */
502   hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, k->var);
503   if (ht_entry) {
504      exec_list *lhs_list = (exec_list *) ht_entry->data;
505      foreach_in_list_safe(acp_entry, entry, lhs_list) {
506         entry->write_mask = entry->write_mask & ~k->write_mask;
507         if (entry->write_mask == 0) {
508            entry->remove();
509            continue;
510         }
511      }
512   }
513
514   /* removal of rhs entries */
515   ht_entry = _mesa_hash_table_search(rhs_ht, k->var);
516   if (ht_entry) {
517      exec_list *rhs_list = (exec_list *) ht_entry->data;
518      acp_ref *ref;
519
520      while ((ref = (acp_ref *) rhs_list->pop_head()) != NULL) {
521         acp_entry *entry = ref->entry;
522
523         /* If entry is still in a list (not already removed by lhs entry
524          * removal above), remove it.
525          */
526         if (entry->prev || entry->next)
527            entry->remove();
528      }
529   }
530
531   /* If we were on a list, remove ourselves before inserting */
532   if (k->next)
533      k->remove();
534
535   this->kills->push_tail(k);
536}
537
538/**
539 * Adds directly-copied channels between vector variables to the available
540 * copy propagation list.
541 */
542void
543ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
544{
545   acp_entry *entry;
546   int orig_swizzle[4] = {0, 1, 2, 3};
547   int swizzle[4];
548
549   if (ir->condition)
550      return;
551
552   ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
553   if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
554      return;
555
556   ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
557   if (!rhs) {
558      ir_swizzle *swiz = ir->rhs->as_swizzle();
559      if (!swiz)
560	 return;
561
562      rhs = swiz->val->as_dereference_variable();
563      if (!rhs)
564	 return;
565
566      orig_swizzle[0] = swiz->mask.x;
567      orig_swizzle[1] = swiz->mask.y;
568      orig_swizzle[2] = swiz->mask.z;
569      orig_swizzle[3] = swiz->mask.w;
570   }
571
572   /* Move the swizzle channels out to the positions they match in the
573    * destination.  We don't want to have to rewrite the swizzle[]
574    * array every time we clear a bit of the write_mask.
575    */
576   int j = 0;
577   for (int i = 0; i < 4; i++) {
578      if (ir->write_mask & (1 << i))
579	 swizzle[i] = orig_swizzle[j++];
580   }
581
582   int write_mask = ir->write_mask;
583   if (lhs->var == rhs->var) {
584      /* If this is a copy from the variable to itself, then we need
585       * to be sure not to include the updated channels from this
586       * instruction in the set of new source channels to be
587       * copy-propagated from.
588       */
589      for (int i = 0; i < 4; i++) {
590	 if (ir->write_mask & (1 << orig_swizzle[i]))
591	    write_mask &= ~(1 << i);
592      }
593   }
594
595   if (lhs->var->data.precise != rhs->var->data.precise)
596      return;
597
598   entry = new(this->lin_ctx) acp_entry(lhs->var, rhs->var, write_mask,
599					swizzle);
600
601   /* lhs hash, hash of lhs -> acp_entry lists */
602   hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, lhs->var);
603   if (ht_entry) {
604      exec_list *lhs_list = (exec_list *) ht_entry->data;
605      lhs_list->push_tail(entry);
606   } else {
607      exec_list *lhs_list = new(mem_ctx) exec_list;
608      lhs_list->push_tail(entry);
609      _mesa_hash_table_insert(lhs_ht, lhs->var, lhs_list);
610   }
611
612   /* rhs hash, hash of rhs -> acp_entry pointers to lhs lists */
613   ht_entry = _mesa_hash_table_search(rhs_ht, rhs->var);
614   if (ht_entry) {
615      exec_list *rhs_list = (exec_list *) ht_entry->data;
616      rhs_list->push_tail(&entry->rhs_node);
617   } else {
618      exec_list *rhs_list = new(mem_ctx) exec_list;
619      rhs_list->push_tail(&entry->rhs_node);
620      _mesa_hash_table_insert(rhs_ht, rhs->var, rhs_list);
621   }
622}
623
624bool
625do_copy_propagation_elements(exec_list *instructions)
626{
627   ir_copy_propagation_elements_visitor v;
628
629   visit_list_elements(&v, instructions);
630
631   return v.progress;
632}
633