1/*
2 * Copyright © 2010 Luca Barbieri
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file lower_variable_index_to_cond_assign.cpp
26 *
27 * Turns non-constant indexing into array types to a series of
28 * conditional moves of each element into a temporary.
29 *
30 * Pre-DX10 GPUs often don't have a native way to do this operation,
31 * and this works around that.
32 *
33 * The lowering process proceeds as follows.  Each non-constant index
34 * found in an r-value is converted to a canonical form \c array[i].  Each
35 * element of the array is conditionally assigned to a temporary by comparing
36 * \c i to a constant index.  This is done by cloning the canonical form and
37 * replacing all occurances of \c i with a constant.  Each remaining occurance
38 * of the canonical form in the IR is replaced with a dereference of the
39 * temporary variable.
40 *
41 * L-values with non-constant indices are handled similarly.  In this case,
42 * the RHS of the assignment is assigned to a temporary.  The non-constant
43 * index is replace with the canonical form (just like for r-values).  The
44 * temporary is conditionally assigned to each element of the canonical form
45 * by comparing \c i with each index.  The same clone-and-replace scheme is
46 * used.
47 */
48
49#include "ir.h"
50#include "ir_rvalue_visitor.h"
51#include "ir_optimization.h"
52#include "compiler/glsl_types.h"
53#include "main/macros.h"
54
55/**
56 * Generate a comparison value for a block of indices
57 *
58 * Lowering passes for non-constant indexing of arrays, matrices, or vectors
59 * can use this to generate blocks of index comparison values.
60 *
61 * \param instructions  List where new instructions will be appended
62 * \param index         \c ir_variable containing the desired index
63 * \param base          Base value for this block of comparisons
64 * \param components    Number of unique index values to compare.  This must
65 *                      be on the range [1, 4].
66 * \param mem_ctx       ralloc memory context to be used for all allocations.
67 *
68 * \returns
69 * An \c ir_rvalue that \b must be cloned for each use in conditional
70 * assignments, etc.
71 */
72ir_rvalue *
73compare_index_block(exec_list *instructions, ir_variable *index,
74		    unsigned base, unsigned components, void *mem_ctx)
75{
76   ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index);
77
78   assert(index->type->is_scalar());
79   assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT);
80   assert(components >= 1 && components <= 4);
81
82   if (components > 1) {
83      const ir_swizzle_mask m = { 0, 0, 0, 0, components, false };
84      broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m);
85   }
86
87   /* Compare the desired index value with the next block of four indices.
88    */
89   ir_constant_data test_indices_data;
90   memset(&test_indices_data, 0, sizeof(test_indices_data));
91   test_indices_data.i[0] = base;
92   test_indices_data.i[1] = base + 1;
93   test_indices_data.i[2] = base + 2;
94   test_indices_data.i[3] = base + 3;
95
96   ir_constant *const test_indices =
97      new(mem_ctx) ir_constant(broadcast_index->type,
98			       &test_indices_data);
99
100   ir_rvalue *const condition_val =
101      new(mem_ctx) ir_expression(ir_binop_equal,
102				 glsl_type::bvec(components),
103				 broadcast_index,
104				 test_indices);
105
106   ir_variable *const condition =
107      new(mem_ctx) ir_variable(condition_val->type,
108			       "dereference_condition",
109			       ir_var_temporary);
110   instructions->push_tail(condition);
111
112   ir_rvalue *const cond_deref =
113      new(mem_ctx) ir_dereference_variable(condition);
114   instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0));
115
116   return cond_deref;
117}
118
119static inline bool
120is_array_or_matrix(const ir_rvalue *ir)
121{
122   return (ir->type->is_array() || ir->type->is_matrix());
123}
124
125namespace {
126/**
127 * Replace a dereference of a variable with a specified r-value
128 *
129 * Each time a dereference of the specified value is replaced, the r-value
130 * tree is cloned.
131 */
132class deref_replacer : public ir_rvalue_visitor {
133public:
134   deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value)
135      : variable_to_replace(variable_to_replace), value(value),
136	progress(false)
137   {
138      assert(this->variable_to_replace != NULL);
139      assert(this->value != NULL);
140   }
141
142   virtual void handle_rvalue(ir_rvalue **rvalue)
143   {
144      ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable();
145
146      if ((dv != NULL) && (dv->var == this->variable_to_replace)) {
147	 this->progress = true;
148	 *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
149      }
150   }
151
152   const ir_variable *variable_to_replace;
153   ir_rvalue *value;
154   bool progress;
155};
156
157/**
158 * Find a variable index dereference of an array in an rvalue tree
159 */
160class find_variable_index : public ir_hierarchical_visitor {
161public:
162   find_variable_index()
163      : deref(NULL)
164   {
165      /* empty */
166   }
167
168   virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
169   {
170      if (is_array_or_matrix(ir->array)
171	  && (ir->array_index->as_constant() == NULL)) {
172	 this->deref = ir;
173	 return visit_stop;
174      }
175
176      return visit_continue;
177   }
178
179   /**
180    * First array dereference found in the tree that has a non-constant index.
181    */
182   ir_dereference_array *deref;
183};
184
185struct assignment_generator
186{
187   ir_instruction* base_ir;
188   ir_dereference *rvalue;
189   ir_variable *old_index;
190   bool is_write;
191   unsigned int write_mask;
192   ir_variable* var;
193
194   assignment_generator()
195      : base_ir(NULL),
196        rvalue(NULL),
197        old_index(NULL),
198        is_write(false),
199        write_mask(0),
200        var(NULL)
201   {
202   }
203
204   void generate(unsigned i, ir_rvalue* condition, exec_list *list) const
205   {
206      /* Just clone the rest of the deref chain when trying to get at the
207       * underlying variable.
208       */
209      void *mem_ctx = ralloc_parent(base_ir);
210
211      /* Clone the old r-value in its entirety.  Then replace any occurances of
212       * the old variable index with the new constant index.
213       */
214      ir_dereference *element = this->rvalue->clone(mem_ctx, NULL);
215      ir_constant *const index = new(mem_ctx) ir_constant(i);
216      deref_replacer r(this->old_index, index);
217      element->accept(&r);
218      assert(r.progress);
219
220      /* Generate a conditional assignment to (or from) the constant indexed
221       * array dereference.
222       */
223      ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
224      ir_assignment *const assignment = (is_write)
225	 ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask)
226	 : new(mem_ctx) ir_assignment(variable, element, condition);
227
228      list->push_tail(assignment);
229   }
230};
231
232struct switch_generator
233{
234   /* make TFunction a template parameter if you need to use other generators */
235   typedef assignment_generator TFunction;
236   const TFunction& generator;
237
238   ir_variable* index;
239   unsigned linear_sequence_max_length;
240   unsigned condition_components;
241
242   void *mem_ctx;
243
244   switch_generator(const TFunction& generator, ir_variable *index,
245		    unsigned linear_sequence_max_length,
246		    unsigned condition_components)
247      : generator(generator), index(index),
248	linear_sequence_max_length(linear_sequence_max_length),
249	condition_components(condition_components)
250   {
251      this->mem_ctx = ralloc_parent(index);
252   }
253
254   void linear_sequence(unsigned begin, unsigned end, exec_list *list)
255   {
256      if (begin == end)
257         return;
258
259      /* If the array access is a read, read the first element of this subregion
260       * unconditionally.  The remaining tests will possibly overwrite this
261       * value with one of the other array elements.
262       *
263       * This optimization cannot be done for writes because it will cause the
264       * first element of the subregion to be written possibly *in addition* to
265       * one of the other elements.
266       */
267      unsigned first;
268      if (!this->generator.is_write) {
269	 this->generator.generate(begin, 0, list);
270	 first = begin + 1;
271      } else {
272	 first = begin;
273      }
274
275      for (unsigned i = first; i < end; i += 4) {
276         const unsigned comps = MIN2(condition_components, end - i);
277
278	 ir_rvalue *const cond_deref =
279	    compare_index_block(list, index, i, comps, this->mem_ctx);
280
281         if (comps == 1) {
282            this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL),
283				     list);
284         } else {
285            for (unsigned j = 0; j < comps; j++) {
286	       ir_rvalue *const cond_swiz =
287		  new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL),
288						j, 0, 0, 0, 1);
289
290               this->generator.generate(i + j, cond_swiz, list);
291            }
292         }
293      }
294   }
295
296   void bisect(unsigned begin, unsigned end, exec_list *list)
297   {
298      unsigned middle = (begin + end) >> 1;
299
300      assert(index->type->is_integer());
301
302      ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT)
303	 ? new(this->mem_ctx) ir_constant((unsigned)middle)
304         : new(this->mem_ctx) ir_constant((int)middle);
305
306
307      ir_dereference_variable *deref =
308	 new(this->mem_ctx) ir_dereference_variable(this->index);
309
310      ir_expression *less =
311	 new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type,
312					  deref, middle_c);
313
314      ir_if *if_less = new(this->mem_ctx) ir_if(less);
315
316      generate(begin, middle, &if_less->then_instructions);
317      generate(middle, end, &if_less->else_instructions);
318
319      list->push_tail(if_less);
320   }
321
322   void generate(unsigned begin, unsigned end, exec_list *list)
323   {
324      unsigned length = end - begin;
325      if (length <= this->linear_sequence_max_length)
326         return linear_sequence(begin, end, list);
327      else
328         return bisect(begin, end, list);
329   }
330};
331
332/**
333 * Visitor class for replacing expressions with ir_constant values.
334 */
335
336class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor {
337public:
338   variable_index_to_cond_assign_visitor(gl_shader_stage stage,
339                                         bool lower_input,
340                                         bool lower_output,
341                                         bool lower_temp,
342                                         bool lower_uniform)
343   {
344      this->progress = false;
345      this->stage = stage;
346      this->lower_inputs = lower_input;
347      this->lower_outputs = lower_output;
348      this->lower_temps = lower_temp;
349      this->lower_uniforms = lower_uniform;
350   }
351
352   bool progress;
353
354   gl_shader_stage stage;
355   bool lower_inputs;
356   bool lower_outputs;
357   bool lower_temps;
358   bool lower_uniforms;
359
360   bool storage_type_needs_lowering(ir_dereference_array *deref) const
361   {
362      /* If a variable isn't eventually the target of this dereference, then
363       * it must be a constant or some sort of anonymous temporary storage.
364       *
365       * FINISHME: Is this correct?  Most drivers treat arrays of constants as
366       * FINISHME: uniforms.  It seems like this should do the same.
367       */
368      const ir_variable *const var = deref->array->variable_referenced();
369      if (var == NULL)
370	 return this->lower_temps;
371
372      switch (var->data.mode) {
373      case ir_var_auto:
374      case ir_var_temporary:
375	 return this->lower_temps;
376
377      case ir_var_uniform:
378      case ir_var_shader_storage:
379	 return this->lower_uniforms;
380
381      case ir_var_shader_shared:
382	 return false;
383
384      case ir_var_function_in:
385      case ir_var_const_in:
386         return this->lower_temps;
387
388      case ir_var_system_value:
389         /* There are only a few system values that have array types:
390          *
391          *    gl_TessLevelInner[]
392          *    gl_TessLevelOuter[]
393          *    gl_SampleMaskIn[]
394          *
395          * The tessellation factor arrays are lowered to vec4/vec2s
396          * by lower_tess_level() before this pass occurs, so we'll
397          * never see them here.
398          *
399          * The only remaining case is gl_SampleMaskIn[], which has
400          * a length of ceil(ctx->Const.MaxSamples / 32).  Most hardware
401          * supports no more than 32 samples, in which case our lowering
402          * produces a single read of gl_SampleMaskIn[0].  Even with 64x
403          * MSAA, the array length is only 2, so the lowering is fairly
404          * efficient.  Therefore, lower unconditionally.
405          */
406         return true;
407
408      case ir_var_shader_in:
409         /* The input array size is unknown at compiler time for non-patch
410          * inputs in TCS and TES. The arrays are sized to
411          * the implementation-dependent limit "gl_MaxPatchVertices", but
412          * the real size is stored in the "gl_PatchVerticesIn" built-in
413          * uniform.
414          *
415          * The TCS input array size is specified by
416          * glPatchParameteri(GL_PATCH_VERTICES).
417          *
418          * The TES input array size is specified by the "vertices" output
419          * layout qualifier in TCS.
420          */
421         if ((stage == MESA_SHADER_TESS_CTRL ||
422              stage == MESA_SHADER_TESS_EVAL) && !var->data.patch)
423            return false;
424         return this->lower_inputs;
425
426      case ir_var_function_out:
427         /* TCS non-patch outputs can only be indexed with "gl_InvocationID".
428          * Other expressions are not allowed.
429          */
430         if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
431            return false;
432         return this->lower_temps;
433
434      case ir_var_shader_out:
435         return this->lower_outputs;
436
437      case ir_var_function_inout:
438	 return this->lower_temps;
439      }
440
441      assert(!"Should not get here.");
442      return false;
443   }
444
445   bool needs_lowering(ir_dereference_array *deref) const
446   {
447      if (deref == NULL || deref->array_index->as_constant()
448	  || !is_array_or_matrix(deref->array))
449	 return false;
450
451      return this->storage_type_needs_lowering(deref);
452   }
453
454   ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
455					  ir_assignment* orig_assign,
456					  ir_dereference *orig_base)
457   {
458      assert(is_array_or_matrix(orig_deref->array));
459
460      const unsigned length = (orig_deref->array->type->is_array())
461         ? orig_deref->array->type->length
462         : orig_deref->array->type->matrix_columns;
463
464      void *const mem_ctx = ralloc_parent(base_ir);
465
466      /* Temporary storage for either the result of the dereference of
467       * the array, or the RHS that's being assigned into the
468       * dereference of the array.
469       */
470      ir_variable *var;
471
472      if (orig_assign) {
473	 var = new(mem_ctx) ir_variable(orig_assign->rhs->type,
474					"dereference_array_value",
475					ir_var_temporary);
476	 base_ir->insert_before(var);
477
478	 ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var);
479	 ir_assignment *assign = new(mem_ctx) ir_assignment(lhs,
480							    orig_assign->rhs,
481							    NULL);
482
483         base_ir->insert_before(assign);
484      } else {
485	 var = new(mem_ctx) ir_variable(orig_deref->type,
486					"dereference_array_value",
487					ir_var_temporary);
488	 base_ir->insert_before(var);
489      }
490
491      /* Store the index to a temporary to avoid reusing its tree. */
492      ir_variable *index =
493	 new(mem_ctx) ir_variable(orig_deref->array_index->type,
494				  "dereference_array_index", ir_var_temporary);
495      base_ir->insert_before(index);
496
497      ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index);
498      ir_assignment *assign =
499	 new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL);
500      base_ir->insert_before(assign);
501
502      orig_deref->array_index = lhs->clone(mem_ctx, NULL);
503
504      assignment_generator ag;
505      ag.rvalue = orig_base;
506      ag.base_ir = base_ir;
507      ag.old_index = index;
508      ag.var = var;
509      if (orig_assign) {
510	 ag.is_write = true;
511	 ag.write_mask = orig_assign->write_mask;
512      } else {
513	 ag.is_write = false;
514      }
515
516      switch_generator sg(ag, index, 4, 4);
517
518      /* If the original assignment has a condition, respect that original
519       * condition!  This is acomplished by wrapping the new conditional
520       * assignments in an if-statement that uses the original condition.
521       */
522      if ((orig_assign != NULL) && (orig_assign->condition != NULL)) {
523	 /* No need to clone the condition because the IR that it hangs on is
524	  * going to be removed from the instruction sequence.
525	  */
526	 ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition);
527
528	 sg.generate(0, length, &if_stmt->then_instructions);
529	 base_ir->insert_before(if_stmt);
530      } else {
531	 exec_list list;
532
533	 sg.generate(0, length, &list);
534	 base_ir->insert_before(&list);
535      }
536
537      return var;
538   }
539
540   virtual void handle_rvalue(ir_rvalue **pir)
541   {
542      if (this->in_assignee)
543	 return;
544
545      if (!*pir)
546         return;
547
548      ir_dereference_array* orig_deref = (*pir)->as_dereference_array();
549      if (needs_lowering(orig_deref)) {
550         ir_variable *var =
551	    convert_dereference_array(orig_deref, NULL, orig_deref);
552         assert(var);
553         *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var);
554         this->progress = true;
555      }
556   }
557
558   ir_visitor_status
559   visit_leave(ir_assignment *ir)
560   {
561      ir_rvalue_visitor::visit_leave(ir);
562
563      find_variable_index f;
564      ir->lhs->accept(&f);
565
566      if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) {
567         convert_dereference_array(f.deref, ir, ir->lhs);
568         ir->remove();
569         this->progress = true;
570      }
571
572      return visit_continue;
573   }
574};
575
576} /* anonymous namespace */
577
578bool
579lower_variable_index_to_cond_assign(gl_shader_stage stage,
580                                    exec_list *instructions,
581                                    bool lower_input,
582                                    bool lower_output,
583                                    bool lower_temp,
584                                    bool lower_uniform)
585{
586   variable_index_to_cond_assign_visitor v(stage,
587                                           lower_input,
588                                           lower_output,
589                                           lower_temp,
590                                           lower_uniform);
591
592   /* Continue lowering until no progress is made.  If there are multiple
593    * levels of indirection (e.g., non-constant indexing of array elements and
594    * matrix columns of an array of matrix), each pass will only lower one
595    * level of indirection.
596    */
597   bool progress_ever = false;
598   do {
599      v.progress = false;
600      visit_list_elements(&v, instructions);
601      progress_ever = v.progress || progress_ever;
602   } while (v.progress);
603
604   return progress_ever;
605}
606