1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/*
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright © 2010 Intel Corporation
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"),
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions:
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the next
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * paragraph) shall be included in all copies or substantial portions of the
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software.
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * DEALINGS IN THE SOFTWARE.
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \file lower_vec_index_to_swizzle.cpp
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Turns constant indexing into vector types to swizzles.  This will
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * let other swizzle-aware optimization passes catch these constructs,
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and codegen backends not have to worry about this case.
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "ir.h"
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "ir_visitor.h"
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "ir_optimization.h"
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl_types.h"
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/macros.h"
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Visitor class for replacing expressions with ir_constant values.
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass ir_vec_index_to_swizzle_visitor : public ir_hierarchical_visitor {
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_vec_index_to_swizzle_visitor()
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      progress = false;
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_rvalue *convert_vec_index_to_swizzle(ir_rvalue *val);
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_enter(ir_expression *);
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_enter(ir_swizzle *);
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_enter(ir_assignment *);
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_enter(ir_return *);
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_enter(ir_call *);
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_enter(ir_if *);
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool progress;
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_rvalue *
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vec_index_to_swizzle_visitor::convert_vec_index_to_swizzle(ir_rvalue *ir)
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_dereference_array *deref = ir->as_dereference_array();
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_constant *ir_constant;
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!deref)
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return ir;
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (deref->array->type->is_matrix() || deref->array->type->is_array())
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return ir;
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(deref->array_index->type->base_type == GLSL_TYPE_INT);
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_constant = deref->array_index->constant_expression_value();
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ir_constant)
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return ir;
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void *ctx = ralloc_parent(ir);
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   this->progress = true;
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Page 40 of the GLSL 1.20 spec says:
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    *
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    *     "When indexing with non-constant expressions, behavior is undefined
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    *     if the index is negative, or greater than or equal to the size of
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    *     the vector."
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    *
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * The quoted spec text mentions non-constant expressions, but this code
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * operates on constants.  These constants are the result of non-constant
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * expressions that have been optimized to constants.  The common case here
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * is a loop counter from an unrolled loop that is used to index a vector.
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    *
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * The ir_swizzle constructor gets angry if the index is negative or too
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * large.  For simplicity sake, just clamp the index to [0, size-1].
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   const int i = MIN2(MAX2(ir_constant->value.i[0], 0),
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		      (deref->array->type->vector_elements - 1));
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return new(ctx) ir_swizzle(deref->array, i, 0, 0, 0, 1);
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vec_index_to_swizzle_visitor::visit_enter(ir_expression *ir)
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int i;
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = 0; i < ir->get_num_operands(); i++) {
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ir->operands[i] = convert_vec_index_to_swizzle(ir->operands[i]);
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vec_index_to_swizzle_visitor::visit_enter(ir_swizzle *ir)
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Can't be hit from normal GLSL, since you can't swizzle a scalar (which
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * the result of indexing a vector is.  But maybe at some point we'll end up
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * using swizzling of scalars for vector construction.
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir->val = convert_vec_index_to_swizzle(ir->val);
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vec_index_to_swizzle_visitor::visit_enter(ir_assignment *ir)
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir->set_lhs(convert_vec_index_to_swizzle(ir->lhs));
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir->rhs = convert_vec_index_to_swizzle(ir->rhs);
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vec_index_to_swizzle_visitor::visit_enter(ir_call *ir)
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_iter(exec_list_iterator, iter, *ir) {
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ir_rvalue *param = (ir_rvalue *)iter.get();
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ir_rvalue *new_param = convert_vec_index_to_swizzle(param);
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (new_param != param) {
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 param->replace_with(new_param);
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vec_index_to_swizzle_visitor::visit_enter(ir_return *ir)
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (ir->value) {
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ir->value = convert_vec_index_to_swizzle(ir->value);
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vec_index_to_swizzle_visitor::visit_enter(ir_if *ir)
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir->condition = convert_vec_index_to_swizzle(ir->condition);
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgdo_vec_index_to_swizzle(exec_list *instructions)
169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_vec_index_to_swizzle_visitor v;
171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   v.run(instructions);
173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return v.progress;
175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
176