1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/*
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright © 2010 Intel Corporation
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"),
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions:
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the next
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * paragraph) shall be included in all copies or substantial portions of the
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software.
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * DEALINGS IN THE SOFTWARE.
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \file brw_wm_vector_splitting.cpp
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * If a vector is only ever referenced by its components, then
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * split those components out to individual variables so they can be
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * handled normally by other optimization passes.
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This skips vectors in uniforms and varyings, which need to be
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * accessible as vectors for their access by the GL.  Also, vector
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * results of non-variable-derefs in assignments aren't handled
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * because to do so we would have to store the vector result to a
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * temporary in order to unload each channel, and to do so would just
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * loop us back to where we started.  For the 965, this is exactly the
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * behavior we want for the results of texture lookups, but probably not for
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgextern "C" {
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/core.h"
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "intel_context.h"
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/ir.h"
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/ir_visitor.h"
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/ir_print_visitor.h"
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/ir_rvalue_visitor.h"
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "glsl/glsl_types.h"
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic bool debug = false;
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass variable_entry : public exec_node
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   variable_entry(ir_variable *var)
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      this->var = var;
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      this->whole_vector_access = 0;
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      this->declaration = false;
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      this->mem_ctx = NULL;
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_variable *var; /* The key: the variable's pointer. */
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /** Number of times the variable is referenced, including assignments. */
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned whole_vector_access;
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   bool declaration; /* If the variable had a decl in the instruction stream */
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_variable *components[4];
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /** ralloc_parent(this->var) -- the shader's ralloc context. */
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void *mem_ctx;
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass ir_vector_reference_visitor : public ir_hierarchical_visitor {
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_vector_reference_visitor(void)
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      this->mem_ctx = ralloc_context(NULL);
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      this->variable_list.make_empty();
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ~ir_vector_reference_visitor(void)
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ralloc_free(mem_ctx);
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit(ir_variable *);
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit(ir_dereference_variable *);
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_enter(ir_swizzle *);
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_enter(ir_assignment *);
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_enter(ir_function_signature *);
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   variable_entry *get_variable_entry(ir_variable *var);
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* List of variable_entry */
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   exec_list variable_list;
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void *mem_ctx;
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvariable_entry *
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vector_reference_visitor::get_variable_entry(ir_variable *var)
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(var);
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!var->type->is_vector())
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return NULL;
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   switch (var->mode) {
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case ir_var_uniform:
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case ir_var_in:
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case ir_var_out:
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case ir_var_inout:
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* Can't split varyings or uniforms.  Function in/outs won't get split
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       * either, so don't care about the ambiguity.
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       */
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return NULL;
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case ir_var_auto:
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   case ir_var_temporary:
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      break;
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_list(node, &this->variable_list) {
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      variable_entry *entry = (variable_entry *)node;
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (entry->var == var)
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 return entry;
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   variable_entry *entry = new(mem_ctx) variable_entry(var);
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   this->variable_list.push_tail(entry);
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return entry;
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vector_reference_visitor::visit(ir_variable *ir)
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   variable_entry *entry = this->get_variable_entry(ir);
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (entry)
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      entry->declaration = true;
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vector_reference_visitor::visit(ir_dereference_variable *ir)
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_variable *const var = ir->var;
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   variable_entry *entry = this->get_variable_entry(var);
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (entry)
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      entry->whole_vector_access++;
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vector_reference_visitor::visit_enter(ir_swizzle *ir)
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Don't descend into a vector ir_dereference_variable below. */
164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (ir->val->as_dereference_variable() && ir->type->is_scalar())
165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return visit_continue_with_parent;
166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vector_reference_visitor::visit_enter(ir_assignment *ir)
172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (ir->lhs->as_dereference_variable() &&
174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       ir->rhs->as_dereference_variable() &&
175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       !ir->condition) {
176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* We'll split copies of a vector to copies of channels, so don't
177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       * descend to the ir_dereference_variables.
178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       */
179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return visit_continue_with_parent;
180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (ir->lhs->as_dereference_variable() &&
182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       is_power_of_two(ir->write_mask) &&
183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       !ir->condition) {
184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* If we're writing just a channel, then channel-splitting the LHS is OK.
185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       */
186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ir->rhs->accept(this);
187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return visit_continue_with_parent;
188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vector_reference_visitor::visit_enter(ir_function_signature *ir)
194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* We don't want to descend into the function parameters and
196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * split them, so just accept the body here.
197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   visit_list_elements(this, &ir->body);
199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue_with_parent;
200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass ir_vector_splitting_visitor : public ir_rvalue_visitor {
203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic:
204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_vector_splitting_visitor(exec_list *vars)
205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   {
206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      this->variable_list = vars;
207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   virtual ir_visitor_status visit_leave(ir_assignment *);
210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void handle_rvalue(ir_rvalue **rvalue);
212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   variable_entry *get_splitting_entry(ir_variable *var);
213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   exec_list *variable_list;
215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvariable_entry *
218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   assert(var);
221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!var->type->is_vector())
223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return NULL;
224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_list(node, &*this->variable_list) {
226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      variable_entry *entry = (variable_entry *)node;
227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (entry->var == var) {
228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 return entry;
229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return NULL;
233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vector_splitting_visitor::handle_rvalue(ir_rvalue **rvalue)
237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!*rvalue)
239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return;
240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_swizzle *swiz = (*rvalue)->as_swizzle();
242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!swiz || !swiz->type->is_scalar())
243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return;
244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_dereference_variable *deref_var = swiz->val->as_dereference_variable();
246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!deref_var)
247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return;
248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   variable_entry *entry = get_splitting_entry(deref_var->var);
250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!entry)
251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return;
252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_variable *var = entry->components[swiz->mask.x];
254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   *rvalue = new(entry->mem_ctx) ir_dereference_variable(var);
255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_visitor_status
258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgir_vector_splitting_visitor::visit_leave(ir_assignment *ir)
259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable();
261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable();
262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   variable_entry *lhs = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL;
263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   variable_entry *rhs = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL;
264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (lhs_deref && rhs_deref && (lhs || rhs) && !ir->condition) {
266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unsigned int rhs_chan = 0;
267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* Straight assignment of vector variables. */
269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (unsigned int i = 0; i < ir->lhs->type->vector_elements; i++) {
270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 ir_dereference *new_lhs;
271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 ir_rvalue *new_rhs;
272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 void *mem_ctx = lhs ? lhs->mem_ctx : rhs->mem_ctx;
273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 unsigned int writemask;
274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (!(ir->write_mask & (1 << i)))
276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    continue;
277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (lhs) {
279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    new_lhs = new(mem_ctx) ir_dereference_variable(lhs->components[i]);
280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    writemask = 1;
281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 } else {
282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    new_lhs = ir->lhs->clone(mem_ctx, NULL);
283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    writemask = 1 << i;
284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (rhs) {
287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    new_rhs =
288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	       new(mem_ctx) ir_dereference_variable(rhs->components[rhs_chan]);
289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 } else {
290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    new_rhs = new(mem_ctx) ir_swizzle(ir->rhs->clone(mem_ctx, NULL),
291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					      rhs_chan, 0, 0, 0, 1);
292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 ir->insert_before(new(mem_ctx) ir_assignment(new_lhs,
295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						      new_rhs,
296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org						      NULL, writemask));
297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 rhs_chan++;
299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ir->remove();
301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   } else if (lhs) {
302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      void *mem_ctx = lhs->mem_ctx;
303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      int elem = -1;
304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      switch (ir->write_mask) {
306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case (1 << 0):
307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 elem = 0;
308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 break;
309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case (1 << 1):
310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 elem = 1;
311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 break;
312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case (1 << 2):
313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 elem = 2;
314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 break;
315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      case (1 << 3):
316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 elem = 3;
317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 break;
318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      default:
319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 ir->print();
320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 assert(!"not reached: non-channelwise dereference of LHS.");
321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ir->lhs = new(mem_ctx) ir_dereference_variable(lhs->components[elem]);
324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ir->write_mask = (1 << 0);
325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      handle_rvalue(&ir->rhs);
327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   } else {
328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      handle_rvalue(&ir->rhs);
329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   handle_rvalue(&ir->condition);
332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return visit_continue;
334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool
337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbrw_do_vector_splitting(exec_list *instructions)
338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_vector_reference_visitor refs;
340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   visit_list_elements(&refs, instructions);
342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Trim out variables we can't split. */
344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_list_safe(node, &refs.variable_list) {
345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      variable_entry *entry = (variable_entry *)node;
346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (debug) {
348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 printf("vector %s@%p: decl %d, whole_access %d\n",
349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		entry->var->name, (void *) entry->var, entry->declaration,
350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		entry->whole_vector_access);
351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!entry->declaration || entry->whole_vector_access) {
354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 entry->remove();
355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (refs.variable_list.is_empty())
359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      return false;
360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   void *mem_ctx = ralloc_context(NULL);
362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Replace the decls of the vectors to be split with their split
364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * components.
365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   foreach_list(node, &refs.variable_list) {
367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      variable_entry *entry = (variable_entry *)node;
368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      const struct glsl_type *type;
369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      entry->mem_ctx = ralloc_parent(entry->var);
372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (unsigned int i = 0; i < entry->var->type->vector_elements; i++) {
374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 const char *name = ralloc_asprintf(mem_ctx, "%s_%c",
375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					    entry->var->name,
376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org					    "xyzw"[i]);
377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 entry->components[i] = new(entry->mem_ctx) ir_variable(type, name,
379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org								ir_var_temporary);
380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 entry->var->insert_before(entry->components[i]);
381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      entry->var->remove();
384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ir_vector_splitting_visitor split(&refs.variable_list);
387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   visit_list_elements(&split, instructions);
388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ralloc_free(mem_ctx);
390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return true;
392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
393