brw_fs_channel_expressions.cpp revision 2ea3ab14f2182978f471674c9dfce029d37f70a7
11e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)/*
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Copyright © 2010 Intel Corporation
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Permission is hereby granted, free of charge, to any person obtaining a
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * copy of this software and associated documentation files (the "Software"),
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to deal in the Software without restriction, including without limitation
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the rights to use, copy, modify, merge, publish, distribute, sublicense,
81e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * and/or sell copies of the Software, and to permit persons to whom the
91e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * Software is furnished to do so, subject to the following conditions:
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The above copyright notice and this permission notice (including the next
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * paragraph) shall be included in all copies or substantial portions of the
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Software.
141e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) *
152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * DEALINGS IN THE SOFTWARE.
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/**
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * \file brw_wm_channel_expressions.cpp
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Breaks vector operations down into operations on each component.
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The 965 fragment shader receives 8 or 16 pixels at a time, so each
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * channel of a vector is laid out as 1 or 2 8-float registers.  Each
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * ALU operation operates on one of those channel registers.  As a
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * result, there is no value to the 965 fragment shader in tracking
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * "vector" expressions in the sense of GLSL fragment shaders, when
341e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * doing a channel at a time may help in constant folding, algebraic
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * simplification, and reducing the liveness of channel registers.
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The exception to the desire to break everything down to floats is
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * texturing.  The texture sampler returns a writemasked masked
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 4/8-register sequence containing the texture values.  We don't want
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to dispatch to the sampler separately for each channel we need, so
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * we do retain the vector types in that case.
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)extern "C" {
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "main/core.h"
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "brw_wm.h"
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "glsl/ir.h"
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "glsl/ir_expression_flattening.h"
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "glsl/glsl_types.h"
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)public:
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   ir_channel_expressions_visitor()
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   {
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      this->progress = false;
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      this->mem_ctx = NULL;
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   }
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   ir_visitor_status visit_leave(ir_assignment *);
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   ir_rvalue *get_element(ir_variable *var, unsigned int element);
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   void assign(ir_assignment *ir, int elem, ir_rvalue *val);
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   bool progress;
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   void *mem_ctx;
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)channel_expressions_predicate(ir_instruction *ir)
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   ir_expression *expr = ir->as_expression();
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   unsigned int i;
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   if (!expr)
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   for (i = 0; i < expr->get_num_operands(); i++) {
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (expr->operands[i]->type->is_vector())
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)	 return true;
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   }
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   return false;
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)brw_do_channel_expressions(exec_list *instructions)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   ir_channel_expressions_visitor v;
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   /* Pull out any matrix expression to a separate assignment to a
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    * temp.  This will make our handling of the breakdown to
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    * operations on the matrix's vector components much easier.
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    */
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   do_expression_flattening(instructions, channel_expressions_predicate);
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   visit_list_elements(&v, instructions);
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   return v.progress;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
102eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochir_rvalue *
103eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
105eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   ir_dereference *deref;
106eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   if (var->type->is_scalar())
1081e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)      return new(mem_ctx) ir_dereference_variable(var);
109
110   assert(elem < var->type->components());
111   deref = new(mem_ctx) ir_dereference_variable(var);
112   return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
113}
114
115void
116ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
117{
118   ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
119   ir_assignment *assign;
120
121   /* This assign-of-expression should have been generated by the
122    * expression flattening visitor (since we never short circit to
123    * not flatten, even for plain assignments of variables), so the
124    * writemask is always full.
125    */
126   assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
127
128   assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
129   ir->insert_before(assign);
130}
131
132ir_visitor_status
133ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
134{
135   ir_expression *expr = ir->rhs->as_expression();
136   bool found_vector = false;
137   unsigned int i, vector_elements = 1;
138   ir_variable *op_var[2];
139
140   if (!expr)
141      return visit_continue;
142
143   if (!this->mem_ctx)
144      this->mem_ctx = ralloc_parent(ir);
145
146   for (i = 0; i < expr->get_num_operands(); i++) {
147      if (expr->operands[i]->type->is_vector()) {
148	 found_vector = true;
149	 vector_elements = expr->operands[i]->type->vector_elements;
150	 break;
151      }
152   }
153   if (!found_vector)
154      return visit_continue;
155
156   /* Store the expression operands in temps so we can use them
157    * multiple times.
158    */
159   for (i = 0; i < expr->get_num_operands(); i++) {
160      ir_assignment *assign;
161      ir_dereference *deref;
162
163      assert(!expr->operands[i]->type->is_matrix());
164
165      op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
166					   "channel_expressions",
167					   ir_var_temporary);
168      ir->insert_before(op_var[i]);
169
170      deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
171      assign = new(mem_ctx) ir_assignment(deref,
172					  expr->operands[i],
173					  NULL);
174      ir->insert_before(assign);
175   }
176
177   const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
178							   1, 1);
179
180   /* OK, time to break down this vector operation. */
181   switch (expr->operation) {
182   case ir_unop_bit_not:
183   case ir_unop_logic_not:
184   case ir_unop_neg:
185   case ir_unop_abs:
186   case ir_unop_sign:
187   case ir_unop_rcp:
188   case ir_unop_rsq:
189   case ir_unop_sqrt:
190   case ir_unop_exp:
191   case ir_unop_log:
192   case ir_unop_exp2:
193   case ir_unop_log2:
194   case ir_unop_bitcast_i2f:
195   case ir_unop_bitcast_f2i:
196   case ir_unop_bitcast_f2u:
197   case ir_unop_bitcast_u2f:
198   case ir_unop_i2u:
199   case ir_unop_u2i:
200   case ir_unop_f2i:
201   case ir_unop_f2u:
202   case ir_unop_i2f:
203   case ir_unop_f2b:
204   case ir_unop_b2f:
205   case ir_unop_i2b:
206   case ir_unop_b2i:
207   case ir_unop_u2f:
208   case ir_unop_trunc:
209   case ir_unop_ceil:
210   case ir_unop_floor:
211   case ir_unop_fract:
212   case ir_unop_round_even:
213   case ir_unop_sin:
214   case ir_unop_cos:
215   case ir_unop_sin_reduced:
216   case ir_unop_cos_reduced:
217   case ir_unop_dFdx:
218   case ir_unop_dFdy:
219      for (i = 0; i < vector_elements; i++) {
220	 ir_rvalue *op0 = get_element(op_var[0], i);
221
222	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
223						  element_type,
224						  op0,
225						  NULL));
226      }
227      break;
228
229   case ir_binop_add:
230   case ir_binop_sub:
231   case ir_binop_mul:
232   case ir_binop_div:
233   case ir_binop_mod:
234   case ir_binop_min:
235   case ir_binop_max:
236   case ir_binop_pow:
237   case ir_binop_lshift:
238   case ir_binop_rshift:
239   case ir_binop_bit_and:
240   case ir_binop_bit_xor:
241   case ir_binop_bit_or:
242   case ir_binop_less:
243   case ir_binop_greater:
244   case ir_binop_lequal:
245   case ir_binop_gequal:
246   case ir_binop_equal:
247   case ir_binop_nequal:
248      for (i = 0; i < vector_elements; i++) {
249	 ir_rvalue *op0 = get_element(op_var[0], i);
250	 ir_rvalue *op1 = get_element(op_var[1], i);
251
252	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
253						  element_type,
254						  op0,
255						  op1));
256      }
257      break;
258
259   case ir_unop_any: {
260      ir_expression *temp;
261      temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
262					element_type,
263					get_element(op_var[0], 0),
264					get_element(op_var[0], 1));
265
266      for (i = 2; i < vector_elements; i++) {
267	 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
268					   element_type,
269					   get_element(op_var[0], i),
270					   temp);
271      }
272      assign(ir, 0, temp);
273      break;
274   }
275
276   case ir_binop_dot: {
277      ir_expression *last = NULL;
278      for (i = 0; i < vector_elements; i++) {
279	 ir_rvalue *op0 = get_element(op_var[0], i);
280	 ir_rvalue *op1 = get_element(op_var[1], i);
281	 ir_expression *temp;
282
283	 temp = new(mem_ctx) ir_expression(ir_binop_mul,
284					   element_type,
285					   op0,
286					   op1);
287	 if (last) {
288	    last = new(mem_ctx) ir_expression(ir_binop_add,
289					      element_type,
290					      temp,
291					      last);
292	 } else {
293	    last = temp;
294	 }
295      }
296      assign(ir, 0, last);
297      break;
298   }
299
300   case ir_binop_logic_and:
301   case ir_binop_logic_xor:
302   case ir_binop_logic_or:
303      ir->print();
304      printf("\n");
305      assert(!"not reached: expression operates on scalars only");
306      break;
307   case ir_binop_all_equal:
308   case ir_binop_any_nequal: {
309      ir_expression *last = NULL;
310      for (i = 0; i < vector_elements; i++) {
311	 ir_rvalue *op0 = get_element(op_var[0], i);
312	 ir_rvalue *op1 = get_element(op_var[1], i);
313	 ir_expression *temp;
314	 ir_expression_operation join;
315
316	 if (expr->operation == ir_binop_all_equal)
317	    join = ir_binop_logic_and;
318	 else
319	    join = ir_binop_logic_or;
320
321	 temp = new(mem_ctx) ir_expression(expr->operation,
322					   element_type,
323					   op0,
324					   op1);
325	 if (last) {
326	    last = new(mem_ctx) ir_expression(join,
327					      element_type,
328					      temp,
329					      last);
330	 } else {
331	    last = temp;
332	 }
333      }
334      assign(ir, 0, last);
335      break;
336   }
337   case ir_unop_noise:
338      assert(!"noise should have been broken down to function call");
339      break;
340
341   case ir_binop_ubo_load:
342      assert(!"not yet supported");
343      break;
344
345   case ir_quadop_vector:
346      assert(!"should have been lowered");
347      break;
348   }
349
350   ir->remove();
351   this->progress = true;
352
353   return visit_continue;
354}
355