brw_fs_channel_expressions.cpp revision e1e887679786cf3882e83b2a194f046b34a71f05
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file brw_wm_channel_expressions.cpp
26 *
27 * Breaks vector operations down into operations on each component.
28 *
29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each
30 * channel of a vector is laid out as 1 or 2 8-float registers.  Each
31 * ALU operation operates on one of those channel registers.  As a
32 * result, there is no value to the 965 fragment shader in tracking
33 * "vector" expressions in the sense of GLSL fragment shaders, when
34 * doing a channel at a time may help in constant folding, algebraic
35 * simplification, and reducing the liveness of channel registers.
36 *
37 * The exception to the desire to break everything down to floats is
38 * texturing.  The texture sampler returns a writemasked masked
39 * 4/8-register sequence containing the texture values.  We don't want
40 * to dispatch to the sampler separately for each channel we need, so
41 * we do retain the vector types in that case.
42 */
43
44extern "C" {
45#include "main/core.h"
46#include "brw_wm.h"
47}
48#include "glsl/ir.h"
49#include "glsl/ir_expression_flattening.h"
50#include "glsl/glsl_types.h"
51
52class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
53public:
54   ir_channel_expressions_visitor()
55   {
56      this->progress = false;
57      this->mem_ctx = NULL;
58   }
59
60   ir_visitor_status visit_leave(ir_assignment *);
61
62   ir_rvalue *get_element(ir_variable *var, unsigned int element);
63   void assign(ir_assignment *ir, int elem, ir_rvalue *val);
64
65   bool progress;
66   void *mem_ctx;
67};
68
69static bool
70channel_expressions_predicate(ir_instruction *ir)
71{
72   ir_expression *expr = ir->as_expression();
73   unsigned int i;
74
75   if (!expr)
76      return false;
77
78   for (i = 0; i < expr->get_num_operands(); i++) {
79      if (expr->operands[i]->type->is_vector())
80	 return true;
81   }
82
83   return false;
84}
85
86bool
87brw_do_channel_expressions(exec_list *instructions)
88{
89   ir_channel_expressions_visitor v;
90
91   /* Pull out any matrix expression to a separate assignment to a
92    * temp.  This will make our handling of the breakdown to
93    * operations on the matrix's vector components much easier.
94    */
95   do_expression_flattening(instructions, channel_expressions_predicate);
96
97   visit_list_elements(&v, instructions);
98
99   return v.progress;
100}
101
102ir_rvalue *
103ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
104{
105   ir_dereference *deref;
106
107   if (var->type->is_scalar())
108      return new(mem_ctx) ir_dereference_variable(var);
109
110   assert(elem < var->type->components());
111   deref = new(mem_ctx) ir_dereference_variable(var);
112   return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
113}
114
115void
116ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
117{
118   ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
119   ir_assignment *assign;
120
121   /* This assign-of-expression should have been generated by the
122    * expression flattening visitor (since we never short circit to
123    * not flatten, even for plain assignments of variables), so the
124    * writemask is always full.
125    */
126   assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
127
128   assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
129   ir->insert_before(assign);
130}
131
132ir_visitor_status
133ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
134{
135   ir_expression *expr = ir->rhs->as_expression();
136   bool found_vector = false;
137   unsigned int i, vector_elements = 1;
138   ir_variable *op_var[3];
139
140   if (!expr)
141      return visit_continue;
142
143   if (!this->mem_ctx)
144      this->mem_ctx = ralloc_parent(ir);
145
146   for (i = 0; i < expr->get_num_operands(); i++) {
147      if (expr->operands[i]->type->is_vector()) {
148	 found_vector = true;
149	 vector_elements = expr->operands[i]->type->vector_elements;
150	 break;
151      }
152   }
153   if (!found_vector)
154      return visit_continue;
155
156   /* Store the expression operands in temps so we can use them
157    * multiple times.
158    */
159   for (i = 0; i < expr->get_num_operands(); i++) {
160      ir_assignment *assign;
161      ir_dereference *deref;
162
163      assert(!expr->operands[i]->type->is_matrix());
164
165      op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
166					   "channel_expressions",
167					   ir_var_temporary);
168      ir->insert_before(op_var[i]);
169
170      deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
171      assign = new(mem_ctx) ir_assignment(deref,
172					  expr->operands[i],
173					  NULL);
174      ir->insert_before(assign);
175   }
176
177   const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
178							   1, 1);
179
180   /* OK, time to break down this vector operation. */
181   switch (expr->operation) {
182   case ir_unop_bit_not:
183   case ir_unop_logic_not:
184   case ir_unop_neg:
185   case ir_unop_abs:
186   case ir_unop_sign:
187   case ir_unop_rcp:
188   case ir_unop_rsq:
189   case ir_unop_sqrt:
190   case ir_unop_exp:
191   case ir_unop_log:
192   case ir_unop_exp2:
193   case ir_unop_log2:
194   case ir_unop_bitcast_i2f:
195   case ir_unop_bitcast_f2i:
196   case ir_unop_bitcast_f2u:
197   case ir_unop_bitcast_u2f:
198   case ir_unop_i2u:
199   case ir_unop_u2i:
200   case ir_unop_f2i:
201   case ir_unop_f2u:
202   case ir_unop_i2f:
203   case ir_unop_f2b:
204   case ir_unop_b2f:
205   case ir_unop_i2b:
206   case ir_unop_b2i:
207   case ir_unop_u2f:
208   case ir_unop_trunc:
209   case ir_unop_ceil:
210   case ir_unop_floor:
211   case ir_unop_fract:
212   case ir_unop_round_even:
213   case ir_unop_sin:
214   case ir_unop_cos:
215   case ir_unop_sin_reduced:
216   case ir_unop_cos_reduced:
217   case ir_unop_dFdx:
218   case ir_unop_dFdy:
219   case ir_unop_bitfield_reverse:
220   case ir_unop_bit_count:
221   case ir_unop_find_msb:
222   case ir_unop_find_lsb:
223      for (i = 0; i < vector_elements; i++) {
224	 ir_rvalue *op0 = get_element(op_var[0], i);
225
226	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
227						  element_type,
228						  op0,
229						  NULL));
230      }
231      break;
232
233   case ir_binop_add:
234   case ir_binop_sub:
235   case ir_binop_mul:
236   case ir_binop_div:
237   case ir_binop_mod:
238   case ir_binop_min:
239   case ir_binop_max:
240   case ir_binop_pow:
241   case ir_binop_lshift:
242   case ir_binop_rshift:
243   case ir_binop_bit_and:
244   case ir_binop_bit_xor:
245   case ir_binop_bit_or:
246   case ir_binop_less:
247   case ir_binop_greater:
248   case ir_binop_lequal:
249   case ir_binop_gequal:
250   case ir_binop_equal:
251   case ir_binop_nequal:
252      for (i = 0; i < vector_elements; i++) {
253	 ir_rvalue *op0 = get_element(op_var[0], i);
254	 ir_rvalue *op1 = get_element(op_var[1], i);
255
256	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
257						  element_type,
258						  op0,
259						  op1));
260      }
261      break;
262
263   case ir_unop_any: {
264      ir_expression *temp;
265      temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
266					element_type,
267					get_element(op_var[0], 0),
268					get_element(op_var[0], 1));
269
270      for (i = 2; i < vector_elements; i++) {
271	 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
272					   element_type,
273					   get_element(op_var[0], i),
274					   temp);
275      }
276      assign(ir, 0, temp);
277      break;
278   }
279
280   case ir_binop_dot: {
281      ir_expression *last = NULL;
282      for (i = 0; i < vector_elements; i++) {
283	 ir_rvalue *op0 = get_element(op_var[0], i);
284	 ir_rvalue *op1 = get_element(op_var[1], i);
285	 ir_expression *temp;
286
287	 temp = new(mem_ctx) ir_expression(ir_binop_mul,
288					   element_type,
289					   op0,
290					   op1);
291	 if (last) {
292	    last = new(mem_ctx) ir_expression(ir_binop_add,
293					      element_type,
294					      temp,
295					      last);
296	 } else {
297	    last = temp;
298	 }
299      }
300      assign(ir, 0, last);
301      break;
302   }
303
304   case ir_binop_logic_and:
305   case ir_binop_logic_xor:
306   case ir_binop_logic_or:
307      ir->print();
308      printf("\n");
309      assert(!"not reached: expression operates on scalars only");
310      break;
311   case ir_binop_all_equal:
312   case ir_binop_any_nequal: {
313      ir_expression *last = NULL;
314      for (i = 0; i < vector_elements; i++) {
315	 ir_rvalue *op0 = get_element(op_var[0], i);
316	 ir_rvalue *op1 = get_element(op_var[1], i);
317	 ir_expression *temp;
318	 ir_expression_operation join;
319
320	 if (expr->operation == ir_binop_all_equal)
321	    join = ir_binop_logic_and;
322	 else
323	    join = ir_binop_logic_or;
324
325	 temp = new(mem_ctx) ir_expression(expr->operation,
326					   element_type,
327					   op0,
328					   op1);
329	 if (last) {
330	    last = new(mem_ctx) ir_expression(join,
331					      element_type,
332					      temp,
333					      last);
334	 } else {
335	    last = temp;
336	 }
337      }
338      assign(ir, 0, last);
339      break;
340   }
341   case ir_unop_noise:
342      assert(!"noise should have been broken down to function call");
343      break;
344
345   case ir_binop_bfm: {
346      /* Does not need to be scalarized, since its result will be identical
347       * for all channels.
348       */
349      ir_rvalue *op0 = get_element(op_var[0], 0);
350      ir_rvalue *op1 = get_element(op_var[1], 0);
351
352      assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
353                                               element_type,
354                                               op0,
355                                               op1));
356      break;
357   }
358
359   case ir_binop_ubo_load:
360      assert(!"not yet supported");
361      break;
362
363   case ir_triop_lrp:
364   case ir_triop_bitfield_extract:
365      for (i = 0; i < vector_elements; i++) {
366	 ir_rvalue *op0 = get_element(op_var[0], i);
367	 ir_rvalue *op1 = get_element(op_var[1], i);
368	 ir_rvalue *op2 = get_element(op_var[2], i);
369
370	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
371						  element_type,
372						  op0,
373						  op1,
374						  op2));
375      }
376      break;
377
378   case ir_triop_bfi: {
379      /* Only a single BFM is needed for multiple BFIs. */
380      ir_rvalue *op0 = get_element(op_var[0], 0);
381
382      for (i = 0; i < vector_elements; i++) {
383         ir_rvalue *op1 = get_element(op_var[1], i);
384         ir_rvalue *op2 = get_element(op_var[2], i);
385
386         assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
387                                                  element_type,
388                                                  op0->clone(mem_ctx, NULL),
389                                                  op1,
390                                                  op2));
391      }
392      break;
393   }
394
395   case ir_unop_pack_snorm_2x16:
396   case ir_unop_pack_snorm_4x8:
397   case ir_unop_pack_unorm_2x16:
398   case ir_unop_pack_unorm_4x8:
399   case ir_unop_pack_half_2x16:
400   case ir_unop_unpack_snorm_2x16:
401   case ir_unop_unpack_snorm_4x8:
402   case ir_unop_unpack_unorm_2x16:
403   case ir_unop_unpack_unorm_4x8:
404   case ir_unop_unpack_half_2x16:
405   case ir_binop_vector_extract:
406   case ir_quadop_bitfield_insert:
407   case ir_quadop_vector:
408      assert(!"should have been lowered");
409      break;
410
411   case ir_unop_unpack_half_2x16_split_x:
412   case ir_unop_unpack_half_2x16_split_y:
413   case ir_binop_pack_half_2x16_split:
414      assert("!not reached: expression operates on scalars only");
415      break;
416   }
417
418   ir->remove();
419   this->progress = true;
420
421   return visit_continue;
422}
423