1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file brw_wm_channel_expressions.cpp 26 * 27 * Breaks vector operations down into operations on each component. 28 * 29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each 30 * channel of a vector is laid out as 1 or 2 8-float registers. Each 31 * ALU operation operates on one of those channel registers. As a 32 * result, there is no value to the 965 fragment shader in tracking 33 * "vector" expressions in the sense of GLSL fragment shaders, when 34 * doing a channel at a time may help in constant folding, algebraic 35 * simplification, and reducing the liveness of channel registers. 36 * 37 * The exception to the desire to break everything down to floats is 38 * texturing. The texture sampler returns a writemasked masked 39 * 4/8-register sequence containing the texture values. We don't want 40 * to dispatch to the sampler separately for each channel we need, so 41 * we do retain the vector types in that case. 42 */ 43 44extern "C" { 45#include "main/core.h" 46#include "brw_wm.h" 47} 48#include "glsl/ir.h" 49#include "glsl/ir_expression_flattening.h" 50#include "glsl/glsl_types.h" 51 52class ir_channel_expressions_visitor : public ir_hierarchical_visitor { 53public: 54 ir_channel_expressions_visitor() 55 { 56 this->progress = false; 57 this->mem_ctx = NULL; 58 } 59 60 ir_visitor_status visit_leave(ir_assignment *); 61 62 ir_rvalue *get_element(ir_variable *var, unsigned int element); 63 void assign(ir_assignment *ir, int elem, ir_rvalue *val); 64 65 bool progress; 66 void *mem_ctx; 67}; 68 69static bool 70channel_expressions_predicate(ir_instruction *ir) 71{ 72 ir_expression *expr = ir->as_expression(); 73 unsigned int i; 74 75 if (!expr) 76 return false; 77 78 for (i = 0; i < expr->get_num_operands(); i++) { 79 if (expr->operands[i]->type->is_vector()) 80 return true; 81 } 82 83 return false; 84} 85 86bool 87brw_do_channel_expressions(exec_list *instructions) 88{ 89 ir_channel_expressions_visitor v; 90 91 /* Pull out any matrix expression to a separate assignment to a 92 * temp. This will make our handling of the breakdown to 93 * operations on the matrix's vector components much easier. 94 */ 95 do_expression_flattening(instructions, channel_expressions_predicate); 96 97 visit_list_elements(&v, instructions); 98 99 return v.progress; 100} 101 102ir_rvalue * 103ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem) 104{ 105 ir_dereference *deref; 106 107 if (var->type->is_scalar()) 108 return new(mem_ctx) ir_dereference_variable(var); 109 110 assert(elem < var->type->components()); 111 deref = new(mem_ctx) ir_dereference_variable(var); 112 return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1); 113} 114 115void 116ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val) 117{ 118 ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL); 119 ir_assignment *assign; 120 121 /* This assign-of-expression should have been generated by the 122 * expression flattening visitor (since we never short circit to 123 * not flatten, even for plain assignments of variables), so the 124 * writemask is always full. 125 */ 126 assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1); 127 128 assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem)); 129 ir->insert_before(assign); 130} 131 132ir_visitor_status 133ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) 134{ 135 ir_expression *expr = ir->rhs->as_expression(); 136 bool found_vector = false; 137 unsigned int i, vector_elements = 1; 138 ir_variable *op_var[2]; 139 140 if (!expr) 141 return visit_continue; 142 143 if (!this->mem_ctx) 144 this->mem_ctx = ralloc_parent(ir); 145 146 for (i = 0; i < expr->get_num_operands(); i++) { 147 if (expr->operands[i]->type->is_vector()) { 148 found_vector = true; 149 vector_elements = expr->operands[i]->type->vector_elements; 150 break; 151 } 152 } 153 if (!found_vector) 154 return visit_continue; 155 156 /* Store the expression operands in temps so we can use them 157 * multiple times. 158 */ 159 for (i = 0; i < expr->get_num_operands(); i++) { 160 ir_assignment *assign; 161 ir_dereference *deref; 162 163 assert(!expr->operands[i]->type->is_matrix()); 164 165 op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type, 166 "channel_expressions", 167 ir_var_temporary); 168 ir->insert_before(op_var[i]); 169 170 deref = new(mem_ctx) ir_dereference_variable(op_var[i]); 171 assign = new(mem_ctx) ir_assignment(deref, 172 expr->operands[i], 173 NULL); 174 ir->insert_before(assign); 175 } 176 177 const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type, 178 1, 1); 179 180 /* OK, time to break down this vector operation. */ 181 switch (expr->operation) { 182 case ir_unop_bit_not: 183 case ir_unop_logic_not: 184 case ir_unop_neg: 185 case ir_unop_abs: 186 case ir_unop_sign: 187 case ir_unop_rcp: 188 case ir_unop_rsq: 189 case ir_unop_sqrt: 190 case ir_unop_exp: 191 case ir_unop_log: 192 case ir_unop_exp2: 193 case ir_unop_log2: 194 case ir_unop_bitcast_i2f: 195 case ir_unop_bitcast_f2i: 196 case ir_unop_bitcast_f2u: 197 case ir_unop_bitcast_u2f: 198 case ir_unop_i2u: 199 case ir_unop_u2i: 200 case ir_unop_f2i: 201 case ir_unop_f2u: 202 case ir_unop_i2f: 203 case ir_unop_f2b: 204 case ir_unop_b2f: 205 case ir_unop_i2b: 206 case ir_unop_b2i: 207 case ir_unop_u2f: 208 case ir_unop_trunc: 209 case ir_unop_ceil: 210 case ir_unop_floor: 211 case ir_unop_fract: 212 case ir_unop_round_even: 213 case ir_unop_sin: 214 case ir_unop_cos: 215 case ir_unop_sin_reduced: 216 case ir_unop_cos_reduced: 217 case ir_unop_dFdx: 218 case ir_unop_dFdy: 219 for (i = 0; i < vector_elements; i++) { 220 ir_rvalue *op0 = get_element(op_var[0], i); 221 222 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 223 element_type, 224 op0, 225 NULL)); 226 } 227 break; 228 229 case ir_binop_add: 230 case ir_binop_sub: 231 case ir_binop_mul: 232 case ir_binop_div: 233 case ir_binop_mod: 234 case ir_binop_min: 235 case ir_binop_max: 236 case ir_binop_pow: 237 case ir_binop_lshift: 238 case ir_binop_rshift: 239 case ir_binop_bit_and: 240 case ir_binop_bit_xor: 241 case ir_binop_bit_or: 242 case ir_binop_less: 243 case ir_binop_greater: 244 case ir_binop_lequal: 245 case ir_binop_gequal: 246 case ir_binop_equal: 247 case ir_binop_nequal: 248 for (i = 0; i < vector_elements; i++) { 249 ir_rvalue *op0 = get_element(op_var[0], i); 250 ir_rvalue *op1 = get_element(op_var[1], i); 251 252 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 253 element_type, 254 op0, 255 op1)); 256 } 257 break; 258 259 case ir_unop_any: { 260 ir_expression *temp; 261 temp = new(mem_ctx) ir_expression(ir_binop_logic_or, 262 element_type, 263 get_element(op_var[0], 0), 264 get_element(op_var[0], 1)); 265 266 for (i = 2; i < vector_elements; i++) { 267 temp = new(mem_ctx) ir_expression(ir_binop_logic_or, 268 element_type, 269 get_element(op_var[0], i), 270 temp); 271 } 272 assign(ir, 0, temp); 273 break; 274 } 275 276 case ir_binop_dot: { 277 ir_expression *last = NULL; 278 for (i = 0; i < vector_elements; i++) { 279 ir_rvalue *op0 = get_element(op_var[0], i); 280 ir_rvalue *op1 = get_element(op_var[1], i); 281 ir_expression *temp; 282 283 temp = new(mem_ctx) ir_expression(ir_binop_mul, 284 element_type, 285 op0, 286 op1); 287 if (last) { 288 last = new(mem_ctx) ir_expression(ir_binop_add, 289 element_type, 290 temp, 291 last); 292 } else { 293 last = temp; 294 } 295 } 296 assign(ir, 0, last); 297 break; 298 } 299 300 case ir_binop_logic_and: 301 case ir_binop_logic_xor: 302 case ir_binop_logic_or: 303 ir->print(); 304 printf("\n"); 305 assert(!"not reached: expression operates on scalars only"); 306 break; 307 case ir_binop_all_equal: 308 case ir_binop_any_nequal: { 309 ir_expression *last = NULL; 310 for (i = 0; i < vector_elements; i++) { 311 ir_rvalue *op0 = get_element(op_var[0], i); 312 ir_rvalue *op1 = get_element(op_var[1], i); 313 ir_expression *temp; 314 ir_expression_operation join; 315 316 if (expr->operation == ir_binop_all_equal) 317 join = ir_binop_logic_and; 318 else 319 join = ir_binop_logic_or; 320 321 temp = new(mem_ctx) ir_expression(expr->operation, 322 element_type, 323 op0, 324 op1); 325 if (last) { 326 last = new(mem_ctx) ir_expression(join, 327 element_type, 328 temp, 329 last); 330 } else { 331 last = temp; 332 } 333 } 334 assign(ir, 0, last); 335 break; 336 } 337 case ir_unop_noise: 338 assert(!"noise should have been broken down to function call"); 339 break; 340 341 case ir_binop_ubo_load: 342 assert(!"not yet supported"); 343 break; 344 345 case ir_quadop_vector: 346 assert(!"should have been lowered"); 347 break; 348 } 349 350 ir->remove(); 351 this->progress = true; 352 353 return visit_continue; 354} 355