brw_fs_channel_expressions.cpp revision e1e887679786cf3882e83b2a194f046b34a71f05
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file brw_wm_channel_expressions.cpp 26 * 27 * Breaks vector operations down into operations on each component. 28 * 29 * The 965 fragment shader receives 8 or 16 pixels at a time, so each 30 * channel of a vector is laid out as 1 or 2 8-float registers. Each 31 * ALU operation operates on one of those channel registers. As a 32 * result, there is no value to the 965 fragment shader in tracking 33 * "vector" expressions in the sense of GLSL fragment shaders, when 34 * doing a channel at a time may help in constant folding, algebraic 35 * simplification, and reducing the liveness of channel registers. 36 * 37 * The exception to the desire to break everything down to floats is 38 * texturing. The texture sampler returns a writemasked masked 39 * 4/8-register sequence containing the texture values. We don't want 40 * to dispatch to the sampler separately for each channel we need, so 41 * we do retain the vector types in that case. 42 */ 43 44extern "C" { 45#include "main/core.h" 46#include "brw_wm.h" 47} 48#include "glsl/ir.h" 49#include "glsl/ir_expression_flattening.h" 50#include "glsl/glsl_types.h" 51 52class ir_channel_expressions_visitor : public ir_hierarchical_visitor { 53public: 54 ir_channel_expressions_visitor() 55 { 56 this->progress = false; 57 this->mem_ctx = NULL; 58 } 59 60 ir_visitor_status visit_leave(ir_assignment *); 61 62 ir_rvalue *get_element(ir_variable *var, unsigned int element); 63 void assign(ir_assignment *ir, int elem, ir_rvalue *val); 64 65 bool progress; 66 void *mem_ctx; 67}; 68 69static bool 70channel_expressions_predicate(ir_instruction *ir) 71{ 72 ir_expression *expr = ir->as_expression(); 73 unsigned int i; 74 75 if (!expr) 76 return false; 77 78 for (i = 0; i < expr->get_num_operands(); i++) { 79 if (expr->operands[i]->type->is_vector()) 80 return true; 81 } 82 83 return false; 84} 85 86bool 87brw_do_channel_expressions(exec_list *instructions) 88{ 89 ir_channel_expressions_visitor v; 90 91 /* Pull out any matrix expression to a separate assignment to a 92 * temp. This will make our handling of the breakdown to 93 * operations on the matrix's vector components much easier. 94 */ 95 do_expression_flattening(instructions, channel_expressions_predicate); 96 97 visit_list_elements(&v, instructions); 98 99 return v.progress; 100} 101 102ir_rvalue * 103ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem) 104{ 105 ir_dereference *deref; 106 107 if (var->type->is_scalar()) 108 return new(mem_ctx) ir_dereference_variable(var); 109 110 assert(elem < var->type->components()); 111 deref = new(mem_ctx) ir_dereference_variable(var); 112 return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1); 113} 114 115void 116ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val) 117{ 118 ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL); 119 ir_assignment *assign; 120 121 /* This assign-of-expression should have been generated by the 122 * expression flattening visitor (since we never short circit to 123 * not flatten, even for plain assignments of variables), so the 124 * writemask is always full. 125 */ 126 assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1); 127 128 assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem)); 129 ir->insert_before(assign); 130} 131 132ir_visitor_status 133ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) 134{ 135 ir_expression *expr = ir->rhs->as_expression(); 136 bool found_vector = false; 137 unsigned int i, vector_elements = 1; 138 ir_variable *op_var[3]; 139 140 if (!expr) 141 return visit_continue; 142 143 if (!this->mem_ctx) 144 this->mem_ctx = ralloc_parent(ir); 145 146 for (i = 0; i < expr->get_num_operands(); i++) { 147 if (expr->operands[i]->type->is_vector()) { 148 found_vector = true; 149 vector_elements = expr->operands[i]->type->vector_elements; 150 break; 151 } 152 } 153 if (!found_vector) 154 return visit_continue; 155 156 /* Store the expression operands in temps so we can use them 157 * multiple times. 158 */ 159 for (i = 0; i < expr->get_num_operands(); i++) { 160 ir_assignment *assign; 161 ir_dereference *deref; 162 163 assert(!expr->operands[i]->type->is_matrix()); 164 165 op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type, 166 "channel_expressions", 167 ir_var_temporary); 168 ir->insert_before(op_var[i]); 169 170 deref = new(mem_ctx) ir_dereference_variable(op_var[i]); 171 assign = new(mem_ctx) ir_assignment(deref, 172 expr->operands[i], 173 NULL); 174 ir->insert_before(assign); 175 } 176 177 const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type, 178 1, 1); 179 180 /* OK, time to break down this vector operation. */ 181 switch (expr->operation) { 182 case ir_unop_bit_not: 183 case ir_unop_logic_not: 184 case ir_unop_neg: 185 case ir_unop_abs: 186 case ir_unop_sign: 187 case ir_unop_rcp: 188 case ir_unop_rsq: 189 case ir_unop_sqrt: 190 case ir_unop_exp: 191 case ir_unop_log: 192 case ir_unop_exp2: 193 case ir_unop_log2: 194 case ir_unop_bitcast_i2f: 195 case ir_unop_bitcast_f2i: 196 case ir_unop_bitcast_f2u: 197 case ir_unop_bitcast_u2f: 198 case ir_unop_i2u: 199 case ir_unop_u2i: 200 case ir_unop_f2i: 201 case ir_unop_f2u: 202 case ir_unop_i2f: 203 case ir_unop_f2b: 204 case ir_unop_b2f: 205 case ir_unop_i2b: 206 case ir_unop_b2i: 207 case ir_unop_u2f: 208 case ir_unop_trunc: 209 case ir_unop_ceil: 210 case ir_unop_floor: 211 case ir_unop_fract: 212 case ir_unop_round_even: 213 case ir_unop_sin: 214 case ir_unop_cos: 215 case ir_unop_sin_reduced: 216 case ir_unop_cos_reduced: 217 case ir_unop_dFdx: 218 case ir_unop_dFdy: 219 case ir_unop_bitfield_reverse: 220 case ir_unop_bit_count: 221 case ir_unop_find_msb: 222 case ir_unop_find_lsb: 223 for (i = 0; i < vector_elements; i++) { 224 ir_rvalue *op0 = get_element(op_var[0], i); 225 226 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 227 element_type, 228 op0, 229 NULL)); 230 } 231 break; 232 233 case ir_binop_add: 234 case ir_binop_sub: 235 case ir_binop_mul: 236 case ir_binop_div: 237 case ir_binop_mod: 238 case ir_binop_min: 239 case ir_binop_max: 240 case ir_binop_pow: 241 case ir_binop_lshift: 242 case ir_binop_rshift: 243 case ir_binop_bit_and: 244 case ir_binop_bit_xor: 245 case ir_binop_bit_or: 246 case ir_binop_less: 247 case ir_binop_greater: 248 case ir_binop_lequal: 249 case ir_binop_gequal: 250 case ir_binop_equal: 251 case ir_binop_nequal: 252 for (i = 0; i < vector_elements; i++) { 253 ir_rvalue *op0 = get_element(op_var[0], i); 254 ir_rvalue *op1 = get_element(op_var[1], i); 255 256 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 257 element_type, 258 op0, 259 op1)); 260 } 261 break; 262 263 case ir_unop_any: { 264 ir_expression *temp; 265 temp = new(mem_ctx) ir_expression(ir_binop_logic_or, 266 element_type, 267 get_element(op_var[0], 0), 268 get_element(op_var[0], 1)); 269 270 for (i = 2; i < vector_elements; i++) { 271 temp = new(mem_ctx) ir_expression(ir_binop_logic_or, 272 element_type, 273 get_element(op_var[0], i), 274 temp); 275 } 276 assign(ir, 0, temp); 277 break; 278 } 279 280 case ir_binop_dot: { 281 ir_expression *last = NULL; 282 for (i = 0; i < vector_elements; i++) { 283 ir_rvalue *op0 = get_element(op_var[0], i); 284 ir_rvalue *op1 = get_element(op_var[1], i); 285 ir_expression *temp; 286 287 temp = new(mem_ctx) ir_expression(ir_binop_mul, 288 element_type, 289 op0, 290 op1); 291 if (last) { 292 last = new(mem_ctx) ir_expression(ir_binop_add, 293 element_type, 294 temp, 295 last); 296 } else { 297 last = temp; 298 } 299 } 300 assign(ir, 0, last); 301 break; 302 } 303 304 case ir_binop_logic_and: 305 case ir_binop_logic_xor: 306 case ir_binop_logic_or: 307 ir->print(); 308 printf("\n"); 309 assert(!"not reached: expression operates on scalars only"); 310 break; 311 case ir_binop_all_equal: 312 case ir_binop_any_nequal: { 313 ir_expression *last = NULL; 314 for (i = 0; i < vector_elements; i++) { 315 ir_rvalue *op0 = get_element(op_var[0], i); 316 ir_rvalue *op1 = get_element(op_var[1], i); 317 ir_expression *temp; 318 ir_expression_operation join; 319 320 if (expr->operation == ir_binop_all_equal) 321 join = ir_binop_logic_and; 322 else 323 join = ir_binop_logic_or; 324 325 temp = new(mem_ctx) ir_expression(expr->operation, 326 element_type, 327 op0, 328 op1); 329 if (last) { 330 last = new(mem_ctx) ir_expression(join, 331 element_type, 332 temp, 333 last); 334 } else { 335 last = temp; 336 } 337 } 338 assign(ir, 0, last); 339 break; 340 } 341 case ir_unop_noise: 342 assert(!"noise should have been broken down to function call"); 343 break; 344 345 case ir_binop_bfm: { 346 /* Does not need to be scalarized, since its result will be identical 347 * for all channels. 348 */ 349 ir_rvalue *op0 = get_element(op_var[0], 0); 350 ir_rvalue *op1 = get_element(op_var[1], 0); 351 352 assign(ir, 0, new(mem_ctx) ir_expression(expr->operation, 353 element_type, 354 op0, 355 op1)); 356 break; 357 } 358 359 case ir_binop_ubo_load: 360 assert(!"not yet supported"); 361 break; 362 363 case ir_triop_lrp: 364 case ir_triop_bitfield_extract: 365 for (i = 0; i < vector_elements; i++) { 366 ir_rvalue *op0 = get_element(op_var[0], i); 367 ir_rvalue *op1 = get_element(op_var[1], i); 368 ir_rvalue *op2 = get_element(op_var[2], i); 369 370 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 371 element_type, 372 op0, 373 op1, 374 op2)); 375 } 376 break; 377 378 case ir_triop_bfi: { 379 /* Only a single BFM is needed for multiple BFIs. */ 380 ir_rvalue *op0 = get_element(op_var[0], 0); 381 382 for (i = 0; i < vector_elements; i++) { 383 ir_rvalue *op1 = get_element(op_var[1], i); 384 ir_rvalue *op2 = get_element(op_var[2], i); 385 386 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 387 element_type, 388 op0->clone(mem_ctx, NULL), 389 op1, 390 op2)); 391 } 392 break; 393 } 394 395 case ir_unop_pack_snorm_2x16: 396 case ir_unop_pack_snorm_4x8: 397 case ir_unop_pack_unorm_2x16: 398 case ir_unop_pack_unorm_4x8: 399 case ir_unop_pack_half_2x16: 400 case ir_unop_unpack_snorm_2x16: 401 case ir_unop_unpack_snorm_4x8: 402 case ir_unop_unpack_unorm_2x16: 403 case ir_unop_unpack_unorm_4x8: 404 case ir_unop_unpack_half_2x16: 405 case ir_binop_vector_extract: 406 case ir_quadop_bitfield_insert: 407 case ir_quadop_vector: 408 assert(!"should have been lowered"); 409 break; 410 411 case ir_unop_unpack_half_2x16_split_x: 412 case ir_unop_unpack_half_2x16_split_y: 413 case ir_binop_pack_half_2x16_split: 414 assert("!not reached: expression operates on scalars only"); 415 break; 416 } 417 418 ir->remove(); 419 this->progress = true; 420 421 return visit_continue; 422} 423