brw_fs_channel_expressions.cpp revision 2ea3ab14f2182978f471674c9dfce029d37f70a7
11e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)/* 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Copyright © 2010 Intel Corporation 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Permission is hereby granted, free of charge, to any person obtaining a 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * copy of this software and associated documentation files (the "Software"), 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to deal in the Software without restriction, including without limitation 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the rights to use, copy, modify, merge, publish, distribute, sublicense, 81e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * and/or sell copies of the Software, and to permit persons to whom the 91e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * Software is furnished to do so, subject to the following conditions: 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The above copyright notice and this permission notice (including the next 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * paragraph) shall be included in all copies or substantial portions of the 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Software. 141e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * 152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * DEALINGS IN THE SOFTWARE. 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * \file brw_wm_channel_expressions.cpp 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Breaks vector operations down into operations on each component. 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The 965 fragment shader receives 8 or 16 pixels at a time, so each 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * channel of a vector is laid out as 1 or 2 8-float registers. Each 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * ALU operation operates on one of those channel registers. As a 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * result, there is no value to the 965 fragment shader in tracking 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * "vector" expressions in the sense of GLSL fragment shaders, when 341e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * doing a channel at a time may help in constant folding, algebraic 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * simplification, and reducing the liveness of channel registers. 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The exception to the desire to break everything down to floats is 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * texturing. The texture sampler returns a writemasked masked 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 4/8-register sequence containing the texture values. We don't want 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * to dispatch to the sampler separately for each channel we need, so 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * we do retain the vector types in that case. 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)extern "C" { 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "main/core.h" 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "brw_wm.h" 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "glsl/ir.h" 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "glsl/ir_expression_flattening.h" 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "glsl/glsl_types.h" 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ir_channel_expressions_visitor : public ir_hierarchical_visitor { 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)public: 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ir_channel_expressions_visitor() 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) { 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) this->progress = false; 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) this->mem_ctx = NULL; 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ir_visitor_status visit_leave(ir_assignment *); 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ir_rvalue *get_element(ir_variable *var, unsigned int element); 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void assign(ir_assignment *ir, int elem, ir_rvalue *val); 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool progress; 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *mem_ctx; 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)channel_expressions_predicate(ir_instruction *ir) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ir_expression *expr = ir->as_expression(); 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned int i; 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!expr) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (i = 0; i < expr->get_num_operands(); i++) { 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (expr->operands[i]->type->is_vector()) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)brw_do_channel_expressions(exec_list *instructions) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ir_channel_expressions_visitor v; 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Pull out any matrix expression to a separate assignment to a 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * temp. This will make our handling of the breakdown to 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * operations on the matrix's vector components much easier. 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) do_expression_flattening(instructions, channel_expressions_predicate); 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) visit_list_elements(&v, instructions); 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return v.progress; 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 102eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochir_rvalue * 103eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 105eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch ir_dereference *deref; 106eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (var->type->is_scalar()) 1081e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) return new(mem_ctx) ir_dereference_variable(var); 109 110 assert(elem < var->type->components()); 111 deref = new(mem_ctx) ir_dereference_variable(var); 112 return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1); 113} 114 115void 116ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val) 117{ 118 ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL); 119 ir_assignment *assign; 120 121 /* This assign-of-expression should have been generated by the 122 * expression flattening visitor (since we never short circit to 123 * not flatten, even for plain assignments of variables), so the 124 * writemask is always full. 125 */ 126 assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1); 127 128 assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem)); 129 ir->insert_before(assign); 130} 131 132ir_visitor_status 133ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) 134{ 135 ir_expression *expr = ir->rhs->as_expression(); 136 bool found_vector = false; 137 unsigned int i, vector_elements = 1; 138 ir_variable *op_var[2]; 139 140 if (!expr) 141 return visit_continue; 142 143 if (!this->mem_ctx) 144 this->mem_ctx = ralloc_parent(ir); 145 146 for (i = 0; i < expr->get_num_operands(); i++) { 147 if (expr->operands[i]->type->is_vector()) { 148 found_vector = true; 149 vector_elements = expr->operands[i]->type->vector_elements; 150 break; 151 } 152 } 153 if (!found_vector) 154 return visit_continue; 155 156 /* Store the expression operands in temps so we can use them 157 * multiple times. 158 */ 159 for (i = 0; i < expr->get_num_operands(); i++) { 160 ir_assignment *assign; 161 ir_dereference *deref; 162 163 assert(!expr->operands[i]->type->is_matrix()); 164 165 op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type, 166 "channel_expressions", 167 ir_var_temporary); 168 ir->insert_before(op_var[i]); 169 170 deref = new(mem_ctx) ir_dereference_variable(op_var[i]); 171 assign = new(mem_ctx) ir_assignment(deref, 172 expr->operands[i], 173 NULL); 174 ir->insert_before(assign); 175 } 176 177 const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type, 178 1, 1); 179 180 /* OK, time to break down this vector operation. */ 181 switch (expr->operation) { 182 case ir_unop_bit_not: 183 case ir_unop_logic_not: 184 case ir_unop_neg: 185 case ir_unop_abs: 186 case ir_unop_sign: 187 case ir_unop_rcp: 188 case ir_unop_rsq: 189 case ir_unop_sqrt: 190 case ir_unop_exp: 191 case ir_unop_log: 192 case ir_unop_exp2: 193 case ir_unop_log2: 194 case ir_unop_bitcast_i2f: 195 case ir_unop_bitcast_f2i: 196 case ir_unop_bitcast_f2u: 197 case ir_unop_bitcast_u2f: 198 case ir_unop_i2u: 199 case ir_unop_u2i: 200 case ir_unop_f2i: 201 case ir_unop_f2u: 202 case ir_unop_i2f: 203 case ir_unop_f2b: 204 case ir_unop_b2f: 205 case ir_unop_i2b: 206 case ir_unop_b2i: 207 case ir_unop_u2f: 208 case ir_unop_trunc: 209 case ir_unop_ceil: 210 case ir_unop_floor: 211 case ir_unop_fract: 212 case ir_unop_round_even: 213 case ir_unop_sin: 214 case ir_unop_cos: 215 case ir_unop_sin_reduced: 216 case ir_unop_cos_reduced: 217 case ir_unop_dFdx: 218 case ir_unop_dFdy: 219 for (i = 0; i < vector_elements; i++) { 220 ir_rvalue *op0 = get_element(op_var[0], i); 221 222 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 223 element_type, 224 op0, 225 NULL)); 226 } 227 break; 228 229 case ir_binop_add: 230 case ir_binop_sub: 231 case ir_binop_mul: 232 case ir_binop_div: 233 case ir_binop_mod: 234 case ir_binop_min: 235 case ir_binop_max: 236 case ir_binop_pow: 237 case ir_binop_lshift: 238 case ir_binop_rshift: 239 case ir_binop_bit_and: 240 case ir_binop_bit_xor: 241 case ir_binop_bit_or: 242 case ir_binop_less: 243 case ir_binop_greater: 244 case ir_binop_lequal: 245 case ir_binop_gequal: 246 case ir_binop_equal: 247 case ir_binop_nequal: 248 for (i = 0; i < vector_elements; i++) { 249 ir_rvalue *op0 = get_element(op_var[0], i); 250 ir_rvalue *op1 = get_element(op_var[1], i); 251 252 assign(ir, i, new(mem_ctx) ir_expression(expr->operation, 253 element_type, 254 op0, 255 op1)); 256 } 257 break; 258 259 case ir_unop_any: { 260 ir_expression *temp; 261 temp = new(mem_ctx) ir_expression(ir_binop_logic_or, 262 element_type, 263 get_element(op_var[0], 0), 264 get_element(op_var[0], 1)); 265 266 for (i = 2; i < vector_elements; i++) { 267 temp = new(mem_ctx) ir_expression(ir_binop_logic_or, 268 element_type, 269 get_element(op_var[0], i), 270 temp); 271 } 272 assign(ir, 0, temp); 273 break; 274 } 275 276 case ir_binop_dot: { 277 ir_expression *last = NULL; 278 for (i = 0; i < vector_elements; i++) { 279 ir_rvalue *op0 = get_element(op_var[0], i); 280 ir_rvalue *op1 = get_element(op_var[1], i); 281 ir_expression *temp; 282 283 temp = new(mem_ctx) ir_expression(ir_binop_mul, 284 element_type, 285 op0, 286 op1); 287 if (last) { 288 last = new(mem_ctx) ir_expression(ir_binop_add, 289 element_type, 290 temp, 291 last); 292 } else { 293 last = temp; 294 } 295 } 296 assign(ir, 0, last); 297 break; 298 } 299 300 case ir_binop_logic_and: 301 case ir_binop_logic_xor: 302 case ir_binop_logic_or: 303 ir->print(); 304 printf("\n"); 305 assert(!"not reached: expression operates on scalars only"); 306 break; 307 case ir_binop_all_equal: 308 case ir_binop_any_nequal: { 309 ir_expression *last = NULL; 310 for (i = 0; i < vector_elements; i++) { 311 ir_rvalue *op0 = get_element(op_var[0], i); 312 ir_rvalue *op1 = get_element(op_var[1], i); 313 ir_expression *temp; 314 ir_expression_operation join; 315 316 if (expr->operation == ir_binop_all_equal) 317 join = ir_binop_logic_and; 318 else 319 join = ir_binop_logic_or; 320 321 temp = new(mem_ctx) ir_expression(expr->operation, 322 element_type, 323 op0, 324 op1); 325 if (last) { 326 last = new(mem_ctx) ir_expression(join, 327 element_type, 328 temp, 329 last); 330 } else { 331 last = temp; 332 } 333 } 334 assign(ir, 0, last); 335 break; 336 } 337 case ir_unop_noise: 338 assert(!"noise should have been broken down to function call"); 339 break; 340 341 case ir_binop_ubo_load: 342 assert(!"not yet supported"); 343 break; 344 345 case ir_quadop_vector: 346 assert(!"should have been lowered"); 347 break; 348 } 349 350 ir->remove(); 351 this->progress = true; 352 353 return visit_continue; 354} 355