1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file brw_wm_vector_splitting.cpp 26 * 27 * If a vector is only ever referenced by its components, then 28 * split those components out to individual variables so they can be 29 * handled normally by other optimization passes. 30 * 31 * This skips vectors in uniforms and varyings, which need to be 32 * accessible as vectors for their access by the GL. Also, vector 33 * results of non-variable-derefs in assignments aren't handled 34 * because to do so we would have to store the vector result to a 35 * temporary in order to unload each channel, and to do so would just 36 * loop us back to where we started. For the 965, this is exactly the 37 * behavior we want for the results of texture lookups, but probably not for 38 */ 39 40extern "C" { 41#include "main/core.h" 42#include "intel_context.h" 43} 44#include "glsl/ir.h" 45#include "glsl/ir_visitor.h" 46#include "glsl/ir_print_visitor.h" 47#include "glsl/ir_rvalue_visitor.h" 48#include "glsl/glsl_types.h" 49 50static bool debug = false; 51 52class variable_entry : public exec_node 53{ 54public: 55 variable_entry(ir_variable *var) 56 { 57 this->var = var; 58 this->whole_vector_access = 0; 59 this->declaration = false; 60 this->mem_ctx = NULL; 61 } 62 63 ir_variable *var; /* The key: the variable's pointer. */ 64 65 /** Number of times the variable is referenced, including assignments. */ 66 unsigned whole_vector_access; 67 68 bool declaration; /* If the variable had a decl in the instruction stream */ 69 70 ir_variable *components[4]; 71 72 /** ralloc_parent(this->var) -- the shader's ralloc context. */ 73 void *mem_ctx; 74}; 75 76class ir_vector_reference_visitor : public ir_hierarchical_visitor { 77public: 78 ir_vector_reference_visitor(void) 79 { 80 this->mem_ctx = ralloc_context(NULL); 81 this->variable_list.make_empty(); 82 } 83 84 ~ir_vector_reference_visitor(void) 85 { 86 ralloc_free(mem_ctx); 87 } 88 89 virtual ir_visitor_status visit(ir_variable *); 90 virtual ir_visitor_status visit(ir_dereference_variable *); 91 virtual ir_visitor_status visit_enter(ir_swizzle *); 92 virtual ir_visitor_status visit_enter(ir_assignment *); 93 virtual ir_visitor_status visit_enter(ir_function_signature *); 94 95 variable_entry *get_variable_entry(ir_variable *var); 96 97 /* List of variable_entry */ 98 exec_list variable_list; 99 100 void *mem_ctx; 101}; 102 103variable_entry * 104ir_vector_reference_visitor::get_variable_entry(ir_variable *var) 105{ 106 assert(var); 107 108 if (!var->type->is_vector()) 109 return NULL; 110 111 switch (var->mode) { 112 case ir_var_uniform: 113 case ir_var_in: 114 case ir_var_out: 115 case ir_var_inout: 116 /* Can't split varyings or uniforms. Function in/outs won't get split 117 * either, so don't care about the ambiguity. 118 */ 119 return NULL; 120 case ir_var_auto: 121 case ir_var_temporary: 122 break; 123 } 124 125 foreach_list(node, &this->variable_list) { 126 variable_entry *entry = (variable_entry *)node; 127 if (entry->var == var) 128 return entry; 129 } 130 131 variable_entry *entry = new(mem_ctx) variable_entry(var); 132 this->variable_list.push_tail(entry); 133 return entry; 134} 135 136 137ir_visitor_status 138ir_vector_reference_visitor::visit(ir_variable *ir) 139{ 140 variable_entry *entry = this->get_variable_entry(ir); 141 142 if (entry) 143 entry->declaration = true; 144 145 return visit_continue; 146} 147 148ir_visitor_status 149ir_vector_reference_visitor::visit(ir_dereference_variable *ir) 150{ 151 ir_variable *const var = ir->var; 152 variable_entry *entry = this->get_variable_entry(var); 153 154 if (entry) 155 entry->whole_vector_access++; 156 157 return visit_continue; 158} 159 160ir_visitor_status 161ir_vector_reference_visitor::visit_enter(ir_swizzle *ir) 162{ 163 /* Don't descend into a vector ir_dereference_variable below. */ 164 if (ir->val->as_dereference_variable() && ir->type->is_scalar()) 165 return visit_continue_with_parent; 166 167 return visit_continue; 168} 169 170ir_visitor_status 171ir_vector_reference_visitor::visit_enter(ir_assignment *ir) 172{ 173 if (ir->lhs->as_dereference_variable() && 174 ir->rhs->as_dereference_variable() && 175 !ir->condition) { 176 /* We'll split copies of a vector to copies of channels, so don't 177 * descend to the ir_dereference_variables. 178 */ 179 return visit_continue_with_parent; 180 } 181 if (ir->lhs->as_dereference_variable() && 182 is_power_of_two(ir->write_mask) && 183 !ir->condition) { 184 /* If we're writing just a channel, then channel-splitting the LHS is OK. 185 */ 186 ir->rhs->accept(this); 187 return visit_continue_with_parent; 188 } 189 return visit_continue; 190} 191 192ir_visitor_status 193ir_vector_reference_visitor::visit_enter(ir_function_signature *ir) 194{ 195 /* We don't want to descend into the function parameters and 196 * split them, so just accept the body here. 197 */ 198 visit_list_elements(this, &ir->body); 199 return visit_continue_with_parent; 200} 201 202class ir_vector_splitting_visitor : public ir_rvalue_visitor { 203public: 204 ir_vector_splitting_visitor(exec_list *vars) 205 { 206 this->variable_list = vars; 207 } 208 209 virtual ir_visitor_status visit_leave(ir_assignment *); 210 211 void handle_rvalue(ir_rvalue **rvalue); 212 variable_entry *get_splitting_entry(ir_variable *var); 213 214 exec_list *variable_list; 215}; 216 217variable_entry * 218ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var) 219{ 220 assert(var); 221 222 if (!var->type->is_vector()) 223 return NULL; 224 225 foreach_list(node, &*this->variable_list) { 226 variable_entry *entry = (variable_entry *)node; 227 if (entry->var == var) { 228 return entry; 229 } 230 } 231 232 return NULL; 233} 234 235void 236ir_vector_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) 237{ 238 if (!*rvalue) 239 return; 240 241 ir_swizzle *swiz = (*rvalue)->as_swizzle(); 242 if (!swiz || !swiz->type->is_scalar()) 243 return; 244 245 ir_dereference_variable *deref_var = swiz->val->as_dereference_variable(); 246 if (!deref_var) 247 return; 248 249 variable_entry *entry = get_splitting_entry(deref_var->var); 250 if (!entry) 251 return; 252 253 ir_variable *var = entry->components[swiz->mask.x]; 254 *rvalue = new(entry->mem_ctx) ir_dereference_variable(var); 255} 256 257ir_visitor_status 258ir_vector_splitting_visitor::visit_leave(ir_assignment *ir) 259{ 260 ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable(); 261 ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable(); 262 variable_entry *lhs = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL; 263 variable_entry *rhs = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL; 264 265 if (lhs_deref && rhs_deref && (lhs || rhs) && !ir->condition) { 266 unsigned int rhs_chan = 0; 267 268 /* Straight assignment of vector variables. */ 269 for (unsigned int i = 0; i < ir->lhs->type->vector_elements; i++) { 270 ir_dereference *new_lhs; 271 ir_rvalue *new_rhs; 272 void *mem_ctx = lhs ? lhs->mem_ctx : rhs->mem_ctx; 273 unsigned int writemask; 274 275 if (!(ir->write_mask & (1 << i))) 276 continue; 277 278 if (lhs) { 279 new_lhs = new(mem_ctx) ir_dereference_variable(lhs->components[i]); 280 writemask = 1; 281 } else { 282 new_lhs = ir->lhs->clone(mem_ctx, NULL); 283 writemask = 1 << i; 284 } 285 286 if (rhs) { 287 new_rhs = 288 new(mem_ctx) ir_dereference_variable(rhs->components[rhs_chan]); 289 } else { 290 new_rhs = new(mem_ctx) ir_swizzle(ir->rhs->clone(mem_ctx, NULL), 291 rhs_chan, 0, 0, 0, 1); 292 } 293 294 ir->insert_before(new(mem_ctx) ir_assignment(new_lhs, 295 new_rhs, 296 NULL, writemask)); 297 298 rhs_chan++; 299 } 300 ir->remove(); 301 } else if (lhs) { 302 void *mem_ctx = lhs->mem_ctx; 303 int elem = -1; 304 305 switch (ir->write_mask) { 306 case (1 << 0): 307 elem = 0; 308 break; 309 case (1 << 1): 310 elem = 1; 311 break; 312 case (1 << 2): 313 elem = 2; 314 break; 315 case (1 << 3): 316 elem = 3; 317 break; 318 default: 319 ir->print(); 320 assert(!"not reached: non-channelwise dereference of LHS."); 321 } 322 323 ir->lhs = new(mem_ctx) ir_dereference_variable(lhs->components[elem]); 324 ir->write_mask = (1 << 0); 325 326 handle_rvalue(&ir->rhs); 327 } else { 328 handle_rvalue(&ir->rhs); 329 } 330 331 handle_rvalue(&ir->condition); 332 333 return visit_continue; 334} 335 336bool 337brw_do_vector_splitting(exec_list *instructions) 338{ 339 ir_vector_reference_visitor refs; 340 341 visit_list_elements(&refs, instructions); 342 343 /* Trim out variables we can't split. */ 344 foreach_list_safe(node, &refs.variable_list) { 345 variable_entry *entry = (variable_entry *)node; 346 347 if (debug) { 348 printf("vector %s@%p: decl %d, whole_access %d\n", 349 entry->var->name, (void *) entry->var, entry->declaration, 350 entry->whole_vector_access); 351 } 352 353 if (!entry->declaration || entry->whole_vector_access) { 354 entry->remove(); 355 } 356 } 357 358 if (refs.variable_list.is_empty()) 359 return false; 360 361 void *mem_ctx = ralloc_context(NULL); 362 363 /* Replace the decls of the vectors to be split with their split 364 * components. 365 */ 366 foreach_list(node, &refs.variable_list) { 367 variable_entry *entry = (variable_entry *)node; 368 const struct glsl_type *type; 369 type = glsl_type::get_instance(entry->var->type->base_type, 1, 1); 370 371 entry->mem_ctx = ralloc_parent(entry->var); 372 373 for (unsigned int i = 0; i < entry->var->type->vector_elements; i++) { 374 const char *name = ralloc_asprintf(mem_ctx, "%s_%c", 375 entry->var->name, 376 "xyzw"[i]); 377 378 entry->components[i] = new(entry->mem_ctx) ir_variable(type, name, 379 ir_var_temporary); 380 entry->var->insert_before(entry->components[i]); 381 } 382 383 entry->var->remove(); 384 } 385 386 ir_vector_splitting_visitor split(&refs.variable_list); 387 visit_list_elements(&split, instructions); 388 389 ralloc_free(mem_ctx); 390 391 return true; 392} 393