brw_nir.c revision 2d8a3fa7ea994ad02a40ff497109f966e3fcbeec
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_nir.h" 25#include "brw_shader.h" 26#include "compiler/glsl_types.h" 27#include "compiler/nir/nir_builder.h" 28 29static bool 30is_input(nir_intrinsic_instr *intrin) 31{ 32 return intrin->intrinsic == nir_intrinsic_load_input || 33 intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 34 intrin->intrinsic == nir_intrinsic_load_interpolated_input; 35} 36 37static bool 38is_output(nir_intrinsic_instr *intrin) 39{ 40 return intrin->intrinsic == nir_intrinsic_load_output || 41 intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 42 intrin->intrinsic == nir_intrinsic_store_output || 43 intrin->intrinsic == nir_intrinsic_store_per_vertex_output; 44} 45 46/** 47 * In many cases, we just add the base and offset together, so there's no 48 * reason to keep them separate. Sometimes, combining them is essential: 49 * if a shader only accesses part of a compound variable (such as a matrix 50 * or array), the variable's base may not actually exist in the VUE map. 51 * 52 * This pass adds constant offsets to instr->const_index[0], and resets 53 * the offset source to 0. Non-constant offsets remain unchanged - since 54 * we don't know what part of a compound variable is accessed, we allocate 55 * storage for the entire thing. 56 */ 57 58static bool 59add_const_offset_to_base_block(nir_block *block, nir_builder *b, 60 nir_variable_mode mode) 61{ 62 nir_foreach_instr_safe(instr, block) { 63 if (instr->type != nir_instr_type_intrinsic) 64 continue; 65 66 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 67 68 if ((mode == nir_var_shader_in && is_input(intrin)) || 69 (mode == nir_var_shader_out && is_output(intrin))) { 70 nir_src *offset = nir_get_io_offset_src(intrin); 71 nir_const_value *const_offset = nir_src_as_const_value(*offset); 72 73 if (const_offset) { 74 intrin->const_index[0] += const_offset->u32[0]; 75 b->cursor = nir_before_instr(&intrin->instr); 76 nir_instr_rewrite_src(&intrin->instr, offset, 77 nir_src_for_ssa(nir_imm_int(b, 0))); 78 } 79 } 80 } 81 return true; 82} 83 84static void 85add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode) 86{ 87 nir_foreach_function(f, nir) { 88 if (f->impl) { 89 nir_builder b; 90 nir_builder_init(&b, f->impl); 91 nir_foreach_block(block, f->impl) { 92 add_const_offset_to_base_block(block, &b, mode); 93 } 94 } 95 } 96} 97 98static bool 99remap_vs_attrs(nir_block *block, struct nir_shader_info *nir_info) 100{ 101 nir_foreach_instr(instr, block) { 102 if (instr->type != nir_instr_type_intrinsic) 103 continue; 104 105 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 106 107 if (intrin->intrinsic == nir_intrinsic_load_input) { 108 /* Attributes come in a contiguous block, ordered by their 109 * gl_vert_attrib value. That means we can compute the slot 110 * number for an attribute by masking out the enabled attributes 111 * before it and counting the bits. 112 */ 113 int attr = intrin->const_index[0]; 114 int slot = _mesa_bitcount_64(nir_info->inputs_read & 115 BITFIELD64_MASK(attr)); 116 int dslot = _mesa_bitcount_64(nir_info->double_inputs_read & 117 BITFIELD64_MASK(attr)); 118 intrin->const_index[0] = 4 * (slot + dslot); 119 } 120 } 121 return true; 122} 123 124static bool 125remap_inputs_with_vue_map(nir_block *block, const struct brw_vue_map *vue_map) 126{ 127 nir_foreach_instr(instr, block) { 128 if (instr->type != nir_instr_type_intrinsic) 129 continue; 130 131 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 132 133 if (intrin->intrinsic == nir_intrinsic_load_input || 134 intrin->intrinsic == nir_intrinsic_load_per_vertex_input) { 135 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 136 assert(vue_slot != -1); 137 intrin->const_index[0] = vue_slot; 138 } 139 } 140 return true; 141} 142 143static bool 144remap_patch_urb_offsets(nir_block *block, nir_builder *b, 145 const struct brw_vue_map *vue_map) 146{ 147 nir_foreach_instr_safe(instr, block) { 148 if (instr->type != nir_instr_type_intrinsic) 149 continue; 150 151 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 152 153 gl_shader_stage stage = b->shader->stage; 154 155 if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || 156 (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { 157 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 158 assert(vue_slot != -1); 159 intrin->const_index[0] = vue_slot; 160 161 nir_src *vertex = nir_get_io_vertex_index_src(intrin); 162 if (vertex) { 163 nir_const_value *const_vertex = nir_src_as_const_value(*vertex); 164 if (const_vertex) { 165 intrin->const_index[0] += const_vertex->u32[0] * 166 vue_map->num_per_vertex_slots; 167 } else { 168 b->cursor = nir_before_instr(&intrin->instr); 169 170 /* Multiply by the number of per-vertex slots. */ 171 nir_ssa_def *vertex_offset = 172 nir_imul(b, 173 nir_ssa_for_src(b, *vertex, 1), 174 nir_imm_int(b, 175 vue_map->num_per_vertex_slots)); 176 177 /* Add it to the existing offset */ 178 nir_src *offset = nir_get_io_offset_src(intrin); 179 nir_ssa_def *total_offset = 180 nir_iadd(b, vertex_offset, 181 nir_ssa_for_src(b, *offset, 1)); 182 183 nir_instr_rewrite_src(&intrin->instr, offset, 184 nir_src_for_ssa(total_offset)); 185 } 186 } 187 } 188 } 189 return true; 190} 191 192void 193brw_nir_lower_vs_inputs(nir_shader *nir, 194 const struct gen_device_info *devinfo, 195 bool is_scalar, 196 bool use_legacy_snorm_formula, 197 const uint8_t *vs_attrib_wa_flags) 198{ 199 /* Start with the location of the variable's base. */ 200 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 201 var->data.driver_location = var->data.location; 202 } 203 204 /* Now use nir_lower_io to walk dereference chains. Attribute arrays are 205 * loaded as one vec4 or dvec4 per element (or matrix column), depending on 206 * whether it is a double-precision type or not. 207 */ 208 nir_lower_io(nir, nir_var_shader_in, type_size_vs_input); 209 210 /* This pass needs actual constants */ 211 nir_opt_constant_folding(nir); 212 213 add_const_offset_to_base(nir, nir_var_shader_in); 214 215 brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula, 216 vs_attrib_wa_flags); 217 218 if (is_scalar) { 219 /* Finally, translate VERT_ATTRIB_* values into the actual registers. */ 220 221 nir_foreach_function(function, nir) { 222 if (function->impl) { 223 nir_foreach_block(block, function->impl) { 224 remap_vs_attrs(block, &nir->info); 225 } 226 } 227 } 228 } 229} 230 231void 232brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, 233 const struct brw_vue_map *vue_map) 234{ 235 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 236 var->data.driver_location = var->data.location; 237 } 238 239 /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ 240 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 241 242 if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) { 243 /* This pass needs actual constants */ 244 nir_opt_constant_folding(nir); 245 246 add_const_offset_to_base(nir, nir_var_shader_in); 247 248 nir_foreach_function(function, nir) { 249 if (function->impl) { 250 nir_foreach_block(block, function->impl) { 251 remap_inputs_with_vue_map(block, vue_map); 252 } 253 } 254 } 255 } 256} 257 258void 259brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) 260{ 261 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 262 var->data.driver_location = var->data.location; 263 } 264 265 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 266 267 /* This pass needs actual constants */ 268 nir_opt_constant_folding(nir); 269 270 add_const_offset_to_base(nir, nir_var_shader_in); 271 272 nir_foreach_function(function, nir) { 273 if (function->impl) { 274 nir_builder b; 275 nir_builder_init(&b, function->impl); 276 nir_foreach_block(block, function->impl) { 277 remap_patch_urb_offsets(block, &b, vue_map); 278 } 279 } 280 } 281} 282 283void 284brw_nir_lower_fs_inputs(nir_shader *nir) 285{ 286 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 287 var->data.driver_location = var->data.location; 288 } 289 290 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 291 292 /* This pass needs actual constants */ 293 nir_opt_constant_folding(nir); 294 295 add_const_offset_to_base(nir, nir_var_shader_in); 296} 297 298void 299brw_nir_lower_vue_outputs(nir_shader *nir, 300 bool is_scalar) 301{ 302 if (is_scalar) { 303 nir_assign_var_locations(&nir->outputs, &nir->num_outputs, 304 VARYING_SLOT_VAR0, 305 type_size_vec4_times_4); 306 nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); 307 } else { 308 nir_foreach_variable(var, &nir->outputs) 309 var->data.driver_location = var->data.location; 310 nir_lower_io(nir, nir_var_shader_out, type_size_vec4); 311 } 312} 313 314void 315brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map) 316{ 317 nir_foreach_variable(var, &nir->outputs) { 318 var->data.driver_location = var->data.location; 319 } 320 321 nir_lower_io(nir, nir_var_shader_out, type_size_vec4); 322 323 /* This pass needs actual constants */ 324 nir_opt_constant_folding(nir); 325 326 add_const_offset_to_base(nir, nir_var_shader_out); 327 328 nir_foreach_function(function, nir) { 329 if (function->impl) { 330 nir_builder b; 331 nir_builder_init(&b, function->impl); 332 nir_foreach_block(block, function->impl) { 333 remap_patch_urb_offsets(block, &b, vue_map); 334 } 335 } 336 } 337} 338 339void 340brw_nir_lower_fs_outputs(nir_shader *nir) 341{ 342 nir_foreach_variable(var, &nir->outputs) { 343 var->data.driver_location = 344 SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) | 345 SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION); 346 } 347 348 nir_lower_io(nir, nir_var_shader_out, type_size_dvec4); 349} 350 351void 352brw_nir_lower_cs_shared(nir_shader *nir) 353{ 354 nir_assign_var_locations(&nir->shared, &nir->num_shared, 0, 355 type_size_scalar_bytes); 356 nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes); 357} 358 359#define OPT(pass, ...) ({ \ 360 bool this_progress = false; \ 361 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 362 if (this_progress) \ 363 progress = true; \ 364 this_progress; \ 365}) 366 367#define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) 368 369static nir_shader * 370nir_optimize(nir_shader *nir, bool is_scalar) 371{ 372 bool progress; 373 do { 374 progress = false; 375 OPT_V(nir_lower_vars_to_ssa); 376 377 if (is_scalar) { 378 OPT(nir_lower_alu_to_scalar); 379 } 380 381 OPT(nir_copy_prop); 382 383 if (is_scalar) { 384 OPT(nir_lower_phis_to_scalar); 385 } 386 387 OPT(nir_copy_prop); 388 OPT(nir_opt_dce); 389 OPT(nir_opt_cse); 390 OPT(nir_opt_peephole_select); 391 OPT(nir_opt_algebraic); 392 OPT(nir_opt_constant_folding); 393 OPT(nir_opt_dead_cf); 394 OPT(nir_opt_remove_phis); 395 OPT(nir_opt_undef); 396 OPT_V(nir_lower_doubles, nir_lower_drcp | 397 nir_lower_dsqrt | 398 nir_lower_drsq | 399 nir_lower_dtrunc | 400 nir_lower_dfloor | 401 nir_lower_dceil | 402 nir_lower_dfract | 403 nir_lower_dround_even | 404 nir_lower_dmod); 405 OPT_V(nir_lower_double_pack); 406 } while (progress); 407 408 return nir; 409} 410 411/* Does some simple lowering and runs the standard suite of optimizations 412 * 413 * This is intended to be called more-or-less directly after you get the 414 * shader out of GLSL or some other source. While it is geared towards i965, 415 * it is not at all generator-specific except for the is_scalar flag. Even 416 * there, it is safe to call with is_scalar = false for a shader that is 417 * intended for the FS backend as long as nir_optimize is called again with 418 * is_scalar = true to scalarize everything prior to code gen. 419 */ 420nir_shader * 421brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) 422{ 423 bool progress; /* Written by OPT and OPT_V */ 424 (void)progress; 425 426 const bool is_scalar = compiler->scalar_stage[nir->stage]; 427 428 if (nir->stage == MESA_SHADER_GEOMETRY) 429 OPT(nir_lower_gs_intrinsics); 430 431 if (compiler->precise_trig) 432 OPT(brw_nir_apply_trig_workarounds); 433 434 static const nir_lower_tex_options tex_options = { 435 .lower_txp = ~0, 436 .lower_txf_offset = true, 437 .lower_rect_offset = true, 438 }; 439 440 OPT(nir_lower_tex, &tex_options); 441 OPT(nir_normalize_cubemap_coords); 442 443 OPT(nir_lower_global_vars_to_local); 444 445 OPT(nir_split_var_copies); 446 447 nir = nir_optimize(nir, is_scalar); 448 449 if (is_scalar) { 450 OPT_V(nir_lower_load_const_to_scalar); 451 } 452 453 /* Lower a bunch of stuff */ 454 OPT_V(nir_lower_var_copies); 455 456 /* Get rid of split copies */ 457 nir = nir_optimize(nir, is_scalar); 458 459 OPT(nir_remove_dead_variables, nir_var_local); 460 461 return nir; 462} 463 464/* Prepare the given shader for codegen 465 * 466 * This function is intended to be called right before going into the actual 467 * backend and is highly backend-specific. Also, once this function has been 468 * called on a shader, it will no longer be in SSA form so most optimizations 469 * will not work. 470 */ 471nir_shader * 472brw_postprocess_nir(nir_shader *nir, 473 const struct gen_device_info *devinfo, 474 bool is_scalar) 475{ 476 bool debug_enabled = 477 (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage)); 478 479 bool progress; /* Written by OPT and OPT_V */ 480 (void)progress; 481 482 nir = nir_optimize(nir, is_scalar); 483 484 if (devinfo->gen >= 6) { 485 /* Try and fuse multiply-adds */ 486 OPT(brw_nir_opt_peephole_ffma); 487 } 488 489 OPT(nir_opt_algebraic_late); 490 491 OPT(nir_lower_locals_to_regs); 492 493 OPT_V(nir_lower_to_source_mods); 494 OPT(nir_copy_prop); 495 OPT(nir_opt_dce); 496 497 if (unlikely(debug_enabled)) { 498 /* Re-index SSA defs so we print more sensible numbers. */ 499 nir_foreach_function(function, nir) { 500 if (function->impl) 501 nir_index_ssa_defs(function->impl); 502 } 503 504 fprintf(stderr, "NIR (SSA form) for %s shader:\n", 505 _mesa_shader_stage_to_string(nir->stage)); 506 nir_print_shader(nir, stderr); 507 } 508 509 OPT_V(nir_convert_from_ssa, true); 510 511 if (!is_scalar) { 512 OPT_V(nir_move_vec_src_uses_to_dest); 513 OPT(nir_lower_vec_to_movs); 514 } 515 516 /* This is the last pass we run before we start emitting stuff. It 517 * determines when we need to insert boolean resolves on Gen <= 5. We 518 * run it last because it stashes data in instr->pass_flags and we don't 519 * want that to be squashed by other NIR passes. 520 */ 521 if (devinfo->gen <= 5) 522 brw_nir_analyze_boolean_resolves(nir); 523 524 nir_sweep(nir); 525 526 if (unlikely(debug_enabled)) { 527 fprintf(stderr, "NIR (final form) for %s shader:\n", 528 _mesa_shader_stage_to_string(nir->stage)); 529 nir_print_shader(nir, stderr); 530 } 531 532 return nir; 533} 534 535nir_shader * 536brw_nir_apply_sampler_key(nir_shader *nir, 537 const struct gen_device_info *devinfo, 538 const struct brw_sampler_prog_key_data *key_tex, 539 bool is_scalar) 540{ 541 nir_lower_tex_options tex_options = { 0 }; 542 543 /* Iron Lake and prior require lowering of all rectangle textures */ 544 if (devinfo->gen < 6) 545 tex_options.lower_rect = true; 546 547 /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ 548 if (devinfo->gen < 8) { 549 tex_options.saturate_s = key_tex->gl_clamp_mask[0]; 550 tex_options.saturate_t = key_tex->gl_clamp_mask[1]; 551 tex_options.saturate_r = key_tex->gl_clamp_mask[2]; 552 } 553 554 /* Prior to Haswell, we have to fake texture swizzle */ 555 for (unsigned s = 0; s < MAX_SAMPLERS; s++) { 556 if (key_tex->swizzles[s] == SWIZZLE_NOOP) 557 continue; 558 559 tex_options.swizzle_result |= (1 << s); 560 for (unsigned c = 0; c < 4; c++) 561 tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); 562 } 563 564 tex_options.lower_y_uv_external = key_tex->y_uv_image_mask; 565 tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask; 566 tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask; 567 568 if (nir_lower_tex(nir, &tex_options)) { 569 nir_validate_shader(nir); 570 nir = nir_optimize(nir, is_scalar); 571 } 572 573 return nir; 574} 575 576enum brw_reg_type 577brw_type_for_nir_type(nir_alu_type type) 578{ 579 switch (type) { 580 case nir_type_uint: 581 case nir_type_uint32: 582 return BRW_REGISTER_TYPE_UD; 583 case nir_type_bool: 584 case nir_type_int: 585 case nir_type_bool32: 586 case nir_type_int32: 587 return BRW_REGISTER_TYPE_D; 588 case nir_type_float: 589 case nir_type_float32: 590 return BRW_REGISTER_TYPE_F; 591 case nir_type_float64: 592 return BRW_REGISTER_TYPE_DF; 593 case nir_type_int64: 594 case nir_type_uint64: 595 /* TODO we should only see these in moves, so for now it's ok, but when 596 * we add actual 64-bit integer support we should fix this. 597 */ 598 return BRW_REGISTER_TYPE_DF; 599 default: 600 unreachable("unknown type"); 601 } 602 603 return BRW_REGISTER_TYPE_F; 604} 605 606/* Returns the glsl_base_type corresponding to a nir_alu_type. 607 * This is used by both brw_vec4_nir and brw_fs_nir. 608 */ 609enum glsl_base_type 610brw_glsl_base_type_for_nir_type(nir_alu_type type) 611{ 612 switch (type) { 613 case nir_type_float: 614 case nir_type_float32: 615 return GLSL_TYPE_FLOAT; 616 617 case nir_type_float64: 618 return GLSL_TYPE_DOUBLE; 619 620 case nir_type_int: 621 case nir_type_int32: 622 return GLSL_TYPE_INT; 623 624 case nir_type_uint: 625 case nir_type_uint32: 626 return GLSL_TYPE_UINT; 627 628 default: 629 unreachable("bad type"); 630 } 631} 632