brw_nir.c revision b0fb08e179d784ca319c3c547a874fd24ce93c3f
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_nir.h" 25#include "brw_shader.h" 26#include "compiler/nir/glsl_to_nir.h" 27#include "compiler/nir/nir_builder.h" 28#include "program/prog_to_nir.h" 29 30static bool 31is_input(nir_intrinsic_instr *intrin) 32{ 33 return intrin->intrinsic == nir_intrinsic_load_input || 34 intrin->intrinsic == nir_intrinsic_load_per_vertex_input; 35} 36 37static bool 38is_output(nir_intrinsic_instr *intrin) 39{ 40 return intrin->intrinsic == nir_intrinsic_load_output || 41 intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 42 intrin->intrinsic == nir_intrinsic_store_output || 43 intrin->intrinsic == nir_intrinsic_store_per_vertex_output; 44} 45 46/** 47 * In many cases, we just add the base and offset together, so there's no 48 * reason to keep them separate. Sometimes, combining them is essential: 49 * if a shader only accesses part of a compound variable (such as a matrix 50 * or array), the variable's base may not actually exist in the VUE map. 51 * 52 * This pass adds constant offsets to instr->const_index[0], and resets 53 * the offset source to 0. Non-constant offsets remain unchanged - since 54 * we don't know what part of a compound variable is accessed, we allocate 55 * storage for the entire thing. 56 */ 57 58static bool 59add_const_offset_to_base_block(nir_block *block, nir_builder *b, 60 nir_variable_mode mode) 61{ 62 nir_foreach_instr_safe(instr, block) { 63 if (instr->type != nir_instr_type_intrinsic) 64 continue; 65 66 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 67 68 if ((mode == nir_var_shader_in && is_input(intrin)) || 69 (mode == nir_var_shader_out && is_output(intrin))) { 70 nir_src *offset = nir_get_io_offset_src(intrin); 71 nir_const_value *const_offset = nir_src_as_const_value(*offset); 72 73 if (const_offset) { 74 intrin->const_index[0] += const_offset->u32[0]; 75 b->cursor = nir_before_instr(&intrin->instr); 76 nir_instr_rewrite_src(&intrin->instr, offset, 77 nir_src_for_ssa(nir_imm_int(b, 0))); 78 } 79 } 80 } 81 return true; 82} 83 84static void 85add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode) 86{ 87 nir_foreach_function(f, nir) { 88 if (f->impl) { 89 nir_builder b; 90 nir_builder_init(&b, f->impl); 91 nir_foreach_block(block, f->impl) { 92 add_const_offset_to_base_block(block, &b, mode); 93 } 94 } 95 } 96} 97 98static bool 99remap_vs_attrs(nir_block *block, struct nir_shader_info *nir_info) 100{ 101 nir_foreach_instr(instr, block) { 102 if (instr->type != nir_instr_type_intrinsic) 103 continue; 104 105 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 106 107 if (intrin->intrinsic == nir_intrinsic_load_input) { 108 /* Attributes come in a contiguous block, ordered by their 109 * gl_vert_attrib value. That means we can compute the slot 110 * number for an attribute by masking out the enabled attributes 111 * before it and counting the bits. 112 */ 113 int attr = intrin->const_index[0]; 114 int slot = _mesa_bitcount_64(nir_info->inputs_read & 115 BITFIELD64_MASK(attr)); 116 int dslot = _mesa_bitcount_64(nir_info->double_inputs_read & 117 BITFIELD64_MASK(attr)); 118 intrin->const_index[0] = 4 * (slot + dslot); 119 } 120 } 121 return true; 122} 123 124static bool 125remap_inputs_with_vue_map(nir_block *block, const struct brw_vue_map *vue_map) 126{ 127 nir_foreach_instr(instr, block) { 128 if (instr->type != nir_instr_type_intrinsic) 129 continue; 130 131 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 132 133 if (intrin->intrinsic == nir_intrinsic_load_input || 134 intrin->intrinsic == nir_intrinsic_load_per_vertex_input) { 135 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 136 assert(vue_slot != -1); 137 intrin->const_index[0] = vue_slot; 138 } 139 } 140 return true; 141} 142 143static bool 144remap_patch_urb_offsets(nir_block *block, nir_builder *b, 145 const struct brw_vue_map *vue_map) 146{ 147 nir_foreach_instr_safe(instr, block) { 148 if (instr->type != nir_instr_type_intrinsic) 149 continue; 150 151 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 152 153 gl_shader_stage stage = b->shader->stage; 154 155 if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || 156 (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { 157 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 158 assert(vue_slot != -1); 159 intrin->const_index[0] = vue_slot; 160 161 nir_src *vertex = nir_get_io_vertex_index_src(intrin); 162 if (vertex) { 163 nir_const_value *const_vertex = nir_src_as_const_value(*vertex); 164 if (const_vertex) { 165 intrin->const_index[0] += const_vertex->u32[0] * 166 vue_map->num_per_vertex_slots; 167 } else { 168 b->cursor = nir_before_instr(&intrin->instr); 169 170 /* Multiply by the number of per-vertex slots. */ 171 nir_ssa_def *vertex_offset = 172 nir_imul(b, 173 nir_ssa_for_src(b, *vertex, 1), 174 nir_imm_int(b, 175 vue_map->num_per_vertex_slots)); 176 177 /* Add it to the existing offset */ 178 nir_src *offset = nir_get_io_offset_src(intrin); 179 nir_ssa_def *total_offset = 180 nir_iadd(b, vertex_offset, 181 nir_ssa_for_src(b, *offset, 1)); 182 183 nir_instr_rewrite_src(&intrin->instr, offset, 184 nir_src_for_ssa(total_offset)); 185 } 186 } 187 } 188 } 189 return true; 190} 191 192void 193brw_nir_lower_vs_inputs(nir_shader *nir, 194 const struct brw_device_info *devinfo, 195 bool is_scalar, 196 bool use_legacy_snorm_formula, 197 const uint8_t *vs_attrib_wa_flags) 198{ 199 /* Start with the location of the variable's base. */ 200 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 201 var->data.driver_location = var->data.location; 202 } 203 204 /* Now use nir_lower_io to walk dereference chains. Attribute arrays are 205 * loaded as one vec4 or dvec4 per element (or matrix column), depending on 206 * whether it is a double-precision type or not. 207 */ 208 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 209 210 /* This pass needs actual constants */ 211 nir_opt_constant_folding(nir); 212 213 add_const_offset_to_base(nir, nir_var_shader_in); 214 215 brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula, 216 vs_attrib_wa_flags); 217 218 if (is_scalar) { 219 /* Finally, translate VERT_ATTRIB_* values into the actual registers. */ 220 221 nir_foreach_function(function, nir) { 222 if (function->impl) { 223 nir_foreach_block(block, function->impl) { 224 remap_vs_attrs(block, &nir->info); 225 } 226 } 227 } 228 } 229} 230 231void 232brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, 233 const struct brw_vue_map *vue_map) 234{ 235 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 236 var->data.driver_location = var->data.location; 237 } 238 239 /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ 240 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 241 242 if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) { 243 /* This pass needs actual constants */ 244 nir_opt_constant_folding(nir); 245 246 add_const_offset_to_base(nir, nir_var_shader_in); 247 248 nir_foreach_function(function, nir) { 249 if (function->impl) { 250 nir_foreach_block(block, function->impl) { 251 remap_inputs_with_vue_map(block, vue_map); 252 } 253 } 254 } 255 } 256} 257 258void 259brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) 260{ 261 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 262 var->data.driver_location = var->data.location; 263 } 264 265 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 266 267 /* This pass needs actual constants */ 268 nir_opt_constant_folding(nir); 269 270 add_const_offset_to_base(nir, nir_var_shader_in); 271 272 nir_foreach_function(function, nir) { 273 if (function->impl) { 274 nir_builder b; 275 nir_builder_init(&b, function->impl); 276 nir_foreach_block(block, function->impl) { 277 remap_patch_urb_offsets(block, &b, vue_map); 278 } 279 } 280 } 281} 282 283void 284brw_nir_lower_fs_inputs(nir_shader *nir) 285{ 286 nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar); 287 nir_lower_io(nir, nir_var_shader_in, type_size_scalar); 288} 289 290void 291brw_nir_lower_vue_outputs(nir_shader *nir, 292 bool is_scalar) 293{ 294 if (is_scalar) { 295 nir_assign_var_locations(&nir->outputs, &nir->num_outputs, 296 type_size_vec4_times_4); 297 nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); 298 } else { 299 nir_foreach_variable(var, &nir->outputs) 300 var->data.driver_location = var->data.location; 301 nir_lower_io(nir, nir_var_shader_out, type_size_vec4); 302 } 303} 304 305void 306brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map) 307{ 308 nir_foreach_variable(var, &nir->outputs) { 309 var->data.driver_location = var->data.location; 310 } 311 312 nir_lower_io(nir, nir_var_shader_out, type_size_vec4); 313 314 /* This pass needs actual constants */ 315 nir_opt_constant_folding(nir); 316 317 add_const_offset_to_base(nir, nir_var_shader_out); 318 319 nir_foreach_function(function, nir) { 320 if (function->impl) { 321 nir_builder b; 322 nir_builder_init(&b, function->impl); 323 nir_foreach_block(block, function->impl) { 324 remap_patch_urb_offsets(block, &b, vue_map); 325 } 326 } 327 } 328} 329 330void 331brw_nir_lower_fs_outputs(nir_shader *nir) 332{ 333 nir_assign_var_locations(&nir->outputs, &nir->num_outputs, 334 type_size_scalar); 335 nir_lower_io(nir, nir_var_shader_out, type_size_scalar); 336} 337 338static int 339type_size_scalar_bytes(const struct glsl_type *type) 340{ 341 return type_size_scalar(type) * 4; 342} 343 344static int 345type_size_vec4_bytes(const struct glsl_type *type) 346{ 347 return type_size_vec4(type) * 16; 348} 349 350static void 351brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) 352{ 353 if (is_scalar) { 354 nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, 355 type_size_scalar_bytes); 356 nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes); 357 } else { 358 nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, 359 type_size_vec4_bytes); 360 nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes); 361 } 362} 363 364void 365brw_nir_lower_cs_shared(nir_shader *nir) 366{ 367 nir_assign_var_locations(&nir->shared, &nir->num_shared, 368 type_size_scalar_bytes); 369 nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes); 370} 371 372#define OPT(pass, ...) ({ \ 373 bool this_progress = false; \ 374 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 375 if (this_progress) \ 376 progress = true; \ 377 this_progress; \ 378}) 379 380#define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) 381 382static nir_shader * 383nir_optimize(nir_shader *nir, bool is_scalar) 384{ 385 bool progress; 386 do { 387 progress = false; 388 OPT_V(nir_lower_vars_to_ssa); 389 390 if (is_scalar) { 391 OPT_V(nir_lower_alu_to_scalar); 392 } 393 394 OPT(nir_copy_prop); 395 396 if (is_scalar) { 397 OPT_V(nir_lower_phis_to_scalar); 398 } 399 400 OPT(nir_copy_prop); 401 OPT(nir_opt_dce); 402 OPT(nir_opt_cse); 403 OPT(nir_opt_peephole_select); 404 OPT(nir_opt_algebraic); 405 OPT(nir_opt_constant_folding); 406 OPT(nir_opt_dead_cf); 407 OPT(nir_opt_remove_phis); 408 OPT(nir_opt_undef); 409 OPT_V(nir_lower_doubles, nir_lower_drcp | 410 nir_lower_dsqrt | 411 nir_lower_drsq | 412 nir_lower_dtrunc | 413 nir_lower_dfloor | 414 nir_lower_dceil | 415 nir_lower_dfract | 416 nir_lower_dround_even | 417 nir_lower_dmod); 418 OPT_V(nir_lower_double_pack); 419 } while (progress); 420 421 return nir; 422} 423 424/* Does some simple lowering and runs the standard suite of optimizations 425 * 426 * This is intended to be called more-or-less directly after you get the 427 * shader out of GLSL or some other source. While it is geared towards i965, 428 * it is not at all generator-specific except for the is_scalar flag. Even 429 * there, it is safe to call with is_scalar = false for a shader that is 430 * intended for the FS backend as long as nir_optimize is called again with 431 * is_scalar = true to scalarize everything prior to code gen. 432 */ 433nir_shader * 434brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) 435{ 436 bool progress; /* Written by OPT and OPT_V */ 437 (void)progress; 438 439 const bool is_scalar = compiler->scalar_stage[nir->stage]; 440 441 if (nir->stage == MESA_SHADER_GEOMETRY) 442 OPT(nir_lower_gs_intrinsics); 443 444 if (compiler->precise_trig) 445 OPT(brw_nir_apply_trig_workarounds); 446 447 static const nir_lower_tex_options tex_options = { 448 .lower_txp = ~0, 449 }; 450 451 OPT(nir_lower_tex, &tex_options); 452 OPT(nir_normalize_cubemap_coords); 453 454 OPT(nir_lower_global_vars_to_local); 455 456 OPT(nir_split_var_copies); 457 458 nir = nir_optimize(nir, is_scalar); 459 460 if (is_scalar) { 461 OPT_V(nir_lower_load_const_to_scalar); 462 } 463 464 /* Lower a bunch of stuff */ 465 OPT_V(nir_lower_var_copies); 466 467 /* Get rid of split copies */ 468 nir = nir_optimize(nir, is_scalar); 469 470 OPT(nir_remove_dead_variables, nir_var_local); 471 472 return nir; 473} 474 475/* Prepare the given shader for codegen 476 * 477 * This function is intended to be called right before going into the actual 478 * backend and is highly backend-specific. Also, once this function has been 479 * called on a shader, it will no longer be in SSA form so most optimizations 480 * will not work. 481 */ 482nir_shader * 483brw_postprocess_nir(nir_shader *nir, 484 const struct brw_device_info *devinfo, 485 bool is_scalar) 486{ 487 bool debug_enabled = 488 (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage)); 489 490 bool progress; /* Written by OPT and OPT_V */ 491 (void)progress; 492 493 nir = nir_optimize(nir, is_scalar); 494 495 if (devinfo->gen >= 6) { 496 /* Try and fuse multiply-adds */ 497 OPT(brw_nir_opt_peephole_ffma); 498 } 499 500 OPT(nir_opt_algebraic_late); 501 502 OPT(nir_lower_locals_to_regs); 503 504 OPT_V(nir_lower_to_source_mods); 505 OPT(nir_copy_prop); 506 OPT(nir_opt_dce); 507 508 if (unlikely(debug_enabled)) { 509 /* Re-index SSA defs so we print more sensible numbers. */ 510 nir_foreach_function(function, nir) { 511 if (function->impl) 512 nir_index_ssa_defs(function->impl); 513 } 514 515 fprintf(stderr, "NIR (SSA form) for %s shader:\n", 516 _mesa_shader_stage_to_string(nir->stage)); 517 nir_print_shader(nir, stderr); 518 } 519 520 OPT_V(nir_convert_from_ssa, true); 521 522 if (!is_scalar) { 523 OPT_V(nir_move_vec_src_uses_to_dest); 524 OPT(nir_lower_vec_to_movs); 525 } 526 527 /* This is the last pass we run before we start emitting stuff. It 528 * determines when we need to insert boolean resolves on Gen <= 5. We 529 * run it last because it stashes data in instr->pass_flags and we don't 530 * want that to be squashed by other NIR passes. 531 */ 532 if (devinfo->gen <= 5) 533 brw_nir_analyze_boolean_resolves(nir); 534 535 nir_sweep(nir); 536 537 if (unlikely(debug_enabled)) { 538 fprintf(stderr, "NIR (final form) for %s shader:\n", 539 _mesa_shader_stage_to_string(nir->stage)); 540 nir_print_shader(nir, stderr); 541 } 542 543 return nir; 544} 545 546nir_shader * 547brw_create_nir(struct brw_context *brw, 548 const struct gl_shader_program *shader_prog, 549 const struct gl_program *prog, 550 gl_shader_stage stage, 551 bool is_scalar) 552{ 553 struct gl_context *ctx = &brw->ctx; 554 const nir_shader_compiler_options *options = 555 ctx->Const.ShaderCompilerOptions[stage].NirOptions; 556 bool progress; 557 nir_shader *nir; 558 559 /* First, lower the GLSL IR or Mesa IR to NIR */ 560 if (shader_prog) { 561 nir = glsl_to_nir(shader_prog, stage, options); 562 OPT_V(nir_lower_io_to_temporaries, 563 nir_shader_get_entrypoint(nir), 564 true, false); 565 } else { 566 nir = prog_to_nir(prog, options); 567 OPT_V(nir_convert_to_ssa); /* turn registers into SSA */ 568 } 569 nir_validate_shader(nir); 570 571 (void)progress; 572 573 nir = brw_preprocess_nir(brw->intelScreen->compiler, nir); 574 575 OPT(nir_lower_system_values); 576 OPT_V(brw_nir_lower_uniforms, is_scalar); 577 578 if (shader_prog) { 579 OPT_V(nir_lower_samplers, shader_prog); 580 OPT_V(nir_lower_atomics, shader_prog); 581 } 582 583 return nir; 584} 585 586nir_shader * 587brw_nir_apply_sampler_key(nir_shader *nir, 588 const struct brw_device_info *devinfo, 589 const struct brw_sampler_prog_key_data *key_tex, 590 bool is_scalar) 591{ 592 nir_lower_tex_options tex_options = { 0 }; 593 594 /* Iron Lake and prior require lowering of all rectangle textures */ 595 if (devinfo->gen < 6) 596 tex_options.lower_rect = true; 597 598 /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ 599 if (devinfo->gen < 8) { 600 tex_options.saturate_s = key_tex->gl_clamp_mask[0]; 601 tex_options.saturate_t = key_tex->gl_clamp_mask[1]; 602 tex_options.saturate_r = key_tex->gl_clamp_mask[2]; 603 } 604 605 /* Prior to Haswell, we have to fake texture swizzle */ 606 for (unsigned s = 0; s < MAX_SAMPLERS; s++) { 607 if (key_tex->swizzles[s] == SWIZZLE_NOOP) 608 continue; 609 610 tex_options.swizzle_result |= (1 << s); 611 for (unsigned c = 0; c < 4; c++) 612 tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); 613 } 614 615 if (nir_lower_tex(nir, &tex_options)) { 616 nir_validate_shader(nir); 617 nir = nir_optimize(nir, is_scalar); 618 } 619 620 return nir; 621} 622 623enum brw_reg_type 624brw_type_for_nir_type(nir_alu_type type) 625{ 626 switch (type) { 627 case nir_type_uint: 628 case nir_type_uint32: 629 return BRW_REGISTER_TYPE_UD; 630 case nir_type_bool: 631 case nir_type_int: 632 case nir_type_bool32: 633 case nir_type_int32: 634 return BRW_REGISTER_TYPE_D; 635 case nir_type_float: 636 case nir_type_float32: 637 return BRW_REGISTER_TYPE_F; 638 case nir_type_float64: 639 return BRW_REGISTER_TYPE_DF; 640 case nir_type_int64: 641 case nir_type_uint64: 642 /* TODO we should only see these in moves, so for now it's ok, but when 643 * we add actual 64-bit integer support we should fix this. 644 */ 645 return BRW_REGISTER_TYPE_DF; 646 default: 647 unreachable("unknown type"); 648 } 649 650 return BRW_REGISTER_TYPE_F; 651} 652 653/* Returns the glsl_base_type corresponding to a nir_alu_type. 654 * This is used by both brw_vec4_nir and brw_fs_nir. 655 */ 656enum glsl_base_type 657brw_glsl_base_type_for_nir_type(nir_alu_type type) 658{ 659 switch (type) { 660 case nir_type_float: 661 case nir_type_float32: 662 return GLSL_TYPE_FLOAT; 663 664 case nir_type_float64: 665 return GLSL_TYPE_DOUBLE; 666 667 case nir_type_int: 668 case nir_type_int32: 669 return GLSL_TYPE_INT; 670 671 case nir_type_uint: 672 case nir_type_uint32: 673 return GLSL_TYPE_UINT; 674 675 default: 676 unreachable("bad type"); 677 } 678} 679