brw_nir.c revision 9f32721f8695f3e55849dce015da3b53d1af5d57
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_nir.h" 25#include "brw_shader.h" 26#include "compiler/glsl_types.h" 27#include "compiler/nir/nir_builder.h" 28 29static bool 30is_input(nir_intrinsic_instr *intrin) 31{ 32 return intrin->intrinsic == nir_intrinsic_load_input || 33 intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 34 intrin->intrinsic == nir_intrinsic_load_interpolated_input; 35} 36 37static bool 38is_output(nir_intrinsic_instr *intrin) 39{ 40 return intrin->intrinsic == nir_intrinsic_load_output || 41 intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 42 intrin->intrinsic == nir_intrinsic_store_output || 43 intrin->intrinsic == nir_intrinsic_store_per_vertex_output; 44} 45 46/** 47 * In many cases, we just add the base and offset together, so there's no 48 * reason to keep them separate. Sometimes, combining them is essential: 49 * if a shader only accesses part of a compound variable (such as a matrix 50 * or array), the variable's base may not actually exist in the VUE map. 51 * 52 * This pass adds constant offsets to instr->const_index[0], and resets 53 * the offset source to 0. Non-constant offsets remain unchanged - since 54 * we don't know what part of a compound variable is accessed, we allocate 55 * storage for the entire thing. 56 */ 57 58static bool 59add_const_offset_to_base_block(nir_block *block, nir_builder *b, 60 nir_variable_mode mode) 61{ 62 nir_foreach_instr_safe(instr, block) { 63 if (instr->type != nir_instr_type_intrinsic) 64 continue; 65 66 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 67 68 if ((mode == nir_var_shader_in && is_input(intrin)) || 69 (mode == nir_var_shader_out && is_output(intrin))) { 70 nir_src *offset = nir_get_io_offset_src(intrin); 71 nir_const_value *const_offset = nir_src_as_const_value(*offset); 72 73 if (const_offset) { 74 intrin->const_index[0] += const_offset->u32[0]; 75 b->cursor = nir_before_instr(&intrin->instr); 76 nir_instr_rewrite_src(&intrin->instr, offset, 77 nir_src_for_ssa(nir_imm_int(b, 0))); 78 } 79 } 80 } 81 return true; 82} 83 84static void 85add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode) 86{ 87 nir_foreach_function(f, nir) { 88 if (f->impl) { 89 nir_builder b; 90 nir_builder_init(&b, f->impl); 91 nir_foreach_block(block, f->impl) { 92 add_const_offset_to_base_block(block, &b, mode); 93 } 94 } 95 } 96} 97 98static bool 99remap_vs_attrs(nir_block *block, struct nir_shader_info *nir_info) 100{ 101 nir_foreach_instr(instr, block) { 102 if (instr->type != nir_instr_type_intrinsic) 103 continue; 104 105 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 106 107 if (intrin->intrinsic == nir_intrinsic_load_input) { 108 /* Attributes come in a contiguous block, ordered by their 109 * gl_vert_attrib value. That means we can compute the slot 110 * number for an attribute by masking out the enabled attributes 111 * before it and counting the bits. 112 */ 113 int attr = intrin->const_index[0]; 114 int slot = _mesa_bitcount_64(nir_info->inputs_read & 115 BITFIELD64_MASK(attr)); 116 int dslot = _mesa_bitcount_64(nir_info->double_inputs_read & 117 BITFIELD64_MASK(attr)); 118 intrin->const_index[0] = 4 * (slot + dslot); 119 } 120 } 121 return true; 122} 123 124static bool 125remap_inputs_with_vue_map(nir_block *block, const struct brw_vue_map *vue_map) 126{ 127 nir_foreach_instr(instr, block) { 128 if (instr->type != nir_instr_type_intrinsic) 129 continue; 130 131 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 132 133 if (intrin->intrinsic == nir_intrinsic_load_input || 134 intrin->intrinsic == nir_intrinsic_load_per_vertex_input) { 135 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 136 assert(vue_slot != -1); 137 intrin->const_index[0] = vue_slot; 138 } 139 } 140 return true; 141} 142 143static bool 144remap_patch_urb_offsets(nir_block *block, nir_builder *b, 145 const struct brw_vue_map *vue_map) 146{ 147 nir_foreach_instr_safe(instr, block) { 148 if (instr->type != nir_instr_type_intrinsic) 149 continue; 150 151 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 152 153 gl_shader_stage stage = b->shader->stage; 154 155 if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || 156 (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { 157 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 158 assert(vue_slot != -1); 159 intrin->const_index[0] = vue_slot; 160 161 nir_src *vertex = nir_get_io_vertex_index_src(intrin); 162 if (vertex) { 163 nir_const_value *const_vertex = nir_src_as_const_value(*vertex); 164 if (const_vertex) { 165 intrin->const_index[0] += const_vertex->u32[0] * 166 vue_map->num_per_vertex_slots; 167 } else { 168 b->cursor = nir_before_instr(&intrin->instr); 169 170 /* Multiply by the number of per-vertex slots. */ 171 nir_ssa_def *vertex_offset = 172 nir_imul(b, 173 nir_ssa_for_src(b, *vertex, 1), 174 nir_imm_int(b, 175 vue_map->num_per_vertex_slots)); 176 177 /* Add it to the existing offset */ 178 nir_src *offset = nir_get_io_offset_src(intrin); 179 nir_ssa_def *total_offset = 180 nir_iadd(b, vertex_offset, 181 nir_ssa_for_src(b, *offset, 1)); 182 183 nir_instr_rewrite_src(&intrin->instr, offset, 184 nir_src_for_ssa(total_offset)); 185 } 186 } 187 } 188 } 189 return true; 190} 191 192void 193brw_nir_lower_vs_inputs(nir_shader *nir, 194 const struct brw_device_info *devinfo, 195 bool is_scalar, 196 bool use_legacy_snorm_formula, 197 const uint8_t *vs_attrib_wa_flags) 198{ 199 /* Start with the location of the variable's base. */ 200 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 201 var->data.driver_location = var->data.location; 202 } 203 204 /* Now use nir_lower_io to walk dereference chains. Attribute arrays are 205 * loaded as one vec4 or dvec4 per element (or matrix column), depending on 206 * whether it is a double-precision type or not. 207 */ 208 nir_lower_io(nir, nir_var_shader_in, type_size_vs_input); 209 210 /* This pass needs actual constants */ 211 nir_opt_constant_folding(nir); 212 213 add_const_offset_to_base(nir, nir_var_shader_in); 214 215 brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula, 216 vs_attrib_wa_flags); 217 218 if (is_scalar) { 219 /* Finally, translate VERT_ATTRIB_* values into the actual registers. */ 220 221 nir_foreach_function(function, nir) { 222 if (function->impl) { 223 nir_foreach_block(block, function->impl) { 224 remap_vs_attrs(block, &nir->info); 225 } 226 } 227 } 228 } 229} 230 231void 232brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, 233 const struct brw_vue_map *vue_map) 234{ 235 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 236 var->data.driver_location = var->data.location; 237 } 238 239 /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ 240 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 241 242 if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) { 243 /* This pass needs actual constants */ 244 nir_opt_constant_folding(nir); 245 246 add_const_offset_to_base(nir, nir_var_shader_in); 247 248 nir_foreach_function(function, nir) { 249 if (function->impl) { 250 nir_foreach_block(block, function->impl) { 251 remap_inputs_with_vue_map(block, vue_map); 252 } 253 } 254 } 255 } 256} 257 258void 259brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) 260{ 261 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 262 var->data.driver_location = var->data.location; 263 } 264 265 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 266 267 /* This pass needs actual constants */ 268 nir_opt_constant_folding(nir); 269 270 add_const_offset_to_base(nir, nir_var_shader_in); 271 272 nir_foreach_function(function, nir) { 273 if (function->impl) { 274 nir_builder b; 275 nir_builder_init(&b, function->impl); 276 nir_foreach_block(block, function->impl) { 277 remap_patch_urb_offsets(block, &b, vue_map); 278 } 279 } 280 } 281} 282 283void 284brw_nir_lower_fs_inputs(nir_shader *nir) 285{ 286 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 287 var->data.driver_location = var->data.location; 288 } 289 290 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 291 292 /* This pass needs actual constants */ 293 nir_opt_constant_folding(nir); 294 295 add_const_offset_to_base(nir, nir_var_shader_in); 296} 297 298void 299brw_nir_lower_vue_outputs(nir_shader *nir, 300 bool is_scalar) 301{ 302 if (is_scalar) { 303 nir_assign_var_locations(&nir->outputs, &nir->num_outputs, 304 VARYING_SLOT_VAR0, 305 type_size_vec4_times_4); 306 nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); 307 } else { 308 nir_foreach_variable(var, &nir->outputs) 309 var->data.driver_location = var->data.location; 310 nir_lower_io(nir, nir_var_shader_out, type_size_vec4); 311 } 312} 313 314void 315brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map) 316{ 317 nir_foreach_variable(var, &nir->outputs) { 318 var->data.driver_location = var->data.location; 319 } 320 321 nir_lower_io(nir, nir_var_shader_out, type_size_vec4); 322 323 /* This pass needs actual constants */ 324 nir_opt_constant_folding(nir); 325 326 add_const_offset_to_base(nir, nir_var_shader_out); 327 328 nir_foreach_function(function, nir) { 329 if (function->impl) { 330 nir_builder b; 331 nir_builder_init(&b, function->impl); 332 nir_foreach_block(block, function->impl) { 333 remap_patch_urb_offsets(block, &b, vue_map); 334 } 335 } 336 } 337} 338 339void 340brw_nir_lower_fs_outputs(nir_shader *nir) 341{ 342 nir_assign_var_locations(&nir->outputs, &nir->num_outputs, 343 FRAG_RESULT_DATA0, type_size_vec4_times_4); 344 nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); 345} 346 347void 348brw_nir_lower_cs_shared(nir_shader *nir) 349{ 350 nir_assign_var_locations(&nir->shared, &nir->num_shared, 0, 351 type_size_scalar_bytes); 352 nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes); 353} 354 355#define OPT(pass, ...) ({ \ 356 bool this_progress = false; \ 357 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 358 if (this_progress) \ 359 progress = true; \ 360 this_progress; \ 361}) 362 363#define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) 364 365static nir_shader * 366nir_optimize(nir_shader *nir, bool is_scalar) 367{ 368 bool progress; 369 do { 370 progress = false; 371 OPT_V(nir_lower_vars_to_ssa); 372 373 if (is_scalar) { 374 OPT_V(nir_lower_alu_to_scalar); 375 } 376 377 OPT(nir_copy_prop); 378 379 if (is_scalar) { 380 OPT_V(nir_lower_phis_to_scalar); 381 } 382 383 OPT(nir_copy_prop); 384 OPT(nir_opt_dce); 385 OPT(nir_opt_cse); 386 OPT(nir_opt_peephole_select); 387 OPT(nir_opt_algebraic); 388 OPT(nir_opt_constant_folding); 389 OPT(nir_opt_dead_cf); 390 OPT(nir_opt_remove_phis); 391 OPT(nir_opt_undef); 392 OPT_V(nir_lower_doubles, nir_lower_drcp | 393 nir_lower_dsqrt | 394 nir_lower_drsq | 395 nir_lower_dtrunc | 396 nir_lower_dfloor | 397 nir_lower_dceil | 398 nir_lower_dfract | 399 nir_lower_dround_even | 400 nir_lower_dmod); 401 OPT_V(nir_lower_double_pack); 402 } while (progress); 403 404 return nir; 405} 406 407/* Does some simple lowering and runs the standard suite of optimizations 408 * 409 * This is intended to be called more-or-less directly after you get the 410 * shader out of GLSL or some other source. While it is geared towards i965, 411 * it is not at all generator-specific except for the is_scalar flag. Even 412 * there, it is safe to call with is_scalar = false for a shader that is 413 * intended for the FS backend as long as nir_optimize is called again with 414 * is_scalar = true to scalarize everything prior to code gen. 415 */ 416nir_shader * 417brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) 418{ 419 bool progress; /* Written by OPT and OPT_V */ 420 (void)progress; 421 422 const bool is_scalar = compiler->scalar_stage[nir->stage]; 423 424 if (nir->stage == MESA_SHADER_GEOMETRY) 425 OPT(nir_lower_gs_intrinsics); 426 427 if (compiler->precise_trig) 428 OPT(brw_nir_apply_trig_workarounds); 429 430 static const nir_lower_tex_options tex_options = { 431 .lower_txp = ~0, 432 .lower_txf_offset = true, 433 .lower_rect_offset = true, 434 }; 435 436 OPT(nir_lower_tex, &tex_options); 437 OPT(nir_normalize_cubemap_coords); 438 439 OPT(nir_lower_global_vars_to_local); 440 441 OPT(nir_split_var_copies); 442 443 nir = nir_optimize(nir, is_scalar); 444 445 if (is_scalar) { 446 OPT_V(nir_lower_load_const_to_scalar); 447 } 448 449 /* Lower a bunch of stuff */ 450 OPT_V(nir_lower_var_copies); 451 452 /* Get rid of split copies */ 453 nir = nir_optimize(nir, is_scalar); 454 455 OPT(nir_remove_dead_variables, nir_var_local); 456 457 return nir; 458} 459 460/* Prepare the given shader for codegen 461 * 462 * This function is intended to be called right before going into the actual 463 * backend and is highly backend-specific. Also, once this function has been 464 * called on a shader, it will no longer be in SSA form so most optimizations 465 * will not work. 466 */ 467nir_shader * 468brw_postprocess_nir(nir_shader *nir, 469 const struct brw_device_info *devinfo, 470 bool is_scalar) 471{ 472 bool debug_enabled = 473 (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage)); 474 475 bool progress; /* Written by OPT and OPT_V */ 476 (void)progress; 477 478 nir = nir_optimize(nir, is_scalar); 479 480 if (devinfo->gen >= 6) { 481 /* Try and fuse multiply-adds */ 482 OPT(brw_nir_opt_peephole_ffma); 483 } 484 485 OPT(nir_opt_algebraic_late); 486 487 OPT(nir_lower_locals_to_regs); 488 489 OPT_V(nir_lower_to_source_mods); 490 OPT(nir_copy_prop); 491 OPT(nir_opt_dce); 492 493 if (unlikely(debug_enabled)) { 494 /* Re-index SSA defs so we print more sensible numbers. */ 495 nir_foreach_function(function, nir) { 496 if (function->impl) 497 nir_index_ssa_defs(function->impl); 498 } 499 500 fprintf(stderr, "NIR (SSA form) for %s shader:\n", 501 _mesa_shader_stage_to_string(nir->stage)); 502 nir_print_shader(nir, stderr); 503 } 504 505 OPT_V(nir_convert_from_ssa, true); 506 507 if (!is_scalar) { 508 OPT_V(nir_move_vec_src_uses_to_dest); 509 OPT(nir_lower_vec_to_movs); 510 } 511 512 /* This is the last pass we run before we start emitting stuff. It 513 * determines when we need to insert boolean resolves on Gen <= 5. We 514 * run it last because it stashes data in instr->pass_flags and we don't 515 * want that to be squashed by other NIR passes. 516 */ 517 if (devinfo->gen <= 5) 518 brw_nir_analyze_boolean_resolves(nir); 519 520 nir_sweep(nir); 521 522 if (unlikely(debug_enabled)) { 523 fprintf(stderr, "NIR (final form) for %s shader:\n", 524 _mesa_shader_stage_to_string(nir->stage)); 525 nir_print_shader(nir, stderr); 526 } 527 528 return nir; 529} 530 531nir_shader * 532brw_nir_apply_sampler_key(nir_shader *nir, 533 const struct brw_device_info *devinfo, 534 const struct brw_sampler_prog_key_data *key_tex, 535 bool is_scalar) 536{ 537 nir_lower_tex_options tex_options = { 0 }; 538 539 /* Iron Lake and prior require lowering of all rectangle textures */ 540 if (devinfo->gen < 6) 541 tex_options.lower_rect = true; 542 543 /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ 544 if (devinfo->gen < 8) { 545 tex_options.saturate_s = key_tex->gl_clamp_mask[0]; 546 tex_options.saturate_t = key_tex->gl_clamp_mask[1]; 547 tex_options.saturate_r = key_tex->gl_clamp_mask[2]; 548 } 549 550 /* Prior to Haswell, we have to fake texture swizzle */ 551 for (unsigned s = 0; s < MAX_SAMPLERS; s++) { 552 if (key_tex->swizzles[s] == SWIZZLE_NOOP) 553 continue; 554 555 tex_options.swizzle_result |= (1 << s); 556 for (unsigned c = 0; c < 4; c++) 557 tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); 558 } 559 560 tex_options.lower_y_uv_external = key_tex->y_uv_image_mask; 561 tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask; 562 tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask; 563 564 if (nir_lower_tex(nir, &tex_options)) { 565 nir_validate_shader(nir); 566 nir = nir_optimize(nir, is_scalar); 567 } 568 569 return nir; 570} 571 572enum brw_reg_type 573brw_type_for_nir_type(nir_alu_type type) 574{ 575 switch (type) { 576 case nir_type_uint: 577 case nir_type_uint32: 578 return BRW_REGISTER_TYPE_UD; 579 case nir_type_bool: 580 case nir_type_int: 581 case nir_type_bool32: 582 case nir_type_int32: 583 return BRW_REGISTER_TYPE_D; 584 case nir_type_float: 585 case nir_type_float32: 586 return BRW_REGISTER_TYPE_F; 587 case nir_type_float64: 588 return BRW_REGISTER_TYPE_DF; 589 case nir_type_int64: 590 case nir_type_uint64: 591 /* TODO we should only see these in moves, so for now it's ok, but when 592 * we add actual 64-bit integer support we should fix this. 593 */ 594 return BRW_REGISTER_TYPE_DF; 595 default: 596 unreachable("unknown type"); 597 } 598 599 return BRW_REGISTER_TYPE_F; 600} 601 602/* Returns the glsl_base_type corresponding to a nir_alu_type. 603 * This is used by both brw_vec4_nir and brw_fs_nir. 604 */ 605enum glsl_base_type 606brw_glsl_base_type_for_nir_type(nir_alu_type type) 607{ 608 switch (type) { 609 case nir_type_float: 610 case nir_type_float32: 611 return GLSL_TYPE_FLOAT; 612 613 case nir_type_float64: 614 return GLSL_TYPE_DOUBLE; 615 616 case nir_type_int: 617 case nir_type_int32: 618 return GLSL_TYPE_INT; 619 620 case nir_type_uint: 621 case nir_type_uint32: 622 return GLSL_TYPE_UINT; 623 624 default: 625 unreachable("bad type"); 626 } 627} 628