brw_nir.c revision aa35b0c2c71f054f72df5a85779d0862fa7d6e4a
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_nir.h" 25#include "brw_shader.h" 26#include "glsl/nir/glsl_to_nir.h" 27#include "program/prog_to_nir.h" 28 29static bool 30remap_vs_attrs(nir_block *block, void *closure) 31{ 32 GLbitfield64 inputs_read = *((GLbitfield64 *) closure); 33 34 nir_foreach_instr(block, instr) { 35 if (instr->type != nir_instr_type_intrinsic) 36 continue; 37 38 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 39 40 /* We set EmitNoIndirect for VS inputs, so there are no indirects. */ 41 assert(intrin->intrinsic != nir_intrinsic_load_input_indirect); 42 43 if (intrin->intrinsic == nir_intrinsic_load_input) { 44 /* Attributes come in a contiguous block, ordered by their 45 * gl_vert_attrib value. That means we can compute the slot 46 * number for an attribute by masking out the enabled attributes 47 * before it and counting the bits. 48 */ 49 int attr = intrin->const_index[0]; 50 int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr)); 51 intrin->const_index[0] = 4 * slot; 52 } 53 } 54 return true; 55} 56 57static void 58brw_nir_lower_inputs(nir_shader *nir, 59 const struct brw_device_info *devinfo, 60 bool is_scalar) 61{ 62 switch (nir->stage) { 63 case MESA_SHADER_VERTEX: 64 /* Start with the location of the variable's base. */ 65 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 66 var->data.driver_location = var->data.location; 67 } 68 69 /* Now use nir_lower_io to walk dereference chains. Attribute arrays 70 * are loaded as one vec4 per element (or matrix column), so we use 71 * type_size_vec4 here. 72 */ 73 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 74 75 if (is_scalar) { 76 /* Finally, translate VERT_ATTRIB_* values into the actual registers. 77 * 78 * Note that we can use nir->info.inputs_read instead of 79 * key->inputs_read since the two are identical aside from Gen4-5 80 * edge flag differences. 81 */ 82 GLbitfield64 inputs_read = nir->info.inputs_read; 83 nir_foreach_overload(nir, overload) { 84 if (overload->impl) { 85 nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read); 86 } 87 } 88 } 89 break; 90 case MESA_SHADER_GEOMETRY: { 91 if (!is_scalar) { 92 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 93 var->data.driver_location = var->data.location; 94 } 95 } else { 96 /* The GLSL linker will have already matched up GS inputs and 97 * the outputs of prior stages. The driver does extend VS outputs 98 * in some cases, but only for legacy OpenGL or Gen4-5 hardware, 99 * neither of which offer geometry shader support. So we can 100 * safely ignore that. 101 * 102 * For SSO pipelines, we use a fixed VUE map layout based on variable 103 * locations, so we can rely on rendezvous-by-location to make this 104 * work. 105 * 106 * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not 107 * written by previous stages and shows up via payload magic. 108 */ 109 struct brw_vue_map input_vue_map; 110 GLbitfield64 inputs_read = 111 nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; 112 brw_compute_vue_map(devinfo, &input_vue_map, inputs_read, 113 nir->info.separate_shader); 114 115 /* Start with the slot for the variable's base. */ 116 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 117 assert(input_vue_map.varying_to_slot[var->data.location] != -1); 118 var->data.driver_location = 119 input_vue_map.varying_to_slot[var->data.location]; 120 } 121 122 /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ 123 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 124 } 125 break; 126 } 127 case MESA_SHADER_FRAGMENT: 128 assert(is_scalar); 129 nir_assign_var_locations(&nir->inputs, &nir->num_inputs, 130 type_size_scalar); 131 break; 132 case MESA_SHADER_COMPUTE: 133 /* Compute shaders have no inputs. */ 134 assert(exec_list_is_empty(&nir->inputs)); 135 break; 136 default: 137 unreachable("unsupported shader stage"); 138 } 139} 140 141static void 142brw_nir_lower_outputs(nir_shader *nir, bool is_scalar) 143{ 144 switch (nir->stage) { 145 case MESA_SHADER_VERTEX: 146 case MESA_SHADER_GEOMETRY: 147 if (is_scalar) { 148 nir_assign_var_locations(&nir->outputs, &nir->num_outputs, 149 type_size_vec4_times_4); 150 nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); 151 } else { 152 nir_foreach_variable(var, &nir->outputs) 153 var->data.driver_location = var->data.location; 154 } 155 break; 156 case MESA_SHADER_FRAGMENT: 157 nir_assign_var_locations(&nir->outputs, &nir->num_outputs, 158 type_size_scalar); 159 break; 160 case MESA_SHADER_COMPUTE: 161 /* Compute shaders have no outputs. */ 162 assert(exec_list_is_empty(&nir->outputs)); 163 break; 164 default: 165 unreachable("unsupported shader stage"); 166 } 167} 168 169#include "util/debug.h" 170 171static bool 172should_clone_nir() 173{ 174 static int should_clone = -1; 175 if (should_clone < 1) 176 should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); 177 178 return should_clone; 179} 180 181#define _OPT(do_pass) (({ \ 182 bool this_progress = true; \ 183 do_pass \ 184 nir_validate_shader(nir); \ 185 if (should_clone_nir()) { \ 186 nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 187 ralloc_free(nir); \ 188 nir = clone; \ 189 } \ 190 this_progress; \ 191})) 192 193#define OPT(pass, ...) _OPT( \ 194 nir_metadata_set_validation_flag(nir); \ 195 this_progress = pass(nir ,##__VA_ARGS__); \ 196 if (this_progress) { \ 197 progress = true; \ 198 nir_metadata_check_validation_flag(nir); \ 199 } \ 200) 201 202#define OPT_V(pass, ...) _OPT( \ 203 pass(nir, ##__VA_ARGS__); \ 204) 205 206static nir_shader * 207nir_optimize(nir_shader *nir, bool is_scalar) 208{ 209 bool progress; 210 do { 211 progress = false; 212 OPT_V(nir_lower_vars_to_ssa); 213 214 if (is_scalar) { 215 OPT_V(nir_lower_alu_to_scalar); 216 } 217 218 OPT(nir_copy_prop); 219 220 if (is_scalar) { 221 OPT_V(nir_lower_phis_to_scalar); 222 } 223 224 OPT(nir_copy_prop); 225 OPT(nir_opt_dce); 226 OPT(nir_opt_cse); 227 OPT(nir_opt_peephole_select); 228 OPT(nir_opt_algebraic); 229 OPT(nir_opt_constant_folding); 230 OPT(nir_opt_dead_cf); 231 OPT(nir_opt_remove_phis); 232 OPT(nir_opt_undef); 233 } while (progress); 234 235 return nir; 236} 237 238/* Does some simple lowering and runs the standard suite of optimizations 239 * 240 * This is intended to be called more-or-less directly after you get the 241 * shader out of GLSL or some other source. While it is geared towards i965, 242 * it is not at all generator-specific except for the is_scalar flag. Even 243 * there, it is safe to call with is_scalar = false for a shader that is 244 * intended for the FS backend as long as nir_optimize is called again with 245 * is_scalar = true to scalarize everything prior to code gen. 246 */ 247nir_shader * 248brw_preprocess_nir(nir_shader *nir, bool is_scalar) 249{ 250 bool progress; /* Written by OPT and OPT_V */ 251 (void)progress; 252 253 if (nir->stage == MESA_SHADER_GEOMETRY) 254 OPT(nir_lower_gs_intrinsics); 255 256 static const nir_lower_tex_options tex_options = { 257 .lower_txp = ~0, 258 }; 259 260 OPT(nir_lower_tex, &tex_options); 261 OPT(nir_normalize_cubemap_coords); 262 263 OPT(nir_lower_global_vars_to_local); 264 265 OPT(nir_split_var_copies); 266 267 nir = nir_optimize(nir, is_scalar); 268 269 /* Lower a bunch of stuff */ 270 OPT_V(nir_lower_var_copies); 271 272 /* Get rid of split copies */ 273 nir = nir_optimize(nir, is_scalar); 274 275 OPT(nir_remove_dead_variables); 276 277 return nir; 278} 279 280/* Lowers inputs, outputs, uniforms, and samplers for i965 281 * 282 * This function does all of the standard lowering prior to post-processing. 283 * The lowering done is highly gen, stage, and backend-specific. The 284 * shader_prog parameter is optional and is used only for lowering sampler 285 * derefs and atomics for GLSL shaders. 286 */ 287nir_shader * 288brw_lower_nir(nir_shader *nir, 289 const struct brw_device_info *devinfo, 290 const struct gl_shader_program *shader_prog, 291 bool is_scalar) 292{ 293 bool progress; /* Written by OPT and OPT_V */ 294 (void)progress; 295 296 OPT_V(brw_nir_lower_inputs, devinfo, is_scalar); 297 OPT_V(brw_nir_lower_outputs, is_scalar); 298 nir_assign_var_locations(&nir->uniforms, 299 &nir->num_uniforms, 300 is_scalar ? type_size_scalar : type_size_vec4); 301 OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); 302 303 if (shader_prog) { 304 OPT_V(nir_lower_samplers, shader_prog); 305 } 306 307 OPT(nir_lower_system_values); 308 309 if (shader_prog) { 310 OPT_V(nir_lower_atomics, shader_prog); 311 } 312 313 return nir_optimize(nir, is_scalar); 314} 315 316/* Prepare the given shader for codegen 317 * 318 * This function is intended to be called right before going into the actual 319 * backend and is highly backend-specific. Also, once this function has been 320 * called on a shader, it will no longer be in SSA form so most optimizations 321 * will not work. 322 */ 323nir_shader * 324brw_postprocess_nir(nir_shader *nir, 325 const struct brw_device_info *devinfo, 326 bool is_scalar) 327{ 328 bool debug_enabled = 329 (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage)); 330 331 bool progress; /* Written by OPT and OPT_V */ 332 (void)progress; 333 334 if (devinfo->gen >= 6) { 335 /* Try and fuse multiply-adds */ 336 OPT(brw_nir_opt_peephole_ffma); 337 } 338 339 OPT(nir_opt_algebraic_late); 340 341 OPT(nir_lower_locals_to_regs); 342 343 OPT_V(nir_lower_to_source_mods); 344 OPT(nir_copy_prop); 345 OPT(nir_opt_dce); 346 347 if (unlikely(debug_enabled)) { 348 /* Re-index SSA defs so we print more sensible numbers. */ 349 nir_foreach_overload(nir, overload) { 350 if (overload->impl) 351 nir_index_ssa_defs(overload->impl); 352 } 353 354 fprintf(stderr, "NIR (SSA form) for %s shader:\n", 355 _mesa_shader_stage_to_string(nir->stage)); 356 nir_print_shader(nir, stderr); 357 } 358 359 OPT_V(nir_convert_from_ssa, true); 360 361 if (!is_scalar) { 362 OPT_V(nir_move_vec_src_uses_to_dest); 363 OPT(nir_lower_vec_to_movs); 364 } 365 366 /* This is the last pass we run before we start emitting stuff. It 367 * determines when we need to insert boolean resolves on Gen <= 5. We 368 * run it last because it stashes data in instr->pass_flags and we don't 369 * want that to be squashed by other NIR passes. 370 */ 371 if (devinfo->gen <= 5) 372 brw_nir_analyze_boolean_resolves(nir); 373 374 nir_sweep(nir); 375 376 if (unlikely(debug_enabled)) { 377 fprintf(stderr, "NIR (final form) for %s shader:\n", 378 _mesa_shader_stage_to_string(nir->stage)); 379 nir_print_shader(nir, stderr); 380 } 381 382 return nir; 383} 384 385nir_shader * 386brw_create_nir(struct brw_context *brw, 387 const struct gl_shader_program *shader_prog, 388 const struct gl_program *prog, 389 gl_shader_stage stage, 390 bool is_scalar) 391{ 392 struct gl_context *ctx = &brw->ctx; 393 const struct brw_device_info *devinfo = brw->intelScreen->devinfo; 394 const nir_shader_compiler_options *options = 395 ctx->Const.ShaderCompilerOptions[stage].NirOptions; 396 bool progress; 397 nir_shader *nir; 398 399 /* First, lower the GLSL IR or Mesa IR to NIR */ 400 if (shader_prog) { 401 nir = glsl_to_nir(shader_prog, stage, options); 402 } else { 403 nir = prog_to_nir(prog, options); 404 OPT_V(nir_convert_to_ssa); /* turn registers into SSA */ 405 } 406 nir_validate_shader(nir); 407 408 (void)progress; 409 410 nir = brw_preprocess_nir(nir, is_scalar); 411 nir = brw_lower_nir(nir, devinfo, shader_prog, is_scalar); 412 413 return nir; 414} 415 416nir_shader * 417brw_nir_apply_sampler_key(nir_shader *nir, 418 const struct brw_device_info *devinfo, 419 const struct brw_sampler_prog_key_data *key_tex, 420 bool is_scalar) 421{ 422 nir_lower_tex_options tex_options = { 0 }; 423 424 /* Iron Lake and prior require lowering of all rectangle textures */ 425 if (devinfo->gen < 6) 426 tex_options.lower_rect = true; 427 428 /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ 429 if (devinfo->gen < 8) { 430 tex_options.saturate_s = key_tex->gl_clamp_mask[0]; 431 tex_options.saturate_t = key_tex->gl_clamp_mask[1]; 432 tex_options.saturate_r = key_tex->gl_clamp_mask[2]; 433 } 434 435 /* Prior to Haswell, we have to fake texture swizzle */ 436 for (unsigned s = 0; s < MAX_SAMPLERS; s++) { 437 if (key_tex->swizzles[s] == SWIZZLE_NOOP) 438 continue; 439 440 tex_options.swizzle_result |= (1 << s); 441 for (unsigned c = 0; c < 4; c++) 442 tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); 443 } 444 445 if (nir_lower_tex(nir, &tex_options)) { 446 nir_validate_shader(nir); 447 nir = nir_optimize(nir, is_scalar); 448 } 449 450 return nir; 451} 452 453enum brw_reg_type 454brw_type_for_nir_type(nir_alu_type type) 455{ 456 switch (type) { 457 case nir_type_uint: 458 return BRW_REGISTER_TYPE_UD; 459 case nir_type_bool: 460 case nir_type_int: 461 return BRW_REGISTER_TYPE_D; 462 case nir_type_float: 463 return BRW_REGISTER_TYPE_F; 464 default: 465 unreachable("unknown type"); 466 } 467 468 return BRW_REGISTER_TYPE_F; 469} 470 471/* Returns the glsl_base_type corresponding to a nir_alu_type. 472 * This is used by both brw_vec4_nir and brw_fs_nir. 473 */ 474enum glsl_base_type 475brw_glsl_base_type_for_nir_type(nir_alu_type type) 476{ 477 switch (type) { 478 case nir_type_float: 479 return GLSL_TYPE_FLOAT; 480 481 case nir_type_int: 482 return GLSL_TYPE_INT; 483 484 case nir_type_uint: 485 return GLSL_TYPE_UINT; 486 487 default: 488 unreachable("bad type"); 489 } 490} 491