brw_nir.c revision ce767bbdfff7c2a7829b652c111a11eb9ddba026
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_nir.h" 25#include "brw_shader.h" 26#include "glsl/glsl_parser_extras.h" 27#include "glsl/nir/glsl_to_nir.h" 28#include "program/prog_to_nir.h" 29 30static bool 31remap_vs_attrs(nir_block *block, void *closure) 32{ 33 GLbitfield64 inputs_read = *((GLbitfield64 *) closure); 34 35 nir_foreach_instr(block, instr) { 36 if (instr->type != nir_instr_type_intrinsic) 37 continue; 38 39 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 40 41 /* We set EmitNoIndirect for VS inputs, so there are no indirects. */ 42 assert(intrin->intrinsic != nir_intrinsic_load_input_indirect); 43 44 if (intrin->intrinsic == nir_intrinsic_load_input) { 45 /* Attributes come in a contiguous block, ordered by their 46 * gl_vert_attrib value. That means we can compute the slot 47 * number for an attribute by masking out the enabled attributes 48 * before it and counting the bits. 49 */ 50 int attr = intrin->const_index[0]; 51 int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr)); 52 intrin->const_index[0] = 4 * slot; 53 } 54 } 55 return true; 56} 57 58static void 59brw_nir_lower_inputs(nir_shader *nir, 60 const struct brw_device_info *devinfo, 61 bool is_scalar) 62{ 63 switch (nir->stage) { 64 case MESA_SHADER_VERTEX: 65 /* For now, leave the vec4 backend doing the old method. */ 66 if (!is_scalar) { 67 nir_assign_var_locations(&nir->inputs, &nir->num_inputs, 68 type_size_vec4); 69 break; 70 } 71 72 /* Start with the location of the variable's base. */ 73 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 74 var->data.driver_location = var->data.location; 75 } 76 77 /* Now use nir_lower_io to walk dereference chains. Attribute arrays 78 * are loaded as one vec4 per element (or matrix column), so we use 79 * type_size_vec4 here. 80 */ 81 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 82 83 /* Finally, translate VERT_ATTRIB_* values into the actual registers. 84 * 85 * Note that we can use nir->info.inputs_read instead of key->inputs_read 86 * since the two are identical aside from Gen4-5 edge flag differences. 87 */ 88 GLbitfield64 inputs_read = nir->info.inputs_read; 89 nir_foreach_overload(nir, overload) { 90 if (overload->impl) { 91 nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read); 92 } 93 } 94 break; 95 case MESA_SHADER_GEOMETRY: { 96 if (!is_scalar) { 97 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 98 var->data.driver_location = var->data.location; 99 } 100 } else { 101 /* The GLSL linker will have already matched up GS inputs and 102 * the outputs of prior stages. The driver does extend VS outputs 103 * in some cases, but only for legacy OpenGL or Gen4-5 hardware, 104 * neither of which offer geometry shader support. So we can 105 * safely ignore that. 106 * 107 * For SSO pipelines, we use a fixed VUE map layout based on variable 108 * locations, so we can rely on rendezvous-by-location to make this 109 * work. 110 * 111 * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not 112 * written by previous stages and shows up via payload magic. 113 */ 114 struct brw_vue_map input_vue_map; 115 GLbitfield64 inputs_read = 116 nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; 117 brw_compute_vue_map(devinfo, &input_vue_map, inputs_read, 118 nir->info.separate_shader); 119 120 /* Start with the slot for the variable's base. */ 121 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 122 assert(input_vue_map.varying_to_slot[var->data.location] != -1); 123 var->data.driver_location = 124 input_vue_map.varying_to_slot[var->data.location]; 125 } 126 127 /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ 128 nir_lower_io(nir, nir_var_shader_in, type_size_vec4); 129 } 130 break; 131 } 132 case MESA_SHADER_FRAGMENT: 133 assert(is_scalar); 134 nir_assign_var_locations(&nir->inputs, &nir->num_inputs, 135 type_size_scalar); 136 break; 137 case MESA_SHADER_COMPUTE: 138 /* Compute shaders have no inputs. */ 139 assert(exec_list_is_empty(&nir->inputs)); 140 break; 141 default: 142 unreachable("unsupported shader stage"); 143 } 144} 145 146static void 147brw_nir_lower_outputs(nir_shader *nir, bool is_scalar) 148{ 149 switch (nir->stage) { 150 case MESA_SHADER_VERTEX: 151 case MESA_SHADER_GEOMETRY: 152 if (is_scalar) { 153 nir_assign_var_locations(&nir->outputs, &nir->num_outputs, 154 type_size_vec4_times_4); 155 nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); 156 } else { 157 nir_foreach_variable(var, &nir->outputs) 158 var->data.driver_location = var->data.location; 159 } 160 break; 161 case MESA_SHADER_FRAGMENT: 162 nir_assign_var_locations(&nir->outputs, &nir->num_outputs, 163 type_size_scalar); 164 break; 165 case MESA_SHADER_COMPUTE: 166 /* Compute shaders have no outputs. */ 167 assert(exec_list_is_empty(&nir->outputs)); 168 break; 169 default: 170 unreachable("unsupported shader stage"); 171 } 172} 173 174static bool 175should_clone_nir() 176{ 177 static int should_clone = -1; 178 if (should_clone < 1) 179 should_clone = brw_env_var_as_boolean("NIR_TEST_CLONE", false); 180 181 return should_clone; 182} 183 184#define _OPT(do_pass) (({ \ 185 bool this_progress = true; \ 186 do_pass \ 187 nir_validate_shader(nir); \ 188 if (should_clone_nir()) { \ 189 nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 190 ralloc_free(nir); \ 191 nir = clone; \ 192 } \ 193 this_progress; \ 194})) 195 196#define OPT(pass, ...) _OPT( \ 197 nir_metadata_set_validation_flag(nir); \ 198 this_progress = pass(nir ,##__VA_ARGS__); \ 199 if (this_progress) { \ 200 progress = true; \ 201 nir_metadata_check_validation_flag(nir); \ 202 } \ 203) 204 205#define OPT_V(pass, ...) _OPT( \ 206 pass(nir, ##__VA_ARGS__); \ 207) 208 209static nir_shader * 210nir_optimize(nir_shader *nir, bool is_scalar) 211{ 212 bool progress; 213 do { 214 progress = false; 215 OPT_V(nir_lower_vars_to_ssa); 216 217 if (is_scalar) { 218 OPT_V(nir_lower_alu_to_scalar); 219 } 220 221 OPT(nir_copy_prop); 222 223 if (is_scalar) { 224 OPT_V(nir_lower_phis_to_scalar); 225 } 226 227 OPT(nir_copy_prop); 228 OPT(nir_opt_dce); 229 OPT(nir_opt_cse); 230 OPT(nir_opt_peephole_select); 231 OPT(nir_opt_algebraic); 232 OPT(nir_opt_constant_folding); 233 OPT(nir_opt_dead_cf); 234 OPT(nir_opt_remove_phis); 235 OPT(nir_opt_undef); 236 } while (progress); 237 238 return nir; 239} 240 241/* Does some simple lowering and runs the standard suite of optimizations 242 * 243 * This is intended to be called more-or-less directly after you get the 244 * shader out of GLSL or some other source. While it is geared towards i965, 245 * it is not at all generator-specific except for the is_scalar flag. Even 246 * there, it is safe to call with is_scalar = false for a shader that is 247 * intended for the FS backend as long as nir_optimize is called again with 248 * is_scalar = true to scalarize everything prior to code gen. 249 */ 250nir_shader * 251brw_preprocess_nir(nir_shader *nir, bool is_scalar) 252{ 253 bool progress; /* Written by OPT and OPT_V */ 254 (void)progress; 255 256 if (nir->stage == MESA_SHADER_GEOMETRY) 257 OPT(nir_lower_gs_intrinsics); 258 259 static const nir_lower_tex_options tex_options = { 260 .lower_txp = ~0, 261 }; 262 263 OPT_V(nir_lower_tex, &tex_options); 264 OPT(nir_normalize_cubemap_coords); 265 266 OPT(nir_lower_global_vars_to_local); 267 268 OPT(nir_split_var_copies); 269 270 nir = nir_optimize(nir, is_scalar); 271 272 /* Lower a bunch of stuff */ 273 OPT_V(nir_lower_var_copies); 274 275 /* Get rid of split copies */ 276 nir = nir_optimize(nir, is_scalar); 277 278 OPT(nir_remove_dead_variables); 279 280 return nir; 281} 282 283/* Lowers inputs, outputs, uniforms, and samplers for i965 284 * 285 * This function does all of the standard lowering prior to post-processing. 286 * The lowering done is highly gen, stage, and backend-specific. The 287 * shader_prog parameter is optional and is used only for lowering sampler 288 * derefs and atomics for GLSL shaders. 289 */ 290nir_shader * 291brw_lower_nir(nir_shader *nir, 292 const struct brw_device_info *devinfo, 293 const struct gl_shader_program *shader_prog, 294 bool is_scalar) 295{ 296 bool progress; /* Written by OPT and OPT_V */ 297 (void)progress; 298 299 OPT_V(brw_nir_lower_inputs, devinfo, is_scalar); 300 OPT_V(brw_nir_lower_outputs, is_scalar); 301 nir_assign_var_locations(&nir->uniforms, 302 &nir->num_uniforms, 303 is_scalar ? type_size_scalar : type_size_vec4); 304 OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); 305 306 if (shader_prog) { 307 OPT_V(nir_lower_samplers, shader_prog); 308 } 309 310 OPT(nir_lower_system_values); 311 312 if (shader_prog) { 313 OPT_V(nir_lower_atomics, shader_prog); 314 } 315 316 return nir_optimize(nir, is_scalar); 317} 318 319/* Prepare the given shader for codegen 320 * 321 * This function is intended to be called right before going into the actual 322 * backend and is highly backend-specific. Also, once this function has been 323 * called on a shader, it will no longer be in SSA form so most optimizations 324 * will not work. 325 */ 326nir_shader * 327brw_postprocess_nir(nir_shader *nir, 328 const struct brw_device_info *devinfo, 329 bool is_scalar) 330{ 331 bool debug_enabled = 332 (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage)); 333 334 bool progress; /* Written by OPT and OPT_V */ 335 (void)progress; 336 337 if (devinfo->gen >= 6) { 338 /* Try and fuse multiply-adds */ 339 OPT(brw_nir_opt_peephole_ffma); 340 } 341 342 OPT(nir_opt_algebraic_late); 343 344 OPT(nir_lower_locals_to_regs); 345 346 OPT_V(nir_lower_to_source_mods); 347 OPT(nir_copy_prop); 348 OPT(nir_opt_dce); 349 350 if (unlikely(debug_enabled)) { 351 /* Re-index SSA defs so we print more sensible numbers. */ 352 nir_foreach_overload(nir, overload) { 353 if (overload->impl) 354 nir_index_ssa_defs(overload->impl); 355 } 356 357 fprintf(stderr, "NIR (SSA form) for %s shader:\n", 358 _mesa_shader_stage_to_string(nir->stage)); 359 nir_print_shader(nir, stderr); 360 } 361 362 OPT_V(nir_convert_from_ssa, true); 363 364 if (!is_scalar) { 365 OPT_V(nir_move_vec_src_uses_to_dest); 366 OPT(nir_lower_vec_to_movs); 367 } 368 369 /* This is the last pass we run before we start emitting stuff. It 370 * determines when we need to insert boolean resolves on Gen <= 5. We 371 * run it last because it stashes data in instr->pass_flags and we don't 372 * want that to be squashed by other NIR passes. 373 */ 374 if (devinfo->gen <= 5) 375 brw_nir_analyze_boolean_resolves(nir); 376 377 nir_sweep(nir); 378 379 if (unlikely(debug_enabled)) { 380 fprintf(stderr, "NIR (final form) for %s shader:\n", 381 _mesa_shader_stage_to_string(nir->stage)); 382 nir_print_shader(nir, stderr); 383 } 384 385 return nir; 386} 387 388nir_shader * 389brw_create_nir(struct brw_context *brw, 390 const struct gl_shader_program *shader_prog, 391 const struct gl_program *prog, 392 gl_shader_stage stage, 393 bool is_scalar) 394{ 395 struct gl_context *ctx = &brw->ctx; 396 const struct brw_device_info *devinfo = brw->intelScreen->devinfo; 397 const nir_shader_compiler_options *options = 398 ctx->Const.ShaderCompilerOptions[stage].NirOptions; 399 bool progress; 400 nir_shader *nir; 401 402 /* First, lower the GLSL IR or Mesa IR to NIR */ 403 if (shader_prog) { 404 nir = glsl_to_nir(shader_prog, stage, options); 405 } else { 406 nir = prog_to_nir(prog, options); 407 OPT_V(nir_convert_to_ssa); /* turn registers into SSA */ 408 } 409 nir_validate_shader(nir); 410 411 (void)progress; 412 413 nir = brw_preprocess_nir(nir, is_scalar); 414 nir = brw_lower_nir(nir, devinfo, shader_prog, is_scalar); 415 416 return nir; 417} 418 419enum brw_reg_type 420brw_type_for_nir_type(nir_alu_type type) 421{ 422 switch (type) { 423 case nir_type_uint: 424 return BRW_REGISTER_TYPE_UD; 425 case nir_type_bool: 426 case nir_type_int: 427 return BRW_REGISTER_TYPE_D; 428 case nir_type_float: 429 return BRW_REGISTER_TYPE_F; 430 default: 431 unreachable("unknown type"); 432 } 433 434 return BRW_REGISTER_TYPE_F; 435} 436 437/* Returns the glsl_base_type corresponding to a nir_alu_type. 438 * This is used by both brw_vec4_nir and brw_fs_nir. 439 */ 440enum glsl_base_type 441brw_glsl_base_type_for_nir_type(nir_alu_type type) 442{ 443 switch (type) { 444 case nir_type_float: 445 return GLSL_TYPE_FLOAT; 446 447 case nir_type_int: 448 return GLSL_TYPE_INT; 449 450 case nir_type_uint: 451 return GLSL_TYPE_UINT; 452 453 default: 454 unreachable("bad type"); 455 } 456} 457