1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * Jason Ekstrand (jason@jlekstrand.net) 26 * 27 */ 28 29/* 30 * This lowering pass converts references to input/output variables with 31 * loads/stores to actual input/output intrinsics. 32 */ 33 34#include "nir.h" 35#include "nir_builder.h" 36 37struct lower_io_state { 38 nir_builder builder; 39 int (*type_size)(const struct glsl_type *type); 40 nir_variable_mode modes; 41 nir_lower_io_options options; 42}; 43 44void 45nir_assign_var_locations(struct exec_list *var_list, unsigned *size, 46 int (*type_size)(const struct glsl_type *)) 47{ 48 unsigned location = 0; 49 50 nir_foreach_variable(var, var_list) { 51 /* 52 * UBO's have their own address spaces, so don't count them towards the 53 * number of global uniforms 54 */ 55 if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) && 56 var->interface_type != NULL) 57 continue; 58 59 var->data.driver_location = location; 60 location += type_size(var->type); 61 } 62 63 *size = location; 64} 65 66/** 67 * Return true if the given variable is a per-vertex input/output array. 68 * (such as geometry shader inputs). 69 */ 70bool 71nir_is_per_vertex_io(nir_variable *var, gl_shader_stage stage) 72{ 73 if (var->data.patch || !glsl_type_is_array(var->type)) 74 return false; 75 76 if (var->data.mode == nir_var_shader_in) 77 return stage == MESA_SHADER_GEOMETRY || 78 stage == MESA_SHADER_TESS_CTRL || 79 stage == MESA_SHADER_TESS_EVAL; 80 81 if (var->data.mode == nir_var_shader_out) 82 return stage == MESA_SHADER_TESS_CTRL; 83 84 return false; 85} 86 87static nir_ssa_def * 88get_io_offset(nir_builder *b, nir_deref_var *deref, 89 nir_ssa_def **vertex_index, 90 int (*type_size)(const struct glsl_type *), 91 unsigned *component) 92{ 93 nir_deref *tail = &deref->deref; 94 95 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the 96 * outermost array index separate. Process the rest normally. 97 */ 98 if (vertex_index != NULL) { 99 tail = tail->child; 100 nir_deref_array *deref_array = nir_deref_as_array(tail); 101 102 nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset); 103 if (deref_array->deref_array_type == nir_deref_array_type_indirect) { 104 vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1)); 105 } 106 *vertex_index = vtx; 107 } 108 109 if (deref->var->data.compact) { 110 assert(tail->child->deref_type == nir_deref_type_array); 111 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type))); 112 nir_deref_array *deref_array = nir_deref_as_array(tail->child); 113 /* We always lower indirect dereferences for "compact" array vars. */ 114 assert(deref_array->deref_array_type == nir_deref_array_type_direct); 115 116 const unsigned total_offset = *component + deref_array->base_offset; 117 const unsigned slot_offset = total_offset / 4; 118 *component = total_offset % 4; 119 return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset); 120 } 121 122 /* Just emit code and let constant-folding go to town */ 123 nir_ssa_def *offset = nir_imm_int(b, 0); 124 125 while (tail->child != NULL) { 126 const struct glsl_type *parent_type = tail->type; 127 tail = tail->child; 128 129 if (tail->deref_type == nir_deref_type_array) { 130 nir_deref_array *deref_array = nir_deref_as_array(tail); 131 unsigned size = type_size(tail->type); 132 133 offset = nir_iadd(b, offset, 134 nir_imm_int(b, size * deref_array->base_offset)); 135 136 if (deref_array->deref_array_type == nir_deref_array_type_indirect) { 137 nir_ssa_def *mul = 138 nir_imul(b, nir_imm_int(b, size), 139 nir_ssa_for_src(b, deref_array->indirect, 1)); 140 141 offset = nir_iadd(b, offset, mul); 142 } 143 } else if (tail->deref_type == nir_deref_type_struct) { 144 nir_deref_struct *deref_struct = nir_deref_as_struct(tail); 145 146 unsigned field_offset = 0; 147 for (unsigned i = 0; i < deref_struct->index; i++) { 148 field_offset += type_size(glsl_get_struct_field(parent_type, i)); 149 } 150 offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); 151 } 152 } 153 154 return offset; 155} 156 157static nir_intrinsic_instr * 158lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, 159 nir_ssa_def *vertex_index, nir_ssa_def *offset, 160 unsigned component) 161{ 162 const nir_shader *nir = state->builder.shader; 163 nir_variable *var = intrin->variables[0]->var; 164 nir_variable_mode mode = var->data.mode; 165 nir_ssa_def *barycentric = NULL; 166 167 nir_intrinsic_op op; 168 switch (mode) { 169 case nir_var_shader_in: 170 if (nir->stage == MESA_SHADER_FRAGMENT && 171 nir->options->use_interpolated_input_intrinsics && 172 var->data.interpolation != INTERP_MODE_FLAT) { 173 assert(vertex_index == NULL); 174 175 nir_intrinsic_op bary_op; 176 if (var->data.sample || 177 (state->options & nir_lower_io_force_sample_interpolation)) 178 bary_op = nir_intrinsic_load_barycentric_sample; 179 else if (var->data.centroid) 180 bary_op = nir_intrinsic_load_barycentric_centroid; 181 else 182 bary_op = nir_intrinsic_load_barycentric_pixel; 183 184 barycentric = nir_load_barycentric(&state->builder, bary_op, 185 var->data.interpolation); 186 op = nir_intrinsic_load_interpolated_input; 187 } else { 188 op = vertex_index ? nir_intrinsic_load_per_vertex_input : 189 nir_intrinsic_load_input; 190 } 191 break; 192 case nir_var_shader_out: 193 op = vertex_index ? nir_intrinsic_load_per_vertex_output : 194 nir_intrinsic_load_output; 195 break; 196 case nir_var_uniform: 197 op = nir_intrinsic_load_uniform; 198 break; 199 case nir_var_shared: 200 op = nir_intrinsic_load_shared; 201 break; 202 default: 203 unreachable("Unknown variable mode"); 204 } 205 206 nir_intrinsic_instr *load = 207 nir_intrinsic_instr_create(state->builder.shader, op); 208 load->num_components = intrin->num_components; 209 210 nir_intrinsic_set_base(load, var->data.driver_location); 211 if (mode == nir_var_shader_in || mode == nir_var_shader_out) 212 nir_intrinsic_set_component(load, component); 213 214 if (load->intrinsic == nir_intrinsic_load_uniform) 215 nir_intrinsic_set_range(load, state->type_size(var->type)); 216 217 if (vertex_index) { 218 load->src[0] = nir_src_for_ssa(vertex_index); 219 load->src[1] = nir_src_for_ssa(offset); 220 } else if (barycentric) { 221 load->src[0] = nir_src_for_ssa(barycentric); 222 load->src[1] = nir_src_for_ssa(offset); 223 } else { 224 load->src[0] = nir_src_for_ssa(offset); 225 } 226 227 return load; 228} 229 230static nir_intrinsic_instr * 231lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, 232 nir_ssa_def *vertex_index, nir_ssa_def *offset, 233 unsigned component) 234{ 235 nir_variable *var = intrin->variables[0]->var; 236 nir_variable_mode mode = var->data.mode; 237 238 nir_intrinsic_op op; 239 if (mode == nir_var_shared) { 240 op = nir_intrinsic_store_shared; 241 } else { 242 assert(mode == nir_var_shader_out); 243 op = vertex_index ? nir_intrinsic_store_per_vertex_output : 244 nir_intrinsic_store_output; 245 } 246 247 nir_intrinsic_instr *store = 248 nir_intrinsic_instr_create(state->builder.shader, op); 249 store->num_components = intrin->num_components; 250 251 nir_src_copy(&store->src[0], &intrin->src[0], store); 252 253 nir_intrinsic_set_base(store, var->data.driver_location); 254 255 if (mode == nir_var_shader_out) 256 nir_intrinsic_set_component(store, component); 257 258 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); 259 260 if (vertex_index) 261 store->src[1] = nir_src_for_ssa(vertex_index); 262 263 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset); 264 265 return store; 266} 267 268static nir_intrinsic_instr * 269lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, 270 nir_ssa_def *offset) 271{ 272 nir_variable *var = intrin->variables[0]->var; 273 274 assert(var->data.mode == nir_var_shared); 275 276 nir_intrinsic_op op; 277 switch (intrin->intrinsic) { 278#define OP(O) case nir_intrinsic_var_##O: op = nir_intrinsic_shared_##O; break; 279 OP(atomic_exchange) 280 OP(atomic_comp_swap) 281 OP(atomic_add) 282 OP(atomic_imin) 283 OP(atomic_umin) 284 OP(atomic_imax) 285 OP(atomic_umax) 286 OP(atomic_and) 287 OP(atomic_or) 288 OP(atomic_xor) 289#undef OP 290 default: 291 unreachable("Invalid atomic"); 292 } 293 294 nir_intrinsic_instr *atomic = 295 nir_intrinsic_instr_create(state->builder.shader, op); 296 297 nir_intrinsic_set_base(atomic, var->data.driver_location); 298 299 atomic->src[0] = nir_src_for_ssa(offset); 300 for (unsigned i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) { 301 nir_src_copy(&atomic->src[i+1], &intrin->src[i], atomic); 302 } 303 304 return atomic; 305} 306 307static nir_intrinsic_instr * 308lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, 309 nir_ssa_def *offset, unsigned component) 310{ 311 nir_variable *var = intrin->variables[0]->var; 312 313 assert(var->data.mode == nir_var_shader_in); 314 315 /* Ignore interpolateAt() for flat variables - flat is flat. */ 316 if (var->data.interpolation == INTERP_MODE_FLAT) 317 return lower_load(intrin, state, NULL, offset, component); 318 319 nir_intrinsic_op bary_op; 320 switch (intrin->intrinsic) { 321 case nir_intrinsic_interp_var_at_centroid: 322 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ? 323 nir_intrinsic_load_barycentric_sample : 324 nir_intrinsic_load_barycentric_centroid; 325 break; 326 case nir_intrinsic_interp_var_at_sample: 327 bary_op = nir_intrinsic_load_barycentric_at_sample; 328 break; 329 case nir_intrinsic_interp_var_at_offset: 330 bary_op = nir_intrinsic_load_barycentric_at_offset; 331 break; 332 default: 333 unreachable("Bogus interpolateAt() intrinsic."); 334 } 335 336 nir_intrinsic_instr *bary_setup = 337 nir_intrinsic_instr_create(state->builder.shader, bary_op); 338 339 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); 340 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); 341 342 if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid) 343 nir_src_copy(&bary_setup->src[0], &intrin->src[0], bary_setup); 344 345 nir_builder_instr_insert(&state->builder, &bary_setup->instr); 346 347 nir_intrinsic_instr *load = 348 nir_intrinsic_instr_create(state->builder.shader, 349 nir_intrinsic_load_interpolated_input); 350 load->num_components = intrin->num_components; 351 352 nir_intrinsic_set_base(load, var->data.driver_location); 353 nir_intrinsic_set_component(load, component); 354 355 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); 356 load->src[1] = nir_src_for_ssa(offset); 357 358 return load; 359} 360 361static bool 362nir_lower_io_block(nir_block *block, 363 struct lower_io_state *state) 364{ 365 nir_builder *b = &state->builder; 366 const nir_shader_compiler_options *options = b->shader->options; 367 368 nir_foreach_instr_safe(instr, block) { 369 if (instr->type != nir_instr_type_intrinsic) 370 continue; 371 372 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 373 374 switch (intrin->intrinsic) { 375 case nir_intrinsic_load_var: 376 case nir_intrinsic_store_var: 377 case nir_intrinsic_var_atomic_add: 378 case nir_intrinsic_var_atomic_imin: 379 case nir_intrinsic_var_atomic_umin: 380 case nir_intrinsic_var_atomic_imax: 381 case nir_intrinsic_var_atomic_umax: 382 case nir_intrinsic_var_atomic_and: 383 case nir_intrinsic_var_atomic_or: 384 case nir_intrinsic_var_atomic_xor: 385 case nir_intrinsic_var_atomic_exchange: 386 case nir_intrinsic_var_atomic_comp_swap: 387 /* We can lower the io for this nir instrinsic */ 388 break; 389 case nir_intrinsic_interp_var_at_centroid: 390 case nir_intrinsic_interp_var_at_sample: 391 case nir_intrinsic_interp_var_at_offset: 392 /* We can optionally lower these to load_interpolated_input */ 393 if (options->use_interpolated_input_intrinsics) 394 break; 395 default: 396 /* We can't lower the io for this nir instrinsic, so skip it */ 397 continue; 398 } 399 400 nir_variable *var = intrin->variables[0]->var; 401 nir_variable_mode mode = var->data.mode; 402 403 if ((state->modes & mode) == 0) 404 continue; 405 406 if (mode != nir_var_shader_in && 407 mode != nir_var_shader_out && 408 mode != nir_var_shared && 409 mode != nir_var_uniform) 410 continue; 411 412 b->cursor = nir_before_instr(instr); 413 414 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->stage); 415 416 nir_ssa_def *offset; 417 nir_ssa_def *vertex_index = NULL; 418 unsigned component_offset = var->data.location_frac; 419 420 offset = get_io_offset(b, intrin->variables[0], 421 per_vertex ? &vertex_index : NULL, 422 state->type_size, &component_offset); 423 424 nir_intrinsic_instr *replacement; 425 426 switch (intrin->intrinsic) { 427 case nir_intrinsic_load_var: 428 replacement = lower_load(intrin, state, vertex_index, offset, 429 component_offset); 430 break; 431 432 case nir_intrinsic_store_var: 433 replacement = lower_store(intrin, state, vertex_index, offset, 434 component_offset); 435 break; 436 437 case nir_intrinsic_var_atomic_add: 438 case nir_intrinsic_var_atomic_imin: 439 case nir_intrinsic_var_atomic_umin: 440 case nir_intrinsic_var_atomic_imax: 441 case nir_intrinsic_var_atomic_umax: 442 case nir_intrinsic_var_atomic_and: 443 case nir_intrinsic_var_atomic_or: 444 case nir_intrinsic_var_atomic_xor: 445 case nir_intrinsic_var_atomic_exchange: 446 case nir_intrinsic_var_atomic_comp_swap: 447 assert(vertex_index == NULL); 448 replacement = lower_atomic(intrin, state, offset); 449 break; 450 451 case nir_intrinsic_interp_var_at_centroid: 452 case nir_intrinsic_interp_var_at_sample: 453 case nir_intrinsic_interp_var_at_offset: 454 assert(vertex_index == NULL); 455 replacement = lower_interpolate_at(intrin, state, offset, 456 component_offset); 457 break; 458 459 default: 460 continue; 461 } 462 463 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) { 464 if (intrin->dest.is_ssa) { 465 nir_ssa_dest_init(&replacement->instr, &replacement->dest, 466 intrin->dest.ssa.num_components, 467 intrin->dest.ssa.bit_size, NULL); 468 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 469 nir_src_for_ssa(&replacement->dest.ssa)); 470 } else { 471 nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr); 472 } 473 } 474 475 nir_instr_insert_before(&intrin->instr, &replacement->instr); 476 nir_instr_remove(&intrin->instr); 477 } 478 479 return true; 480} 481 482static void 483nir_lower_io_impl(nir_function_impl *impl, 484 nir_variable_mode modes, 485 int (*type_size)(const struct glsl_type *), 486 nir_lower_io_options options) 487{ 488 struct lower_io_state state; 489 490 nir_builder_init(&state.builder, impl); 491 state.modes = modes; 492 state.type_size = type_size; 493 state.options = options; 494 495 nir_foreach_block(block, impl) { 496 nir_lower_io_block(block, &state); 497 } 498 499 nir_metadata_preserve(impl, nir_metadata_block_index | 500 nir_metadata_dominance); 501} 502 503void 504nir_lower_io(nir_shader *shader, nir_variable_mode modes, 505 int (*type_size)(const struct glsl_type *), 506 nir_lower_io_options options) 507{ 508 nir_foreach_function(function, shader) { 509 if (function->impl) { 510 nir_lower_io_impl(function->impl, modes, type_size, options); 511 } 512 } 513} 514 515/** 516 * Return the offset soruce for a load/store intrinsic. 517 */ 518nir_src * 519nir_get_io_offset_src(nir_intrinsic_instr *instr) 520{ 521 switch (instr->intrinsic) { 522 case nir_intrinsic_load_input: 523 case nir_intrinsic_load_output: 524 case nir_intrinsic_load_uniform: 525 return &instr->src[0]; 526 case nir_intrinsic_load_ubo: 527 case nir_intrinsic_load_ssbo: 528 case nir_intrinsic_load_per_vertex_input: 529 case nir_intrinsic_load_per_vertex_output: 530 case nir_intrinsic_load_interpolated_input: 531 case nir_intrinsic_store_output: 532 return &instr->src[1]; 533 case nir_intrinsic_store_ssbo: 534 case nir_intrinsic_store_per_vertex_output: 535 return &instr->src[2]; 536 default: 537 return NULL; 538 } 539} 540 541/** 542 * Return the vertex index source for a load/store per_vertex intrinsic. 543 */ 544nir_src * 545nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) 546{ 547 switch (instr->intrinsic) { 548 case nir_intrinsic_load_per_vertex_input: 549 case nir_intrinsic_load_per_vertex_output: 550 return &instr->src[0]; 551 case nir_intrinsic_store_per_vertex_output: 552 return &instr->src[1]; 553 default: 554 return NULL; 555 } 556} 557