nir_lower_io.c revision cba6657d8ba57fddf72bbe3c96e8aee997a1527d
1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * Jason Ekstrand (jason@jlekstrand.net) 26 * 27 */ 28 29/* 30 * This lowering pass converts references to input/output variables with 31 * loads/stores to actual input/output intrinsics. 32 */ 33 34#include "nir.h" 35#include "nir_builder.h" 36 37struct lower_io_state { 38 nir_builder builder; 39 void *mem_ctx; 40 int (*type_size)(const struct glsl_type *type); 41 nir_variable_mode modes; 42}; 43 44void 45nir_assign_var_locations(struct exec_list *var_list, unsigned *size, 46 unsigned base_offset, 47 int (*type_size)(const struct glsl_type *)) 48{ 49 unsigned location = 0; 50 51 /* There are 32 regular and 32 patch varyings allowed */ 52 int locations[64][2]; 53 for (unsigned i = 0; i < 64; i++) { 54 for (unsigned j = 0; j < 2; j++) 55 locations[i][j] = -1; 56 } 57 58 nir_foreach_variable(var, var_list) { 59 /* 60 * UBO's have their own address spaces, so don't count them towards the 61 * number of global uniforms 62 */ 63 if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) && 64 var->interface_type != NULL) 65 continue; 66 67 /* Make sure we give the same location to varyings packed with 68 * ARB_enhanced_layouts. 69 */ 70 int idx = var->data.location - base_offset; 71 if (base_offset && idx >= 0) { 72 assert(idx < ARRAY_SIZE(locations)); 73 74 if (locations[idx][var->data.index] == -1) { 75 var->data.driver_location = location; 76 locations[idx][var->data.index] = location; 77 78 /* A dvec3 can be packed with a double we need special handling 79 * for this as we are packing across two locations. 80 */ 81 if (glsl_get_base_type(var->type) == GLSL_TYPE_DOUBLE && 82 glsl_get_vector_elements(var->type) == 3) { 83 /* Hack around type_size functions that expect vectors to be 84 * padded out to vec4. If a float type is the same size as a 85 * double then the type size is padded to vec4, otherwise 86 * set the offset to two doubles which offsets the location 87 * past the first two components in dvec3 which were stored at 88 * the previous location. 89 */ 90 unsigned dsize = type_size(glsl_double_type()); 91 unsigned offset = 92 dsize == type_size(glsl_float_type()) ? dsize : dsize * 2; 93 94 locations[idx + 1][var->data.index] = location + offset; 95 } 96 97 location += type_size(var->type); 98 } else { 99 var->data.driver_location = locations[idx][var->data.index]; 100 } 101 } else { 102 var->data.driver_location = location; 103 location += type_size(var->type); 104 } 105 } 106 107 *size = location; 108} 109 110/** 111 * Returns true if we're processing a stage whose inputs are arrays indexed 112 * by a vertex number (such as geometry shader inputs). 113 */ 114static bool 115is_per_vertex_input(struct lower_io_state *state, nir_variable *var) 116{ 117 gl_shader_stage stage = state->builder.shader->stage; 118 119 return var->data.mode == nir_var_shader_in && !var->data.patch && 120 (stage == MESA_SHADER_TESS_CTRL || 121 stage == MESA_SHADER_TESS_EVAL || 122 stage == MESA_SHADER_GEOMETRY); 123} 124 125static bool 126is_per_vertex_output(struct lower_io_state *state, nir_variable *var) 127{ 128 gl_shader_stage stage = state->builder.shader->stage; 129 return var->data.mode == nir_var_shader_out && !var->data.patch && 130 stage == MESA_SHADER_TESS_CTRL; 131} 132 133static nir_ssa_def * 134get_io_offset(nir_builder *b, nir_deref_var *deref, 135 nir_ssa_def **vertex_index, 136 int (*type_size)(const struct glsl_type *)) 137{ 138 nir_deref *tail = &deref->deref; 139 140 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the 141 * outermost array index separate. Process the rest normally. 142 */ 143 if (vertex_index != NULL) { 144 tail = tail->child; 145 assert(tail->deref_type == nir_deref_type_array); 146 nir_deref_array *deref_array = nir_deref_as_array(tail); 147 148 nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset); 149 if (deref_array->deref_array_type == nir_deref_array_type_indirect) { 150 vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1)); 151 } 152 *vertex_index = vtx; 153 } 154 155 /* Just emit code and let constant-folding go to town */ 156 nir_ssa_def *offset = nir_imm_int(b, 0); 157 158 while (tail->child != NULL) { 159 const struct glsl_type *parent_type = tail->type; 160 tail = tail->child; 161 162 if (tail->deref_type == nir_deref_type_array) { 163 nir_deref_array *deref_array = nir_deref_as_array(tail); 164 unsigned size = type_size(tail->type); 165 166 offset = nir_iadd(b, offset, 167 nir_imm_int(b, size * deref_array->base_offset)); 168 169 if (deref_array->deref_array_type == nir_deref_array_type_indirect) { 170 nir_ssa_def *mul = 171 nir_imul(b, nir_imm_int(b, size), 172 nir_ssa_for_src(b, deref_array->indirect, 1)); 173 174 offset = nir_iadd(b, offset, mul); 175 } 176 } else if (tail->deref_type == nir_deref_type_struct) { 177 nir_deref_struct *deref_struct = nir_deref_as_struct(tail); 178 179 unsigned field_offset = 0; 180 for (unsigned i = 0; i < deref_struct->index; i++) { 181 field_offset += type_size(glsl_get_struct_field(parent_type, i)); 182 } 183 offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); 184 } 185 } 186 187 return offset; 188} 189 190static nir_intrinsic_instr * 191lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, 192 nir_ssa_def *vertex_index, nir_ssa_def *offset) 193{ 194 const nir_shader *nir = state->builder.shader; 195 nir_variable *var = intrin->variables[0]->var; 196 nir_variable_mode mode = var->data.mode; 197 nir_ssa_def *barycentric = NULL; 198 199 nir_intrinsic_op op; 200 switch (mode) { 201 case nir_var_shader_in: 202 if (nir->stage == MESA_SHADER_FRAGMENT && 203 nir->options->use_interpolated_input_intrinsics && 204 var->data.interpolation != INTERP_MODE_FLAT) { 205 assert(vertex_index == NULL); 206 207 nir_intrinsic_op bary_op; 208 if (var->data.sample) 209 bary_op = nir_intrinsic_load_barycentric_sample; 210 else if (var->data.centroid) 211 bary_op = nir_intrinsic_load_barycentric_centroid; 212 else 213 bary_op = nir_intrinsic_load_barycentric_pixel; 214 215 barycentric = nir_load_barycentric(&state->builder, bary_op, 216 var->data.interpolation); 217 op = nir_intrinsic_load_interpolated_input; 218 } else { 219 op = vertex_index ? nir_intrinsic_load_per_vertex_input : 220 nir_intrinsic_load_input; 221 } 222 break; 223 case nir_var_shader_out: 224 op = vertex_index ? nir_intrinsic_load_per_vertex_output : 225 nir_intrinsic_load_output; 226 break; 227 case nir_var_uniform: 228 op = nir_intrinsic_load_uniform; 229 break; 230 case nir_var_shared: 231 op = nir_intrinsic_load_shared; 232 break; 233 default: 234 unreachable("Unknown variable mode"); 235 } 236 237 nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, op); 238 load->num_components = intrin->num_components; 239 240 nir_intrinsic_set_base(load, var->data.driver_location); 241 if (mode == nir_var_shader_in || mode == nir_var_shader_out) 242 nir_intrinsic_set_component(load, var->data.location_frac); 243 244 if (load->intrinsic == nir_intrinsic_load_uniform) 245 nir_intrinsic_set_range(load, state->type_size(var->type)); 246 247 if (vertex_index) { 248 load->src[0] = nir_src_for_ssa(vertex_index); 249 load->src[1] = nir_src_for_ssa(offset); 250 } else if (barycentric) { 251 load->src[0] = nir_src_for_ssa(barycentric); 252 load->src[1] = nir_src_for_ssa(offset); 253 } else { 254 load->src[0] = nir_src_for_ssa(offset); 255 } 256 257 return load; 258} 259 260static nir_intrinsic_instr * 261lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, 262 nir_ssa_def *vertex_index, nir_ssa_def *offset) 263{ 264 nir_variable *var = intrin->variables[0]->var; 265 nir_variable_mode mode = var->data.mode; 266 267 nir_intrinsic_op op; 268 if (mode == nir_var_shared) { 269 op = nir_intrinsic_store_shared; 270 } else { 271 assert(mode == nir_var_shader_out); 272 op = vertex_index ? nir_intrinsic_store_per_vertex_output : 273 nir_intrinsic_store_output; 274 } 275 276 nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, op); 277 store->num_components = intrin->num_components; 278 279 nir_src_copy(&store->src[0], &intrin->src[0], store); 280 281 nir_intrinsic_set_base(store, var->data.driver_location); 282 283 if (mode == nir_var_shader_out) 284 nir_intrinsic_set_component(store, var->data.location_frac); 285 286 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); 287 288 if (vertex_index) 289 store->src[1] = nir_src_for_ssa(vertex_index); 290 291 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset); 292 293 return store; 294} 295 296static nir_intrinsic_instr * 297lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, 298 nir_ssa_def *offset) 299{ 300 nir_variable *var = intrin->variables[0]->var; 301 302 assert(var->data.mode == nir_var_shared); 303 304 nir_intrinsic_op op; 305 switch (intrin->intrinsic) { 306#define OP(O) case nir_intrinsic_var_##O: op = nir_intrinsic_shared_##O; break; 307 OP(atomic_exchange) 308 OP(atomic_comp_swap) 309 OP(atomic_add) 310 OP(atomic_imin) 311 OP(atomic_umin) 312 OP(atomic_imax) 313 OP(atomic_umax) 314 OP(atomic_and) 315 OP(atomic_or) 316 OP(atomic_xor) 317#undef OP 318 default: 319 unreachable("Invalid atomic"); 320 } 321 322 nir_intrinsic_instr *atomic = 323 nir_intrinsic_instr_create(state->mem_ctx, op); 324 325 nir_intrinsic_set_base(atomic, var->data.driver_location); 326 327 atomic->src[0] = nir_src_for_ssa(offset); 328 for (unsigned i = 0; i < nir_op_infos[intrin->intrinsic].num_inputs; i++) { 329 nir_src_copy(&atomic->src[i+1], &intrin->src[i], atomic); 330 } 331 332 return atomic; 333} 334 335static nir_intrinsic_instr * 336lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, 337 nir_ssa_def *offset) 338{ 339 nir_variable *var = intrin->variables[0]->var; 340 341 assert(var->data.mode == nir_var_shader_in); 342 343 nir_intrinsic_op bary_op; 344 switch (intrin->intrinsic) { 345 case nir_intrinsic_interp_var_at_centroid: 346 bary_op = nir_intrinsic_load_barycentric_centroid; 347 break; 348 case nir_intrinsic_interp_var_at_sample: 349 bary_op = nir_intrinsic_load_barycentric_at_sample; 350 break; 351 case nir_intrinsic_interp_var_at_offset: 352 bary_op = nir_intrinsic_load_barycentric_at_offset; 353 break; 354 default: 355 unreachable("Bogus interpolateAt() intrinsic."); 356 } 357 358 nir_intrinsic_instr *bary_setup = 359 nir_intrinsic_instr_create(state->mem_ctx, bary_op); 360 361 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); 362 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); 363 364 if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid) 365 nir_src_copy(&bary_setup->src[0], &intrin->src[0], bary_setup); 366 367 nir_builder_instr_insert(&state->builder, &bary_setup->instr); 368 369 nir_intrinsic_instr *load = 370 nir_intrinsic_instr_create(state->mem_ctx, 371 nir_intrinsic_load_interpolated_input); 372 load->num_components = intrin->num_components; 373 374 nir_intrinsic_set_base(load, var->data.driver_location); 375 nir_intrinsic_set_component(load, var->data.location_frac); 376 377 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); 378 load->src[1] = nir_src_for_ssa(offset); 379 380 return load; 381} 382 383static bool 384nir_lower_io_block(nir_block *block, 385 struct lower_io_state *state) 386{ 387 nir_builder *b = &state->builder; 388 const nir_shader_compiler_options *options = b->shader->options; 389 390 nir_foreach_instr_safe(instr, block) { 391 if (instr->type != nir_instr_type_intrinsic) 392 continue; 393 394 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 395 396 switch (intrin->intrinsic) { 397 case nir_intrinsic_load_var: 398 case nir_intrinsic_store_var: 399 case nir_intrinsic_var_atomic_add: 400 case nir_intrinsic_var_atomic_imin: 401 case nir_intrinsic_var_atomic_umin: 402 case nir_intrinsic_var_atomic_imax: 403 case nir_intrinsic_var_atomic_umax: 404 case nir_intrinsic_var_atomic_and: 405 case nir_intrinsic_var_atomic_or: 406 case nir_intrinsic_var_atomic_xor: 407 case nir_intrinsic_var_atomic_exchange: 408 case nir_intrinsic_var_atomic_comp_swap: 409 /* We can lower the io for this nir instrinsic */ 410 break; 411 case nir_intrinsic_interp_var_at_centroid: 412 case nir_intrinsic_interp_var_at_sample: 413 case nir_intrinsic_interp_var_at_offset: 414 /* We can optionally lower these to load_interpolated_input */ 415 if (options->use_interpolated_input_intrinsics) 416 break; 417 default: 418 /* We can't lower the io for this nir instrinsic, so skip it */ 419 continue; 420 } 421 422 nir_variable *var = intrin->variables[0]->var; 423 nir_variable_mode mode = var->data.mode; 424 425 if ((state->modes & mode) == 0) 426 continue; 427 428 if (mode != nir_var_shader_in && 429 mode != nir_var_shader_out && 430 mode != nir_var_shared && 431 mode != nir_var_uniform) 432 continue; 433 434 b->cursor = nir_before_instr(instr); 435 436 const bool per_vertex = 437 is_per_vertex_input(state, var) || is_per_vertex_output(state, var); 438 439 nir_ssa_def *offset; 440 nir_ssa_def *vertex_index = NULL; 441 442 offset = get_io_offset(b, intrin->variables[0], 443 per_vertex ? &vertex_index : NULL, 444 state->type_size); 445 446 nir_intrinsic_instr *replacement; 447 448 switch (intrin->intrinsic) { 449 case nir_intrinsic_load_var: 450 replacement = lower_load(intrin, state, vertex_index, offset); 451 break; 452 453 case nir_intrinsic_store_var: 454 replacement = lower_store(intrin, state, vertex_index, offset); 455 break; 456 457 case nir_intrinsic_var_atomic_add: 458 case nir_intrinsic_var_atomic_imin: 459 case nir_intrinsic_var_atomic_umin: 460 case nir_intrinsic_var_atomic_imax: 461 case nir_intrinsic_var_atomic_umax: 462 case nir_intrinsic_var_atomic_and: 463 case nir_intrinsic_var_atomic_or: 464 case nir_intrinsic_var_atomic_xor: 465 case nir_intrinsic_var_atomic_exchange: 466 case nir_intrinsic_var_atomic_comp_swap: 467 assert(vertex_index == NULL); 468 replacement = lower_atomic(intrin, state, offset); 469 break; 470 471 case nir_intrinsic_interp_var_at_centroid: 472 case nir_intrinsic_interp_var_at_sample: 473 case nir_intrinsic_interp_var_at_offset: 474 assert(vertex_index == NULL); 475 replacement = lower_interpolate_at(intrin, state, offset); 476 break; 477 478 default: 479 continue; 480 } 481 482 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) { 483 if (intrin->dest.is_ssa) { 484 nir_ssa_dest_init(&replacement->instr, &replacement->dest, 485 intrin->dest.ssa.num_components, 486 intrin->dest.ssa.bit_size, NULL); 487 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 488 nir_src_for_ssa(&replacement->dest.ssa)); 489 } else { 490 nir_dest_copy(&replacement->dest, &intrin->dest, state->mem_ctx); 491 } 492 } 493 494 nir_instr_insert_before(&intrin->instr, &replacement->instr); 495 nir_instr_remove(&intrin->instr); 496 } 497 498 return true; 499} 500 501static void 502nir_lower_io_impl(nir_function_impl *impl, 503 nir_variable_mode modes, 504 int (*type_size)(const struct glsl_type *)) 505{ 506 struct lower_io_state state; 507 508 nir_builder_init(&state.builder, impl); 509 state.mem_ctx = ralloc_parent(impl); 510 state.modes = modes; 511 state.type_size = type_size; 512 513 nir_foreach_block(block, impl) { 514 nir_lower_io_block(block, &state); 515 } 516 517 nir_metadata_preserve(impl, nir_metadata_block_index | 518 nir_metadata_dominance); 519} 520 521void 522nir_lower_io(nir_shader *shader, nir_variable_mode modes, 523 int (*type_size)(const struct glsl_type *)) 524{ 525 nir_foreach_function(function, shader) { 526 if (function->impl) 527 nir_lower_io_impl(function->impl, modes, type_size); 528 } 529} 530 531/** 532 * Return the offset soruce for a load/store intrinsic. 533 */ 534nir_src * 535nir_get_io_offset_src(nir_intrinsic_instr *instr) 536{ 537 switch (instr->intrinsic) { 538 case nir_intrinsic_load_input: 539 case nir_intrinsic_load_output: 540 case nir_intrinsic_load_uniform: 541 return &instr->src[0]; 542 case nir_intrinsic_load_ubo: 543 case nir_intrinsic_load_ssbo: 544 case nir_intrinsic_load_per_vertex_input: 545 case nir_intrinsic_load_per_vertex_output: 546 case nir_intrinsic_load_interpolated_input: 547 case nir_intrinsic_store_output: 548 return &instr->src[1]; 549 case nir_intrinsic_store_ssbo: 550 case nir_intrinsic_store_per_vertex_output: 551 return &instr->src[2]; 552 default: 553 return NULL; 554 } 555} 556 557/** 558 * Return the vertex index source for a load/store per_vertex intrinsic. 559 */ 560nir_src * 561nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) 562{ 563 switch (instr->intrinsic) { 564 case nir_intrinsic_load_per_vertex_input: 565 case nir_intrinsic_load_per_vertex_output: 566 return &instr->src[0]; 567 case nir_intrinsic_store_per_vertex_output: 568 return &instr->src[1]; 569 default: 570 return NULL; 571 } 572} 573