si_shader.h revision 4636d9be4a40138d0a10cadcb1b63eea89d95e34
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Tom Stellard <thomas.stellard@amd.com> 25 * Michel Dänzer <michel.daenzer@amd.com> 26 * Christian König <christian.koenig@amd.com> 27 */ 28 29/* How linking shader inputs and outputs between vertex, tessellation, and 30 * geometry shaders works. 31 * 32 * Inputs and outputs between shaders are stored in a buffer. This buffer 33 * lives in LDS (typical case for tessellation), but it can also live 34 * in memory (ESGS). Each input or output has a fixed location within a vertex. 35 * The highest used input or output determines the stride between vertices. 36 * 37 * Since GS and tessellation are only possible in the OpenGL core profile, 38 * only these semantics are valid for per-vertex data: 39 * 40 * Name Location 41 * 42 * POSITION 0 43 * PSIZE 1 44 * CLIPDIST0..1 2..3 45 * CULLDIST0..1 (not implemented) 46 * GENERIC0..31 4..35 47 * 48 * For example, a shader only writing GENERIC0 has the output stride of 5. 49 * 50 * Only these semantics are valid for per-patch data: 51 * 52 * Name Location 53 * 54 * TESSOUTER 0 55 * TESSINNER 1 56 * PATCH0..29 2..31 57 * 58 * That's how independent shaders agree on input and output locations. 59 * The si_shader_io_get_unique_index function assigns the locations. 60 * 61 * For tessellation, other required information for calculating the input and 62 * output addresses like the vertex stride, the patch stride, and the offsets 63 * where per-vertex and per-patch data start, is passed to the shader via 64 * user data SGPRs. The offsets and strides are calculated at draw time and 65 * aren't available at compile time. 66 */ 67 68#ifndef SI_SHADER_H 69#define SI_SHADER_H 70 71#include <llvm-c/Core.h> /* LLVMModuleRef */ 72#include "tgsi/tgsi_scan.h" 73#include "si_state.h" 74 75struct radeon_shader_binary; 76struct radeon_shader_reloc; 77 78#define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */ 79#define SI_SGPR_CONST_BUFFERS 2 80#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */ 81/* TODO: gap */ 82#define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */ 83#define SI_SGPR_BASE_VERTEX 10 /* VS only */ 84#define SI_SGPR_START_INSTANCE 11 /* VS only */ 85#define SI_SGPR_VS_STATE_BITS 12 /* VS(VS) only */ 86#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */ 87#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */ 88#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */ 89#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */ 90#define SI_SGPR_ALPHA_REF 8 /* PS only */ 91 92#define SI_VS_NUM_USER_SGPR 13 /* API VS */ 93#define SI_ES_NUM_USER_SGPR 12 /* API VS */ 94#define SI_LS_NUM_USER_SGPR 13 /* API VS */ 95#define SI_TCS_NUM_USER_SGPR 11 96#define SI_TES_NUM_USER_SGPR 10 97#define SI_GS_NUM_USER_SGPR 8 98#define SI_GSCOPY_NUM_USER_SGPR 4 99#define SI_PS_NUM_USER_SGPR 9 100 101/* LLVM function parameter indices */ 102#define SI_PARAM_RW_BUFFERS 0 103#define SI_PARAM_CONST_BUFFERS 1 104#define SI_PARAM_SAMPLERS 2 105#define SI_PARAM_UNUSED 3 /* TODO: use */ 106 107/* VS only parameters */ 108#define SI_PARAM_VERTEX_BUFFERS 4 109#define SI_PARAM_BASE_VERTEX 5 110#define SI_PARAM_START_INSTANCE 6 111/* [0] = clamp vertex color */ 112#define SI_PARAM_VS_STATE_BITS 7 113/* the other VS parameters are assigned dynamically */ 114 115/* Offsets where TCS outputs and TCS patch outputs live in LDS: 116 * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 117 * [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32 118 */ 119#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */ 120 121/* Layout of TCS outputs / TES inputs: 122 * [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4 123 * [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4 124 * [26:31] = gl_PatchVerticesIn, max = 32 125 */ 126#define SI_PARAM_TCS_OUT_LAYOUT 5 /* for TCS & TES */ 127 128/* Layout of LS outputs / TCS inputs 129 * [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4 130 * [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4 131 */ 132#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */ 133#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */ 134 135/* TCS only parameters. */ 136#define SI_PARAM_TESS_FACTOR_OFFSET 7 137#define SI_PARAM_PATCH_ID 8 138#define SI_PARAM_REL_IDS 9 139 140/* GS only parameters */ 141#define SI_PARAM_GS2VS_OFFSET 4 142#define SI_PARAM_GS_WAVE_ID 5 143#define SI_PARAM_VTX0_OFFSET 6 144#define SI_PARAM_VTX1_OFFSET 7 145#define SI_PARAM_PRIMITIVE_ID 8 146#define SI_PARAM_VTX2_OFFSET 9 147#define SI_PARAM_VTX3_OFFSET 10 148#define SI_PARAM_VTX4_OFFSET 11 149#define SI_PARAM_VTX5_OFFSET 12 150#define SI_PARAM_GS_INSTANCE_ID 13 151 152/* PS only parameters */ 153#define SI_PARAM_ALPHA_REF 4 154#define SI_PARAM_PRIM_MASK 5 155#define SI_PARAM_PERSP_SAMPLE 6 156#define SI_PARAM_PERSP_CENTER 7 157#define SI_PARAM_PERSP_CENTROID 8 158#define SI_PARAM_PERSP_PULL_MODEL 9 159#define SI_PARAM_LINEAR_SAMPLE 10 160#define SI_PARAM_LINEAR_CENTER 11 161#define SI_PARAM_LINEAR_CENTROID 12 162#define SI_PARAM_LINE_STIPPLE_TEX 13 163#define SI_PARAM_POS_X_FLOAT 14 164#define SI_PARAM_POS_Y_FLOAT 15 165#define SI_PARAM_POS_Z_FLOAT 16 166#define SI_PARAM_POS_W_FLOAT 17 167#define SI_PARAM_FRONT_FACE 18 168#define SI_PARAM_ANCILLARY 19 169#define SI_PARAM_SAMPLE_COVERAGE 20 170#define SI_PARAM_POS_FIXED_PT 21 171 172#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */ 173 174struct si_shader; 175 176/* A shader selector is a gallium CSO and contains shader variants and 177 * binaries for one TGSI program. This can be shared by multiple contexts. 178 */ 179struct si_shader_selector { 180 pipe_mutex mutex; 181 struct si_shader *first_variant; /* immutable after the first variant */ 182 struct si_shader *last_variant; /* mutable */ 183 184 struct tgsi_token *tokens; 185 struct pipe_stream_output_info so; 186 struct tgsi_shader_info info; 187 188 /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ 189 unsigned type; 190 191 /* GS parameters. */ 192 unsigned esgs_itemsize; 193 unsigned gs_input_verts_per_prim; 194 unsigned gs_output_prim; 195 unsigned gs_max_out_vertices; 196 unsigned gs_num_invocations; 197 unsigned max_gs_stream; /* count - 1 */ 198 unsigned gsvs_vertex_size; 199 unsigned max_gsvs_emit_size; 200 201 /* PS parameters. */ 202 unsigned color_attr_index[2]; 203 unsigned db_shader_control; 204 /* Set 0xf or 0x0 (4 bits) per each written output. 205 * ANDed with spi_shader_col_format. 206 */ 207 unsigned colors_written_4bit; 208 209 /* masks of "get_unique_index" bits */ 210 uint64_t outputs_written; 211 uint32_t patch_outputs_written; 212}; 213 214/* Valid shader configurations: 215 * 216 * API shaders VS | TCS | TES | GS |pass| PS 217 * are compiled as: | | | |thru| 218 * | | | | | 219 * Only VS & PS: VS | -- | -- | -- | -- | PS 220 * With GS: ES | -- | -- | GS | VS | PS 221 * With Tessel.: LS | HS | VS | -- | -- | PS 222 * With both: LS | HS | ES | GS | VS | PS 223 */ 224 225/* Common VS bits between the shader key and the prolog key. */ 226struct si_vs_prolog_bits { 227 unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; 228}; 229 230/* Common VS bits between the shader key and the epilog key. */ 231struct si_vs_epilog_bits { 232 unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ 233 /* TODO: 234 * - skip clipdist, culldist (including clipvertex code) exports based 235 * on which clip_plane_enable bits are set 236 * - skip layer, viewport, clipdist, and culldist parameter exports 237 * if PS doesn't read them 238 */ 239}; 240 241/* Common TCS bits between the shader key and the epilog key. */ 242struct si_tcs_epilog_bits { 243 unsigned prim_mode:3; 244}; 245 246/* Common PS bits between the shader key and the prolog key. */ 247struct si_ps_prolog_bits { 248 unsigned color_two_side:1; 249 /* TODO: add a flatshade bit that skips interpolation for colors */ 250 unsigned poly_stipple:1; 251 unsigned force_persample_interp:1; 252 /* TODO: 253 * - add force_center_interp if MSAA is disabled and centroid or 254 * sample are present 255 * - add force_center_interp_bc_optimize to force center interpolation 256 * based on the bc_optimize SGPR bit if MSAA is enabled, centroid is 257 * present and sample isn't present. 258 */ 259}; 260 261/* Common PS bits between the shader key and the epilog key. */ 262struct si_ps_epilog_bits { 263 unsigned spi_shader_col_format; 264 unsigned color_is_int8:8; 265 unsigned last_cbuf:3; 266 unsigned alpha_func:3; 267 unsigned alpha_to_one:1; 268 unsigned poly_line_smoothing:1; 269 unsigned clamp_color:1; 270}; 271 272union si_shader_part_key { 273 struct { 274 struct si_vs_prolog_bits states; 275 unsigned num_input_sgprs:5; 276 unsigned last_input:4; 277 } vs_prolog; 278 struct { 279 struct si_vs_epilog_bits states; 280 unsigned prim_id_param_offset:5; 281 } vs_epilog; 282 struct { 283 struct si_tcs_epilog_bits states; 284 } tcs_epilog; 285 struct { 286 struct si_ps_prolog_bits states; 287 unsigned num_input_sgprs:5; 288 unsigned num_input_vgprs:5; 289 /* Color interpolation and two-side color selection. */ 290 unsigned colors_read:8; /* color input components read */ 291 unsigned num_interp_inputs:5; /* BCOLOR is at this location */ 292 unsigned face_vgpr_index:5; 293 char color_attr_index[2]; 294 char color_interp_vgpr_index[2]; /* -1 == constant */ 295 } ps_prolog; 296 struct { 297 struct si_ps_epilog_bits states; 298 unsigned colors_written:8; 299 unsigned writes_z:1; 300 unsigned writes_stencil:1; 301 unsigned writes_samplemask:1; 302 } ps_epilog; 303}; 304 305union si_shader_key { 306 struct { 307 struct si_ps_prolog_bits prolog; 308 struct si_ps_epilog_bits epilog; 309 } ps; 310 struct { 311 struct si_vs_prolog_bits prolog; 312 struct si_vs_epilog_bits epilog; 313 unsigned as_es:1; /* export shader */ 314 unsigned as_ls:1; /* local shader */ 315 } vs; 316 struct { 317 struct si_tcs_epilog_bits epilog; 318 } tcs; /* tessellation control shader */ 319 struct { 320 struct si_vs_epilog_bits epilog; /* same as VS */ 321 unsigned as_es:1; /* export shader */ 322 } tes; /* tessellation evaluation shader */ 323}; 324 325struct si_shader_config { 326 unsigned num_sgprs; 327 unsigned num_vgprs; 328 unsigned lds_size; 329 unsigned spi_ps_input_ena; 330 unsigned spi_ps_input_addr; 331 unsigned float_mode; 332 unsigned scratch_bytes_per_wave; 333 unsigned rsrc1; 334 unsigned rsrc2; 335}; 336 337struct si_shader { 338 struct si_shader_selector *selector; 339 struct si_shader *next_variant; 340 341 struct si_shader_part *prolog; 342 struct si_shader_part *epilog; 343 344 struct si_shader *gs_copy_shader; 345 struct si_pm4_state *pm4; 346 struct r600_resource *bo; 347 struct r600_resource *scratch_bo; 348 union si_shader_key key; 349 struct radeon_shader_binary binary; 350 struct si_shader_config config; 351 352 ubyte num_input_sgprs; 353 ubyte num_input_vgprs; 354 char face_vgpr_index; 355 356 unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS]; 357 bool uses_instanceid; 358 unsigned nr_pos_exports; 359 unsigned nr_param_exports; 360}; 361 362struct si_shader_part { 363 struct si_shader_part *next; 364 union si_shader_part_key key; 365 struct radeon_shader_binary binary; 366 struct si_shader_config config; 367}; 368 369static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) 370{ 371 if (sctx->gs_shader.cso) 372 return &sctx->gs_shader.cso->info; 373 else if (sctx->tes_shader.cso) 374 return &sctx->tes_shader.cso->info; 375 else if (sctx->vs_shader.cso) 376 return &sctx->vs_shader.cso->info; 377 else 378 return NULL; 379} 380 381static inline struct si_shader* si_get_vs_state(struct si_context *sctx) 382{ 383 if (sctx->gs_shader.current) 384 return sctx->gs_shader.current->gs_copy_shader; 385 else if (sctx->tes_shader.current) 386 return sctx->tes_shader.current; 387 else 388 return sctx->vs_shader.current; 389} 390 391static inline bool si_vs_exports_prim_id(struct si_shader *shader) 392{ 393 if (shader->selector->type == PIPE_SHADER_VERTEX) 394 return shader->key.vs.epilog.export_prim_id; 395 else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) 396 return shader->key.tes.epilog.export_prim_id; 397 else 398 return false; 399} 400 401/* si_shader.c */ 402int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, 403 struct si_shader *shader, 404 struct pipe_debug_callback *debug); 405void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f); 406int si_compile_llvm(struct si_screen *sscreen, 407 struct radeon_shader_binary *binary, 408 struct si_shader_config *conf, 409 LLVMTargetMachineRef tm, 410 LLVMModuleRef mod, 411 struct pipe_debug_callback *debug, 412 unsigned processor, 413 const char *name); 414void si_shader_destroy(struct si_shader *shader); 415unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); 416int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); 417void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, 418 struct pipe_debug_callback *debug, unsigned processor); 419void si_shader_apply_scratch_relocs(struct si_context *sctx, 420 struct si_shader *shader, 421 uint64_t scratch_va); 422void si_shader_binary_read_config(struct radeon_shader_binary *binary, 423 struct si_shader_config *conf, 424 unsigned symbol_offset); 425 426#endif 427