si_shader.h revision 5bc871a4caf97f4e07830ea463f445994c8d13b5
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Tom Stellard <thomas.stellard@amd.com> 25 * Michel Dänzer <michel.daenzer@amd.com> 26 * Christian König <christian.koenig@amd.com> 27 */ 28 29/* How linking tessellation shader inputs and outputs works. 30 * 31 * Inputs and outputs between shaders are stored in a buffer. This buffer 32 * lives in LDS (typical case for tessellation), but it can also live 33 * in memory. Each input or output has a fixed location within a vertex. 34 * The highest used input or output determines the stride between vertices. 35 * 36 * Since tessellation is only enabled in the OpenGL core profile, 37 * only these semantics are valid for per-vertex data: 38 * 39 * Name Location 40 * 41 * POSITION 0 42 * PSIZE 1 43 * CLIPDIST0..1 2..3 44 * CULLDIST0..1 (not implemented) 45 * GENERIC0..31 4..35 46 * 47 * For example, a shader only writing GENERIC0 has the output stride of 5. 48 * 49 * Only these semantics are valid for per-patch data: 50 * 51 * Name Location 52 * 53 * TESSOUTER 0 54 * TESSINNER 1 55 * PATCH0..29 2..31 56 * 57 * That's how independent shaders agree on input and output locations. 58 * The si_shader_io_get_unique_index function assigns the locations. 59 * 60 * Other required information for calculating the input and output addresses 61 * like the vertex stride, the patch stride, and the offsets where per-vertex 62 * and per-patch data start, is passed to the shader via user data SGPRs. 63 * The offsets and strides are calculated at draw time and aren't available 64 * at compile time. 65 * 66 * The same approach should be used for linking ES->GS in the future. 67 */ 68 69#ifndef SI_SHADER_H 70#define SI_SHADER_H 71 72#include <llvm-c/Core.h> /* LLVMModuleRef */ 73#include "tgsi/tgsi_scan.h" 74#include "si_state.h" 75 76struct radeon_shader_binary; 77struct radeon_shader_reloc; 78 79#define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */ 80#define SI_SGPR_CONST 2 81#define SI_SGPR_SAMPLER 4 82#define SI_SGPR_RESOURCE 6 83#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */ 84#define SI_SGPR_BASE_VERTEX 10 /* VS only */ 85#define SI_SGPR_START_INSTANCE 11 /* VS only */ 86#define SI_SGPR_VS_STATE_BITS 12 /* VS(VS) only */ 87#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */ 88#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */ 89#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */ 90#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */ 91#define SI_SGPR_ALPHA_REF 8 /* PS only */ 92#define SI_SGPR_PS_STATE_BITS 9 /* PS only */ 93 94#define SI_VS_NUM_USER_SGPR 13 /* API VS */ 95#define SI_ES_NUM_USER_SGPR 12 /* API VS */ 96#define SI_LS_NUM_USER_SGPR 13 /* API VS */ 97#define SI_TCS_NUM_USER_SGPR 11 98#define SI_TES_NUM_USER_SGPR 10 99#define SI_GS_NUM_USER_SGPR 8 100#define SI_GSCOPY_NUM_USER_SGPR 4 101#define SI_PS_NUM_USER_SGPR 10 102 103/* LLVM function parameter indices */ 104#define SI_PARAM_RW_BUFFERS 0 105#define SI_PARAM_CONST 1 106#define SI_PARAM_SAMPLER 2 107#define SI_PARAM_RESOURCE 3 108 109/* VS only parameters */ 110#define SI_PARAM_VERTEX_BUFFER 4 111#define SI_PARAM_BASE_VERTEX 5 112#define SI_PARAM_START_INSTANCE 6 113/* [0] = clamp vertex color */ 114#define SI_PARAM_VS_STATE_BITS 7 115/* the other VS parameters are assigned dynamically */ 116 117/* Offsets where TCS outputs and TCS patch outputs live in LDS: 118 * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 119 * [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32 120 */ 121#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */ 122 123/* Layout of TCS outputs / TES inputs: 124 * [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4 125 * [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4 126 * [26:31] = gl_PatchVerticesIn, max = 32 127 */ 128#define SI_PARAM_TCS_OUT_LAYOUT 5 /* for TCS & TES */ 129 130/* Layout of LS outputs / TCS inputs 131 * [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4 132 * [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4 133 */ 134#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */ 135#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */ 136 137/* TCS only parameters. */ 138#define SI_PARAM_TESS_FACTOR_OFFSET 7 139#define SI_PARAM_PATCH_ID 8 140#define SI_PARAM_REL_IDS 9 141 142/* GS only parameters */ 143#define SI_PARAM_GS2VS_OFFSET 4 144#define SI_PARAM_GS_WAVE_ID 5 145#define SI_PARAM_VTX0_OFFSET 6 146#define SI_PARAM_VTX1_OFFSET 7 147#define SI_PARAM_PRIMITIVE_ID 8 148#define SI_PARAM_VTX2_OFFSET 9 149#define SI_PARAM_VTX3_OFFSET 10 150#define SI_PARAM_VTX4_OFFSET 11 151#define SI_PARAM_VTX5_OFFSET 12 152#define SI_PARAM_GS_INSTANCE_ID 13 153 154/* PS only parameters */ 155#define SI_PARAM_ALPHA_REF 4 156/* Bits: 157 * 0: force_persample_interp 158 */ 159#define SI_PARAM_PS_STATE_BITS 5 160#define SI_PARAM_PRIM_MASK 6 161#define SI_PARAM_PERSP_SAMPLE 7 162#define SI_PARAM_PERSP_CENTER 8 163#define SI_PARAM_PERSP_CENTROID 9 164#define SI_PARAM_PERSP_PULL_MODEL 10 165#define SI_PARAM_LINEAR_SAMPLE 11 166#define SI_PARAM_LINEAR_CENTER 12 167#define SI_PARAM_LINEAR_CENTROID 13 168#define SI_PARAM_LINE_STIPPLE_TEX 14 169#define SI_PARAM_POS_X_FLOAT 15 170#define SI_PARAM_POS_Y_FLOAT 16 171#define SI_PARAM_POS_Z_FLOAT 17 172#define SI_PARAM_POS_W_FLOAT 18 173#define SI_PARAM_FRONT_FACE 19 174#define SI_PARAM_ANCILLARY 20 175#define SI_PARAM_SAMPLE_COVERAGE 21 176#define SI_PARAM_POS_FIXED_PT 22 177 178#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) 179 180struct si_shader; 181 182struct si_shader_selector { 183 struct si_shader *current; 184 185 struct tgsi_token *tokens; 186 struct pipe_stream_output_info so; 187 struct tgsi_shader_info info; 188 189 unsigned num_shaders; 190 191 /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ 192 unsigned type; 193 194 /* Whether the shader has to use a conditional assignment to 195 * choose between weights when emulating 196 * pipe_rasterizer_state::force_persample_interp. 197 * If false, "si_emit_spi_ps_input" will take care of it instead. 198 */ 199 bool forces_persample_interp_for_persp; 200 bool forces_persample_interp_for_linear; 201 202 unsigned gs_output_prim; 203 unsigned gs_max_out_vertices; 204 unsigned gs_num_invocations; 205 unsigned gsvs_itemsize; 206 207 /* masks of "get_unique_index" bits */ 208 uint64_t inputs_read; 209 uint64_t outputs_written; 210 uint32_t patch_outputs_written; 211 uint32_t ps_colors_written; 212}; 213 214/* Valid shader configurations: 215 * 216 * API shaders VS | TCS | TES | GS |pass| PS 217 * are compiled as: | | | |thru| 218 * | | | | | 219 * Only VS & PS: VS | -- | -- | -- | -- | PS 220 * With GS: ES | -- | -- | GS | VS | PS 221 * With Tessel.: LS | HS | VS | -- | -- | PS 222 * With both: LS | HS | ES | GS | VS | PS 223 */ 224 225union si_shader_key { 226 struct { 227 unsigned export_16bpc:8; 228 unsigned last_cbuf:3; 229 unsigned color_two_side:1; 230 unsigned alpha_func:3; 231 unsigned alpha_to_one:1; 232 unsigned poly_stipple:1; 233 unsigned poly_line_smoothing:1; 234 unsigned clamp_color:1; 235 } ps; 236 struct { 237 unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; 238 /* Mask of "get_unique_index" bits - which outputs are read 239 * by the next stage (needed by ES). 240 * This describes how outputs are laid out in memory. */ 241 uint64_t es_enabled_outputs; 242 unsigned as_es:1; /* export shader */ 243 unsigned as_ls:1; /* local shader */ 244 unsigned export_prim_id; /* when PS needs it and GS is disabled */ 245 } vs; 246 struct { 247 unsigned prim_mode:3; 248 } tcs; /* tessellation control shader */ 249 struct { 250 /* Mask of "get_unique_index" bits - which outputs are read 251 * by the next stage (needed by ES). 252 * This describes how outputs are laid out in memory. */ 253 uint64_t es_enabled_outputs; 254 unsigned as_es:1; /* export shader */ 255 unsigned export_prim_id; /* when PS needs it and GS is disabled */ 256 } tes; /* tessellation evaluation shader */ 257}; 258 259struct si_shader { 260 struct si_shader_selector *selector; 261 struct si_shader *next_variant; 262 263 struct si_shader *gs_copy_shader; 264 struct si_pm4_state *pm4; 265 struct r600_resource *bo; 266 struct r600_resource *scratch_bo; 267 struct radeon_shader_binary binary; 268 unsigned num_sgprs; 269 unsigned num_vgprs; 270 unsigned lds_size; 271 unsigned spi_ps_input_ena; 272 unsigned float_mode; 273 unsigned scratch_bytes_per_wave; 274 unsigned spi_shader_col_format; 275 unsigned spi_shader_z_format; 276 unsigned db_shader_control; 277 unsigned cb_shader_mask; 278 union si_shader_key key; 279 280 unsigned nparam; 281 unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS]; 282 unsigned ps_input_param_offset[PIPE_MAX_SHADER_INPUTS]; 283 unsigned ps_input_interpolate[PIPE_MAX_SHADER_INPUTS]; 284 bool uses_instanceid; 285 unsigned nr_pos_exports; 286 unsigned nr_param_exports; 287 bool is_gs_copy_shader; 288 bool dx10_clamp_mode; /* convert NaNs to 0 */ 289 290 unsigned ls_rsrc1; 291 unsigned ls_rsrc2; 292}; 293 294static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) 295{ 296 if (sctx->gs_shader) 297 return &sctx->gs_shader->info; 298 else if (sctx->tes_shader) 299 return &sctx->tes_shader->info; 300 else if (sctx->vs_shader) 301 return &sctx->vs_shader->info; 302 else 303 return NULL; 304} 305 306static inline struct si_shader* si_get_vs_state(struct si_context *sctx) 307{ 308 if (sctx->gs_shader) 309 return sctx->gs_shader->current->gs_copy_shader; 310 else if (sctx->tes_shader) 311 return sctx->tes_shader->current; 312 else 313 return sctx->vs_shader->current; 314} 315 316static inline bool si_vs_exports_prim_id(struct si_shader *shader) 317{ 318 if (shader->selector->type == PIPE_SHADER_VERTEX) 319 return shader->key.vs.export_prim_id; 320 else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) 321 return shader->key.tes.export_prim_id; 322 else 323 return false; 324} 325 326/* radeonsi_shader.c */ 327int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, 328 struct si_shader *shader); 329void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f); 330int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, 331 LLVMTargetMachineRef tm, LLVMModuleRef mod); 332void si_shader_destroy(struct si_shader *shader); 333unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); 334int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); 335int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader); 336void si_shader_apply_scratch_relocs(struct si_context *sctx, 337 struct si_shader *shader, 338 uint64_t scratch_va); 339void si_shader_binary_read_config(const struct si_screen *sscreen, 340 struct si_shader *shader, 341 unsigned symbol_offset); 342 343#endif 344