si_state.c revision eca57f85ee1f47b32daa641a19d8d386c58eb1de
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "sid.h" 29#include "radeon/r600_cs.h" 30#include "radeon/r600_query.h" 31 32#include "util/u_dual_blend.h" 33#include "util/u_format.h" 34#include "util/u_format_s3tc.h" 35#include "util/u_memory.h" 36#include "util/u_resource.h" 37 38/* Initialize an external atom (owned by ../radeon). */ 39static void 40si_init_external_atom(struct si_context *sctx, struct r600_atom *atom, 41 struct r600_atom **list_elem) 42{ 43 atom->id = list_elem - sctx->atoms.array + 1; 44 *list_elem = atom; 45} 46 47/* Initialize an atom owned by radeonsi. */ 48void si_init_atom(struct si_context *sctx, struct r600_atom *atom, 49 struct r600_atom **list_elem, 50 void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) 51{ 52 atom->emit = (void*)emit_func; 53 atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */ 54 *list_elem = atom; 55} 56 57static unsigned si_map_swizzle(unsigned swizzle) 58{ 59 switch (swizzle) { 60 case PIPE_SWIZZLE_Y: 61 return V_008F0C_SQ_SEL_Y; 62 case PIPE_SWIZZLE_Z: 63 return V_008F0C_SQ_SEL_Z; 64 case PIPE_SWIZZLE_W: 65 return V_008F0C_SQ_SEL_W; 66 case PIPE_SWIZZLE_0: 67 return V_008F0C_SQ_SEL_0; 68 case PIPE_SWIZZLE_1: 69 return V_008F0C_SQ_SEL_1; 70 default: /* PIPE_SWIZZLE_X */ 71 return V_008F0C_SQ_SEL_X; 72 } 73} 74 75static uint32_t S_FIXED(float value, uint32_t frac_bits) 76{ 77 return value * (1 << frac_bits); 78} 79 80/* 12.4 fixed-point */ 81static unsigned si_pack_float_12p4(float x) 82{ 83 return x <= 0 ? 0 : 84 x >= 4096 ? 0xffff : x * 16; 85} 86 87/* 88 * Inferred framebuffer and blender state. 89 * 90 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 91 * if there is not enough PS outputs. 92 */ 93static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom) 94{ 95 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 96 struct si_state_blend *blend = sctx->queued.named.blend; 97 /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 98 * but you never know. */ 99 uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit; 100 unsigned i; 101 102 if (blend) 103 cb_target_mask &= blend->cb_target_mask; 104 105 /* Avoid a hang that happens when dual source blending is enabled 106 * but there is not enough color outputs. This is undefined behavior, 107 * so disable color writes completely. 108 * 109 * Reproducible with Unigine Heaven 4.0 and drirc missing. 110 */ 111 if (blend && blend->dual_src_blend && 112 sctx->ps_shader.cso && 113 (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 114 cb_target_mask = 0; 115 116 radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); 117 118 /* STONEY-specific register settings. */ 119 if (sctx->b.family == CHIP_STONEY) { 120 unsigned spi_shader_col_format = 121 sctx->ps_shader.cso ? 122 sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; 123 unsigned sx_ps_downconvert = 0; 124 unsigned sx_blend_opt_epsilon = 0; 125 unsigned sx_blend_opt_control = 0; 126 127 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 128 struct r600_surface *surf = 129 (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; 130 unsigned format, swap, spi_format, colormask; 131 bool has_alpha, has_rgb; 132 133 if (!surf) 134 continue; 135 136 format = G_028C70_FORMAT(surf->cb_color_info); 137 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 138 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 139 colormask = (cb_target_mask >> (i * 4)) & 0xf; 140 141 /* Set if RGB and A are present. */ 142 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 143 144 if (format == V_028C70_COLOR_8 || 145 format == V_028C70_COLOR_16 || 146 format == V_028C70_COLOR_32) 147 has_rgb = !has_alpha; 148 else 149 has_rgb = true; 150 151 /* Check the colormask and export format. */ 152 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 153 has_rgb = false; 154 if (!(colormask & PIPE_MASK_A)) 155 has_alpha = false; 156 157 if (spi_format == V_028714_SPI_SHADER_ZERO) { 158 has_rgb = false; 159 has_alpha = false; 160 } 161 162 /* Disable value checking for disabled channels. */ 163 if (!has_rgb) 164 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 165 if (!has_alpha) 166 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 167 168 /* Enable down-conversion for 32bpp and smaller formats. */ 169 switch (format) { 170 case V_028C70_COLOR_8: 171 case V_028C70_COLOR_8_8: 172 case V_028C70_COLOR_8_8_8_8: 173 /* For 1 and 2-channel formats, use the superset thereof. */ 174 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 175 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 176 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 177 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 178 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 179 } 180 break; 181 182 case V_028C70_COLOR_5_6_5: 183 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 184 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 185 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 186 } 187 break; 188 189 case V_028C70_COLOR_1_5_5_5: 190 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 191 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 192 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 193 } 194 break; 195 196 case V_028C70_COLOR_4_4_4_4: 197 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 198 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 199 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 200 } 201 break; 202 203 case V_028C70_COLOR_32: 204 if (swap == V_0280A0_SWAP_STD && 205 spi_format == V_028714_SPI_SHADER_32_R) 206 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 207 else if (swap == V_0280A0_SWAP_ALT_REV && 208 spi_format == V_028714_SPI_SHADER_32_AR) 209 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 210 break; 211 212 case V_028C70_COLOR_16: 213 case V_028C70_COLOR_16_16: 214 /* For 1-channel formats, use the superset thereof. */ 215 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 216 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 217 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 218 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 219 if (swap == V_0280A0_SWAP_STD || 220 swap == V_0280A0_SWAP_STD_REV) 221 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 222 else 223 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 224 } 225 break; 226 227 case V_028C70_COLOR_10_11_11: 228 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 229 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 230 sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 231 } 232 break; 233 234 case V_028C70_COLOR_2_10_10_10: 235 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 236 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 237 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 238 } 239 break; 240 } 241 } 242 243 if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { 244 sx_ps_downconvert = 0; 245 sx_blend_opt_epsilon = 0; 246 sx_blend_opt_control = 0; 247 } 248 249 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); 250 radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ 251 radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ 252 radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ 253 } 254} 255 256/* 257 * Blender functions 258 */ 259 260static uint32_t si_translate_blend_function(int blend_func) 261{ 262 switch (blend_func) { 263 case PIPE_BLEND_ADD: 264 return V_028780_COMB_DST_PLUS_SRC; 265 case PIPE_BLEND_SUBTRACT: 266 return V_028780_COMB_SRC_MINUS_DST; 267 case PIPE_BLEND_REVERSE_SUBTRACT: 268 return V_028780_COMB_DST_MINUS_SRC; 269 case PIPE_BLEND_MIN: 270 return V_028780_COMB_MIN_DST_SRC; 271 case PIPE_BLEND_MAX: 272 return V_028780_COMB_MAX_DST_SRC; 273 default: 274 R600_ERR("Unknown blend function %d\n", blend_func); 275 assert(0); 276 break; 277 } 278 return 0; 279} 280 281static uint32_t si_translate_blend_factor(int blend_fact) 282{ 283 switch (blend_fact) { 284 case PIPE_BLENDFACTOR_ONE: 285 return V_028780_BLEND_ONE; 286 case PIPE_BLENDFACTOR_SRC_COLOR: 287 return V_028780_BLEND_SRC_COLOR; 288 case PIPE_BLENDFACTOR_SRC_ALPHA: 289 return V_028780_BLEND_SRC_ALPHA; 290 case PIPE_BLENDFACTOR_DST_ALPHA: 291 return V_028780_BLEND_DST_ALPHA; 292 case PIPE_BLENDFACTOR_DST_COLOR: 293 return V_028780_BLEND_DST_COLOR; 294 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 295 return V_028780_BLEND_SRC_ALPHA_SATURATE; 296 case PIPE_BLENDFACTOR_CONST_COLOR: 297 return V_028780_BLEND_CONSTANT_COLOR; 298 case PIPE_BLENDFACTOR_CONST_ALPHA: 299 return V_028780_BLEND_CONSTANT_ALPHA; 300 case PIPE_BLENDFACTOR_ZERO: 301 return V_028780_BLEND_ZERO; 302 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 303 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 304 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 305 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 306 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 307 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 308 case PIPE_BLENDFACTOR_INV_DST_COLOR: 309 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 310 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 311 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 312 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 313 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 314 case PIPE_BLENDFACTOR_SRC1_COLOR: 315 return V_028780_BLEND_SRC1_COLOR; 316 case PIPE_BLENDFACTOR_SRC1_ALPHA: 317 return V_028780_BLEND_SRC1_ALPHA; 318 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 319 return V_028780_BLEND_INV_SRC1_COLOR; 320 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 321 return V_028780_BLEND_INV_SRC1_ALPHA; 322 default: 323 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 324 assert(0); 325 break; 326 } 327 return 0; 328} 329 330static uint32_t si_translate_blend_opt_function(int blend_func) 331{ 332 switch (blend_func) { 333 case PIPE_BLEND_ADD: 334 return V_028760_OPT_COMB_ADD; 335 case PIPE_BLEND_SUBTRACT: 336 return V_028760_OPT_COMB_SUBTRACT; 337 case PIPE_BLEND_REVERSE_SUBTRACT: 338 return V_028760_OPT_COMB_REVSUBTRACT; 339 case PIPE_BLEND_MIN: 340 return V_028760_OPT_COMB_MIN; 341 case PIPE_BLEND_MAX: 342 return V_028760_OPT_COMB_MAX; 343 default: 344 return V_028760_OPT_COMB_BLEND_DISABLED; 345 } 346} 347 348static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 349{ 350 switch (blend_fact) { 351 case PIPE_BLENDFACTOR_ZERO: 352 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 353 case PIPE_BLENDFACTOR_ONE: 354 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 355 case PIPE_BLENDFACTOR_SRC_COLOR: 356 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 357 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 358 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 359 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 360 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 361 case PIPE_BLENDFACTOR_SRC_ALPHA: 362 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 363 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 364 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 365 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 366 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 367 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 368 default: 369 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 370 } 371} 372 373/** 374 * Get rid of DST in the blend factors by commuting the operands: 375 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 376 */ 377static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 378 unsigned *dst_factor, unsigned expected_dst, 379 unsigned replacement_src) 380{ 381 if (*src_factor == expected_dst && 382 *dst_factor == PIPE_BLENDFACTOR_ZERO) { 383 *src_factor = PIPE_BLENDFACTOR_ZERO; 384 *dst_factor = replacement_src; 385 386 /* Commuting the operands requires reversing subtractions. */ 387 if (*func == PIPE_BLEND_SUBTRACT) 388 *func = PIPE_BLEND_REVERSE_SUBTRACT; 389 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 390 *func = PIPE_BLEND_SUBTRACT; 391 } 392} 393 394static bool si_blend_factor_uses_dst(unsigned factor) 395{ 396 return factor == PIPE_BLENDFACTOR_DST_COLOR || 397 factor == PIPE_BLENDFACTOR_DST_ALPHA || 398 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 399 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 400 factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 401} 402 403static void *si_create_blend_state_mode(struct pipe_context *ctx, 404 const struct pipe_blend_state *state, 405 unsigned mode) 406{ 407 struct si_context *sctx = (struct si_context*)ctx; 408 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 409 struct si_pm4_state *pm4 = &blend->pm4; 410 uint32_t sx_mrt_blend_opt[8] = {0}; 411 uint32_t color_control = 0; 412 413 if (!blend) 414 return NULL; 415 416 blend->alpha_to_coverage = state->alpha_to_coverage; 417 blend->alpha_to_one = state->alpha_to_one; 418 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 419 420 if (state->logicop_enable) { 421 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 422 } else { 423 color_control |= S_028808_ROP3(0xcc); 424 } 425 426 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 427 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 428 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 429 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 430 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 431 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 432 433 if (state->alpha_to_coverage) 434 blend->need_src_alpha_4bit |= 0xf; 435 436 blend->cb_target_mask = 0; 437 for (int i = 0; i < 8; i++) { 438 /* state->rt entries > 0 only written if independent blending */ 439 const int j = state->independent_blend_enable ? i : 0; 440 441 unsigned eqRGB = state->rt[j].rgb_func; 442 unsigned srcRGB = state->rt[j].rgb_src_factor; 443 unsigned dstRGB = state->rt[j].rgb_dst_factor; 444 unsigned eqA = state->rt[j].alpha_func; 445 unsigned srcA = state->rt[j].alpha_src_factor; 446 unsigned dstA = state->rt[j].alpha_dst_factor; 447 448 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 449 unsigned blend_cntl = 0; 450 451 sx_mrt_blend_opt[i] = 452 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 453 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 454 455 /* Only set dual source blending for MRT0 to avoid a hang. */ 456 if (i >= 1 && blend->dual_src_blend) { 457 /* Vulkan does this for dual source blending. */ 458 if (i == 1) 459 blend_cntl |= S_028780_ENABLE(1); 460 461 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 462 continue; 463 } 464 465 /* Only addition and subtraction equations are supported with 466 * dual source blending. 467 */ 468 if (blend->dual_src_blend && 469 (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 470 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 471 assert(!"Unsupported equation for dual source blending"); 472 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 473 continue; 474 } 475 476 /* cb_render_state will disable unused ones */ 477 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 478 479 if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 480 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 481 continue; 482 } 483 484 /* Blending optimizations for Stoney. 485 * These transformations don't change the behavior. 486 * 487 * First, get rid of DST in the blend factors: 488 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 489 */ 490 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 491 PIPE_BLENDFACTOR_DST_COLOR, 492 PIPE_BLENDFACTOR_SRC_COLOR); 493 si_blend_remove_dst(&eqA, &srcA, &dstA, 494 PIPE_BLENDFACTOR_DST_COLOR, 495 PIPE_BLENDFACTOR_SRC_COLOR); 496 si_blend_remove_dst(&eqA, &srcA, &dstA, 497 PIPE_BLENDFACTOR_DST_ALPHA, 498 PIPE_BLENDFACTOR_SRC_ALPHA); 499 500 /* Look up the ideal settings from tables. */ 501 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 502 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 503 srcA_opt = si_translate_blend_opt_factor(srcA, true); 504 dstA_opt = si_translate_blend_opt_factor(dstA, true); 505 506 /* Handle interdependencies. */ 507 if (si_blend_factor_uses_dst(srcRGB)) 508 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 509 if (si_blend_factor_uses_dst(srcA)) 510 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 511 512 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 513 (dstRGB == PIPE_BLENDFACTOR_ZERO || 514 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 515 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 516 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 517 518 /* Set the final value. */ 519 sx_mrt_blend_opt[i] = 520 S_028760_COLOR_SRC_OPT(srcRGB_opt) | 521 S_028760_COLOR_DST_OPT(dstRGB_opt) | 522 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 523 S_028760_ALPHA_SRC_OPT(srcA_opt) | 524 S_028760_ALPHA_DST_OPT(dstA_opt) | 525 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 526 527 /* Set blend state. */ 528 blend_cntl |= S_028780_ENABLE(1); 529 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 530 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 531 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 532 533 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 534 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 535 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 536 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 537 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 538 } 539 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 540 541 blend->blend_enable_4bit |= 0xfu << (i * 4); 542 543 /* This is only important for formats without alpha. */ 544 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 545 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 546 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 547 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 548 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 549 dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 550 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 551 } 552 553 if (blend->cb_target_mask) { 554 color_control |= S_028808_MODE(mode); 555 } else { 556 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 557 } 558 559 if (sctx->b.family == CHIP_STONEY) { 560 /* Disable RB+ blend optimizations for dual source blending. 561 * Vulkan does this. 562 */ 563 if (blend->dual_src_blend) { 564 for (int i = 0; i < 8; i++) { 565 sx_mrt_blend_opt[i] = 566 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 567 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 568 } 569 } 570 571 for (int i = 0; i < 8; i++) 572 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 573 sx_mrt_blend_opt[i]); 574 575 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 576 if (blend->dual_src_blend || state->logicop_enable || 577 mode == V_028808_CB_RESOLVE) 578 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 579 } 580 581 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 582 return blend; 583} 584 585static void *si_create_blend_state(struct pipe_context *ctx, 586 const struct pipe_blend_state *state) 587{ 588 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 589} 590 591static void si_bind_blend_state(struct pipe_context *ctx, void *state) 592{ 593 struct si_context *sctx = (struct si_context *)ctx; 594 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 595 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 596 sctx->do_update_shaders = true; 597} 598 599static void si_delete_blend_state(struct pipe_context *ctx, void *state) 600{ 601 struct si_context *sctx = (struct si_context *)ctx; 602 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 603} 604 605static void si_set_blend_color(struct pipe_context *ctx, 606 const struct pipe_blend_color *state) 607{ 608 struct si_context *sctx = (struct si_context *)ctx; 609 610 if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0) 611 return; 612 613 sctx->blend_color.state = *state; 614 si_mark_atom_dirty(sctx, &sctx->blend_color.atom); 615} 616 617static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom) 618{ 619 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 620 621 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 622 radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 623} 624 625/* 626 * Clipping 627 */ 628 629static void si_set_clip_state(struct pipe_context *ctx, 630 const struct pipe_clip_state *state) 631{ 632 struct si_context *sctx = (struct si_context *)ctx; 633 struct pipe_constant_buffer cb; 634 635 if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 636 return; 637 638 sctx->clip_state.state = *state; 639 si_mark_atom_dirty(sctx, &sctx->clip_state.atom); 640 641 cb.buffer = NULL; 642 cb.user_buffer = state->ucp; 643 cb.buffer_offset = 0; 644 cb.buffer_size = 4*4*8; 645 si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 646 pipe_resource_reference(&cb.buffer, NULL); 647} 648 649static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom) 650{ 651 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 652 653 radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 654 radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 655} 656 657#define SIX_BITS 0x3F 658 659static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 660{ 661 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 662 struct si_shader *vs = si_get_vs_state(sctx); 663 struct tgsi_shader_info *info = si_get_vs_info(sctx); 664 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 665 unsigned window_space = 666 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 667 unsigned clipdist_mask = 668 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 669 unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; 670 unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance; 671 unsigned total_mask; 672 bool misc_vec_ena; 673 674 if (vs->key.opt.hw_vs.clip_disable) { 675 assert(!info->culldist_writemask); 676 clipdist_mask = 0; 677 culldist_mask = 0; 678 } 679 total_mask = clipdist_mask | culldist_mask; 680 681 /* Clip distances on points have no effect, so need to be implemented 682 * as cull distances. This applies for the clipvertex case as well. 683 * 684 * Setting this for primitives other than points should have no adverse 685 * effects. 686 */ 687 clipdist_mask &= rs->clip_plane_enable; 688 culldist_mask |= clipdist_mask; 689 690 misc_vec_ena = info->writes_psize || info->writes_edgeflag || 691 info->writes_layer || info->writes_viewport_index; 692 693 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 694 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 695 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 696 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 697 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | 698 S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | 699 S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | 700 S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | 701 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | 702 clipdist_mask | (culldist_mask << 8)); 703 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 704 rs->pa_cl_clip_cntl | 705 ucp_mask | 706 S_028810_CLIP_DISABLE(window_space)); 707 708 /* reuse needs to be set off if we write oViewport */ 709 radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 710 S_028AB4_REUSE_OFF(info->writes_viewport_index)); 711} 712 713/* 714 * inferred state between framebuffer and rasterizer 715 */ 716static void si_update_poly_offset_state(struct si_context *sctx) 717{ 718 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 719 720 if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) 721 return; 722 723 /* Use the user format, not db_render_format, so that the polygon 724 * offset behaves as expected by applications. 725 */ 726 switch (sctx->framebuffer.state.zsbuf->texture->format) { 727 case PIPE_FORMAT_Z16_UNORM: 728 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 729 break; 730 default: /* 24-bit */ 731 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 732 break; 733 case PIPE_FORMAT_Z32_FLOAT: 734 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 735 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 736 break; 737 } 738} 739 740/* 741 * Rasterizer 742 */ 743 744static uint32_t si_translate_fill(uint32_t func) 745{ 746 switch(func) { 747 case PIPE_POLYGON_MODE_FILL: 748 return V_028814_X_DRAW_TRIANGLES; 749 case PIPE_POLYGON_MODE_LINE: 750 return V_028814_X_DRAW_LINES; 751 case PIPE_POLYGON_MODE_POINT: 752 return V_028814_X_DRAW_POINTS; 753 default: 754 assert(0); 755 return V_028814_X_DRAW_POINTS; 756 } 757} 758 759static void *si_create_rs_state(struct pipe_context *ctx, 760 const struct pipe_rasterizer_state *state) 761{ 762 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 763 struct si_pm4_state *pm4 = &rs->pm4; 764 unsigned tmp, i; 765 float psize_min, psize_max; 766 767 if (!rs) { 768 return NULL; 769 } 770 771 rs->scissor_enable = state->scissor; 772 rs->clip_halfz = state->clip_halfz; 773 rs->two_side = state->light_twoside; 774 rs->multisample_enable = state->multisample; 775 rs->force_persample_interp = state->force_persample_interp; 776 rs->clip_plane_enable = state->clip_plane_enable; 777 rs->line_stipple_enable = state->line_stipple_enable; 778 rs->poly_stipple_enable = state->poly_stipple_enable; 779 rs->line_smooth = state->line_smooth; 780 rs->poly_smooth = state->poly_smooth; 781 rs->uses_poly_offset = state->offset_point || state->offset_line || 782 state->offset_tri; 783 rs->clamp_fragment_color = state->clamp_fragment_color; 784 rs->flatshade = state->flatshade; 785 rs->sprite_coord_enable = state->sprite_coord_enable; 786 rs->rasterizer_discard = state->rasterizer_discard; 787 rs->pa_sc_line_stipple = state->line_stipple_enable ? 788 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 789 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 790 rs->pa_cl_clip_cntl = 791 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 792 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 793 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 794 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 795 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 796 797 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 798 S_0286D4_FLAT_SHADE_ENA(1) | 799 S_0286D4_PNT_SPRITE_ENA(1) | 800 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 801 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 802 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 803 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 804 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 805 806 /* point size 12.4 fixed point */ 807 tmp = (unsigned)(state->point_size * 8.0); 808 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 809 810 if (state->point_size_per_vertex) { 811 psize_min = util_get_min_point_size(state); 812 psize_max = 8192; 813 } else { 814 /* Force the point size to be as if the vertex output was disabled. */ 815 psize_min = state->point_size; 816 psize_max = state->point_size; 817 } 818 /* Divide by two, because 0.5 = 1 pixel. */ 819 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 820 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 821 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 822 823 tmp = (unsigned)state->line_width * 8; 824 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 825 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 826 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 827 S_028A48_MSAA_ENABLE(state->multisample || 828 state->poly_smooth || 829 state->line_smooth) | 830 S_028A48_VPORT_SCISSOR_ENABLE(1)); 831 832 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 833 S_028BE4_PIX_CENTER(state->half_pixel_center) | 834 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 835 836 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 837 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 838 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 839 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 840 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 841 S_028814_FACE(!state->front_ccw) | 842 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 843 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 844 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 845 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 846 state->fill_back != PIPE_POLYGON_MODE_FILL) | 847 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 848 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 849 si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 + 850 SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color); 851 852 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 853 for (i = 0; i < 3; i++) { 854 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 855 float offset_units = state->offset_units; 856 float offset_scale = state->offset_scale * 16.0f; 857 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 858 859 if (!state->offset_units_unscaled) { 860 switch (i) { 861 case 0: /* 16-bit zbuffer */ 862 offset_units *= 4.0f; 863 pa_su_poly_offset_db_fmt_cntl = 864 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 865 break; 866 case 1: /* 24-bit zbuffer */ 867 offset_units *= 2.0f; 868 pa_su_poly_offset_db_fmt_cntl = 869 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 870 break; 871 case 2: /* 32-bit zbuffer */ 872 offset_units *= 1.0f; 873 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 874 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 875 break; 876 } 877 } 878 879 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 880 fui(offset_scale)); 881 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 882 fui(offset_units)); 883 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 884 fui(offset_scale)); 885 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 886 fui(offset_units)); 887 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 888 pa_su_poly_offset_db_fmt_cntl); 889 } 890 891 return rs; 892} 893 894static void si_bind_rs_state(struct pipe_context *ctx, void *state) 895{ 896 struct si_context *sctx = (struct si_context *)ctx; 897 struct si_state_rasterizer *old_rs = 898 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 899 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 900 901 if (!state) 902 return; 903 904 if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) { 905 si_mark_atom_dirty(sctx, &sctx->db_render_state); 906 907 /* Update the small primitive filter workaround if necessary. */ 908 if (sctx->b.family >= CHIP_POLARIS10 && 909 sctx->framebuffer.nr_samples > 1) 910 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 911 } 912 913 r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz); 914 915 si_pm4_bind_state(sctx, rasterizer, rs); 916 si_update_poly_offset_state(sctx); 917 918 si_mark_atom_dirty(sctx, &sctx->clip_regs); 919 sctx->do_update_shaders = true; 920} 921 922static void si_delete_rs_state(struct pipe_context *ctx, void *state) 923{ 924 struct si_context *sctx = (struct si_context *)ctx; 925 926 if (sctx->queued.named.rasterizer == state) 927 si_pm4_bind_state(sctx, poly_offset, NULL); 928 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 929} 930 931/* 932 * infeered state between dsa and stencil ref 933 */ 934static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom) 935{ 936 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 937 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 938 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 939 940 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 941 radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 942 S_028430_STENCILMASK(dsa->valuemask[0]) | 943 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 944 S_028430_STENCILOPVAL(1)); 945 radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 946 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 947 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 948 S_028434_STENCILOPVAL_BF(1)); 949} 950 951static void si_set_stencil_ref(struct pipe_context *ctx, 952 const struct pipe_stencil_ref *state) 953{ 954 struct si_context *sctx = (struct si_context *)ctx; 955 956 if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 957 return; 958 959 sctx->stencil_ref.state = *state; 960 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 961} 962 963 964/* 965 * DSA 966 */ 967 968static uint32_t si_translate_stencil_op(int s_op) 969{ 970 switch (s_op) { 971 case PIPE_STENCIL_OP_KEEP: 972 return V_02842C_STENCIL_KEEP; 973 case PIPE_STENCIL_OP_ZERO: 974 return V_02842C_STENCIL_ZERO; 975 case PIPE_STENCIL_OP_REPLACE: 976 return V_02842C_STENCIL_REPLACE_TEST; 977 case PIPE_STENCIL_OP_INCR: 978 return V_02842C_STENCIL_ADD_CLAMP; 979 case PIPE_STENCIL_OP_DECR: 980 return V_02842C_STENCIL_SUB_CLAMP; 981 case PIPE_STENCIL_OP_INCR_WRAP: 982 return V_02842C_STENCIL_ADD_WRAP; 983 case PIPE_STENCIL_OP_DECR_WRAP: 984 return V_02842C_STENCIL_SUB_WRAP; 985 case PIPE_STENCIL_OP_INVERT: 986 return V_02842C_STENCIL_INVERT; 987 default: 988 R600_ERR("Unknown stencil op %d", s_op); 989 assert(0); 990 break; 991 } 992 return 0; 993} 994 995static void *si_create_dsa_state(struct pipe_context *ctx, 996 const struct pipe_depth_stencil_alpha_state *state) 997{ 998 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 999 struct si_pm4_state *pm4 = &dsa->pm4; 1000 unsigned db_depth_control; 1001 uint32_t db_stencil_control = 0; 1002 1003 if (!dsa) { 1004 return NULL; 1005 } 1006 1007 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1008 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1009 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1010 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1011 1012 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 1013 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 1014 S_028800_ZFUNC(state->depth.func) | 1015 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 1016 1017 /* stencil */ 1018 if (state->stencil[0].enabled) { 1019 db_depth_control |= S_028800_STENCIL_ENABLE(1); 1020 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1021 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1022 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1023 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1024 1025 if (state->stencil[1].enabled) { 1026 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1027 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1028 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1029 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1030 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1031 } 1032 } 1033 1034 /* alpha */ 1035 if (state->alpha.enabled) { 1036 dsa->alpha_func = state->alpha.func; 1037 1038 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 1039 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 1040 } else { 1041 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1042 } 1043 1044 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1045 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1046 if (state->depth.bounds_test) { 1047 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1048 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1049 } 1050 1051 return dsa; 1052} 1053 1054static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1055{ 1056 struct si_context *sctx = (struct si_context *)ctx; 1057 struct si_state_dsa *dsa = state; 1058 1059 if (!state) 1060 return; 1061 1062 si_pm4_bind_state(sctx, dsa, dsa); 1063 1064 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1065 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1066 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1067 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1068 } 1069 sctx->do_update_shaders = true; 1070} 1071 1072static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1073{ 1074 struct si_context *sctx = (struct si_context *)ctx; 1075 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1076} 1077 1078static void *si_create_db_flush_dsa(struct si_context *sctx) 1079{ 1080 struct pipe_depth_stencil_alpha_state dsa = {}; 1081 1082 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 1083} 1084 1085/* DB RENDER STATE */ 1086 1087static void si_set_active_query_state(struct pipe_context *ctx, boolean enable) 1088{ 1089 struct si_context *sctx = (struct si_context*)ctx; 1090 1091 /* Pipeline stat & streamout queries. */ 1092 if (enable) { 1093 sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS; 1094 sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS; 1095 } else { 1096 sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS; 1097 sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS; 1098 } 1099 1100 /* Occlusion queries. */ 1101 if (sctx->occlusion_queries_disabled != !enable) { 1102 sctx->occlusion_queries_disabled = !enable; 1103 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1104 } 1105} 1106 1107static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 1108{ 1109 struct si_context *sctx = (struct si_context*)ctx; 1110 1111 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1112} 1113 1114static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st) 1115{ 1116 struct si_context *sctx = (struct si_context*)ctx; 1117 1118 st->saved_compute = sctx->cs_shader_state.program; 1119 1120 si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1121 si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); 1122} 1123 1124static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 1125{ 1126 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1127 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1128 unsigned db_shader_control; 1129 1130 radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 1131 1132 /* DB_RENDER_CONTROL */ 1133 if (sctx->dbcb_depth_copy_enabled || 1134 sctx->dbcb_stencil_copy_enabled) { 1135 radeon_emit(cs, 1136 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1137 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1138 S_028000_COPY_CENTROID(1) | 1139 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 1140 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1141 radeon_emit(cs, 1142 S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1143 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); 1144 } else { 1145 radeon_emit(cs, 1146 S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1147 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); 1148 } 1149 1150 /* DB_COUNT_CONTROL (occlusion queries) */ 1151 if (sctx->b.num_occlusion_queries > 0 && 1152 !sctx->occlusion_queries_disabled) { 1153 bool perfect = sctx->b.num_perfect_occlusion_queries > 0; 1154 1155 if (sctx->b.chip_class >= CIK) { 1156 radeon_emit(cs, 1157 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1158 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 1159 S_028004_ZPASS_ENABLE(1) | 1160 S_028004_SLICE_EVEN_ENABLE(1) | 1161 S_028004_SLICE_ODD_ENABLE(1)); 1162 } else { 1163 radeon_emit(cs, 1164 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1165 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 1166 } 1167 } else { 1168 /* Disable occlusion queries. */ 1169 if (sctx->b.chip_class >= CIK) { 1170 radeon_emit(cs, 0); 1171 } else { 1172 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 1173 } 1174 } 1175 1176 /* DB_RENDER_OVERRIDE2 */ 1177 radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 1178 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1179 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1180 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); 1181 1182 db_shader_control = sctx->ps_db_shader_control; 1183 1184 /* Bug workaround for smoothing (overrasterization) on SI. */ 1185 if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { 1186 db_shader_control &= C_02880C_Z_ORDER; 1187 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1188 } 1189 1190 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1191 if (!rs || !rs->multisample_enable) 1192 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1193 1194 if (sctx->b.family == CHIP_STONEY && 1195 sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) 1196 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1197 1198 radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 1199 db_shader_control); 1200} 1201 1202/* 1203 * format translation 1204 */ 1205static uint32_t si_translate_colorformat(enum pipe_format format) 1206{ 1207 const struct util_format_description *desc = util_format_description(format); 1208 1209#define HAS_SIZE(x,y,z,w) \ 1210 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1211 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1212 1213 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1214 return V_028C70_COLOR_10_11_11; 1215 1216 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1217 return V_028C70_COLOR_INVALID; 1218 1219 /* hw cannot support mixed formats (except depth/stencil, since 1220 * stencil is not written to). */ 1221 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1222 return V_028C70_COLOR_INVALID; 1223 1224 switch (desc->nr_channels) { 1225 case 1: 1226 switch (desc->channel[0].size) { 1227 case 8: 1228 return V_028C70_COLOR_8; 1229 case 16: 1230 return V_028C70_COLOR_16; 1231 case 32: 1232 return V_028C70_COLOR_32; 1233 } 1234 break; 1235 case 2: 1236 if (desc->channel[0].size == desc->channel[1].size) { 1237 switch (desc->channel[0].size) { 1238 case 8: 1239 return V_028C70_COLOR_8_8; 1240 case 16: 1241 return V_028C70_COLOR_16_16; 1242 case 32: 1243 return V_028C70_COLOR_32_32; 1244 } 1245 } else if (HAS_SIZE(8,24,0,0)) { 1246 return V_028C70_COLOR_24_8; 1247 } else if (HAS_SIZE(24,8,0,0)) { 1248 return V_028C70_COLOR_8_24; 1249 } 1250 break; 1251 case 3: 1252 if (HAS_SIZE(5,6,5,0)) { 1253 return V_028C70_COLOR_5_6_5; 1254 } else if (HAS_SIZE(32,8,24,0)) { 1255 return V_028C70_COLOR_X24_8_32_FLOAT; 1256 } 1257 break; 1258 case 4: 1259 if (desc->channel[0].size == desc->channel[1].size && 1260 desc->channel[0].size == desc->channel[2].size && 1261 desc->channel[0].size == desc->channel[3].size) { 1262 switch (desc->channel[0].size) { 1263 case 4: 1264 return V_028C70_COLOR_4_4_4_4; 1265 case 8: 1266 return V_028C70_COLOR_8_8_8_8; 1267 case 16: 1268 return V_028C70_COLOR_16_16_16_16; 1269 case 32: 1270 return V_028C70_COLOR_32_32_32_32; 1271 } 1272 } else if (HAS_SIZE(5,5,5,1)) { 1273 return V_028C70_COLOR_1_5_5_5; 1274 } else if (HAS_SIZE(10,10,10,2)) { 1275 return V_028C70_COLOR_2_10_10_10; 1276 } 1277 break; 1278 } 1279 return V_028C70_COLOR_INVALID; 1280} 1281 1282static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1283{ 1284 if (SI_BIG_ENDIAN) { 1285 switch(colorformat) { 1286 /* 8-bit buffers. */ 1287 case V_028C70_COLOR_8: 1288 return V_028C70_ENDIAN_NONE; 1289 1290 /* 16-bit buffers. */ 1291 case V_028C70_COLOR_5_6_5: 1292 case V_028C70_COLOR_1_5_5_5: 1293 case V_028C70_COLOR_4_4_4_4: 1294 case V_028C70_COLOR_16: 1295 case V_028C70_COLOR_8_8: 1296 return V_028C70_ENDIAN_8IN16; 1297 1298 /* 32-bit buffers. */ 1299 case V_028C70_COLOR_8_8_8_8: 1300 case V_028C70_COLOR_2_10_10_10: 1301 case V_028C70_COLOR_8_24: 1302 case V_028C70_COLOR_24_8: 1303 case V_028C70_COLOR_16_16: 1304 return V_028C70_ENDIAN_8IN32; 1305 1306 /* 64-bit buffers. */ 1307 case V_028C70_COLOR_16_16_16_16: 1308 return V_028C70_ENDIAN_8IN16; 1309 1310 case V_028C70_COLOR_32_32: 1311 return V_028C70_ENDIAN_8IN32; 1312 1313 /* 128-bit buffers. */ 1314 case V_028C70_COLOR_32_32_32_32: 1315 return V_028C70_ENDIAN_8IN32; 1316 default: 1317 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1318 } 1319 } else { 1320 return V_028C70_ENDIAN_NONE; 1321 } 1322} 1323 1324static uint32_t si_translate_dbformat(enum pipe_format format) 1325{ 1326 switch (format) { 1327 case PIPE_FORMAT_Z16_UNORM: 1328 return V_028040_Z_16; 1329 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1330 case PIPE_FORMAT_X8Z24_UNORM: 1331 case PIPE_FORMAT_Z24X8_UNORM: 1332 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1333 return V_028040_Z_24; /* deprecated on SI */ 1334 case PIPE_FORMAT_Z32_FLOAT: 1335 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1336 return V_028040_Z_32_FLOAT; 1337 default: 1338 return V_028040_Z_INVALID; 1339 } 1340} 1341 1342/* 1343 * Texture translation 1344 */ 1345 1346static uint32_t si_translate_texformat(struct pipe_screen *screen, 1347 enum pipe_format format, 1348 const struct util_format_description *desc, 1349 int first_non_void) 1350{ 1351 struct si_screen *sscreen = (struct si_screen*)screen; 1352 bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 && 1353 sscreen->b.info.drm_minor >= 31) || 1354 sscreen->b.info.drm_major == 3; 1355 bool uniform = true; 1356 int i; 1357 1358 /* Colorspace (return non-RGB formats directly). */ 1359 switch (desc->colorspace) { 1360 /* Depth stencil formats */ 1361 case UTIL_FORMAT_COLORSPACE_ZS: 1362 switch (format) { 1363 case PIPE_FORMAT_Z16_UNORM: 1364 return V_008F14_IMG_DATA_FORMAT_16; 1365 case PIPE_FORMAT_X24S8_UINT: 1366 case PIPE_FORMAT_Z24X8_UNORM: 1367 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1368 return V_008F14_IMG_DATA_FORMAT_8_24; 1369 case PIPE_FORMAT_X8Z24_UNORM: 1370 case PIPE_FORMAT_S8X24_UINT: 1371 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1372 return V_008F14_IMG_DATA_FORMAT_24_8; 1373 case PIPE_FORMAT_S8_UINT: 1374 return V_008F14_IMG_DATA_FORMAT_8; 1375 case PIPE_FORMAT_Z32_FLOAT: 1376 return V_008F14_IMG_DATA_FORMAT_32; 1377 case PIPE_FORMAT_X32_S8X24_UINT: 1378 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1379 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1380 default: 1381 goto out_unknown; 1382 } 1383 1384 case UTIL_FORMAT_COLORSPACE_YUV: 1385 goto out_unknown; /* TODO */ 1386 1387 case UTIL_FORMAT_COLORSPACE_SRGB: 1388 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1389 goto out_unknown; 1390 break; 1391 1392 default: 1393 break; 1394 } 1395 1396 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1397 if (!enable_compressed_formats) 1398 goto out_unknown; 1399 1400 switch (format) { 1401 case PIPE_FORMAT_RGTC1_SNORM: 1402 case PIPE_FORMAT_LATC1_SNORM: 1403 case PIPE_FORMAT_RGTC1_UNORM: 1404 case PIPE_FORMAT_LATC1_UNORM: 1405 return V_008F14_IMG_DATA_FORMAT_BC4; 1406 case PIPE_FORMAT_RGTC2_SNORM: 1407 case PIPE_FORMAT_LATC2_SNORM: 1408 case PIPE_FORMAT_RGTC2_UNORM: 1409 case PIPE_FORMAT_LATC2_UNORM: 1410 return V_008F14_IMG_DATA_FORMAT_BC5; 1411 default: 1412 goto out_unknown; 1413 } 1414 } 1415 1416 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1417 sscreen->b.family == CHIP_STONEY) { 1418 switch (format) { 1419 case PIPE_FORMAT_ETC1_RGB8: 1420 case PIPE_FORMAT_ETC2_RGB8: 1421 case PIPE_FORMAT_ETC2_SRGB8: 1422 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1423 case PIPE_FORMAT_ETC2_RGB8A1: 1424 case PIPE_FORMAT_ETC2_SRGB8A1: 1425 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1426 case PIPE_FORMAT_ETC2_RGBA8: 1427 case PIPE_FORMAT_ETC2_SRGBA8: 1428 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1429 case PIPE_FORMAT_ETC2_R11_UNORM: 1430 case PIPE_FORMAT_ETC2_R11_SNORM: 1431 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1432 case PIPE_FORMAT_ETC2_RG11_UNORM: 1433 case PIPE_FORMAT_ETC2_RG11_SNORM: 1434 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1435 default: 1436 goto out_unknown; 1437 } 1438 } 1439 1440 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1441 if (!enable_compressed_formats) 1442 goto out_unknown; 1443 1444 switch (format) { 1445 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1446 case PIPE_FORMAT_BPTC_SRGBA: 1447 return V_008F14_IMG_DATA_FORMAT_BC7; 1448 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1449 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1450 return V_008F14_IMG_DATA_FORMAT_BC6; 1451 default: 1452 goto out_unknown; 1453 } 1454 } 1455 1456 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1457 switch (format) { 1458 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1459 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1460 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1461 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1462 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1463 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1464 default: 1465 goto out_unknown; 1466 } 1467 } 1468 1469 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1470 if (!enable_compressed_formats) 1471 goto out_unknown; 1472 1473 if (!util_format_s3tc_enabled) { 1474 goto out_unknown; 1475 } 1476 1477 switch (format) { 1478 case PIPE_FORMAT_DXT1_RGB: 1479 case PIPE_FORMAT_DXT1_RGBA: 1480 case PIPE_FORMAT_DXT1_SRGB: 1481 case PIPE_FORMAT_DXT1_SRGBA: 1482 return V_008F14_IMG_DATA_FORMAT_BC1; 1483 case PIPE_FORMAT_DXT3_RGBA: 1484 case PIPE_FORMAT_DXT3_SRGBA: 1485 return V_008F14_IMG_DATA_FORMAT_BC2; 1486 case PIPE_FORMAT_DXT5_RGBA: 1487 case PIPE_FORMAT_DXT5_SRGBA: 1488 return V_008F14_IMG_DATA_FORMAT_BC3; 1489 default: 1490 goto out_unknown; 1491 } 1492 } 1493 1494 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1495 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1496 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1497 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1498 } 1499 1500 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1501 1502 /* hw cannot support mixed formats (except depth/stencil, since only 1503 * depth is read).*/ 1504 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1505 goto out_unknown; 1506 1507 /* See whether the components are of the same size. */ 1508 for (i = 1; i < desc->nr_channels; i++) { 1509 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1510 } 1511 1512 /* Non-uniform formats. */ 1513 if (!uniform) { 1514 switch(desc->nr_channels) { 1515 case 3: 1516 if (desc->channel[0].size == 5 && 1517 desc->channel[1].size == 6 && 1518 desc->channel[2].size == 5) { 1519 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1520 } 1521 goto out_unknown; 1522 case 4: 1523 if (desc->channel[0].size == 5 && 1524 desc->channel[1].size == 5 && 1525 desc->channel[2].size == 5 && 1526 desc->channel[3].size == 1) { 1527 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1528 } 1529 if (desc->channel[0].size == 10 && 1530 desc->channel[1].size == 10 && 1531 desc->channel[2].size == 10 && 1532 desc->channel[3].size == 2) { 1533 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1534 } 1535 goto out_unknown; 1536 } 1537 goto out_unknown; 1538 } 1539 1540 if (first_non_void < 0 || first_non_void > 3) 1541 goto out_unknown; 1542 1543 /* uniform formats */ 1544 switch (desc->channel[first_non_void].size) { 1545 case 4: 1546 switch (desc->nr_channels) { 1547#if 0 /* Not supported for render targets */ 1548 case 2: 1549 return V_008F14_IMG_DATA_FORMAT_4_4; 1550#endif 1551 case 4: 1552 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1553 } 1554 break; 1555 case 8: 1556 switch (desc->nr_channels) { 1557 case 1: 1558 return V_008F14_IMG_DATA_FORMAT_8; 1559 case 2: 1560 return V_008F14_IMG_DATA_FORMAT_8_8; 1561 case 4: 1562 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1563 } 1564 break; 1565 case 16: 1566 switch (desc->nr_channels) { 1567 case 1: 1568 return V_008F14_IMG_DATA_FORMAT_16; 1569 case 2: 1570 return V_008F14_IMG_DATA_FORMAT_16_16; 1571 case 4: 1572 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1573 } 1574 break; 1575 case 32: 1576 switch (desc->nr_channels) { 1577 case 1: 1578 return V_008F14_IMG_DATA_FORMAT_32; 1579 case 2: 1580 return V_008F14_IMG_DATA_FORMAT_32_32; 1581#if 0 /* Not supported for render targets */ 1582 case 3: 1583 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1584#endif 1585 case 4: 1586 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1587 } 1588 } 1589 1590out_unknown: 1591 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1592 return ~0; 1593} 1594 1595static unsigned si_tex_wrap(unsigned wrap) 1596{ 1597 switch (wrap) { 1598 default: 1599 case PIPE_TEX_WRAP_REPEAT: 1600 return V_008F30_SQ_TEX_WRAP; 1601 case PIPE_TEX_WRAP_CLAMP: 1602 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1603 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1604 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1605 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1606 return V_008F30_SQ_TEX_CLAMP_BORDER; 1607 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1608 return V_008F30_SQ_TEX_MIRROR; 1609 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1610 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1611 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1612 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1613 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1614 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1615 } 1616} 1617 1618static unsigned si_tex_mipfilter(unsigned filter) 1619{ 1620 switch (filter) { 1621 case PIPE_TEX_MIPFILTER_NEAREST: 1622 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1623 case PIPE_TEX_MIPFILTER_LINEAR: 1624 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1625 default: 1626 case PIPE_TEX_MIPFILTER_NONE: 1627 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1628 } 1629} 1630 1631static unsigned si_tex_compare(unsigned compare) 1632{ 1633 switch (compare) { 1634 default: 1635 case PIPE_FUNC_NEVER: 1636 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1637 case PIPE_FUNC_LESS: 1638 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1639 case PIPE_FUNC_EQUAL: 1640 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1641 case PIPE_FUNC_LEQUAL: 1642 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1643 case PIPE_FUNC_GREATER: 1644 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1645 case PIPE_FUNC_NOTEQUAL: 1646 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1647 case PIPE_FUNC_GEQUAL: 1648 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1649 case PIPE_FUNC_ALWAYS: 1650 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1651 } 1652} 1653 1654static unsigned si_tex_dim(unsigned res_target, unsigned view_target, 1655 unsigned nr_samples) 1656{ 1657 if (view_target == PIPE_TEXTURE_CUBE || 1658 view_target == PIPE_TEXTURE_CUBE_ARRAY) 1659 res_target = view_target; 1660 /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 1661 else if (res_target == PIPE_TEXTURE_CUBE || 1662 res_target == PIPE_TEXTURE_CUBE_ARRAY) 1663 res_target = PIPE_TEXTURE_2D_ARRAY; 1664 1665 switch (res_target) { 1666 default: 1667 case PIPE_TEXTURE_1D: 1668 return V_008F1C_SQ_RSRC_IMG_1D; 1669 case PIPE_TEXTURE_1D_ARRAY: 1670 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1671 case PIPE_TEXTURE_2D: 1672 case PIPE_TEXTURE_RECT: 1673 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1674 V_008F1C_SQ_RSRC_IMG_2D; 1675 case PIPE_TEXTURE_2D_ARRAY: 1676 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1677 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1678 case PIPE_TEXTURE_3D: 1679 return V_008F1C_SQ_RSRC_IMG_3D; 1680 case PIPE_TEXTURE_CUBE: 1681 case PIPE_TEXTURE_CUBE_ARRAY: 1682 return V_008F1C_SQ_RSRC_IMG_CUBE; 1683 } 1684} 1685 1686/* 1687 * Format support testing 1688 */ 1689 1690static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1691{ 1692 return si_translate_texformat(screen, format, util_format_description(format), 1693 util_format_get_first_non_void_channel(format)) != ~0U; 1694} 1695 1696static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1697 const struct util_format_description *desc, 1698 int first_non_void) 1699{ 1700 unsigned type; 1701 int i; 1702 1703 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1704 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1705 1706 assert(first_non_void >= 0); 1707 type = desc->channel[first_non_void].type; 1708 1709 if (type == UTIL_FORMAT_TYPE_FIXED) 1710 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1711 1712 if (desc->nr_channels == 4 && 1713 desc->channel[0].size == 10 && 1714 desc->channel[1].size == 10 && 1715 desc->channel[2].size == 10 && 1716 desc->channel[3].size == 2) 1717 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1718 1719 /* See whether the components are of the same size. */ 1720 for (i = 0; i < desc->nr_channels; i++) { 1721 if (desc->channel[first_non_void].size != desc->channel[i].size) 1722 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1723 } 1724 1725 switch (desc->channel[first_non_void].size) { 1726 case 8: 1727 switch (desc->nr_channels) { 1728 case 1: 1729 return V_008F0C_BUF_DATA_FORMAT_8; 1730 case 2: 1731 return V_008F0C_BUF_DATA_FORMAT_8_8; 1732 case 3: 1733 case 4: 1734 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1735 } 1736 break; 1737 case 16: 1738 switch (desc->nr_channels) { 1739 case 1: 1740 return V_008F0C_BUF_DATA_FORMAT_16; 1741 case 2: 1742 return V_008F0C_BUF_DATA_FORMAT_16_16; 1743 case 3: 1744 case 4: 1745 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1746 } 1747 break; 1748 case 32: 1749 /* From the Southern Islands ISA documentation about MTBUF: 1750 * 'Memory reads of data in memory that is 32 or 64 bits do not 1751 * undergo any format conversion.' 1752 */ 1753 if (type != UTIL_FORMAT_TYPE_FLOAT && 1754 !desc->channel[first_non_void].pure_integer) 1755 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1756 1757 switch (desc->nr_channels) { 1758 case 1: 1759 return V_008F0C_BUF_DATA_FORMAT_32; 1760 case 2: 1761 return V_008F0C_BUF_DATA_FORMAT_32_32; 1762 case 3: 1763 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1764 case 4: 1765 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1766 } 1767 break; 1768 } 1769 1770 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1771} 1772 1773static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1774 const struct util_format_description *desc, 1775 int first_non_void) 1776{ 1777 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1778 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1779 1780 assert(first_non_void >= 0); 1781 1782 switch (desc->channel[first_non_void].type) { 1783 case UTIL_FORMAT_TYPE_SIGNED: 1784 if (desc->channel[first_non_void].normalized) 1785 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1786 else if (desc->channel[first_non_void].pure_integer) 1787 return V_008F0C_BUF_NUM_FORMAT_SINT; 1788 else 1789 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1790 break; 1791 case UTIL_FORMAT_TYPE_UNSIGNED: 1792 if (desc->channel[first_non_void].normalized) 1793 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1794 else if (desc->channel[first_non_void].pure_integer) 1795 return V_008F0C_BUF_NUM_FORMAT_UINT; 1796 else 1797 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1798 break; 1799 case UTIL_FORMAT_TYPE_FLOAT: 1800 default: 1801 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1802 } 1803} 1804 1805static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, 1806 enum pipe_format format, 1807 unsigned usage) 1808{ 1809 const struct util_format_description *desc; 1810 int first_non_void; 1811 unsigned data_format; 1812 1813 assert((usage & ~(PIPE_BIND_SHADER_IMAGE | 1814 PIPE_BIND_SAMPLER_VIEW | 1815 PIPE_BIND_VERTEX_BUFFER)) == 0); 1816 1817 desc = util_format_description(format); 1818 1819 /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 1820 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 1821 * for read-only access (with caveats surrounding bounds checks), but 1822 * obviously fails for write access which we have to implement for 1823 * shader images. Luckily, OpenGL doesn't expect this to be supported 1824 * anyway, and so the only impact is on PBO uploads / downloads, which 1825 * shouldn't be expected to be fast for GL_RGB anyway. 1826 */ 1827 if (desc->block.bits == 3 * 8 || 1828 desc->block.bits == 3 * 16) { 1829 if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 1830 usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 1831 if (!usage) 1832 return 0; 1833 } 1834 } 1835 1836 first_non_void = util_format_get_first_non_void_channel(format); 1837 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1838 if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 1839 return 0; 1840 1841 return usage; 1842} 1843 1844static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1845{ 1846 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1847 r600_translate_colorswap(format, false) != ~0U; 1848} 1849 1850static bool si_is_zs_format_supported(enum pipe_format format) 1851{ 1852 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1853} 1854 1855static boolean si_is_format_supported(struct pipe_screen *screen, 1856 enum pipe_format format, 1857 enum pipe_texture_target target, 1858 unsigned sample_count, 1859 unsigned usage) 1860{ 1861 unsigned retval = 0; 1862 1863 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1864 R600_ERR("r600: unsupported texture type %d\n", target); 1865 return false; 1866 } 1867 1868 if (!util_format_is_supported(format, usage)) 1869 return false; 1870 1871 if (sample_count > 1) { 1872 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 1873 return false; 1874 1875 if (usage & PIPE_BIND_SHADER_IMAGE) 1876 return false; 1877 1878 switch (sample_count) { 1879 case 2: 1880 case 4: 1881 case 8: 1882 break; 1883 case 16: 1884 if (format == PIPE_FORMAT_NONE) 1885 return true; 1886 else 1887 return false; 1888 default: 1889 return false; 1890 } 1891 } 1892 1893 if (usage & (PIPE_BIND_SAMPLER_VIEW | 1894 PIPE_BIND_SHADER_IMAGE)) { 1895 if (target == PIPE_BUFFER) { 1896 retval |= si_is_vertex_format_supported( 1897 screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | 1898 PIPE_BIND_SHADER_IMAGE)); 1899 } else { 1900 if (si_is_sampler_format_supported(screen, format)) 1901 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 1902 PIPE_BIND_SHADER_IMAGE); 1903 } 1904 } 1905 1906 if ((usage & (PIPE_BIND_RENDER_TARGET | 1907 PIPE_BIND_DISPLAY_TARGET | 1908 PIPE_BIND_SCANOUT | 1909 PIPE_BIND_SHARED | 1910 PIPE_BIND_BLENDABLE)) && 1911 si_is_colorbuffer_format_supported(format)) { 1912 retval |= usage & 1913 (PIPE_BIND_RENDER_TARGET | 1914 PIPE_BIND_DISPLAY_TARGET | 1915 PIPE_BIND_SCANOUT | 1916 PIPE_BIND_SHARED); 1917 if (!util_format_is_pure_integer(format) && 1918 !util_format_is_depth_or_stencil(format)) 1919 retval |= usage & PIPE_BIND_BLENDABLE; 1920 } 1921 1922 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1923 si_is_zs_format_supported(format)) { 1924 retval |= PIPE_BIND_DEPTH_STENCIL; 1925 } 1926 1927 if (usage & PIPE_BIND_VERTEX_BUFFER) { 1928 retval |= si_is_vertex_format_supported(screen, format, 1929 PIPE_BIND_VERTEX_BUFFER); 1930 } 1931 1932 if ((usage & PIPE_BIND_LINEAR) && 1933 !util_format_is_compressed(format) && 1934 !(usage & PIPE_BIND_DEPTH_STENCIL)) 1935 retval |= PIPE_BIND_LINEAR; 1936 1937 return retval == usage; 1938} 1939 1940/* 1941 * framebuffer handling 1942 */ 1943 1944static void si_choose_spi_color_formats(struct r600_surface *surf, 1945 unsigned format, unsigned swap, 1946 unsigned ntype, bool is_depth) 1947{ 1948 /* Alpha is needed for alpha-to-coverage. 1949 * Blending may be with or without alpha. 1950 */ 1951 unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 1952 unsigned alpha = 0; /* exports alpha, but may not support blending */ 1953 unsigned blend = 0; /* supports blending, but may not export alpha */ 1954 unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 1955 1956 /* Choose the SPI color formats. These are required values for Stoney/RB+. 1957 * Other chips have multiple choices, though they are not necessarily better. 1958 */ 1959 switch (format) { 1960 case V_028C70_COLOR_5_6_5: 1961 case V_028C70_COLOR_1_5_5_5: 1962 case V_028C70_COLOR_5_5_5_1: 1963 case V_028C70_COLOR_4_4_4_4: 1964 case V_028C70_COLOR_10_11_11: 1965 case V_028C70_COLOR_11_11_10: 1966 case V_028C70_COLOR_8: 1967 case V_028C70_COLOR_8_8: 1968 case V_028C70_COLOR_8_8_8_8: 1969 case V_028C70_COLOR_10_10_10_2: 1970 case V_028C70_COLOR_2_10_10_10: 1971 if (ntype == V_028C70_NUMBER_UINT) 1972 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 1973 else if (ntype == V_028C70_NUMBER_SINT) 1974 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 1975 else 1976 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 1977 break; 1978 1979 case V_028C70_COLOR_16: 1980 case V_028C70_COLOR_16_16: 1981 case V_028C70_COLOR_16_16_16_16: 1982 if (ntype == V_028C70_NUMBER_UNORM || 1983 ntype == V_028C70_NUMBER_SNORM) { 1984 /* UNORM16 and SNORM16 don't support blending */ 1985 if (ntype == V_028C70_NUMBER_UNORM) 1986 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 1987 else 1988 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 1989 1990 /* Use 32 bits per channel for blending. */ 1991 if (format == V_028C70_COLOR_16) { 1992 if (swap == V_028C70_SWAP_STD) { /* R */ 1993 blend = V_028714_SPI_SHADER_32_R; 1994 blend_alpha = V_028714_SPI_SHADER_32_AR; 1995 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 1996 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1997 else 1998 assert(0); 1999 } else if (format == V_028C70_COLOR_16_16) { 2000 if (swap == V_028C70_SWAP_STD) { /* RG */ 2001 blend = V_028714_SPI_SHADER_32_GR; 2002 blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2003 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2004 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2005 else 2006 assert(0); 2007 } else /* 16_16_16_16 */ 2008 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2009 } else if (ntype == V_028C70_NUMBER_UINT) 2010 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2011 else if (ntype == V_028C70_NUMBER_SINT) 2012 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2013 else if (ntype == V_028C70_NUMBER_FLOAT) 2014 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2015 else 2016 assert(0); 2017 break; 2018 2019 case V_028C70_COLOR_32: 2020 if (swap == V_028C70_SWAP_STD) { /* R */ 2021 blend = normal = V_028714_SPI_SHADER_32_R; 2022 alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 2023 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2024 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2025 else 2026 assert(0); 2027 break; 2028 2029 case V_028C70_COLOR_32_32: 2030 if (swap == V_028C70_SWAP_STD) { /* RG */ 2031 blend = normal = V_028714_SPI_SHADER_32_GR; 2032 alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2033 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2034 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2035 else 2036 assert(0); 2037 break; 2038 2039 case V_028C70_COLOR_32_32_32_32: 2040 case V_028C70_COLOR_8_24: 2041 case V_028C70_COLOR_24_8: 2042 case V_028C70_COLOR_X24_8_32_FLOAT: 2043 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2044 break; 2045 2046 default: 2047 assert(0); 2048 return; 2049 } 2050 2051 /* The DB->CB copy needs 32_ABGR. */ 2052 if (is_depth) 2053 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2054 2055 surf->spi_shader_col_format = normal; 2056 surf->spi_shader_col_format_alpha = alpha; 2057 surf->spi_shader_col_format_blend = blend; 2058 surf->spi_shader_col_format_blend_alpha = blend_alpha; 2059} 2060 2061static void si_initialize_color_surface(struct si_context *sctx, 2062 struct r600_surface *surf) 2063{ 2064 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2065 unsigned color_info, color_attrib, color_view; 2066 unsigned format, swap, ntype, endian; 2067 const struct util_format_description *desc; 2068 int i; 2069 unsigned blend_clamp = 0, blend_bypass = 0; 2070 2071 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2072 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 2073 2074 desc = util_format_description(surf->base.format); 2075 for (i = 0; i < 4; i++) { 2076 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2077 break; 2078 } 2079 } 2080 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 2081 ntype = V_028C70_NUMBER_FLOAT; 2082 } else { 2083 ntype = V_028C70_NUMBER_UNORM; 2084 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2085 ntype = V_028C70_NUMBER_SRGB; 2086 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2087 if (desc->channel[i].pure_integer) { 2088 ntype = V_028C70_NUMBER_SINT; 2089 } else { 2090 assert(desc->channel[i].normalized); 2091 ntype = V_028C70_NUMBER_SNORM; 2092 } 2093 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2094 if (desc->channel[i].pure_integer) { 2095 ntype = V_028C70_NUMBER_UINT; 2096 } else { 2097 assert(desc->channel[i].normalized); 2098 ntype = V_028C70_NUMBER_UNORM; 2099 } 2100 } 2101 } 2102 2103 format = si_translate_colorformat(surf->base.format); 2104 if (format == V_028C70_COLOR_INVALID) { 2105 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2106 } 2107 assert(format != V_028C70_COLOR_INVALID); 2108 swap = r600_translate_colorswap(surf->base.format, false); 2109 endian = si_colorformat_endian_swap(format); 2110 2111 /* blend clamp should be set for all NORM/SRGB types */ 2112 if (ntype == V_028C70_NUMBER_UNORM || 2113 ntype == V_028C70_NUMBER_SNORM || 2114 ntype == V_028C70_NUMBER_SRGB) 2115 blend_clamp = 1; 2116 2117 /* set blend bypass according to docs if SINT/UINT or 2118 8/24 COLOR variants */ 2119 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2120 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2121 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2122 blend_clamp = 0; 2123 blend_bypass = 1; 2124 } 2125 2126 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 2127 (format == V_028C70_COLOR_8 || 2128 format == V_028C70_COLOR_8_8 || 2129 format == V_028C70_COLOR_8_8_8_8)) 2130 surf->color_is_int8 = true; 2131 2132 color_info = S_028C70_FORMAT(format) | 2133 S_028C70_COMP_SWAP(swap) | 2134 S_028C70_BLEND_CLAMP(blend_clamp) | 2135 S_028C70_BLEND_BYPASS(blend_bypass) | 2136 S_028C70_SIMPLE_FLOAT(1) | 2137 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 2138 ntype != V_028C70_NUMBER_SNORM && 2139 ntype != V_028C70_NUMBER_SRGB && 2140 format != V_028C70_COLOR_8_24 && 2141 format != V_028C70_COLOR_24_8) | 2142 S_028C70_NUMBER_TYPE(ntype) | 2143 S_028C70_ENDIAN(endian); 2144 2145 /* Intensity is implemented as Red, so treat it that way. */ 2146 color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2147 util_format_is_intensity(surf->base.format)); 2148 2149 if (rtex->resource.b.b.nr_samples > 1) { 2150 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 2151 2152 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2153 S_028C74_NUM_FRAGMENTS(log_samples); 2154 2155 if (rtex->fmask.size) { 2156 color_info |= S_028C70_COMPRESSION(1); 2157 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 2158 2159 if (sctx->b.chip_class == SI) { 2160 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2161 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2162 } 2163 } 2164 } 2165 2166 surf->cb_color_view = color_view; 2167 surf->cb_color_info = color_info; 2168 surf->cb_color_attrib = color_attrib; 2169 2170 if (sctx->b.chip_class >= VI) { 2171 unsigned max_uncompressed_block_size = 2; 2172 2173 if (rtex->resource.b.b.nr_samples > 1) { 2174 if (rtex->surface.bpe == 1) 2175 max_uncompressed_block_size = 0; 2176 else if (rtex->surface.bpe == 2) 2177 max_uncompressed_block_size = 1; 2178 } 2179 2180 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2181 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2182 } 2183 2184 /* This must be set for fast clear to work without FMASK. */ 2185 if (!rtex->fmask.size && sctx->b.chip_class == SI) { 2186 unsigned bankh = util_logbase2(rtex->surface.bankh); 2187 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2188 } 2189 2190 /* Determine pixel shader export format */ 2191 si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); 2192 2193 surf->color_initialized = true; 2194} 2195 2196static void si_init_depth_surface(struct si_context *sctx, 2197 struct r600_surface *surf) 2198{ 2199 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2200 unsigned level = surf->base.u.tex.level; 2201 struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; 2202 unsigned format; 2203 uint32_t z_info, s_info, db_depth_info; 2204 uint64_t z_offs, s_offs; 2205 uint32_t db_htile_data_base, db_htile_surface; 2206 2207 format = si_translate_dbformat(rtex->db_render_format); 2208 2209 if (format == V_028040_Z_INVALID) { 2210 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 2211 } 2212 assert(format != V_028040_Z_INVALID); 2213 2214 s_offs = z_offs = rtex->resource.gpu_address; 2215 z_offs += rtex->surface.level[level].offset; 2216 s_offs += rtex->surface.stencil_level[level].offset; 2217 2218 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile); 2219 2220 z_info = S_028040_FORMAT(format); 2221 if (rtex->resource.b.b.nr_samples > 1) { 2222 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 2223 } 2224 2225 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 2226 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 2227 else 2228 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 2229 2230 if (sctx->b.chip_class >= CIK) { 2231 struct radeon_info *info = &sctx->screen->b.info; 2232 unsigned index = rtex->surface.tiling_index[level]; 2233 unsigned stencil_index = rtex->surface.stencil_tiling_index[level]; 2234 unsigned macro_index = rtex->surface.macro_tile_index; 2235 unsigned tile_mode = info->si_tile_mode_array[index]; 2236 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2237 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2238 2239 db_depth_info |= 2240 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2241 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2242 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2243 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2244 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2245 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2246 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2247 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2248 } else { 2249 unsigned tile_mode_index = si_tile_mode_index(rtex, level, false); 2250 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2251 tile_mode_index = si_tile_mode_index(rtex, level, true); 2252 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2253 } 2254 2255 /* HiZ aka depth buffer htile */ 2256 /* use htile only for first level */ 2257 if (rtex->htile_buffer && !level) { 2258 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2259 S_028040_ALLOW_EXPCLEAR(1); 2260 2261 if (rtex->surface.flags & RADEON_SURF_SBUFFER) { 2262 /* Workaround: For a not yet understood reason, the 2263 * combination of MSAA, fast stencil clear and stencil 2264 * decompress messes with subsequent stencil buffer 2265 * uses. Problem was reproduced on Verde, Bonaire, 2266 * Tonga, and Carrizo. 2267 * 2268 * Disabling EXPCLEAR works around the problem. 2269 * 2270 * Check piglit's arb_texture_multisample-stencil-clear 2271 * test if you want to try changing this. 2272 */ 2273 if (rtex->resource.b.b.nr_samples <= 1) 2274 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2275 } else if (!rtex->tc_compatible_htile) { 2276 /* Use all of the htile_buffer for depth if there's no stencil. 2277 * This must not be set when TC-compatible HTILE is enabled 2278 * due to a hw bug. 2279 */ 2280 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2281 } 2282 2283 uint64_t va = rtex->htile_buffer->gpu_address; 2284 db_htile_data_base = va >> 8; 2285 db_htile_surface = S_028ABC_FULL_CACHE(1); 2286 2287 if (rtex->tc_compatible_htile) { 2288 db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 2289 2290 switch (rtex->resource.b.b.nr_samples) { 2291 case 0: 2292 case 1: 2293 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 2294 break; 2295 case 2: 2296 case 4: 2297 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 2298 break; 2299 case 8: 2300 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 2301 break; 2302 default: 2303 assert(0); 2304 } 2305 } 2306 } else { 2307 db_htile_data_base = 0; 2308 db_htile_surface = 0; 2309 } 2310 2311 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2312 2313 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2314 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2315 surf->db_htile_data_base = db_htile_data_base; 2316 surf->db_depth_info = db_depth_info; 2317 surf->db_z_info = z_info; 2318 surf->db_stencil_info = s_info; 2319 surf->db_depth_base = z_offs >> 8; 2320 surf->db_stencil_base = s_offs >> 8; 2321 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2322 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2323 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2324 levelinfo->nblk_y) / 64 - 1); 2325 surf->db_htile_surface = db_htile_surface; 2326 2327 surf->depth_initialized = true; 2328} 2329 2330static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2331{ 2332 for (int i = 0; i < state->nr_cbufs; ++i) { 2333 struct r600_surface *surf = NULL; 2334 struct r600_texture *rtex; 2335 2336 if (!state->cbufs[i]) 2337 continue; 2338 surf = (struct r600_surface*)state->cbufs[i]; 2339 rtex = (struct r600_texture*)surf->base.texture; 2340 2341 p_atomic_dec(&rtex->framebuffers_bound); 2342 } 2343} 2344 2345static void si_set_framebuffer_state(struct pipe_context *ctx, 2346 const struct pipe_framebuffer_state *state) 2347{ 2348 struct si_context *sctx = (struct si_context *)ctx; 2349 struct pipe_constant_buffer constbuf = {0}; 2350 struct r600_surface *surf = NULL; 2351 struct r600_texture *rtex; 2352 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2353 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2354 int i; 2355 2356 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2357 if (!sctx->framebuffer.state.cbufs[i]) 2358 continue; 2359 2360 rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture; 2361 if (rtex->dcc_gather_statistics) 2362 vi_separate_dcc_stop_query(ctx, rtex); 2363 } 2364 2365 /* Only flush TC when changing the framebuffer state, because 2366 * the only client not using TC that can change textures is 2367 * the framebuffer. 2368 * 2369 * Flush all CB and DB caches here because all buffers can be used 2370 * for write by both TC (with shader image stores) and CB/DB. 2371 */ 2372 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 2373 SI_CONTEXT_INV_GLOBAL_L2 | 2374 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | 2375 SI_CONTEXT_CS_PARTIAL_FLUSH; 2376 2377 /* Take the maximum of the old and new count. If the new count is lower, 2378 * dirtying is needed to disable the unbound colorbuffers. 2379 */ 2380 sctx->framebuffer.dirty_cbufs |= 2381 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2382 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2383 2384 si_dec_framebuffer_counters(&sctx->framebuffer.state); 2385 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2386 2387 sctx->framebuffer.colorbuf_enabled_4bit = 0; 2388 sctx->framebuffer.spi_shader_col_format = 0; 2389 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2390 sctx->framebuffer.spi_shader_col_format_blend = 0; 2391 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2392 sctx->framebuffer.color_is_int8 = 0; 2393 2394 sctx->framebuffer.compressed_cb_mask = 0; 2395 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2396 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2397 sctx->framebuffer.any_dst_linear = false; 2398 2399 for (i = 0; i < state->nr_cbufs; i++) { 2400 if (!state->cbufs[i]) 2401 continue; 2402 2403 surf = (struct r600_surface*)state->cbufs[i]; 2404 rtex = (struct r600_texture*)surf->base.texture; 2405 2406 if (!surf->color_initialized) { 2407 si_initialize_color_surface(sctx, surf); 2408 } 2409 2410 sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 2411 sctx->framebuffer.spi_shader_col_format |= 2412 surf->spi_shader_col_format << (i * 4); 2413 sctx->framebuffer.spi_shader_col_format_alpha |= 2414 surf->spi_shader_col_format_alpha << (i * 4); 2415 sctx->framebuffer.spi_shader_col_format_blend |= 2416 surf->spi_shader_col_format_blend << (i * 4); 2417 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2418 surf->spi_shader_col_format_blend_alpha << (i * 4); 2419 2420 if (surf->color_is_int8) 2421 sctx->framebuffer.color_is_int8 |= 1 << i; 2422 2423 if (rtex->fmask.size) { 2424 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2425 } 2426 2427 if (rtex->surface.is_linear) 2428 sctx->framebuffer.any_dst_linear = true; 2429 2430 r600_context_add_resource_size(ctx, surf->base.texture); 2431 2432 p_atomic_inc(&rtex->framebuffers_bound); 2433 2434 if (rtex->dcc_gather_statistics) { 2435 /* Dirty tracking must be enabled for DCC usage analysis. */ 2436 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2437 vi_separate_dcc_start_query(ctx, rtex); 2438 } 2439 } 2440 2441 if (state->zsbuf) { 2442 surf = (struct r600_surface*)state->zsbuf; 2443 rtex = (struct r600_texture*)surf->base.texture; 2444 2445 if (!surf->depth_initialized) { 2446 si_init_depth_surface(sctx, surf); 2447 } 2448 r600_context_add_resource_size(ctx, surf->base.texture); 2449 } 2450 2451 si_update_poly_offset_state(sctx); 2452 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 2453 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); 2454 2455 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2456 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2457 2458 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2459 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2460 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2461 2462 /* Set sample locations as fragment shader constants. */ 2463 switch (sctx->framebuffer.nr_samples) { 2464 case 1: 2465 constbuf.user_buffer = sctx->b.sample_locations_1x; 2466 break; 2467 case 2: 2468 constbuf.user_buffer = sctx->b.sample_locations_2x; 2469 break; 2470 case 4: 2471 constbuf.user_buffer = sctx->b.sample_locations_4x; 2472 break; 2473 case 8: 2474 constbuf.user_buffer = sctx->b.sample_locations_8x; 2475 break; 2476 case 16: 2477 constbuf.user_buffer = sctx->b.sample_locations_16x; 2478 break; 2479 default: 2480 R600_ERR("Requested an invalid number of samples %i.\n", 2481 sctx->framebuffer.nr_samples); 2482 assert(0); 2483 } 2484 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2485 si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 2486 2487 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 2488 } 2489 2490 sctx->need_check_render_feedback = true; 2491 sctx->do_update_shaders = true; 2492} 2493 2494static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2495{ 2496 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2497 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2498 unsigned i, nr_cbufs = state->nr_cbufs; 2499 struct r600_texture *tex = NULL; 2500 struct r600_surface *cb = NULL; 2501 unsigned cb_color_info = 0; 2502 2503 /* Colorbuffers. */ 2504 for (i = 0; i < nr_cbufs; i++) { 2505 const struct radeon_surf_level *level_info; 2506 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 2507 unsigned cb_color_base, cb_color_fmask, cb_color_attrib; 2508 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 2509 2510 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 2511 continue; 2512 2513 cb = (struct r600_surface*)state->cbufs[i]; 2514 if (!cb) { 2515 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2516 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2517 continue; 2518 } 2519 2520 tex = (struct r600_texture *)cb->base.texture; 2521 level_info = &tex->surface.level[cb->base.u.tex.level]; 2522 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2523 &tex->resource, RADEON_USAGE_READWRITE, 2524 tex->resource.b.b.nr_samples > 1 ? 2525 RADEON_PRIO_COLOR_BUFFER_MSAA : 2526 RADEON_PRIO_COLOR_BUFFER); 2527 2528 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2529 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2530 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2531 RADEON_PRIO_CMASK); 2532 } 2533 2534 if (tex->dcc_separate_buffer) 2535 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2536 tex->dcc_separate_buffer, 2537 RADEON_USAGE_READWRITE, 2538 RADEON_PRIO_DCC); 2539 2540 /* Compute mutable surface parameters. */ 2541 pitch_tile_max = level_info->nblk_x / 8 - 1; 2542 slice_tile_max = level_info->nblk_x * 2543 level_info->nblk_y / 64 - 1; 2544 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 2545 2546 cb_color_base = (tex->resource.gpu_address + level_info->offset) >> 8; 2547 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 2548 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 2549 cb_color_attrib = cb->cb_color_attrib | 2550 S_028C74_TILE_MODE_INDEX(tile_mode_index); 2551 2552 if (tex->fmask.size) { 2553 if (sctx->b.chip_class >= CIK) 2554 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1); 2555 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index); 2556 cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8; 2557 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max); 2558 } else { 2559 /* This must be set for fast clear to work without FMASK. */ 2560 if (sctx->b.chip_class >= CIK) 2561 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 2562 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 2563 cb_color_fmask = cb_color_base; 2564 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 2565 } 2566 2567 cb_color_info = cb->cb_color_info | tex->cb_color_info; 2568 2569 if (tex->dcc_offset && cb->base.u.tex.level < tex->surface.num_dcc_levels) { 2570 bool is_msaa_resolve_dst = state->cbufs[0] && 2571 state->cbufs[0]->texture->nr_samples > 1 && 2572 state->cbufs[1] == &cb->base && 2573 state->cbufs[1]->texture->nr_samples <= 1; 2574 2575 if (!is_msaa_resolve_dst) 2576 cb_color_info |= S_028C70_DCC_ENABLE(1); 2577 } 2578 2579 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 2580 sctx->b.chip_class >= VI ? 14 : 13); 2581 radeon_emit(cs, cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2582 radeon_emit(cs, cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2583 radeon_emit(cs, cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2584 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2585 radeon_emit(cs, cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2586 radeon_emit(cs, cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2587 radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ 2588 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2589 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2590 radeon_emit(cs, cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2591 radeon_emit(cs, cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2592 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2593 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2594 2595 if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ 2596 radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + 2597 tex->dcc_offset + 2598 tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8); 2599 } 2600 for (; i < 8 ; i++) 2601 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 2602 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2603 2604 /* ZS buffer. */ 2605 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 2606 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2607 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2608 2609 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2610 &rtex->resource, RADEON_USAGE_READWRITE, 2611 zb->base.texture->nr_samples > 1 ? 2612 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2613 RADEON_PRIO_DEPTH_BUFFER); 2614 2615 if (zb->db_htile_data_base) { 2616 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2617 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2618 RADEON_PRIO_HTILE); 2619 } 2620 2621 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2622 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2623 2624 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2625 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2626 radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */ 2627 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 2628 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2629 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2630 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2631 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2632 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2633 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2634 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2635 2636 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 2637 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 2638 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 2639 2640 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2641 } else if (sctx->framebuffer.dirty_zsbuf) { 2642 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2643 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2644 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2645 } 2646 2647 /* Framebuffer dimensions. */ 2648 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2649 radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2650 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2651 2652 sctx->framebuffer.dirty_cbufs = 0; 2653 sctx->framebuffer.dirty_zsbuf = false; 2654} 2655 2656static void si_emit_msaa_sample_locs(struct si_context *sctx, 2657 struct r600_atom *atom) 2658{ 2659 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2660 unsigned nr_samples = sctx->framebuffer.nr_samples; 2661 2662 /* Smoothing (only possible with nr_samples == 1) uses the same 2663 * sample locations as the MSAA it simulates. 2664 */ 2665 if (nr_samples <= 1 && sctx->smoothing_enabled) 2666 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 2667 2668 /* On Polaris, the small primitive filter uses the sample locations 2669 * even when MSAA is off, so we need to make sure they're set to 0. 2670 */ 2671 if (sctx->b.family >= CHIP_POLARIS10) 2672 nr_samples = MAX2(nr_samples, 1); 2673 2674 if (nr_samples >= 1 && 2675 (nr_samples != sctx->msaa_sample_locs.nr_samples)) { 2676 sctx->msaa_sample_locs.nr_samples = nr_samples; 2677 cayman_emit_msaa_sample_locs(cs, nr_samples); 2678 } 2679 2680 if (sctx->b.family >= CHIP_POLARIS10) { 2681 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 2682 unsigned small_prim_filter_cntl = 2683 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 2684 S_028830_LINE_FILTER_DISABLE(1); /* line bug */ 2685 2686 /* The alternative of setting sample locations to 0 would 2687 * require a DB flush to avoid Z errors, see 2688 * https://bugs.freedesktop.org/show_bug.cgi?id=96908 2689 */ 2690 if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable) 2691 small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 2692 2693 radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 2694 small_prim_filter_cntl); 2695 } 2696} 2697 2698static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) 2699{ 2700 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2701 unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes; 2702 /* 33% faster rendering to linear color buffers */ 2703 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 2704 unsigned sc_mode_cntl_1 = 2705 S_028A4C_WALK_SIZE(dst_is_linear) | 2706 S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 2707 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 2708 /* always 1: */ 2709 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | 2710 S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 2711 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | 2712 S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 2713 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 2714 S_028A4C_FORCE_EOV_REZ_ENABLE(1); 2715 2716 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2717 sctx->ps_iter_samples, 2718 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0, 2719 sc_mode_cntl_1); 2720} 2721 2722static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2723{ 2724 struct si_context *sctx = (struct si_context *)ctx; 2725 2726 if (sctx->ps_iter_samples == min_samples) 2727 return; 2728 2729 sctx->ps_iter_samples = min_samples; 2730 sctx->do_update_shaders = true; 2731 2732 if (sctx->framebuffer.nr_samples > 1) 2733 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2734} 2735 2736/* 2737 * Samplers 2738 */ 2739 2740/** 2741 * Build the sampler view descriptor for a buffer texture. 2742 * @param state 256-bit descriptor; only the high 128 bits are filled in 2743 */ 2744void 2745si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, 2746 enum pipe_format format, 2747 unsigned offset, unsigned size, 2748 uint32_t *state) 2749{ 2750 const struct util_format_description *desc; 2751 int first_non_void; 2752 unsigned stride; 2753 unsigned num_records; 2754 unsigned num_format, data_format; 2755 2756 desc = util_format_description(format); 2757 first_non_void = util_format_get_first_non_void_channel(format); 2758 stride = desc->block.bits / 8; 2759 num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void); 2760 data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void); 2761 2762 num_records = size / stride; 2763 num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 2764 2765 if (screen->b.chip_class >= VI) 2766 num_records *= stride; 2767 2768 state[4] = 0; 2769 state[5] = S_008F04_STRIDE(stride); 2770 state[6] = num_records; 2771 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2772 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2773 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2774 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2775 S_008F0C_NUM_FORMAT(num_format) | 2776 S_008F0C_DATA_FORMAT(data_format); 2777} 2778 2779/** 2780 * Build the sampler view descriptor for a texture. 2781 */ 2782void 2783si_make_texture_descriptor(struct si_screen *screen, 2784 struct r600_texture *tex, 2785 bool sampler, 2786 enum pipe_texture_target target, 2787 enum pipe_format pipe_format, 2788 const unsigned char state_swizzle[4], 2789 unsigned first_level, unsigned last_level, 2790 unsigned first_layer, unsigned last_layer, 2791 unsigned width, unsigned height, unsigned depth, 2792 uint32_t *state, 2793 uint32_t *fmask_state) 2794{ 2795 struct pipe_resource *res = &tex->resource.b.b; 2796 const struct util_format_description *desc; 2797 unsigned char swizzle[4]; 2798 int first_non_void; 2799 unsigned num_format, data_format, type; 2800 uint64_t va; 2801 2802 desc = util_format_description(pipe_format); 2803 2804 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2805 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2806 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2807 2808 switch (pipe_format) { 2809 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2810 case PIPE_FORMAT_X24S8_UINT: 2811 case PIPE_FORMAT_X32_S8X24_UINT: 2812 case PIPE_FORMAT_X8Z24_UNORM: 2813 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2814 break; 2815 default: 2816 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2817 } 2818 } else { 2819 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2820 } 2821 2822 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2823 2824 switch (pipe_format) { 2825 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2826 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2827 break; 2828 default: 2829 if (first_non_void < 0) { 2830 if (util_format_is_compressed(pipe_format)) { 2831 switch (pipe_format) { 2832 case PIPE_FORMAT_DXT1_SRGB: 2833 case PIPE_FORMAT_DXT1_SRGBA: 2834 case PIPE_FORMAT_DXT3_SRGBA: 2835 case PIPE_FORMAT_DXT5_SRGBA: 2836 case PIPE_FORMAT_BPTC_SRGBA: 2837 case PIPE_FORMAT_ETC2_SRGB8: 2838 case PIPE_FORMAT_ETC2_SRGB8A1: 2839 case PIPE_FORMAT_ETC2_SRGBA8: 2840 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2841 break; 2842 case PIPE_FORMAT_RGTC1_SNORM: 2843 case PIPE_FORMAT_LATC1_SNORM: 2844 case PIPE_FORMAT_RGTC2_SNORM: 2845 case PIPE_FORMAT_LATC2_SNORM: 2846 case PIPE_FORMAT_ETC2_R11_SNORM: 2847 case PIPE_FORMAT_ETC2_RG11_SNORM: 2848 /* implies float, so use SNORM/UNORM to determine 2849 whether data is signed or not */ 2850 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2851 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2852 break; 2853 default: 2854 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2855 break; 2856 } 2857 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2858 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2859 } else { 2860 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2861 } 2862 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2863 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2864 } else { 2865 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2866 2867 switch (desc->channel[first_non_void].type) { 2868 case UTIL_FORMAT_TYPE_FLOAT: 2869 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2870 break; 2871 case UTIL_FORMAT_TYPE_SIGNED: 2872 if (desc->channel[first_non_void].normalized) 2873 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2874 else if (desc->channel[first_non_void].pure_integer) 2875 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2876 else 2877 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2878 break; 2879 case UTIL_FORMAT_TYPE_UNSIGNED: 2880 if (desc->channel[first_non_void].normalized) 2881 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2882 else if (desc->channel[first_non_void].pure_integer) 2883 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2884 else 2885 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2886 } 2887 } 2888 } 2889 2890 data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void); 2891 if (data_format == ~0) { 2892 data_format = 0; 2893 } 2894 2895 if (!sampler && 2896 (res->target == PIPE_TEXTURE_CUBE || 2897 res->target == PIPE_TEXTURE_CUBE_ARRAY || 2898 res->target == PIPE_TEXTURE_3D)) { 2899 /* For the purpose of shader images, treat cube maps and 3D 2900 * textures as 2D arrays. For 3D textures, the address 2901 * calculations for mipmaps are different, so we rely on the 2902 * caller to effectively disable mipmaps. 2903 */ 2904 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 2905 2906 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 2907 } else { 2908 type = si_tex_dim(res->target, target, res->nr_samples); 2909 } 2910 2911 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 2912 height = 1; 2913 depth = res->array_size; 2914 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 2915 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 2916 if (sampler || res->target != PIPE_TEXTURE_3D) 2917 depth = res->array_size; 2918 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 2919 depth = res->array_size / 6; 2920 2921 state[0] = 0; 2922 state[1] = (S_008F14_DATA_FORMAT(data_format) | 2923 S_008F14_NUM_FORMAT(num_format)); 2924 state[2] = (S_008F18_WIDTH(width - 1) | 2925 S_008F18_HEIGHT(height - 1) | 2926 S_008F18_PERF_MOD(4)); 2927 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2928 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2929 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2930 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2931 S_008F1C_BASE_LEVEL(res->nr_samples > 1 ? 2932 0 : first_level) | 2933 S_008F1C_LAST_LEVEL(res->nr_samples > 1 ? 2934 util_logbase2(res->nr_samples) : 2935 last_level) | 2936 S_008F1C_POW2_PAD(res->last_level > 0) | 2937 S_008F1C_TYPE(type)); 2938 state[4] = S_008F20_DEPTH(depth - 1); 2939 state[5] = (S_008F24_BASE_ARRAY(first_layer) | 2940 S_008F24_LAST_ARRAY(last_layer)); 2941 state[6] = 0; 2942 state[7] = 0; 2943 2944 if (tex->dcc_offset) { 2945 unsigned swap = r600_translate_colorswap(pipe_format, false); 2946 2947 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 2948 } else { 2949 /* The last dword is unused by hw. The shader uses it to clear 2950 * bits in the first dword of sampler state. 2951 */ 2952 if (screen->b.chip_class <= CIK && res->nr_samples <= 1) { 2953 if (first_level == last_level) 2954 state[7] = C_008F30_MAX_ANISO_RATIO; 2955 else 2956 state[7] = 0xffffffff; 2957 } 2958 } 2959 2960 /* Initialize the sampler view for FMASK. */ 2961 if (tex->fmask.size) { 2962 uint32_t fmask_format; 2963 2964 va = tex->resource.gpu_address + tex->fmask.offset; 2965 2966 switch (res->nr_samples) { 2967 case 2: 2968 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2969 break; 2970 case 4: 2971 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2972 break; 2973 case 8: 2974 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2975 break; 2976 default: 2977 assert(0); 2978 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2979 } 2980 2981 fmask_state[0] = va >> 8; 2982 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2983 S_008F14_DATA_FORMAT(fmask_format) | 2984 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2985 fmask_state[2] = S_008F18_WIDTH(width - 1) | 2986 S_008F18_HEIGHT(height - 1); 2987 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2988 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2989 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2990 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2991 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) | 2992 S_008F1C_TYPE(si_tex_dim(res->target, target, 0)); 2993 fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2994 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1); 2995 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) | 2996 S_008F24_LAST_ARRAY(last_layer); 2997 fmask_state[6] = 0; 2998 fmask_state[7] = 0; 2999 } 3000} 3001 3002/** 3003 * Create a sampler view. 3004 * 3005 * @param ctx context 3006 * @param texture texture 3007 * @param state sampler view template 3008 * @param width0 width0 override (for compressed textures as int) 3009 * @param height0 height0 override (for compressed textures as int) 3010 * @param force_level set the base address to the level (for compressed textures) 3011 */ 3012struct pipe_sampler_view * 3013si_create_sampler_view_custom(struct pipe_context *ctx, 3014 struct pipe_resource *texture, 3015 const struct pipe_sampler_view *state, 3016 unsigned width0, unsigned height0, 3017 unsigned force_level) 3018{ 3019 struct si_context *sctx = (struct si_context*)ctx; 3020 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 3021 struct r600_texture *tmp = (struct r600_texture*)texture; 3022 unsigned base_level, first_level, last_level; 3023 unsigned char state_swizzle[4]; 3024 unsigned height, depth, width; 3025 unsigned last_layer = state->u.tex.last_layer; 3026 enum pipe_format pipe_format; 3027 const struct radeon_surf_level *surflevel; 3028 3029 if (!view) 3030 return NULL; 3031 3032 /* initialize base object */ 3033 view->base = *state; 3034 view->base.texture = NULL; 3035 view->base.reference.count = 1; 3036 view->base.context = ctx; 3037 3038 assert(texture); 3039 pipe_resource_reference(&view->base.texture, texture); 3040 3041 if (state->format == PIPE_FORMAT_X24S8_UINT || 3042 state->format == PIPE_FORMAT_S8X24_UINT || 3043 state->format == PIPE_FORMAT_X32_S8X24_UINT || 3044 state->format == PIPE_FORMAT_S8_UINT) 3045 view->is_stencil_sampler = true; 3046 3047 /* Buffer resource. */ 3048 if (texture->target == PIPE_BUFFER) { 3049 si_make_buffer_descriptor(sctx->screen, 3050 (struct r600_resource *)texture, 3051 state->format, 3052 state->u.buf.offset, 3053 state->u.buf.size, 3054 view->state); 3055 return &view->base; 3056 } 3057 3058 state_swizzle[0] = state->swizzle_r; 3059 state_swizzle[1] = state->swizzle_g; 3060 state_swizzle[2] = state->swizzle_b; 3061 state_swizzle[3] = state->swizzle_a; 3062 3063 base_level = 0; 3064 first_level = state->u.tex.first_level; 3065 last_level = state->u.tex.last_level; 3066 width = width0; 3067 height = height0; 3068 depth = texture->depth0; 3069 3070 if (force_level) { 3071 assert(force_level == first_level && 3072 force_level == last_level); 3073 base_level = force_level; 3074 first_level = 0; 3075 last_level = 0; 3076 width = u_minify(width, force_level); 3077 height = u_minify(height, force_level); 3078 depth = u_minify(depth, force_level); 3079 } 3080 3081 /* This is not needed if state trackers set last_layer correctly. */ 3082 if (state->target == PIPE_TEXTURE_1D || 3083 state->target == PIPE_TEXTURE_2D || 3084 state->target == PIPE_TEXTURE_RECT || 3085 state->target == PIPE_TEXTURE_CUBE) 3086 last_layer = state->u.tex.first_layer; 3087 3088 /* Texturing with separate depth and stencil. */ 3089 pipe_format = state->format; 3090 3091 /* Depth/stencil texturing sometimes needs separate texture. */ 3092 if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) { 3093 if (!tmp->flushed_depth_texture && 3094 !r600_init_flushed_depth_texture(ctx, texture, NULL)) { 3095 pipe_resource_reference(&view->base.texture, NULL); 3096 FREE(view); 3097 return NULL; 3098 } 3099 3100 assert(tmp->flushed_depth_texture); 3101 3102 /* Override format for the case where the flushed texture 3103 * contains only Z or only S. 3104 */ 3105 if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format) 3106 pipe_format = tmp->flushed_depth_texture->resource.b.b.format; 3107 3108 tmp = tmp->flushed_depth_texture; 3109 } 3110 3111 surflevel = tmp->surface.level; 3112 3113 if (tmp->db_compatible) { 3114 if (!view->is_stencil_sampler) 3115 pipe_format = tmp->db_render_format; 3116 3117 switch (pipe_format) { 3118 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 3119 pipe_format = PIPE_FORMAT_Z32_FLOAT; 3120 break; 3121 case PIPE_FORMAT_X8Z24_UNORM: 3122 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3123 /* Z24 is always stored like this for DB 3124 * compatibility. 3125 */ 3126 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 3127 break; 3128 case PIPE_FORMAT_X24S8_UINT: 3129 case PIPE_FORMAT_S8X24_UINT: 3130 case PIPE_FORMAT_X32_S8X24_UINT: 3131 pipe_format = PIPE_FORMAT_S8_UINT; 3132 surflevel = tmp->surface.stencil_level; 3133 break; 3134 default:; 3135 } 3136 } 3137 3138 vi_dcc_disable_if_incompatible_format(&sctx->b, texture, 3139 state->u.tex.first_level, 3140 state->format); 3141 3142 si_make_texture_descriptor(sctx->screen, tmp, true, 3143 state->target, pipe_format, state_swizzle, 3144 first_level, last_level, 3145 state->u.tex.first_layer, last_layer, 3146 width, height, depth, 3147 view->state, view->fmask_state); 3148 3149 view->base_level_info = &surflevel[base_level]; 3150 view->base_level = base_level; 3151 view->block_width = util_format_get_blockwidth(pipe_format); 3152 return &view->base; 3153} 3154 3155static struct pipe_sampler_view * 3156si_create_sampler_view(struct pipe_context *ctx, 3157 struct pipe_resource *texture, 3158 const struct pipe_sampler_view *state) 3159{ 3160 return si_create_sampler_view_custom(ctx, texture, state, 3161 texture ? texture->width0 : 0, 3162 texture ? texture->height0 : 0, 0); 3163} 3164 3165static void si_sampler_view_destroy(struct pipe_context *ctx, 3166 struct pipe_sampler_view *state) 3167{ 3168 struct si_sampler_view *view = (struct si_sampler_view *)state; 3169 3170 pipe_resource_reference(&state->texture, NULL); 3171 FREE(view); 3172} 3173 3174static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 3175{ 3176 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 3177 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 3178 (linear_filter && 3179 (wrap == PIPE_TEX_WRAP_CLAMP || 3180 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 3181} 3182 3183static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 3184{ 3185 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 3186 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 3187 3188 return (state->border_color.ui[0] || state->border_color.ui[1] || 3189 state->border_color.ui[2] || state->border_color.ui[3]) && 3190 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 3191 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 3192 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 3193} 3194 3195static void *si_create_sampler_state(struct pipe_context *ctx, 3196 const struct pipe_sampler_state *state) 3197{ 3198 struct si_context *sctx = (struct si_context *)ctx; 3199 struct r600_common_screen *rscreen = sctx->b.screen; 3200 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 3201 unsigned border_color_type, border_color_index = 0; 3202 unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso 3203 : state->max_anisotropy; 3204 unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); 3205 3206 if (!rstate) { 3207 return NULL; 3208 } 3209 3210 if (!sampler_state_needs_border_color(state)) 3211 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3212 else if (state->border_color.f[0] == 0 && 3213 state->border_color.f[1] == 0 && 3214 state->border_color.f[2] == 0 && 3215 state->border_color.f[3] == 0) 3216 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3217 else if (state->border_color.f[0] == 0 && 3218 state->border_color.f[1] == 0 && 3219 state->border_color.f[2] == 0 && 3220 state->border_color.f[3] == 1) 3221 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 3222 else if (state->border_color.f[0] == 1 && 3223 state->border_color.f[1] == 1 && 3224 state->border_color.f[2] == 1 && 3225 state->border_color.f[3] == 1) 3226 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 3227 else { 3228 int i; 3229 3230 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 3231 3232 /* Check if the border has been uploaded already. */ 3233 for (i = 0; i < sctx->border_color_count; i++) 3234 if (memcmp(&sctx->border_color_table[i], &state->border_color, 3235 sizeof(state->border_color)) == 0) 3236 break; 3237 3238 if (i >= SI_MAX_BORDER_COLORS) { 3239 /* Getting 4096 unique border colors is very unlikely. */ 3240 fprintf(stderr, "radeonsi: The border color table is full. " 3241 "Any new border colors will be just black. " 3242 "Please file a bug.\n"); 3243 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3244 } else { 3245 if (i == sctx->border_color_count) { 3246 /* Upload a new border color. */ 3247 memcpy(&sctx->border_color_table[i], &state->border_color, 3248 sizeof(state->border_color)); 3249 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 3250 &state->border_color, 3251 sizeof(state->border_color)); 3252 sctx->border_color_count++; 3253 } 3254 3255 border_color_index = i; 3256 } 3257 } 3258 3259#ifdef DEBUG 3260 rstate->magic = SI_SAMPLER_STATE_MAGIC; 3261#endif 3262 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 3263 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 3264 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 3265 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 3266 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 3267 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 3268 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | 3269 S_008F30_ANISO_BIAS(max_aniso_ratio) | 3270 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 3271 S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI)); 3272 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 3273 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 3274 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 3275 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 3276 S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) | 3277 S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) | 3278 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 3279 S_008F38_MIP_POINT_PRECLAMP(1) | 3280 S_008F38_DISABLE_LSB_CEIL(1) | 3281 S_008F38_FILTER_PREC_FIX(1) | 3282 S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI)); 3283 rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | 3284 S_008F3C_BORDER_COLOR_TYPE(border_color_type); 3285 return rstate; 3286} 3287 3288static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 3289{ 3290 struct si_context *sctx = (struct si_context *)ctx; 3291 3292 if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask) 3293 return; 3294 3295 sctx->sample_mask.sample_mask = sample_mask; 3296 si_mark_atom_dirty(sctx, &sctx->sample_mask.atom); 3297} 3298 3299static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom) 3300{ 3301 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 3302 unsigned mask = sctx->sample_mask.sample_mask; 3303 3304 /* Needed for line and polygon smoothing as well as for the Polaris 3305 * small primitive filter. We expect the state tracker to take care of 3306 * this for us. 3307 */ 3308 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 3309 (mask & 1 && sctx->blitter->running)); 3310 3311 radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 3312 radeon_emit(cs, mask | (mask << 16)); 3313 radeon_emit(cs, mask | (mask << 16)); 3314} 3315 3316static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 3317{ 3318#ifdef DEBUG 3319 struct si_sampler_state *s = state; 3320 3321 assert(s->magic == SI_SAMPLER_STATE_MAGIC); 3322 s->magic = 0; 3323#endif 3324 free(state); 3325} 3326 3327/* 3328 * Vertex elements & buffers 3329 */ 3330 3331static void *si_create_vertex_elements(struct pipe_context *ctx, 3332 unsigned count, 3333 const struct pipe_vertex_element *elements) 3334{ 3335 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 3336 int i; 3337 3338 assert(count <= SI_MAX_ATTRIBS); 3339 if (!v) 3340 return NULL; 3341 3342 v->count = count; 3343 for (i = 0; i < count; ++i) { 3344 const struct util_format_description *desc; 3345 unsigned data_format, num_format; 3346 int first_non_void; 3347 3348 desc = util_format_description(elements[i].src_format); 3349 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 3350 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 3351 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 3352 3353 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3354 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3355 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3356 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3357 S_008F0C_NUM_FORMAT(num_format) | 3358 S_008F0C_DATA_FORMAT(data_format); 3359 v->format_size[i] = desc->block.bits / 8; 3360 3361 /* The hardware always treats the 2-bit alpha channel as 3362 * unsigned, so a shader workaround is needed. 3363 */ 3364 if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { 3365 if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { 3366 v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i); 3367 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { 3368 v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i); 3369 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) { 3370 /* This isn't actually used in OpenGL. */ 3371 v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i); 3372 } 3373 } 3374 3375 /* We work around the fact that 8_8_8 and 16_16_16 data formats 3376 * do not exist by using the corresponding 4-component formats. 3377 * This requires a fixup of the descriptor for bounds checks. 3378 */ 3379 if (desc->block.bits == 3 * 8 || 3380 desc->block.bits == 3 * 16) { 3381 v->fix_size3 |= (desc->block.bits / 24) << (2 * i); 3382 } 3383 } 3384 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 3385 3386 return v; 3387} 3388 3389static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 3390{ 3391 struct si_context *sctx = (struct si_context *)ctx; 3392 struct si_vertex_element *v = (struct si_vertex_element*)state; 3393 3394 sctx->vertex_elements = v; 3395 sctx->vertex_buffers_dirty = true; 3396 sctx->do_update_shaders = true; 3397} 3398 3399static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 3400{ 3401 struct si_context *sctx = (struct si_context *)ctx; 3402 3403 if (sctx->vertex_elements == state) 3404 sctx->vertex_elements = NULL; 3405 FREE(state); 3406} 3407 3408static void si_set_vertex_buffers(struct pipe_context *ctx, 3409 unsigned start_slot, unsigned count, 3410 const struct pipe_vertex_buffer *buffers) 3411{ 3412 struct si_context *sctx = (struct si_context *)ctx; 3413 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 3414 int i; 3415 3416 assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); 3417 3418 if (buffers) { 3419 for (i = 0; i < count; i++) { 3420 const struct pipe_vertex_buffer *src = buffers + i; 3421 struct pipe_vertex_buffer *dsti = dst + i; 3422 struct pipe_resource *buf = src->buffer; 3423 3424 pipe_resource_reference(&dsti->buffer, buf); 3425 dsti->buffer_offset = src->buffer_offset; 3426 dsti->stride = src->stride; 3427 r600_context_add_resource_size(ctx, buf); 3428 if (buf) 3429 r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 3430 } 3431 } else { 3432 for (i = 0; i < count; i++) { 3433 pipe_resource_reference(&dst[i].buffer, NULL); 3434 } 3435 } 3436 sctx->vertex_buffers_dirty = true; 3437} 3438 3439static void si_set_index_buffer(struct pipe_context *ctx, 3440 const struct pipe_index_buffer *ib) 3441{ 3442 struct si_context *sctx = (struct si_context *)ctx; 3443 3444 if (ib) { 3445 struct pipe_resource *buf = ib->buffer; 3446 3447 pipe_resource_reference(&sctx->index_buffer.buffer, buf); 3448 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 3449 r600_context_add_resource_size(ctx, buf); 3450 if (buf) 3451 r600_resource(buf)->bind_history |= PIPE_BIND_INDEX_BUFFER; 3452 } else { 3453 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 3454 } 3455} 3456 3457/* 3458 * Misc 3459 */ 3460 3461static void si_set_tess_state(struct pipe_context *ctx, 3462 const float default_outer_level[4], 3463 const float default_inner_level[2]) 3464{ 3465 struct si_context *sctx = (struct si_context *)ctx; 3466 struct pipe_constant_buffer cb; 3467 float array[8]; 3468 3469 memcpy(array, default_outer_level, sizeof(float) * 4); 3470 memcpy(array+4, default_inner_level, sizeof(float) * 2); 3471 3472 cb.buffer = NULL; 3473 cb.user_buffer = NULL; 3474 cb.buffer_size = sizeof(array); 3475 3476 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, 3477 (void*)array, sizeof(array), 3478 &cb.buffer_offset); 3479 3480 si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 3481 pipe_resource_reference(&cb.buffer, NULL); 3482} 3483 3484static void si_texture_barrier(struct pipe_context *ctx) 3485{ 3486 struct si_context *sctx = (struct si_context *)ctx; 3487 3488 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 3489 SI_CONTEXT_INV_GLOBAL_L2 | 3490 SI_CONTEXT_FLUSH_AND_INV_CB | 3491 SI_CONTEXT_CS_PARTIAL_FLUSH; 3492} 3493 3494/* This only ensures coherency for shader image/buffer stores. */ 3495static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 3496{ 3497 struct si_context *sctx = (struct si_context *)ctx; 3498 3499 /* Subsequent commands must wait for all shader invocations to 3500 * complete. */ 3501 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 3502 SI_CONTEXT_CS_PARTIAL_FLUSH; 3503 3504 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 3505 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | 3506 SI_CONTEXT_INV_VMEM_L1; 3507 3508 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 3509 PIPE_BARRIER_SHADER_BUFFER | 3510 PIPE_BARRIER_TEXTURE | 3511 PIPE_BARRIER_IMAGE | 3512 PIPE_BARRIER_STREAMOUT_BUFFER | 3513 PIPE_BARRIER_GLOBAL_BUFFER)) { 3514 /* As far as I can tell, L1 contents are written back to L2 3515 * automatically at end of shader, but the contents of other 3516 * L1 caches might still be stale. */ 3517 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 3518 } 3519 3520 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 3521 /* Indices are read through TC L2 since VI. 3522 * L1 isn't used. 3523 */ 3524 if (sctx->screen->b.chip_class <= CIK) 3525 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 3526 } 3527 3528 if (flags & PIPE_BARRIER_FRAMEBUFFER) 3529 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; 3530 3531 if (flags & (PIPE_BARRIER_FRAMEBUFFER | 3532 PIPE_BARRIER_INDIRECT_BUFFER)) 3533 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 3534} 3535 3536static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 3537{ 3538 struct pipe_blend_state blend; 3539 3540 memset(&blend, 0, sizeof(blend)); 3541 blend.independent_blend_enable = true; 3542 blend.rt[0].colormask = 0xf; 3543 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 3544} 3545 3546static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 3547 bool include_draw_vbo) 3548{ 3549 si_need_cs_space((struct si_context*)ctx); 3550} 3551 3552static void si_init_config(struct si_context *sctx); 3553 3554void si_init_state_functions(struct si_context *sctx) 3555{ 3556 si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond); 3557 si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin); 3558 si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable); 3559 si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors); 3560 si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports); 3561 3562 si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); 3563 si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); 3564 si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); 3565 si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); 3566 si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); 3567 si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state); 3568 si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color); 3569 si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs); 3570 si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state); 3571 si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref); 3572 3573 sctx->b.b.create_blend_state = si_create_blend_state; 3574 sctx->b.b.bind_blend_state = si_bind_blend_state; 3575 sctx->b.b.delete_blend_state = si_delete_blend_state; 3576 sctx->b.b.set_blend_color = si_set_blend_color; 3577 3578 sctx->b.b.create_rasterizer_state = si_create_rs_state; 3579 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 3580 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 3581 3582 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 3583 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 3584 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 3585 3586 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 3587 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 3588 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 3589 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 3590 sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 3591 3592 sctx->b.b.set_clip_state = si_set_clip_state; 3593 sctx->b.b.set_stencil_ref = si_set_stencil_ref; 3594 3595 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 3596 sctx->b.b.get_sample_position = cayman_get_sample_position; 3597 3598 sctx->b.b.create_sampler_state = si_create_sampler_state; 3599 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 3600 3601 sctx->b.b.create_sampler_view = si_create_sampler_view; 3602 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 3603 3604 sctx->b.b.set_sample_mask = si_set_sample_mask; 3605 3606 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 3607 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 3608 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 3609 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 3610 sctx->b.b.set_index_buffer = si_set_index_buffer; 3611 3612 sctx->b.b.texture_barrier = si_texture_barrier; 3613 sctx->b.b.memory_barrier = si_memory_barrier; 3614 sctx->b.b.set_min_samples = si_set_min_samples; 3615 sctx->b.b.set_tess_state = si_set_tess_state; 3616 3617 sctx->b.b.set_active_query_state = si_set_active_query_state; 3618 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 3619 sctx->b.save_qbo_state = si_save_qbo_state; 3620 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 3621 3622 sctx->b.b.draw_vbo = si_draw_vbo; 3623 3624 si_init_config(sctx); 3625} 3626 3627static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen) 3628{ 3629 return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; 3630} 3631 3632static void si_query_opaque_metadata(struct r600_common_screen *rscreen, 3633 struct r600_texture *rtex, 3634 struct radeon_bo_metadata *md) 3635{ 3636 struct si_screen *sscreen = (struct si_screen*)rscreen; 3637 struct pipe_resource *res = &rtex->resource.b.b; 3638 static const unsigned char swizzle[] = { 3639 PIPE_SWIZZLE_X, 3640 PIPE_SWIZZLE_Y, 3641 PIPE_SWIZZLE_Z, 3642 PIPE_SWIZZLE_W 3643 }; 3644 uint32_t desc[8], i; 3645 bool is_array = util_resource_is_array_texture(res); 3646 3647 /* DRM 2.x.x doesn't support this. */ 3648 if (rscreen->info.drm_major != 3) 3649 return; 3650 3651 assert(rtex->dcc_separate_buffer == NULL); 3652 assert(rtex->fmask.size == 0); 3653 3654 /* Metadata image format format version 1: 3655 * [0] = 1 (metadata format identifier) 3656 * [1] = (VENDOR_ID << 16) | PCI_ID 3657 * [2:9] = image descriptor for the whole resource 3658 * [2] is always 0, because the base address is cleared 3659 * [9] is the DCC offset bits [39:8] from the beginning of 3660 * the buffer 3661 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 3662 */ 3663 3664 md->metadata[0] = 1; /* metadata image format version 1 */ 3665 3666 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ 3667 md->metadata[1] = si_get_bo_metadata_word1(rscreen); 3668 3669 si_make_texture_descriptor(sscreen, rtex, true, 3670 res->target, res->format, 3671 swizzle, 0, res->last_level, 0, 3672 is_array ? res->array_size - 1 : 0, 3673 res->width0, res->height0, res->depth0, 3674 desc, NULL); 3675 3676 si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0, 3677 rtex->surface.blk_w, false, desc); 3678 3679 /* Clear the base address and set the relative DCC offset. */ 3680 desc[0] = 0; 3681 desc[1] &= C_008F14_BASE_ADDRESS_HI; 3682 desc[7] = rtex->dcc_offset >> 8; 3683 3684 /* Dwords [2:9] contain the image descriptor. */ 3685 memcpy(&md->metadata[2], desc, sizeof(desc)); 3686 3687 /* Dwords [10:..] contain the mipmap level offsets. */ 3688 for (i = 0; i <= res->last_level; i++) 3689 md->metadata[10+i] = rtex->surface.level[i].offset >> 8; 3690 3691 md->size_metadata = (11 + res->last_level) * 4; 3692} 3693 3694static void si_apply_opaque_metadata(struct r600_common_screen *rscreen, 3695 struct r600_texture *rtex, 3696 struct radeon_bo_metadata *md) 3697{ 3698 uint32_t *desc = &md->metadata[2]; 3699 3700 if (rscreen->chip_class < VI) 3701 return; 3702 3703 /* Return if DCC is enabled. The texture should be set up with it 3704 * already. 3705 */ 3706 if (md->size_metadata >= 11 * 4 && 3707 md->metadata[0] != 0 && 3708 md->metadata[1] == si_get_bo_metadata_word1(rscreen) && 3709 G_008F28_COMPRESSION_EN(desc[6])) { 3710 assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8)); 3711 return; 3712 } 3713 3714 /* Disable DCC. These are always set by texture_from_handle and must 3715 * be cleared here. 3716 */ 3717 rtex->dcc_offset = 0; 3718} 3719 3720void si_init_screen_state_functions(struct si_screen *sscreen) 3721{ 3722 sscreen->b.b.is_format_supported = si_is_format_supported; 3723 sscreen->b.query_opaque_metadata = si_query_opaque_metadata; 3724 sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata; 3725} 3726 3727static void 3728si_write_harvested_raster_configs(struct si_context *sctx, 3729 struct si_pm4_state *pm4, 3730 unsigned raster_config, 3731 unsigned raster_config_1) 3732{ 3733 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 3734 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 3735 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3736 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3737 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 3738 unsigned rb_per_se = num_rb / num_se; 3739 unsigned se_mask[4]; 3740 unsigned se; 3741 3742 se_mask[0] = ((1 << rb_per_se) - 1); 3743 se_mask[1] = (se_mask[0] << rb_per_se); 3744 se_mask[2] = (se_mask[1] << rb_per_se); 3745 se_mask[3] = (se_mask[2] << rb_per_se); 3746 3747 se_mask[0] &= rb_mask; 3748 se_mask[1] &= rb_mask; 3749 se_mask[2] &= rb_mask; 3750 se_mask[3] &= rb_mask; 3751 3752 assert(num_se == 1 || num_se == 2 || num_se == 4); 3753 assert(sh_per_se == 1 || sh_per_se == 2); 3754 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 3755 3756 /* XXX: I can't figure out what the *_XSEL and *_YSEL 3757 * fields are for, so I'm leaving them as their default 3758 * values. */ 3759 3760 for (se = 0; se < num_se; se++) { 3761 unsigned raster_config_se = raster_config; 3762 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3763 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3764 int idx = (se / 2) * 2; 3765 3766 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3767 raster_config_se &= C_028350_SE_MAP; 3768 3769 if (!se_mask[idx]) { 3770 raster_config_se |= 3771 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 3772 } else { 3773 raster_config_se |= 3774 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 3775 } 3776 } 3777 3778 pkr0_mask &= rb_mask; 3779 pkr1_mask &= rb_mask; 3780 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3781 raster_config_se &= C_028350_PKR_MAP; 3782 3783 if (!pkr0_mask) { 3784 raster_config_se |= 3785 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 3786 } else { 3787 raster_config_se |= 3788 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 3789 } 3790 } 3791 3792 if (rb_per_se >= 2) { 3793 unsigned rb0_mask = 1 << (se * rb_per_se); 3794 unsigned rb1_mask = rb0_mask << 1; 3795 3796 rb0_mask &= rb_mask; 3797 rb1_mask &= rb_mask; 3798 if (!rb0_mask || !rb1_mask) { 3799 raster_config_se &= C_028350_RB_MAP_PKR0; 3800 3801 if (!rb0_mask) { 3802 raster_config_se |= 3803 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 3804 } else { 3805 raster_config_se |= 3806 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 3807 } 3808 } 3809 3810 if (rb_per_se > 2) { 3811 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3812 rb1_mask = rb0_mask << 1; 3813 rb0_mask &= rb_mask; 3814 rb1_mask &= rb_mask; 3815 if (!rb0_mask || !rb1_mask) { 3816 raster_config_se &= C_028350_RB_MAP_PKR1; 3817 3818 if (!rb0_mask) { 3819 raster_config_se |= 3820 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 3821 } else { 3822 raster_config_se |= 3823 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 3824 } 3825 } 3826 } 3827 } 3828 3829 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3830 if (sctx->b.chip_class < CIK) 3831 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3832 SE_INDEX(se) | SH_BROADCAST_WRITES | 3833 INSTANCE_BROADCAST_WRITES); 3834 else 3835 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3836 S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | 3837 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3838 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 3839 } 3840 3841 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3842 if (sctx->b.chip_class < CIK) 3843 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3844 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 3845 INSTANCE_BROADCAST_WRITES); 3846 else { 3847 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3848 S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 3849 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3850 3851 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3852 (!se_mask[2] && !se_mask[3]))) { 3853 raster_config_1 &= C_028354_SE_PAIR_MAP; 3854 3855 if (!se_mask[0] && !se_mask[1]) { 3856 raster_config_1 |= 3857 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 3858 } else { 3859 raster_config_1 |= 3860 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 3861 } 3862 } 3863 3864 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 3865 } 3866} 3867 3868static void si_init_config(struct si_context *sctx) 3869{ 3870 struct si_screen *sscreen = sctx->screen; 3871 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3872 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3873 unsigned raster_config, raster_config_1; 3874 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 3875 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 3876 3877 if (!pm4) 3878 return; 3879 3880 si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 3881 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); 3882 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 3883 si_pm4_cmd_end(pm4, false); 3884 3885 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 3886 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 3887 3888 /* FIXME calculate these values somehow ??? */ 3889 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 3890 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3891 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3892 3893 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3894 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3895 3896 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3897 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3898 if (sctx->b.chip_class < CIK) 3899 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3900 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3901 3902 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3903 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3904 3905 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3906 3907 switch (sctx->screen->b.family) { 3908 case CHIP_TAHITI: 3909 case CHIP_PITCAIRN: 3910 raster_config = 0x2a00126a; 3911 raster_config_1 = 0x00000000; 3912 break; 3913 case CHIP_VERDE: 3914 raster_config = 0x0000124a; 3915 raster_config_1 = 0x00000000; 3916 break; 3917 case CHIP_OLAND: 3918 raster_config = 0x00000082; 3919 raster_config_1 = 0x00000000; 3920 break; 3921 case CHIP_HAINAN: 3922 raster_config = 0x00000000; 3923 raster_config_1 = 0x00000000; 3924 break; 3925 case CHIP_BONAIRE: 3926 raster_config = 0x16000012; 3927 raster_config_1 = 0x00000000; 3928 break; 3929 case CHIP_HAWAII: 3930 raster_config = 0x3a00161a; 3931 raster_config_1 = 0x0000002e; 3932 break; 3933 case CHIP_FIJI: 3934 if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { 3935 /* old kernels with old tiling config */ 3936 raster_config = 0x16000012; 3937 raster_config_1 = 0x0000002a; 3938 } else { 3939 raster_config = 0x3a00161a; 3940 raster_config_1 = 0x0000002e; 3941 } 3942 break; 3943 case CHIP_POLARIS10: 3944 raster_config = 0x16000012; 3945 raster_config_1 = 0x0000002a; 3946 break; 3947 case CHIP_POLARIS11: 3948 case CHIP_POLARIS12: 3949 raster_config = 0x16000012; 3950 raster_config_1 = 0x00000000; 3951 break; 3952 case CHIP_TONGA: 3953 raster_config = 0x16000012; 3954 raster_config_1 = 0x0000002a; 3955 break; 3956 case CHIP_ICELAND: 3957 if (num_rb == 1) 3958 raster_config = 0x00000000; 3959 else 3960 raster_config = 0x00000002; 3961 raster_config_1 = 0x00000000; 3962 break; 3963 case CHIP_CARRIZO: 3964 raster_config = 0x00000002; 3965 raster_config_1 = 0x00000000; 3966 break; 3967 case CHIP_KAVERI: 3968 /* KV should be 0x00000002, but that causes problems with radeon */ 3969 raster_config = 0x00000000; /* 0x00000002 */ 3970 raster_config_1 = 0x00000000; 3971 break; 3972 case CHIP_KABINI: 3973 case CHIP_MULLINS: 3974 case CHIP_STONEY: 3975 raster_config = 0x00000000; 3976 raster_config_1 = 0x00000000; 3977 break; 3978 default: 3979 fprintf(stderr, 3980 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 3981 raster_config = 0x00000000; 3982 raster_config_1 = 0x00000000; 3983 break; 3984 } 3985 3986 /* Always use the default config when all backends are enabled 3987 * (or when we failed to determine the enabled backends). 3988 */ 3989 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 3990 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 3991 raster_config); 3992 if (sctx->b.chip_class >= CIK) 3993 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 3994 raster_config_1); 3995 } else { 3996 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 3997 } 3998 3999 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 4000 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 4001 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 4002 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 4003 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 4004 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 4005 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 4006 4007 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 4008 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 4009 S_028230_ER_TRI(0xA) | 4010 S_028230_ER_POINT(0xA) | 4011 S_028230_ER_RECT(0xA) | 4012 /* Required by DX10_DIAMOND_TEST_ENA: */ 4013 S_028230_ER_LINE_LR(0x1A) | 4014 S_028230_ER_LINE_RL(0x26) | 4015 S_028230_ER_LINE_TB(0xA) | 4016 S_028230_ER_LINE_BT(0xA)); 4017 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 4018 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 4019 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 4020 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 4021 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 4022 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 4023 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 4024 4025 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 4026 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 4027 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 4028 4029 if (sctx->b.chip_class >= CIK) { 4030 /* If this is 0, Bonaire can hang even if GS isn't being used. 4031 * Other chips are unaffected. These are suboptimal values, 4032 * but we don't use on-chip GS. 4033 */ 4034 si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 4035 S_028A44_ES_VERTS_PER_SUBGRP(64) | 4036 S_028A44_GS_PRIMS_PER_SUBGRP(4)); 4037 4038 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); 4039 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); 4040 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); 4041 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); 4042 4043 if (sscreen->b.info.num_good_compute_units / 4044 (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) { 4045 /* Too few available compute units per SH. Disallowing 4046 * VS to run on CU0 could hurt us more than late VS 4047 * allocation would help. 4048 * 4049 * LATE_ALLOC_VS = 2 is the highest safe number. 4050 */ 4051 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 4052 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); 4053 } else { 4054 /* Set LATE_ALLOC_VS == 31. It should be less than 4055 * the number of scratch waves. Limitations: 4056 * - VS can't execute on CU0. 4057 * - If HS writes outputs to LDS, LS can't execute on CU0. 4058 */ 4059 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); 4060 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); 4061 } 4062 4063 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 4064 } 4065 4066 if (sctx->b.chip_class >= VI) { 4067 unsigned vgt_tess_distribution; 4068 4069 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, 4070 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 4071 S_028424_OVERWRITE_COMBINER_WATERMARK(4)); 4072 if (sctx->b.family < CHIP_POLARIS10) 4073 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); 4074 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); 4075 4076 vgt_tess_distribution = 4077 S_028B50_ACCUM_ISOLINE(32) | 4078 S_028B50_ACCUM_TRI(11) | 4079 S_028B50_ACCUM_QUAD(11) | 4080 S_028B50_DONUT_SPLIT(16); 4081 4082 /* Testing with Unigine Heaven extreme tesselation yielded best results 4083 * with TRAP_SPLIT = 3. 4084 */ 4085 if (sctx->b.family == CHIP_FIJI || 4086 sctx->b.family >= CHIP_POLARIS10) 4087 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 4088 4089 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 4090 } else { 4091 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 4092 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 4093 } 4094 4095 if (sctx->b.family == CHIP_STONEY) 4096 si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); 4097 4098 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 4099 if (sctx->b.chip_class >= CIK) 4100 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); 4101 si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 4102 RADEON_PRIO_BORDER_COLORS); 4103 4104 si_pm4_upload_indirect_buffer(sctx, pm4); 4105 sctx->init_config = pm4; 4106} 4107