si_state.c revision b9b9540a604883e620de872537b89d47d4bceb68
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "sid.h" 29#include "radeon/r600_cs.h" 30#include "radeon/r600_query.h" 31 32#include "util/u_dual_blend.h" 33#include "util/u_format.h" 34#include "util/u_format_s3tc.h" 35#include "util/u_memory.h" 36#include "util/u_resource.h" 37 38/* Initialize an external atom (owned by ../radeon). */ 39static void 40si_init_external_atom(struct si_context *sctx, struct r600_atom *atom, 41 struct r600_atom **list_elem) 42{ 43 atom->id = list_elem - sctx->atoms.array + 1; 44 *list_elem = atom; 45} 46 47/* Initialize an atom owned by radeonsi. */ 48void si_init_atom(struct si_context *sctx, struct r600_atom *atom, 49 struct r600_atom **list_elem, 50 void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) 51{ 52 atom->emit = (void*)emit_func; 53 atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */ 54 *list_elem = atom; 55} 56 57static unsigned si_map_swizzle(unsigned swizzle) 58{ 59 switch (swizzle) { 60 case PIPE_SWIZZLE_Y: 61 return V_008F0C_SQ_SEL_Y; 62 case PIPE_SWIZZLE_Z: 63 return V_008F0C_SQ_SEL_Z; 64 case PIPE_SWIZZLE_W: 65 return V_008F0C_SQ_SEL_W; 66 case PIPE_SWIZZLE_0: 67 return V_008F0C_SQ_SEL_0; 68 case PIPE_SWIZZLE_1: 69 return V_008F0C_SQ_SEL_1; 70 default: /* PIPE_SWIZZLE_X */ 71 return V_008F0C_SQ_SEL_X; 72 } 73} 74 75static uint32_t S_FIXED(float value, uint32_t frac_bits) 76{ 77 return value * (1 << frac_bits); 78} 79 80/* 12.4 fixed-point */ 81static unsigned si_pack_float_12p4(float x) 82{ 83 return x <= 0 ? 0 : 84 x >= 4096 ? 0xffff : x * 16; 85} 86 87/* 88 * Inferred framebuffer and blender state. 89 * 90 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 91 * if there is not enough PS outputs. 92 */ 93static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom) 94{ 95 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 96 struct si_state_blend *blend = sctx->queued.named.blend; 97 /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 98 * but you never know. */ 99 uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit; 100 unsigned i; 101 102 if (blend) 103 cb_target_mask &= blend->cb_target_mask; 104 105 /* Avoid a hang that happens when dual source blending is enabled 106 * but there is not enough color outputs. This is undefined behavior, 107 * so disable color writes completely. 108 * 109 * Reproducible with Unigine Heaven 4.0 and drirc missing. 110 */ 111 if (blend && blend->dual_src_blend && 112 sctx->ps_shader.cso && 113 (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 114 cb_target_mask = 0; 115 116 radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); 117 118 /* STONEY-specific register settings. */ 119 if (sctx->b.family == CHIP_STONEY) { 120 unsigned spi_shader_col_format = 121 sctx->ps_shader.cso ? 122 sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; 123 unsigned sx_ps_downconvert = 0; 124 unsigned sx_blend_opt_epsilon = 0; 125 unsigned sx_blend_opt_control = 0; 126 127 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 128 struct r600_surface *surf = 129 (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; 130 unsigned format, swap, spi_format, colormask; 131 bool has_alpha, has_rgb; 132 133 if (!surf) 134 continue; 135 136 format = G_028C70_FORMAT(surf->cb_color_info); 137 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 138 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 139 colormask = (cb_target_mask >> (i * 4)) & 0xf; 140 141 /* Set if RGB and A are present. */ 142 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 143 144 if (format == V_028C70_COLOR_8 || 145 format == V_028C70_COLOR_16 || 146 format == V_028C70_COLOR_32) 147 has_rgb = !has_alpha; 148 else 149 has_rgb = true; 150 151 /* Check the colormask and export format. */ 152 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 153 has_rgb = false; 154 if (!(colormask & PIPE_MASK_A)) 155 has_alpha = false; 156 157 if (spi_format == V_028714_SPI_SHADER_ZERO) { 158 has_rgb = false; 159 has_alpha = false; 160 } 161 162 /* Disable value checking for disabled channels. */ 163 if (!has_rgb) 164 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 165 if (!has_alpha) 166 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 167 168 /* Enable down-conversion for 32bpp and smaller formats. */ 169 switch (format) { 170 case V_028C70_COLOR_8: 171 case V_028C70_COLOR_8_8: 172 case V_028C70_COLOR_8_8_8_8: 173 /* For 1 and 2-channel formats, use the superset thereof. */ 174 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 175 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 176 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 177 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 178 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 179 } 180 break; 181 182 case V_028C70_COLOR_5_6_5: 183 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 184 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 185 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 186 } 187 break; 188 189 case V_028C70_COLOR_1_5_5_5: 190 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 191 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 192 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 193 } 194 break; 195 196 case V_028C70_COLOR_4_4_4_4: 197 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 198 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 199 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 200 } 201 break; 202 203 case V_028C70_COLOR_32: 204 if (swap == V_0280A0_SWAP_STD && 205 spi_format == V_028714_SPI_SHADER_32_R) 206 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 207 else if (swap == V_0280A0_SWAP_ALT_REV && 208 spi_format == V_028714_SPI_SHADER_32_AR) 209 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 210 break; 211 212 case V_028C70_COLOR_16: 213 case V_028C70_COLOR_16_16: 214 /* For 1-channel formats, use the superset thereof. */ 215 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 216 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 217 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 218 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 219 if (swap == V_0280A0_SWAP_STD || 220 swap == V_0280A0_SWAP_STD_REV) 221 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 222 else 223 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 224 } 225 break; 226 227 case V_028C70_COLOR_10_11_11: 228 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 229 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 230 sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 231 } 232 break; 233 234 case V_028C70_COLOR_2_10_10_10: 235 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 236 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 237 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 238 } 239 break; 240 } 241 } 242 243 if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { 244 sx_ps_downconvert = 0; 245 sx_blend_opt_epsilon = 0; 246 sx_blend_opt_control = 0; 247 } 248 249 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); 250 radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ 251 radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ 252 radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ 253 } 254} 255 256/* 257 * Blender functions 258 */ 259 260static uint32_t si_translate_blend_function(int blend_func) 261{ 262 switch (blend_func) { 263 case PIPE_BLEND_ADD: 264 return V_028780_COMB_DST_PLUS_SRC; 265 case PIPE_BLEND_SUBTRACT: 266 return V_028780_COMB_SRC_MINUS_DST; 267 case PIPE_BLEND_REVERSE_SUBTRACT: 268 return V_028780_COMB_DST_MINUS_SRC; 269 case PIPE_BLEND_MIN: 270 return V_028780_COMB_MIN_DST_SRC; 271 case PIPE_BLEND_MAX: 272 return V_028780_COMB_MAX_DST_SRC; 273 default: 274 R600_ERR("Unknown blend function %d\n", blend_func); 275 assert(0); 276 break; 277 } 278 return 0; 279} 280 281static uint32_t si_translate_blend_factor(int blend_fact) 282{ 283 switch (blend_fact) { 284 case PIPE_BLENDFACTOR_ONE: 285 return V_028780_BLEND_ONE; 286 case PIPE_BLENDFACTOR_SRC_COLOR: 287 return V_028780_BLEND_SRC_COLOR; 288 case PIPE_BLENDFACTOR_SRC_ALPHA: 289 return V_028780_BLEND_SRC_ALPHA; 290 case PIPE_BLENDFACTOR_DST_ALPHA: 291 return V_028780_BLEND_DST_ALPHA; 292 case PIPE_BLENDFACTOR_DST_COLOR: 293 return V_028780_BLEND_DST_COLOR; 294 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 295 return V_028780_BLEND_SRC_ALPHA_SATURATE; 296 case PIPE_BLENDFACTOR_CONST_COLOR: 297 return V_028780_BLEND_CONSTANT_COLOR; 298 case PIPE_BLENDFACTOR_CONST_ALPHA: 299 return V_028780_BLEND_CONSTANT_ALPHA; 300 case PIPE_BLENDFACTOR_ZERO: 301 return V_028780_BLEND_ZERO; 302 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 303 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 304 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 305 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 306 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 307 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 308 case PIPE_BLENDFACTOR_INV_DST_COLOR: 309 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 310 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 311 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 312 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 313 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 314 case PIPE_BLENDFACTOR_SRC1_COLOR: 315 return V_028780_BLEND_SRC1_COLOR; 316 case PIPE_BLENDFACTOR_SRC1_ALPHA: 317 return V_028780_BLEND_SRC1_ALPHA; 318 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 319 return V_028780_BLEND_INV_SRC1_COLOR; 320 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 321 return V_028780_BLEND_INV_SRC1_ALPHA; 322 default: 323 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 324 assert(0); 325 break; 326 } 327 return 0; 328} 329 330static uint32_t si_translate_blend_opt_function(int blend_func) 331{ 332 switch (blend_func) { 333 case PIPE_BLEND_ADD: 334 return V_028760_OPT_COMB_ADD; 335 case PIPE_BLEND_SUBTRACT: 336 return V_028760_OPT_COMB_SUBTRACT; 337 case PIPE_BLEND_REVERSE_SUBTRACT: 338 return V_028760_OPT_COMB_REVSUBTRACT; 339 case PIPE_BLEND_MIN: 340 return V_028760_OPT_COMB_MIN; 341 case PIPE_BLEND_MAX: 342 return V_028760_OPT_COMB_MAX; 343 default: 344 return V_028760_OPT_COMB_BLEND_DISABLED; 345 } 346} 347 348static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 349{ 350 switch (blend_fact) { 351 case PIPE_BLENDFACTOR_ZERO: 352 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 353 case PIPE_BLENDFACTOR_ONE: 354 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 355 case PIPE_BLENDFACTOR_SRC_COLOR: 356 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 357 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 358 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 359 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 360 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 361 case PIPE_BLENDFACTOR_SRC_ALPHA: 362 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 363 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 364 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 365 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 366 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 367 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 368 default: 369 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 370 } 371} 372 373/** 374 * Get rid of DST in the blend factors by commuting the operands: 375 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 376 */ 377static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 378 unsigned *dst_factor, unsigned expected_dst, 379 unsigned replacement_src) 380{ 381 if (*src_factor == expected_dst && 382 *dst_factor == PIPE_BLENDFACTOR_ZERO) { 383 *src_factor = PIPE_BLENDFACTOR_ZERO; 384 *dst_factor = replacement_src; 385 386 /* Commuting the operands requires reversing subtractions. */ 387 if (*func == PIPE_BLEND_SUBTRACT) 388 *func = PIPE_BLEND_REVERSE_SUBTRACT; 389 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 390 *func = PIPE_BLEND_SUBTRACT; 391 } 392} 393 394static bool si_blend_factor_uses_dst(unsigned factor) 395{ 396 return factor == PIPE_BLENDFACTOR_DST_COLOR || 397 factor == PIPE_BLENDFACTOR_DST_ALPHA || 398 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 399 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 400 factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 401} 402 403static void *si_create_blend_state_mode(struct pipe_context *ctx, 404 const struct pipe_blend_state *state, 405 unsigned mode) 406{ 407 struct si_context *sctx = (struct si_context*)ctx; 408 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 409 struct si_pm4_state *pm4 = &blend->pm4; 410 uint32_t sx_mrt_blend_opt[8] = {0}; 411 uint32_t color_control = 0; 412 413 if (!blend) 414 return NULL; 415 416 blend->alpha_to_coverage = state->alpha_to_coverage; 417 blend->alpha_to_one = state->alpha_to_one; 418 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 419 420 if (state->logicop_enable) { 421 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 422 } else { 423 color_control |= S_028808_ROP3(0xcc); 424 } 425 426 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 427 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 428 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 429 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 430 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 431 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 432 433 if (state->alpha_to_coverage) 434 blend->need_src_alpha_4bit |= 0xf; 435 436 blend->cb_target_mask = 0; 437 for (int i = 0; i < 8; i++) { 438 /* state->rt entries > 0 only written if independent blending */ 439 const int j = state->independent_blend_enable ? i : 0; 440 441 unsigned eqRGB = state->rt[j].rgb_func; 442 unsigned srcRGB = state->rt[j].rgb_src_factor; 443 unsigned dstRGB = state->rt[j].rgb_dst_factor; 444 unsigned eqA = state->rt[j].alpha_func; 445 unsigned srcA = state->rt[j].alpha_src_factor; 446 unsigned dstA = state->rt[j].alpha_dst_factor; 447 448 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 449 unsigned blend_cntl = 0; 450 451 sx_mrt_blend_opt[i] = 452 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 453 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 454 455 /* Only set dual source blending for MRT0 to avoid a hang. */ 456 if (i >= 1 && blend->dual_src_blend) { 457 /* Vulkan does this for dual source blending. */ 458 if (i == 1) 459 blend_cntl |= S_028780_ENABLE(1); 460 461 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 462 continue; 463 } 464 465 /* Only addition and subtraction equations are supported with 466 * dual source blending. 467 */ 468 if (blend->dual_src_blend && 469 (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 470 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 471 assert(!"Unsupported equation for dual source blending"); 472 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 473 continue; 474 } 475 476 /* cb_render_state will disable unused ones */ 477 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 478 479 if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 480 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 481 continue; 482 } 483 484 /* Blending optimizations for Stoney. 485 * These transformations don't change the behavior. 486 * 487 * First, get rid of DST in the blend factors: 488 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 489 */ 490 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 491 PIPE_BLENDFACTOR_DST_COLOR, 492 PIPE_BLENDFACTOR_SRC_COLOR); 493 si_blend_remove_dst(&eqA, &srcA, &dstA, 494 PIPE_BLENDFACTOR_DST_COLOR, 495 PIPE_BLENDFACTOR_SRC_COLOR); 496 si_blend_remove_dst(&eqA, &srcA, &dstA, 497 PIPE_BLENDFACTOR_DST_ALPHA, 498 PIPE_BLENDFACTOR_SRC_ALPHA); 499 500 /* Look up the ideal settings from tables. */ 501 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 502 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 503 srcA_opt = si_translate_blend_opt_factor(srcA, true); 504 dstA_opt = si_translate_blend_opt_factor(dstA, true); 505 506 /* Handle interdependencies. */ 507 if (si_blend_factor_uses_dst(srcRGB)) 508 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 509 if (si_blend_factor_uses_dst(srcA)) 510 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 511 512 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 513 (dstRGB == PIPE_BLENDFACTOR_ZERO || 514 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 515 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 516 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 517 518 /* Set the final value. */ 519 sx_mrt_blend_opt[i] = 520 S_028760_COLOR_SRC_OPT(srcRGB_opt) | 521 S_028760_COLOR_DST_OPT(dstRGB_opt) | 522 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 523 S_028760_ALPHA_SRC_OPT(srcA_opt) | 524 S_028760_ALPHA_DST_OPT(dstA_opt) | 525 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 526 527 /* Set blend state. */ 528 blend_cntl |= S_028780_ENABLE(1); 529 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 530 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 531 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 532 533 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 534 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 535 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 536 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 537 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 538 } 539 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 540 541 blend->blend_enable_4bit |= 0xfu << (i * 4); 542 543 /* This is only important for formats without alpha. */ 544 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 545 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 546 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 547 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 548 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 549 dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 550 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 551 } 552 553 if (blend->cb_target_mask) { 554 color_control |= S_028808_MODE(mode); 555 } else { 556 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 557 } 558 559 if (sctx->b.family == CHIP_STONEY) { 560 /* Disable RB+ blend optimizations for dual source blending. 561 * Vulkan does this. 562 */ 563 if (blend->dual_src_blend) { 564 for (int i = 0; i < 8; i++) { 565 sx_mrt_blend_opt[i] = 566 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 567 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 568 } 569 } 570 571 for (int i = 0; i < 8; i++) 572 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 573 sx_mrt_blend_opt[i]); 574 575 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 576 if (blend->dual_src_blend || state->logicop_enable || 577 mode == V_028808_CB_RESOLVE) 578 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 579 } 580 581 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 582 return blend; 583} 584 585static void *si_create_blend_state(struct pipe_context *ctx, 586 const struct pipe_blend_state *state) 587{ 588 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 589} 590 591static void si_bind_blend_state(struct pipe_context *ctx, void *state) 592{ 593 struct si_context *sctx = (struct si_context *)ctx; 594 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 595 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 596 sctx->do_update_shaders = true; 597} 598 599static void si_delete_blend_state(struct pipe_context *ctx, void *state) 600{ 601 struct si_context *sctx = (struct si_context *)ctx; 602 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 603} 604 605static void si_set_blend_color(struct pipe_context *ctx, 606 const struct pipe_blend_color *state) 607{ 608 struct si_context *sctx = (struct si_context *)ctx; 609 610 if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0) 611 return; 612 613 sctx->blend_color.state = *state; 614 si_mark_atom_dirty(sctx, &sctx->blend_color.atom); 615} 616 617static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom) 618{ 619 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 620 621 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 622 radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 623} 624 625/* 626 * Clipping 627 */ 628 629static void si_set_clip_state(struct pipe_context *ctx, 630 const struct pipe_clip_state *state) 631{ 632 struct si_context *sctx = (struct si_context *)ctx; 633 struct pipe_constant_buffer cb; 634 635 if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 636 return; 637 638 sctx->clip_state.state = *state; 639 si_mark_atom_dirty(sctx, &sctx->clip_state.atom); 640 641 cb.buffer = NULL; 642 cb.user_buffer = state->ucp; 643 cb.buffer_offset = 0; 644 cb.buffer_size = 4*4*8; 645 si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 646 pipe_resource_reference(&cb.buffer, NULL); 647} 648 649static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom) 650{ 651 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 652 653 radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 654 radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 655} 656 657#define SIX_BITS 0x3F 658 659static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 660{ 661 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 662 struct si_shader *vs = si_get_vs_state(sctx); 663 struct tgsi_shader_info *info = si_get_vs_info(sctx); 664 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 665 unsigned window_space = 666 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 667 unsigned clipdist_mask = 668 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 669 unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; 670 unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance; 671 unsigned total_mask; 672 bool misc_vec_ena; 673 674 if (vs->key.opt.hw_vs.clip_disable) { 675 assert(!info->culldist_writemask); 676 clipdist_mask = 0; 677 culldist_mask = 0; 678 } 679 total_mask = clipdist_mask | culldist_mask; 680 681 /* Clip distances on points have no effect, so need to be implemented 682 * as cull distances. This applies for the clipvertex case as well. 683 * 684 * Setting this for primitives other than points should have no adverse 685 * effects. 686 */ 687 clipdist_mask &= rs->clip_plane_enable; 688 culldist_mask |= clipdist_mask; 689 690 misc_vec_ena = info->writes_psize || info->writes_edgeflag || 691 info->writes_layer || info->writes_viewport_index; 692 693 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 694 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 695 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 696 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 697 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | 698 S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | 699 S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | 700 S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | 701 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | 702 clipdist_mask | (culldist_mask << 8)); 703 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 704 rs->pa_cl_clip_cntl | 705 ucp_mask | 706 S_028810_CLIP_DISABLE(window_space)); 707 708 /* reuse needs to be set off if we write oViewport */ 709 radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 710 S_028AB4_REUSE_OFF(info->writes_viewport_index)); 711} 712 713/* 714 * inferred state between framebuffer and rasterizer 715 */ 716static void si_update_poly_offset_state(struct si_context *sctx) 717{ 718 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 719 720 if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) 721 return; 722 723 /* Use the user format, not db_render_format, so that the polygon 724 * offset behaves as expected by applications. 725 */ 726 switch (sctx->framebuffer.state.zsbuf->texture->format) { 727 case PIPE_FORMAT_Z16_UNORM: 728 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 729 break; 730 default: /* 24-bit */ 731 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 732 break; 733 case PIPE_FORMAT_Z32_FLOAT: 734 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 735 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 736 break; 737 } 738} 739 740/* 741 * Rasterizer 742 */ 743 744static uint32_t si_translate_fill(uint32_t func) 745{ 746 switch(func) { 747 case PIPE_POLYGON_MODE_FILL: 748 return V_028814_X_DRAW_TRIANGLES; 749 case PIPE_POLYGON_MODE_LINE: 750 return V_028814_X_DRAW_LINES; 751 case PIPE_POLYGON_MODE_POINT: 752 return V_028814_X_DRAW_POINTS; 753 default: 754 assert(0); 755 return V_028814_X_DRAW_POINTS; 756 } 757} 758 759static void *si_create_rs_state(struct pipe_context *ctx, 760 const struct pipe_rasterizer_state *state) 761{ 762 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 763 struct si_pm4_state *pm4 = &rs->pm4; 764 unsigned tmp, i; 765 float psize_min, psize_max; 766 767 if (!rs) { 768 return NULL; 769 } 770 771 rs->scissor_enable = state->scissor; 772 rs->clip_halfz = state->clip_halfz; 773 rs->two_side = state->light_twoside; 774 rs->multisample_enable = state->multisample; 775 rs->force_persample_interp = state->force_persample_interp; 776 rs->clip_plane_enable = state->clip_plane_enable; 777 rs->line_stipple_enable = state->line_stipple_enable; 778 rs->poly_stipple_enable = state->poly_stipple_enable; 779 rs->line_smooth = state->line_smooth; 780 rs->poly_smooth = state->poly_smooth; 781 rs->uses_poly_offset = state->offset_point || state->offset_line || 782 state->offset_tri; 783 rs->clamp_fragment_color = state->clamp_fragment_color; 784 rs->flatshade = state->flatshade; 785 rs->sprite_coord_enable = state->sprite_coord_enable; 786 rs->rasterizer_discard = state->rasterizer_discard; 787 rs->pa_sc_line_stipple = state->line_stipple_enable ? 788 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 789 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 790 rs->pa_cl_clip_cntl = 791 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 792 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 793 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 794 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 795 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 796 797 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 798 S_0286D4_FLAT_SHADE_ENA(1) | 799 S_0286D4_PNT_SPRITE_ENA(1) | 800 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 801 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 802 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 803 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 804 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 805 806 /* point size 12.4 fixed point */ 807 tmp = (unsigned)(state->point_size * 8.0); 808 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 809 810 if (state->point_size_per_vertex) { 811 psize_min = util_get_min_point_size(state); 812 psize_max = 8192; 813 } else { 814 /* Force the point size to be as if the vertex output was disabled. */ 815 psize_min = state->point_size; 816 psize_max = state->point_size; 817 } 818 /* Divide by two, because 0.5 = 1 pixel. */ 819 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 820 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 821 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 822 823 tmp = (unsigned)state->line_width * 8; 824 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 825 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 826 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 827 S_028A48_MSAA_ENABLE(state->multisample || 828 state->poly_smooth || 829 state->line_smooth) | 830 S_028A48_VPORT_SCISSOR_ENABLE(1)); 831 832 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 833 S_028BE4_PIX_CENTER(state->half_pixel_center) | 834 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 835 836 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 837 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 838 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 839 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 840 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 841 S_028814_FACE(!state->front_ccw) | 842 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 843 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 844 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 845 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 846 state->fill_back != PIPE_POLYGON_MODE_FILL) | 847 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 848 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 849 si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 + 850 SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color); 851 852 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 853 for (i = 0; i < 3; i++) { 854 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 855 float offset_units = state->offset_units; 856 float offset_scale = state->offset_scale * 16.0f; 857 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 858 859 if (!state->offset_units_unscaled) { 860 switch (i) { 861 case 0: /* 16-bit zbuffer */ 862 offset_units *= 4.0f; 863 pa_su_poly_offset_db_fmt_cntl = 864 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 865 break; 866 case 1: /* 24-bit zbuffer */ 867 offset_units *= 2.0f; 868 pa_su_poly_offset_db_fmt_cntl = 869 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 870 break; 871 case 2: /* 32-bit zbuffer */ 872 offset_units *= 1.0f; 873 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 874 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 875 break; 876 } 877 } 878 879 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 880 fui(offset_scale)); 881 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 882 fui(offset_units)); 883 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 884 fui(offset_scale)); 885 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 886 fui(offset_units)); 887 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 888 pa_su_poly_offset_db_fmt_cntl); 889 } 890 891 return rs; 892} 893 894static void si_bind_rs_state(struct pipe_context *ctx, void *state) 895{ 896 struct si_context *sctx = (struct si_context *)ctx; 897 struct si_state_rasterizer *old_rs = 898 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 899 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 900 901 if (!state) 902 return; 903 904 if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) { 905 si_mark_atom_dirty(sctx, &sctx->db_render_state); 906 907 /* Update the small primitive filter workaround if necessary. */ 908 if (sctx->b.family >= CHIP_POLARIS10 && 909 sctx->framebuffer.nr_samples > 1) 910 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 911 } 912 913 r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz); 914 915 si_pm4_bind_state(sctx, rasterizer, rs); 916 si_update_poly_offset_state(sctx); 917 918 si_mark_atom_dirty(sctx, &sctx->clip_regs); 919 sctx->do_update_shaders = true; 920} 921 922static void si_delete_rs_state(struct pipe_context *ctx, void *state) 923{ 924 struct si_context *sctx = (struct si_context *)ctx; 925 926 if (sctx->queued.named.rasterizer == state) 927 si_pm4_bind_state(sctx, poly_offset, NULL); 928 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 929} 930 931/* 932 * infeered state between dsa and stencil ref 933 */ 934static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom) 935{ 936 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 937 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 938 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 939 940 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 941 radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 942 S_028430_STENCILMASK(dsa->valuemask[0]) | 943 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 944 S_028430_STENCILOPVAL(1)); 945 radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 946 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 947 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 948 S_028434_STENCILOPVAL_BF(1)); 949} 950 951static void si_set_stencil_ref(struct pipe_context *ctx, 952 const struct pipe_stencil_ref *state) 953{ 954 struct si_context *sctx = (struct si_context *)ctx; 955 956 if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 957 return; 958 959 sctx->stencil_ref.state = *state; 960 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 961} 962 963 964/* 965 * DSA 966 */ 967 968static uint32_t si_translate_stencil_op(int s_op) 969{ 970 switch (s_op) { 971 case PIPE_STENCIL_OP_KEEP: 972 return V_02842C_STENCIL_KEEP; 973 case PIPE_STENCIL_OP_ZERO: 974 return V_02842C_STENCIL_ZERO; 975 case PIPE_STENCIL_OP_REPLACE: 976 return V_02842C_STENCIL_REPLACE_TEST; 977 case PIPE_STENCIL_OP_INCR: 978 return V_02842C_STENCIL_ADD_CLAMP; 979 case PIPE_STENCIL_OP_DECR: 980 return V_02842C_STENCIL_SUB_CLAMP; 981 case PIPE_STENCIL_OP_INCR_WRAP: 982 return V_02842C_STENCIL_ADD_WRAP; 983 case PIPE_STENCIL_OP_DECR_WRAP: 984 return V_02842C_STENCIL_SUB_WRAP; 985 case PIPE_STENCIL_OP_INVERT: 986 return V_02842C_STENCIL_INVERT; 987 default: 988 R600_ERR("Unknown stencil op %d", s_op); 989 assert(0); 990 break; 991 } 992 return 0; 993} 994 995static void *si_create_dsa_state(struct pipe_context *ctx, 996 const struct pipe_depth_stencil_alpha_state *state) 997{ 998 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 999 struct si_pm4_state *pm4 = &dsa->pm4; 1000 unsigned db_depth_control; 1001 uint32_t db_stencil_control = 0; 1002 1003 if (!dsa) { 1004 return NULL; 1005 } 1006 1007 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1008 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1009 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1010 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1011 1012 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 1013 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 1014 S_028800_ZFUNC(state->depth.func) | 1015 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 1016 1017 /* stencil */ 1018 if (state->stencil[0].enabled) { 1019 db_depth_control |= S_028800_STENCIL_ENABLE(1); 1020 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1021 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1022 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1023 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1024 1025 if (state->stencil[1].enabled) { 1026 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1027 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1028 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1029 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1030 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1031 } 1032 } 1033 1034 /* alpha */ 1035 if (state->alpha.enabled) { 1036 dsa->alpha_func = state->alpha.func; 1037 1038 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 1039 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 1040 } else { 1041 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1042 } 1043 1044 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1045 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1046 if (state->depth.bounds_test) { 1047 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1048 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1049 } 1050 1051 return dsa; 1052} 1053 1054static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1055{ 1056 struct si_context *sctx = (struct si_context *)ctx; 1057 struct si_state_dsa *dsa = state; 1058 1059 if (!state) 1060 return; 1061 1062 si_pm4_bind_state(sctx, dsa, dsa); 1063 1064 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1065 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1066 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1067 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1068 } 1069 sctx->do_update_shaders = true; 1070} 1071 1072static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1073{ 1074 struct si_context *sctx = (struct si_context *)ctx; 1075 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1076} 1077 1078static void *si_create_db_flush_dsa(struct si_context *sctx) 1079{ 1080 struct pipe_depth_stencil_alpha_state dsa = {}; 1081 1082 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 1083} 1084 1085/* DB RENDER STATE */ 1086 1087static void si_set_active_query_state(struct pipe_context *ctx, boolean enable) 1088{ 1089 struct si_context *sctx = (struct si_context*)ctx; 1090 1091 /* Pipeline stat & streamout queries. */ 1092 if (enable) { 1093 sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS; 1094 sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS; 1095 } else { 1096 sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS; 1097 sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS; 1098 } 1099 1100 /* Occlusion queries. */ 1101 if (sctx->occlusion_queries_disabled != !enable) { 1102 sctx->occlusion_queries_disabled = !enable; 1103 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1104 } 1105} 1106 1107static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 1108{ 1109 struct si_context *sctx = (struct si_context*)ctx; 1110 1111 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1112} 1113 1114static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st) 1115{ 1116 struct si_context *sctx = (struct si_context*)ctx; 1117 1118 st->saved_compute = sctx->cs_shader_state.program; 1119 1120 si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1121 si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); 1122} 1123 1124static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 1125{ 1126 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1127 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1128 unsigned db_shader_control; 1129 1130 radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 1131 1132 /* DB_RENDER_CONTROL */ 1133 if (sctx->dbcb_depth_copy_enabled || 1134 sctx->dbcb_stencil_copy_enabled) { 1135 radeon_emit(cs, 1136 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1137 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1138 S_028000_COPY_CENTROID(1) | 1139 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 1140 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1141 radeon_emit(cs, 1142 S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1143 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); 1144 } else { 1145 radeon_emit(cs, 1146 S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1147 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); 1148 } 1149 1150 /* DB_COUNT_CONTROL (occlusion queries) */ 1151 if (sctx->b.num_occlusion_queries > 0 && 1152 !sctx->occlusion_queries_disabled) { 1153 bool perfect = sctx->b.num_perfect_occlusion_queries > 0; 1154 1155 if (sctx->b.chip_class >= CIK) { 1156 radeon_emit(cs, 1157 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1158 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 1159 S_028004_ZPASS_ENABLE(1) | 1160 S_028004_SLICE_EVEN_ENABLE(1) | 1161 S_028004_SLICE_ODD_ENABLE(1)); 1162 } else { 1163 radeon_emit(cs, 1164 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1165 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 1166 } 1167 } else { 1168 /* Disable occlusion queries. */ 1169 if (sctx->b.chip_class >= CIK) { 1170 radeon_emit(cs, 0); 1171 } else { 1172 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 1173 } 1174 } 1175 1176 /* DB_RENDER_OVERRIDE2 */ 1177 radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 1178 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1179 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1180 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); 1181 1182 db_shader_control = sctx->ps_db_shader_control; 1183 1184 /* Bug workaround for smoothing (overrasterization) on SI. */ 1185 if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { 1186 db_shader_control &= C_02880C_Z_ORDER; 1187 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1188 } 1189 1190 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1191 if (!rs || !rs->multisample_enable) 1192 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1193 1194 if (sctx->b.family == CHIP_STONEY && 1195 sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) 1196 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1197 1198 radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 1199 db_shader_control); 1200} 1201 1202/* 1203 * format translation 1204 */ 1205static uint32_t si_translate_colorformat(enum pipe_format format) 1206{ 1207 const struct util_format_description *desc = util_format_description(format); 1208 1209#define HAS_SIZE(x,y,z,w) \ 1210 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1211 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1212 1213 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1214 return V_028C70_COLOR_10_11_11; 1215 1216 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1217 return V_028C70_COLOR_INVALID; 1218 1219 /* hw cannot support mixed formats (except depth/stencil, since 1220 * stencil is not written to). */ 1221 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1222 return V_028C70_COLOR_INVALID; 1223 1224 switch (desc->nr_channels) { 1225 case 1: 1226 switch (desc->channel[0].size) { 1227 case 8: 1228 return V_028C70_COLOR_8; 1229 case 16: 1230 return V_028C70_COLOR_16; 1231 case 32: 1232 return V_028C70_COLOR_32; 1233 } 1234 break; 1235 case 2: 1236 if (desc->channel[0].size == desc->channel[1].size) { 1237 switch (desc->channel[0].size) { 1238 case 8: 1239 return V_028C70_COLOR_8_8; 1240 case 16: 1241 return V_028C70_COLOR_16_16; 1242 case 32: 1243 return V_028C70_COLOR_32_32; 1244 } 1245 } else if (HAS_SIZE(8,24,0,0)) { 1246 return V_028C70_COLOR_24_8; 1247 } else if (HAS_SIZE(24,8,0,0)) { 1248 return V_028C70_COLOR_8_24; 1249 } 1250 break; 1251 case 3: 1252 if (HAS_SIZE(5,6,5,0)) { 1253 return V_028C70_COLOR_5_6_5; 1254 } else if (HAS_SIZE(32,8,24,0)) { 1255 return V_028C70_COLOR_X24_8_32_FLOAT; 1256 } 1257 break; 1258 case 4: 1259 if (desc->channel[0].size == desc->channel[1].size && 1260 desc->channel[0].size == desc->channel[2].size && 1261 desc->channel[0].size == desc->channel[3].size) { 1262 switch (desc->channel[0].size) { 1263 case 4: 1264 return V_028C70_COLOR_4_4_4_4; 1265 case 8: 1266 return V_028C70_COLOR_8_8_8_8; 1267 case 16: 1268 return V_028C70_COLOR_16_16_16_16; 1269 case 32: 1270 return V_028C70_COLOR_32_32_32_32; 1271 } 1272 } else if (HAS_SIZE(5,5,5,1)) { 1273 return V_028C70_COLOR_1_5_5_5; 1274 } else if (HAS_SIZE(10,10,10,2)) { 1275 return V_028C70_COLOR_2_10_10_10; 1276 } 1277 break; 1278 } 1279 return V_028C70_COLOR_INVALID; 1280} 1281 1282static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1283{ 1284 if (SI_BIG_ENDIAN) { 1285 switch(colorformat) { 1286 /* 8-bit buffers. */ 1287 case V_028C70_COLOR_8: 1288 return V_028C70_ENDIAN_NONE; 1289 1290 /* 16-bit buffers. */ 1291 case V_028C70_COLOR_5_6_5: 1292 case V_028C70_COLOR_1_5_5_5: 1293 case V_028C70_COLOR_4_4_4_4: 1294 case V_028C70_COLOR_16: 1295 case V_028C70_COLOR_8_8: 1296 return V_028C70_ENDIAN_8IN16; 1297 1298 /* 32-bit buffers. */ 1299 case V_028C70_COLOR_8_8_8_8: 1300 case V_028C70_COLOR_2_10_10_10: 1301 case V_028C70_COLOR_8_24: 1302 case V_028C70_COLOR_24_8: 1303 case V_028C70_COLOR_16_16: 1304 return V_028C70_ENDIAN_8IN32; 1305 1306 /* 64-bit buffers. */ 1307 case V_028C70_COLOR_16_16_16_16: 1308 return V_028C70_ENDIAN_8IN16; 1309 1310 case V_028C70_COLOR_32_32: 1311 return V_028C70_ENDIAN_8IN32; 1312 1313 /* 128-bit buffers. */ 1314 case V_028C70_COLOR_32_32_32_32: 1315 return V_028C70_ENDIAN_8IN32; 1316 default: 1317 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1318 } 1319 } else { 1320 return V_028C70_ENDIAN_NONE; 1321 } 1322} 1323 1324static uint32_t si_translate_dbformat(enum pipe_format format) 1325{ 1326 switch (format) { 1327 case PIPE_FORMAT_Z16_UNORM: 1328 return V_028040_Z_16; 1329 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1330 case PIPE_FORMAT_X8Z24_UNORM: 1331 case PIPE_FORMAT_Z24X8_UNORM: 1332 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1333 return V_028040_Z_24; /* deprecated on SI */ 1334 case PIPE_FORMAT_Z32_FLOAT: 1335 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1336 return V_028040_Z_32_FLOAT; 1337 default: 1338 return V_028040_Z_INVALID; 1339 } 1340} 1341 1342/* 1343 * Texture translation 1344 */ 1345 1346static uint32_t si_translate_texformat(struct pipe_screen *screen, 1347 enum pipe_format format, 1348 const struct util_format_description *desc, 1349 int first_non_void) 1350{ 1351 struct si_screen *sscreen = (struct si_screen*)screen; 1352 bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 && 1353 sscreen->b.info.drm_minor >= 31) || 1354 sscreen->b.info.drm_major == 3; 1355 bool uniform = true; 1356 int i; 1357 1358 /* Colorspace (return non-RGB formats directly). */ 1359 switch (desc->colorspace) { 1360 /* Depth stencil formats */ 1361 case UTIL_FORMAT_COLORSPACE_ZS: 1362 switch (format) { 1363 case PIPE_FORMAT_Z16_UNORM: 1364 return V_008F14_IMG_DATA_FORMAT_16; 1365 case PIPE_FORMAT_X24S8_UINT: 1366 case PIPE_FORMAT_Z24X8_UNORM: 1367 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1368 return V_008F14_IMG_DATA_FORMAT_8_24; 1369 case PIPE_FORMAT_X8Z24_UNORM: 1370 case PIPE_FORMAT_S8X24_UINT: 1371 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1372 return V_008F14_IMG_DATA_FORMAT_24_8; 1373 case PIPE_FORMAT_S8_UINT: 1374 return V_008F14_IMG_DATA_FORMAT_8; 1375 case PIPE_FORMAT_Z32_FLOAT: 1376 return V_008F14_IMG_DATA_FORMAT_32; 1377 case PIPE_FORMAT_X32_S8X24_UINT: 1378 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1379 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1380 default: 1381 goto out_unknown; 1382 } 1383 1384 case UTIL_FORMAT_COLORSPACE_YUV: 1385 goto out_unknown; /* TODO */ 1386 1387 case UTIL_FORMAT_COLORSPACE_SRGB: 1388 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1389 goto out_unknown; 1390 break; 1391 1392 default: 1393 break; 1394 } 1395 1396 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1397 if (!enable_compressed_formats) 1398 goto out_unknown; 1399 1400 switch (format) { 1401 case PIPE_FORMAT_RGTC1_SNORM: 1402 case PIPE_FORMAT_LATC1_SNORM: 1403 case PIPE_FORMAT_RGTC1_UNORM: 1404 case PIPE_FORMAT_LATC1_UNORM: 1405 return V_008F14_IMG_DATA_FORMAT_BC4; 1406 case PIPE_FORMAT_RGTC2_SNORM: 1407 case PIPE_FORMAT_LATC2_SNORM: 1408 case PIPE_FORMAT_RGTC2_UNORM: 1409 case PIPE_FORMAT_LATC2_UNORM: 1410 return V_008F14_IMG_DATA_FORMAT_BC5; 1411 default: 1412 goto out_unknown; 1413 } 1414 } 1415 1416 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1417 sscreen->b.family == CHIP_STONEY) { 1418 switch (format) { 1419 case PIPE_FORMAT_ETC1_RGB8: 1420 case PIPE_FORMAT_ETC2_RGB8: 1421 case PIPE_FORMAT_ETC2_SRGB8: 1422 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1423 case PIPE_FORMAT_ETC2_RGB8A1: 1424 case PIPE_FORMAT_ETC2_SRGB8A1: 1425 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1426 case PIPE_FORMAT_ETC2_RGBA8: 1427 case PIPE_FORMAT_ETC2_SRGBA8: 1428 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1429 case PIPE_FORMAT_ETC2_R11_UNORM: 1430 case PIPE_FORMAT_ETC2_R11_SNORM: 1431 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1432 case PIPE_FORMAT_ETC2_RG11_UNORM: 1433 case PIPE_FORMAT_ETC2_RG11_SNORM: 1434 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1435 default: 1436 goto out_unknown; 1437 } 1438 } 1439 1440 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1441 if (!enable_compressed_formats) 1442 goto out_unknown; 1443 1444 switch (format) { 1445 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1446 case PIPE_FORMAT_BPTC_SRGBA: 1447 return V_008F14_IMG_DATA_FORMAT_BC7; 1448 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1449 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1450 return V_008F14_IMG_DATA_FORMAT_BC6; 1451 default: 1452 goto out_unknown; 1453 } 1454 } 1455 1456 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1457 switch (format) { 1458 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1459 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1460 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1461 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1462 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1463 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1464 default: 1465 goto out_unknown; 1466 } 1467 } 1468 1469 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1470 if (!enable_compressed_formats) 1471 goto out_unknown; 1472 1473 if (!util_format_s3tc_enabled) { 1474 goto out_unknown; 1475 } 1476 1477 switch (format) { 1478 case PIPE_FORMAT_DXT1_RGB: 1479 case PIPE_FORMAT_DXT1_RGBA: 1480 case PIPE_FORMAT_DXT1_SRGB: 1481 case PIPE_FORMAT_DXT1_SRGBA: 1482 return V_008F14_IMG_DATA_FORMAT_BC1; 1483 case PIPE_FORMAT_DXT3_RGBA: 1484 case PIPE_FORMAT_DXT3_SRGBA: 1485 return V_008F14_IMG_DATA_FORMAT_BC2; 1486 case PIPE_FORMAT_DXT5_RGBA: 1487 case PIPE_FORMAT_DXT5_SRGBA: 1488 return V_008F14_IMG_DATA_FORMAT_BC3; 1489 default: 1490 goto out_unknown; 1491 } 1492 } 1493 1494 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1495 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1496 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1497 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1498 } 1499 1500 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1501 1502 /* hw cannot support mixed formats (except depth/stencil, since only 1503 * depth is read).*/ 1504 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1505 goto out_unknown; 1506 1507 /* See whether the components are of the same size. */ 1508 for (i = 1; i < desc->nr_channels; i++) { 1509 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1510 } 1511 1512 /* Non-uniform formats. */ 1513 if (!uniform) { 1514 switch(desc->nr_channels) { 1515 case 3: 1516 if (desc->channel[0].size == 5 && 1517 desc->channel[1].size == 6 && 1518 desc->channel[2].size == 5) { 1519 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1520 } 1521 goto out_unknown; 1522 case 4: 1523 if (desc->channel[0].size == 5 && 1524 desc->channel[1].size == 5 && 1525 desc->channel[2].size == 5 && 1526 desc->channel[3].size == 1) { 1527 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1528 } 1529 if (desc->channel[0].size == 10 && 1530 desc->channel[1].size == 10 && 1531 desc->channel[2].size == 10 && 1532 desc->channel[3].size == 2) { 1533 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1534 } 1535 goto out_unknown; 1536 } 1537 goto out_unknown; 1538 } 1539 1540 if (first_non_void < 0 || first_non_void > 3) 1541 goto out_unknown; 1542 1543 /* uniform formats */ 1544 switch (desc->channel[first_non_void].size) { 1545 case 4: 1546 switch (desc->nr_channels) { 1547#if 0 /* Not supported for render targets */ 1548 case 2: 1549 return V_008F14_IMG_DATA_FORMAT_4_4; 1550#endif 1551 case 4: 1552 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1553 } 1554 break; 1555 case 8: 1556 switch (desc->nr_channels) { 1557 case 1: 1558 return V_008F14_IMG_DATA_FORMAT_8; 1559 case 2: 1560 return V_008F14_IMG_DATA_FORMAT_8_8; 1561 case 4: 1562 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1563 } 1564 break; 1565 case 16: 1566 switch (desc->nr_channels) { 1567 case 1: 1568 return V_008F14_IMG_DATA_FORMAT_16; 1569 case 2: 1570 return V_008F14_IMG_DATA_FORMAT_16_16; 1571 case 4: 1572 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1573 } 1574 break; 1575 case 32: 1576 switch (desc->nr_channels) { 1577 case 1: 1578 return V_008F14_IMG_DATA_FORMAT_32; 1579 case 2: 1580 return V_008F14_IMG_DATA_FORMAT_32_32; 1581#if 0 /* Not supported for render targets */ 1582 case 3: 1583 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1584#endif 1585 case 4: 1586 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1587 } 1588 } 1589 1590out_unknown: 1591 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1592 return ~0; 1593} 1594 1595static unsigned si_tex_wrap(unsigned wrap) 1596{ 1597 switch (wrap) { 1598 default: 1599 case PIPE_TEX_WRAP_REPEAT: 1600 return V_008F30_SQ_TEX_WRAP; 1601 case PIPE_TEX_WRAP_CLAMP: 1602 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1603 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1604 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1605 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1606 return V_008F30_SQ_TEX_CLAMP_BORDER; 1607 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1608 return V_008F30_SQ_TEX_MIRROR; 1609 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1610 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1611 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1612 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1613 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1614 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1615 } 1616} 1617 1618static unsigned si_tex_mipfilter(unsigned filter) 1619{ 1620 switch (filter) { 1621 case PIPE_TEX_MIPFILTER_NEAREST: 1622 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1623 case PIPE_TEX_MIPFILTER_LINEAR: 1624 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1625 default: 1626 case PIPE_TEX_MIPFILTER_NONE: 1627 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1628 } 1629} 1630 1631static unsigned si_tex_compare(unsigned compare) 1632{ 1633 switch (compare) { 1634 default: 1635 case PIPE_FUNC_NEVER: 1636 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1637 case PIPE_FUNC_LESS: 1638 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1639 case PIPE_FUNC_EQUAL: 1640 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1641 case PIPE_FUNC_LEQUAL: 1642 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1643 case PIPE_FUNC_GREATER: 1644 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1645 case PIPE_FUNC_NOTEQUAL: 1646 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1647 case PIPE_FUNC_GEQUAL: 1648 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1649 case PIPE_FUNC_ALWAYS: 1650 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1651 } 1652} 1653 1654static unsigned si_tex_dim(unsigned res_target, unsigned view_target, 1655 unsigned nr_samples) 1656{ 1657 if (view_target == PIPE_TEXTURE_CUBE || 1658 view_target == PIPE_TEXTURE_CUBE_ARRAY) 1659 res_target = view_target; 1660 /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 1661 else if (res_target == PIPE_TEXTURE_CUBE || 1662 res_target == PIPE_TEXTURE_CUBE_ARRAY) 1663 res_target = PIPE_TEXTURE_2D_ARRAY; 1664 1665 switch (res_target) { 1666 default: 1667 case PIPE_TEXTURE_1D: 1668 return V_008F1C_SQ_RSRC_IMG_1D; 1669 case PIPE_TEXTURE_1D_ARRAY: 1670 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1671 case PIPE_TEXTURE_2D: 1672 case PIPE_TEXTURE_RECT: 1673 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1674 V_008F1C_SQ_RSRC_IMG_2D; 1675 case PIPE_TEXTURE_2D_ARRAY: 1676 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1677 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1678 case PIPE_TEXTURE_3D: 1679 return V_008F1C_SQ_RSRC_IMG_3D; 1680 case PIPE_TEXTURE_CUBE: 1681 case PIPE_TEXTURE_CUBE_ARRAY: 1682 return V_008F1C_SQ_RSRC_IMG_CUBE; 1683 } 1684} 1685 1686/* 1687 * Format support testing 1688 */ 1689 1690static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1691{ 1692 return si_translate_texformat(screen, format, util_format_description(format), 1693 util_format_get_first_non_void_channel(format)) != ~0U; 1694} 1695 1696static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1697 const struct util_format_description *desc, 1698 int first_non_void) 1699{ 1700 int i; 1701 1702 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1703 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1704 1705 assert(first_non_void >= 0); 1706 1707 if (desc->nr_channels == 4 && 1708 desc->channel[0].size == 10 && 1709 desc->channel[1].size == 10 && 1710 desc->channel[2].size == 10 && 1711 desc->channel[3].size == 2) 1712 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1713 1714 /* See whether the components are of the same size. */ 1715 for (i = 0; i < desc->nr_channels; i++) { 1716 if (desc->channel[first_non_void].size != desc->channel[i].size) 1717 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1718 } 1719 1720 switch (desc->channel[first_non_void].size) { 1721 case 8: 1722 switch (desc->nr_channels) { 1723 case 1: 1724 return V_008F0C_BUF_DATA_FORMAT_8; 1725 case 2: 1726 return V_008F0C_BUF_DATA_FORMAT_8_8; 1727 case 3: 1728 case 4: 1729 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1730 } 1731 break; 1732 case 16: 1733 switch (desc->nr_channels) { 1734 case 1: 1735 return V_008F0C_BUF_DATA_FORMAT_16; 1736 case 2: 1737 return V_008F0C_BUF_DATA_FORMAT_16_16; 1738 case 3: 1739 case 4: 1740 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1741 } 1742 break; 1743 case 32: 1744 switch (desc->nr_channels) { 1745 case 1: 1746 return V_008F0C_BUF_DATA_FORMAT_32; 1747 case 2: 1748 return V_008F0C_BUF_DATA_FORMAT_32_32; 1749 case 3: 1750 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1751 case 4: 1752 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1753 } 1754 break; 1755 } 1756 1757 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1758} 1759 1760static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1761 const struct util_format_description *desc, 1762 int first_non_void) 1763{ 1764 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1765 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1766 1767 assert(first_non_void >= 0); 1768 1769 switch (desc->channel[first_non_void].type) { 1770 case UTIL_FORMAT_TYPE_SIGNED: 1771 case UTIL_FORMAT_TYPE_FIXED: 1772 if (desc->channel[first_non_void].size >= 32 || 1773 desc->channel[first_non_void].pure_integer) 1774 return V_008F0C_BUF_NUM_FORMAT_SINT; 1775 else if (desc->channel[first_non_void].normalized) 1776 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1777 else 1778 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1779 break; 1780 case UTIL_FORMAT_TYPE_UNSIGNED: 1781 if (desc->channel[first_non_void].size >= 32 || 1782 desc->channel[first_non_void].pure_integer) 1783 return V_008F0C_BUF_NUM_FORMAT_UINT; 1784 else if (desc->channel[first_non_void].normalized) 1785 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1786 else 1787 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1788 break; 1789 case UTIL_FORMAT_TYPE_FLOAT: 1790 default: 1791 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1792 } 1793} 1794 1795static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, 1796 enum pipe_format format, 1797 unsigned usage) 1798{ 1799 const struct util_format_description *desc; 1800 int first_non_void; 1801 unsigned data_format; 1802 1803 assert((usage & ~(PIPE_BIND_SHADER_IMAGE | 1804 PIPE_BIND_SAMPLER_VIEW | 1805 PIPE_BIND_VERTEX_BUFFER)) == 0); 1806 1807 desc = util_format_description(format); 1808 1809 /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 1810 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 1811 * for read-only access (with caveats surrounding bounds checks), but 1812 * obviously fails for write access which we have to implement for 1813 * shader images. Luckily, OpenGL doesn't expect this to be supported 1814 * anyway, and so the only impact is on PBO uploads / downloads, which 1815 * shouldn't be expected to be fast for GL_RGB anyway. 1816 */ 1817 if (desc->block.bits == 3 * 8 || 1818 desc->block.bits == 3 * 16) { 1819 if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 1820 usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 1821 if (!usage) 1822 return 0; 1823 } 1824 } 1825 1826 first_non_void = util_format_get_first_non_void_channel(format); 1827 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1828 if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 1829 return 0; 1830 1831 return usage; 1832} 1833 1834static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1835{ 1836 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1837 r600_translate_colorswap(format, false) != ~0U; 1838} 1839 1840static bool si_is_zs_format_supported(enum pipe_format format) 1841{ 1842 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1843} 1844 1845static boolean si_is_format_supported(struct pipe_screen *screen, 1846 enum pipe_format format, 1847 enum pipe_texture_target target, 1848 unsigned sample_count, 1849 unsigned usage) 1850{ 1851 unsigned retval = 0; 1852 1853 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1854 R600_ERR("r600: unsupported texture type %d\n", target); 1855 return false; 1856 } 1857 1858 if (!util_format_is_supported(format, usage)) 1859 return false; 1860 1861 if (sample_count > 1) { 1862 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 1863 return false; 1864 1865 if (usage & PIPE_BIND_SHADER_IMAGE) 1866 return false; 1867 1868 switch (sample_count) { 1869 case 2: 1870 case 4: 1871 case 8: 1872 break; 1873 case 16: 1874 if (format == PIPE_FORMAT_NONE) 1875 return true; 1876 else 1877 return false; 1878 default: 1879 return false; 1880 } 1881 } 1882 1883 if (usage & (PIPE_BIND_SAMPLER_VIEW | 1884 PIPE_BIND_SHADER_IMAGE)) { 1885 if (target == PIPE_BUFFER) { 1886 retval |= si_is_vertex_format_supported( 1887 screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | 1888 PIPE_BIND_SHADER_IMAGE)); 1889 } else { 1890 if (si_is_sampler_format_supported(screen, format)) 1891 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 1892 PIPE_BIND_SHADER_IMAGE); 1893 } 1894 } 1895 1896 if ((usage & (PIPE_BIND_RENDER_TARGET | 1897 PIPE_BIND_DISPLAY_TARGET | 1898 PIPE_BIND_SCANOUT | 1899 PIPE_BIND_SHARED | 1900 PIPE_BIND_BLENDABLE)) && 1901 si_is_colorbuffer_format_supported(format)) { 1902 retval |= usage & 1903 (PIPE_BIND_RENDER_TARGET | 1904 PIPE_BIND_DISPLAY_TARGET | 1905 PIPE_BIND_SCANOUT | 1906 PIPE_BIND_SHARED); 1907 if (!util_format_is_pure_integer(format) && 1908 !util_format_is_depth_or_stencil(format)) 1909 retval |= usage & PIPE_BIND_BLENDABLE; 1910 } 1911 1912 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1913 si_is_zs_format_supported(format)) { 1914 retval |= PIPE_BIND_DEPTH_STENCIL; 1915 } 1916 1917 if (usage & PIPE_BIND_VERTEX_BUFFER) { 1918 retval |= si_is_vertex_format_supported(screen, format, 1919 PIPE_BIND_VERTEX_BUFFER); 1920 } 1921 1922 if ((usage & PIPE_BIND_LINEAR) && 1923 !util_format_is_compressed(format) && 1924 !(usage & PIPE_BIND_DEPTH_STENCIL)) 1925 retval |= PIPE_BIND_LINEAR; 1926 1927 return retval == usage; 1928} 1929 1930/* 1931 * framebuffer handling 1932 */ 1933 1934static void si_choose_spi_color_formats(struct r600_surface *surf, 1935 unsigned format, unsigned swap, 1936 unsigned ntype, bool is_depth) 1937{ 1938 /* Alpha is needed for alpha-to-coverage. 1939 * Blending may be with or without alpha. 1940 */ 1941 unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 1942 unsigned alpha = 0; /* exports alpha, but may not support blending */ 1943 unsigned blend = 0; /* supports blending, but may not export alpha */ 1944 unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 1945 1946 /* Choose the SPI color formats. These are required values for Stoney/RB+. 1947 * Other chips have multiple choices, though they are not necessarily better. 1948 */ 1949 switch (format) { 1950 case V_028C70_COLOR_5_6_5: 1951 case V_028C70_COLOR_1_5_5_5: 1952 case V_028C70_COLOR_5_5_5_1: 1953 case V_028C70_COLOR_4_4_4_4: 1954 case V_028C70_COLOR_10_11_11: 1955 case V_028C70_COLOR_11_11_10: 1956 case V_028C70_COLOR_8: 1957 case V_028C70_COLOR_8_8: 1958 case V_028C70_COLOR_8_8_8_8: 1959 case V_028C70_COLOR_10_10_10_2: 1960 case V_028C70_COLOR_2_10_10_10: 1961 if (ntype == V_028C70_NUMBER_UINT) 1962 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 1963 else if (ntype == V_028C70_NUMBER_SINT) 1964 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 1965 else 1966 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 1967 break; 1968 1969 case V_028C70_COLOR_16: 1970 case V_028C70_COLOR_16_16: 1971 case V_028C70_COLOR_16_16_16_16: 1972 if (ntype == V_028C70_NUMBER_UNORM || 1973 ntype == V_028C70_NUMBER_SNORM) { 1974 /* UNORM16 and SNORM16 don't support blending */ 1975 if (ntype == V_028C70_NUMBER_UNORM) 1976 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 1977 else 1978 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 1979 1980 /* Use 32 bits per channel for blending. */ 1981 if (format == V_028C70_COLOR_16) { 1982 if (swap == V_028C70_SWAP_STD) { /* R */ 1983 blend = V_028714_SPI_SHADER_32_R; 1984 blend_alpha = V_028714_SPI_SHADER_32_AR; 1985 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 1986 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1987 else 1988 assert(0); 1989 } else if (format == V_028C70_COLOR_16_16) { 1990 if (swap == V_028C70_SWAP_STD) { /* RG */ 1991 blend = V_028714_SPI_SHADER_32_GR; 1992 blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1993 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 1994 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1995 else 1996 assert(0); 1997 } else /* 16_16_16_16 */ 1998 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1999 } else if (ntype == V_028C70_NUMBER_UINT) 2000 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2001 else if (ntype == V_028C70_NUMBER_SINT) 2002 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2003 else if (ntype == V_028C70_NUMBER_FLOAT) 2004 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2005 else 2006 assert(0); 2007 break; 2008 2009 case V_028C70_COLOR_32: 2010 if (swap == V_028C70_SWAP_STD) { /* R */ 2011 blend = normal = V_028714_SPI_SHADER_32_R; 2012 alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 2013 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2014 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2015 else 2016 assert(0); 2017 break; 2018 2019 case V_028C70_COLOR_32_32: 2020 if (swap == V_028C70_SWAP_STD) { /* RG */ 2021 blend = normal = V_028714_SPI_SHADER_32_GR; 2022 alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2023 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2024 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2025 else 2026 assert(0); 2027 break; 2028 2029 case V_028C70_COLOR_32_32_32_32: 2030 case V_028C70_COLOR_8_24: 2031 case V_028C70_COLOR_24_8: 2032 case V_028C70_COLOR_X24_8_32_FLOAT: 2033 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2034 break; 2035 2036 default: 2037 assert(0); 2038 return; 2039 } 2040 2041 /* The DB->CB copy needs 32_ABGR. */ 2042 if (is_depth) 2043 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2044 2045 surf->spi_shader_col_format = normal; 2046 surf->spi_shader_col_format_alpha = alpha; 2047 surf->spi_shader_col_format_blend = blend; 2048 surf->spi_shader_col_format_blend_alpha = blend_alpha; 2049} 2050 2051static void si_initialize_color_surface(struct si_context *sctx, 2052 struct r600_surface *surf) 2053{ 2054 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2055 unsigned color_info, color_attrib, color_view; 2056 unsigned format, swap, ntype, endian; 2057 const struct util_format_description *desc; 2058 int i; 2059 unsigned blend_clamp = 0, blend_bypass = 0; 2060 2061 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2062 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 2063 2064 desc = util_format_description(surf->base.format); 2065 for (i = 0; i < 4; i++) { 2066 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2067 break; 2068 } 2069 } 2070 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 2071 ntype = V_028C70_NUMBER_FLOAT; 2072 } else { 2073 ntype = V_028C70_NUMBER_UNORM; 2074 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2075 ntype = V_028C70_NUMBER_SRGB; 2076 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2077 if (desc->channel[i].pure_integer) { 2078 ntype = V_028C70_NUMBER_SINT; 2079 } else { 2080 assert(desc->channel[i].normalized); 2081 ntype = V_028C70_NUMBER_SNORM; 2082 } 2083 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2084 if (desc->channel[i].pure_integer) { 2085 ntype = V_028C70_NUMBER_UINT; 2086 } else { 2087 assert(desc->channel[i].normalized); 2088 ntype = V_028C70_NUMBER_UNORM; 2089 } 2090 } 2091 } 2092 2093 format = si_translate_colorformat(surf->base.format); 2094 if (format == V_028C70_COLOR_INVALID) { 2095 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2096 } 2097 assert(format != V_028C70_COLOR_INVALID); 2098 swap = r600_translate_colorswap(surf->base.format, false); 2099 endian = si_colorformat_endian_swap(format); 2100 2101 /* blend clamp should be set for all NORM/SRGB types */ 2102 if (ntype == V_028C70_NUMBER_UNORM || 2103 ntype == V_028C70_NUMBER_SNORM || 2104 ntype == V_028C70_NUMBER_SRGB) 2105 blend_clamp = 1; 2106 2107 /* set blend bypass according to docs if SINT/UINT or 2108 8/24 COLOR variants */ 2109 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2110 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2111 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2112 blend_clamp = 0; 2113 blend_bypass = 1; 2114 } 2115 2116 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 2117 (format == V_028C70_COLOR_8 || 2118 format == V_028C70_COLOR_8_8 || 2119 format == V_028C70_COLOR_8_8_8_8)) 2120 surf->color_is_int8 = true; 2121 2122 color_info = S_028C70_FORMAT(format) | 2123 S_028C70_COMP_SWAP(swap) | 2124 S_028C70_BLEND_CLAMP(blend_clamp) | 2125 S_028C70_BLEND_BYPASS(blend_bypass) | 2126 S_028C70_SIMPLE_FLOAT(1) | 2127 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 2128 ntype != V_028C70_NUMBER_SNORM && 2129 ntype != V_028C70_NUMBER_SRGB && 2130 format != V_028C70_COLOR_8_24 && 2131 format != V_028C70_COLOR_24_8) | 2132 S_028C70_NUMBER_TYPE(ntype) | 2133 S_028C70_ENDIAN(endian); 2134 2135 /* Intensity is implemented as Red, so treat it that way. */ 2136 color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2137 util_format_is_intensity(surf->base.format)); 2138 2139 if (rtex->resource.b.b.nr_samples > 1) { 2140 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 2141 2142 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2143 S_028C74_NUM_FRAGMENTS(log_samples); 2144 2145 if (rtex->fmask.size) { 2146 color_info |= S_028C70_COMPRESSION(1); 2147 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 2148 2149 if (sctx->b.chip_class == SI) { 2150 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2151 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2152 } 2153 } 2154 } 2155 2156 surf->cb_color_view = color_view; 2157 surf->cb_color_info = color_info; 2158 surf->cb_color_attrib = color_attrib; 2159 2160 if (sctx->b.chip_class >= VI) { 2161 unsigned max_uncompressed_block_size = 2; 2162 2163 if (rtex->resource.b.b.nr_samples > 1) { 2164 if (rtex->surface.bpe == 1) 2165 max_uncompressed_block_size = 0; 2166 else if (rtex->surface.bpe == 2) 2167 max_uncompressed_block_size = 1; 2168 } 2169 2170 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2171 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2172 } 2173 2174 /* This must be set for fast clear to work without FMASK. */ 2175 if (!rtex->fmask.size && sctx->b.chip_class == SI) { 2176 unsigned bankh = util_logbase2(rtex->surface.bankh); 2177 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2178 } 2179 2180 /* Determine pixel shader export format */ 2181 si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); 2182 2183 surf->color_initialized = true; 2184} 2185 2186static void si_init_depth_surface(struct si_context *sctx, 2187 struct r600_surface *surf) 2188{ 2189 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2190 unsigned level = surf->base.u.tex.level; 2191 struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; 2192 unsigned format; 2193 uint32_t z_info, s_info, db_depth_info; 2194 uint64_t z_offs, s_offs; 2195 uint32_t db_htile_data_base, db_htile_surface; 2196 2197 format = si_translate_dbformat(rtex->db_render_format); 2198 2199 if (format == V_028040_Z_INVALID) { 2200 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 2201 } 2202 assert(format != V_028040_Z_INVALID); 2203 2204 s_offs = z_offs = rtex->resource.gpu_address; 2205 z_offs += rtex->surface.level[level].offset; 2206 s_offs += rtex->surface.stencil_level[level].offset; 2207 2208 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile); 2209 2210 z_info = S_028040_FORMAT(format); 2211 if (rtex->resource.b.b.nr_samples > 1) { 2212 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 2213 } 2214 2215 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 2216 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 2217 else 2218 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 2219 2220 if (sctx->b.chip_class >= CIK) { 2221 struct radeon_info *info = &sctx->screen->b.info; 2222 unsigned index = rtex->surface.tiling_index[level]; 2223 unsigned stencil_index = rtex->surface.stencil_tiling_index[level]; 2224 unsigned macro_index = rtex->surface.macro_tile_index; 2225 unsigned tile_mode = info->si_tile_mode_array[index]; 2226 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2227 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2228 2229 db_depth_info |= 2230 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2231 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2232 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2233 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2234 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2235 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2236 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2237 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2238 } else { 2239 unsigned tile_mode_index = si_tile_mode_index(rtex, level, false); 2240 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2241 tile_mode_index = si_tile_mode_index(rtex, level, true); 2242 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2243 } 2244 2245 /* HiZ aka depth buffer htile */ 2246 /* use htile only for first level */ 2247 if (rtex->htile_buffer && !level) { 2248 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2249 S_028040_ALLOW_EXPCLEAR(1); 2250 2251 if (rtex->surface.flags & RADEON_SURF_SBUFFER) { 2252 /* Workaround: For a not yet understood reason, the 2253 * combination of MSAA, fast stencil clear and stencil 2254 * decompress messes with subsequent stencil buffer 2255 * uses. Problem was reproduced on Verde, Bonaire, 2256 * Tonga, and Carrizo. 2257 * 2258 * Disabling EXPCLEAR works around the problem. 2259 * 2260 * Check piglit's arb_texture_multisample-stencil-clear 2261 * test if you want to try changing this. 2262 */ 2263 if (rtex->resource.b.b.nr_samples <= 1) 2264 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2265 } else if (!rtex->tc_compatible_htile) { 2266 /* Use all of the htile_buffer for depth if there's no stencil. 2267 * This must not be set when TC-compatible HTILE is enabled 2268 * due to a hw bug. 2269 */ 2270 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2271 } 2272 2273 uint64_t va = rtex->htile_buffer->gpu_address; 2274 db_htile_data_base = va >> 8; 2275 db_htile_surface = S_028ABC_FULL_CACHE(1); 2276 2277 if (rtex->tc_compatible_htile) { 2278 db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 2279 2280 switch (rtex->resource.b.b.nr_samples) { 2281 case 0: 2282 case 1: 2283 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 2284 break; 2285 case 2: 2286 case 4: 2287 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 2288 break; 2289 case 8: 2290 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 2291 break; 2292 default: 2293 assert(0); 2294 } 2295 } 2296 } else { 2297 db_htile_data_base = 0; 2298 db_htile_surface = 0; 2299 } 2300 2301 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2302 2303 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2304 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2305 surf->db_htile_data_base = db_htile_data_base; 2306 surf->db_depth_info = db_depth_info; 2307 surf->db_z_info = z_info; 2308 surf->db_stencil_info = s_info; 2309 surf->db_depth_base = z_offs >> 8; 2310 surf->db_stencil_base = s_offs >> 8; 2311 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2312 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2313 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2314 levelinfo->nblk_y) / 64 - 1); 2315 surf->db_htile_surface = db_htile_surface; 2316 2317 surf->depth_initialized = true; 2318} 2319 2320static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2321{ 2322 for (int i = 0; i < state->nr_cbufs; ++i) { 2323 struct r600_surface *surf = NULL; 2324 struct r600_texture *rtex; 2325 2326 if (!state->cbufs[i]) 2327 continue; 2328 surf = (struct r600_surface*)state->cbufs[i]; 2329 rtex = (struct r600_texture*)surf->base.texture; 2330 2331 p_atomic_dec(&rtex->framebuffers_bound); 2332 } 2333} 2334 2335static void si_set_framebuffer_state(struct pipe_context *ctx, 2336 const struct pipe_framebuffer_state *state) 2337{ 2338 struct si_context *sctx = (struct si_context *)ctx; 2339 struct pipe_constant_buffer constbuf = {0}; 2340 struct r600_surface *surf = NULL; 2341 struct r600_texture *rtex; 2342 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2343 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2344 int i; 2345 2346 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2347 if (!sctx->framebuffer.state.cbufs[i]) 2348 continue; 2349 2350 rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture; 2351 if (rtex->dcc_gather_statistics) 2352 vi_separate_dcc_stop_query(ctx, rtex); 2353 } 2354 2355 /* Only flush TC when changing the framebuffer state, because 2356 * the only client not using TC that can change textures is 2357 * the framebuffer. 2358 * 2359 * Flush all CB and DB caches here because all buffers can be used 2360 * for write by both TC (with shader image stores) and CB/DB. 2361 */ 2362 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 2363 SI_CONTEXT_INV_GLOBAL_L2 | 2364 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | 2365 SI_CONTEXT_CS_PARTIAL_FLUSH; 2366 2367 /* Take the maximum of the old and new count. If the new count is lower, 2368 * dirtying is needed to disable the unbound colorbuffers. 2369 */ 2370 sctx->framebuffer.dirty_cbufs |= 2371 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2372 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2373 2374 si_dec_framebuffer_counters(&sctx->framebuffer.state); 2375 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2376 2377 sctx->framebuffer.colorbuf_enabled_4bit = 0; 2378 sctx->framebuffer.spi_shader_col_format = 0; 2379 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2380 sctx->framebuffer.spi_shader_col_format_blend = 0; 2381 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2382 sctx->framebuffer.color_is_int8 = 0; 2383 2384 sctx->framebuffer.compressed_cb_mask = 0; 2385 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2386 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2387 sctx->framebuffer.any_dst_linear = false; 2388 2389 for (i = 0; i < state->nr_cbufs; i++) { 2390 if (!state->cbufs[i]) 2391 continue; 2392 2393 surf = (struct r600_surface*)state->cbufs[i]; 2394 rtex = (struct r600_texture*)surf->base.texture; 2395 2396 if (!surf->color_initialized) { 2397 si_initialize_color_surface(sctx, surf); 2398 } 2399 2400 sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 2401 sctx->framebuffer.spi_shader_col_format |= 2402 surf->spi_shader_col_format << (i * 4); 2403 sctx->framebuffer.spi_shader_col_format_alpha |= 2404 surf->spi_shader_col_format_alpha << (i * 4); 2405 sctx->framebuffer.spi_shader_col_format_blend |= 2406 surf->spi_shader_col_format_blend << (i * 4); 2407 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2408 surf->spi_shader_col_format_blend_alpha << (i * 4); 2409 2410 if (surf->color_is_int8) 2411 sctx->framebuffer.color_is_int8 |= 1 << i; 2412 2413 if (rtex->fmask.size) { 2414 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2415 } 2416 2417 if (rtex->surface.is_linear) 2418 sctx->framebuffer.any_dst_linear = true; 2419 2420 r600_context_add_resource_size(ctx, surf->base.texture); 2421 2422 p_atomic_inc(&rtex->framebuffers_bound); 2423 2424 if (rtex->dcc_gather_statistics) { 2425 /* Dirty tracking must be enabled for DCC usage analysis. */ 2426 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2427 vi_separate_dcc_start_query(ctx, rtex); 2428 } 2429 } 2430 2431 if (state->zsbuf) { 2432 surf = (struct r600_surface*)state->zsbuf; 2433 rtex = (struct r600_texture*)surf->base.texture; 2434 2435 if (!surf->depth_initialized) { 2436 si_init_depth_surface(sctx, surf); 2437 } 2438 r600_context_add_resource_size(ctx, surf->base.texture); 2439 } 2440 2441 si_update_poly_offset_state(sctx); 2442 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 2443 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); 2444 2445 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2446 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2447 2448 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2449 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2450 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2451 2452 /* Set sample locations as fragment shader constants. */ 2453 switch (sctx->framebuffer.nr_samples) { 2454 case 1: 2455 constbuf.user_buffer = sctx->b.sample_locations_1x; 2456 break; 2457 case 2: 2458 constbuf.user_buffer = sctx->b.sample_locations_2x; 2459 break; 2460 case 4: 2461 constbuf.user_buffer = sctx->b.sample_locations_4x; 2462 break; 2463 case 8: 2464 constbuf.user_buffer = sctx->b.sample_locations_8x; 2465 break; 2466 case 16: 2467 constbuf.user_buffer = sctx->b.sample_locations_16x; 2468 break; 2469 default: 2470 R600_ERR("Requested an invalid number of samples %i.\n", 2471 sctx->framebuffer.nr_samples); 2472 assert(0); 2473 } 2474 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2475 si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 2476 2477 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 2478 } 2479 2480 sctx->need_check_render_feedback = true; 2481 sctx->do_update_shaders = true; 2482} 2483 2484static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2485{ 2486 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2487 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2488 unsigned i, nr_cbufs = state->nr_cbufs; 2489 struct r600_texture *tex = NULL; 2490 struct r600_surface *cb = NULL; 2491 unsigned cb_color_info = 0; 2492 2493 /* Colorbuffers. */ 2494 for (i = 0; i < nr_cbufs; i++) { 2495 const struct radeon_surf_level *level_info; 2496 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 2497 unsigned cb_color_base, cb_color_fmask, cb_color_attrib; 2498 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 2499 2500 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 2501 continue; 2502 2503 cb = (struct r600_surface*)state->cbufs[i]; 2504 if (!cb) { 2505 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2506 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2507 continue; 2508 } 2509 2510 tex = (struct r600_texture *)cb->base.texture; 2511 level_info = &tex->surface.level[cb->base.u.tex.level]; 2512 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2513 &tex->resource, RADEON_USAGE_READWRITE, 2514 tex->resource.b.b.nr_samples > 1 ? 2515 RADEON_PRIO_COLOR_BUFFER_MSAA : 2516 RADEON_PRIO_COLOR_BUFFER); 2517 2518 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2519 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2520 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2521 RADEON_PRIO_CMASK); 2522 } 2523 2524 if (tex->dcc_separate_buffer) 2525 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2526 tex->dcc_separate_buffer, 2527 RADEON_USAGE_READWRITE, 2528 RADEON_PRIO_DCC); 2529 2530 /* Compute mutable surface parameters. */ 2531 pitch_tile_max = level_info->nblk_x / 8 - 1; 2532 slice_tile_max = level_info->nblk_x * 2533 level_info->nblk_y / 64 - 1; 2534 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 2535 2536 cb_color_base = (tex->resource.gpu_address + level_info->offset) >> 8; 2537 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 2538 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 2539 cb_color_attrib = cb->cb_color_attrib | 2540 S_028C74_TILE_MODE_INDEX(tile_mode_index); 2541 2542 if (tex->fmask.size) { 2543 if (sctx->b.chip_class >= CIK) 2544 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1); 2545 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index); 2546 cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8; 2547 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max); 2548 } else { 2549 /* This must be set for fast clear to work without FMASK. */ 2550 if (sctx->b.chip_class >= CIK) 2551 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 2552 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 2553 cb_color_fmask = cb_color_base; 2554 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 2555 } 2556 2557 cb_color_info = cb->cb_color_info | tex->cb_color_info; 2558 2559 if (tex->dcc_offset && cb->base.u.tex.level < tex->surface.num_dcc_levels) { 2560 bool is_msaa_resolve_dst = state->cbufs[0] && 2561 state->cbufs[0]->texture->nr_samples > 1 && 2562 state->cbufs[1] == &cb->base && 2563 state->cbufs[1]->texture->nr_samples <= 1; 2564 2565 if (!is_msaa_resolve_dst) 2566 cb_color_info |= S_028C70_DCC_ENABLE(1); 2567 } 2568 2569 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 2570 sctx->b.chip_class >= VI ? 14 : 13); 2571 radeon_emit(cs, cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2572 radeon_emit(cs, cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2573 radeon_emit(cs, cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2574 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2575 radeon_emit(cs, cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2576 radeon_emit(cs, cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2577 radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ 2578 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2579 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2580 radeon_emit(cs, cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2581 radeon_emit(cs, cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2582 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2583 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2584 2585 if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ 2586 radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + 2587 tex->dcc_offset + 2588 tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8); 2589 } 2590 for (; i < 8 ; i++) 2591 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 2592 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2593 2594 /* ZS buffer. */ 2595 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 2596 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2597 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2598 2599 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2600 &rtex->resource, RADEON_USAGE_READWRITE, 2601 zb->base.texture->nr_samples > 1 ? 2602 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2603 RADEON_PRIO_DEPTH_BUFFER); 2604 2605 if (zb->db_htile_data_base) { 2606 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2607 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2608 RADEON_PRIO_HTILE); 2609 } 2610 2611 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2612 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2613 2614 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2615 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2616 radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */ 2617 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 2618 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2619 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2620 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2621 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2622 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2623 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2624 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2625 2626 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 2627 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 2628 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 2629 2630 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2631 } else if (sctx->framebuffer.dirty_zsbuf) { 2632 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2633 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2634 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2635 } 2636 2637 /* Framebuffer dimensions. */ 2638 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2639 radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2640 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2641 2642 sctx->framebuffer.dirty_cbufs = 0; 2643 sctx->framebuffer.dirty_zsbuf = false; 2644} 2645 2646static void si_emit_msaa_sample_locs(struct si_context *sctx, 2647 struct r600_atom *atom) 2648{ 2649 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2650 unsigned nr_samples = sctx->framebuffer.nr_samples; 2651 2652 /* Smoothing (only possible with nr_samples == 1) uses the same 2653 * sample locations as the MSAA it simulates. 2654 */ 2655 if (nr_samples <= 1 && sctx->smoothing_enabled) 2656 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 2657 2658 /* On Polaris, the small primitive filter uses the sample locations 2659 * even when MSAA is off, so we need to make sure they're set to 0. 2660 */ 2661 if (sctx->b.family >= CHIP_POLARIS10) 2662 nr_samples = MAX2(nr_samples, 1); 2663 2664 if (nr_samples >= 1 && 2665 (nr_samples != sctx->msaa_sample_locs.nr_samples)) { 2666 sctx->msaa_sample_locs.nr_samples = nr_samples; 2667 cayman_emit_msaa_sample_locs(cs, nr_samples); 2668 } 2669 2670 if (sctx->b.family >= CHIP_POLARIS10) { 2671 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 2672 unsigned small_prim_filter_cntl = 2673 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 2674 S_028830_LINE_FILTER_DISABLE(1); /* line bug */ 2675 2676 /* The alternative of setting sample locations to 0 would 2677 * require a DB flush to avoid Z errors, see 2678 * https://bugs.freedesktop.org/show_bug.cgi?id=96908 2679 */ 2680 if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable) 2681 small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 2682 2683 radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 2684 small_prim_filter_cntl); 2685 } 2686} 2687 2688static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) 2689{ 2690 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2691 unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes; 2692 /* 33% faster rendering to linear color buffers */ 2693 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 2694 unsigned sc_mode_cntl_1 = 2695 S_028A4C_WALK_SIZE(dst_is_linear) | 2696 S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 2697 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 2698 /* always 1: */ 2699 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | 2700 S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 2701 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | 2702 S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 2703 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 2704 S_028A4C_FORCE_EOV_REZ_ENABLE(1); 2705 2706 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2707 sctx->ps_iter_samples, 2708 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0, 2709 sc_mode_cntl_1); 2710} 2711 2712static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2713{ 2714 struct si_context *sctx = (struct si_context *)ctx; 2715 2716 if (sctx->ps_iter_samples == min_samples) 2717 return; 2718 2719 sctx->ps_iter_samples = min_samples; 2720 sctx->do_update_shaders = true; 2721 2722 if (sctx->framebuffer.nr_samples > 1) 2723 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2724} 2725 2726/* 2727 * Samplers 2728 */ 2729 2730/** 2731 * Build the sampler view descriptor for a buffer texture. 2732 * @param state 256-bit descriptor; only the high 128 bits are filled in 2733 */ 2734void 2735si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, 2736 enum pipe_format format, 2737 unsigned offset, unsigned size, 2738 uint32_t *state) 2739{ 2740 const struct util_format_description *desc; 2741 int first_non_void; 2742 unsigned stride; 2743 unsigned num_records; 2744 unsigned num_format, data_format; 2745 2746 desc = util_format_description(format); 2747 first_non_void = util_format_get_first_non_void_channel(format); 2748 stride = desc->block.bits / 8; 2749 num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void); 2750 data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void); 2751 2752 num_records = size / stride; 2753 num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 2754 2755 if (screen->b.chip_class >= VI) 2756 num_records *= stride; 2757 2758 state[4] = 0; 2759 state[5] = S_008F04_STRIDE(stride); 2760 state[6] = num_records; 2761 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2762 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2763 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2764 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2765 S_008F0C_NUM_FORMAT(num_format) | 2766 S_008F0C_DATA_FORMAT(data_format); 2767} 2768 2769/** 2770 * Build the sampler view descriptor for a texture. 2771 */ 2772void 2773si_make_texture_descriptor(struct si_screen *screen, 2774 struct r600_texture *tex, 2775 bool sampler, 2776 enum pipe_texture_target target, 2777 enum pipe_format pipe_format, 2778 const unsigned char state_swizzle[4], 2779 unsigned first_level, unsigned last_level, 2780 unsigned first_layer, unsigned last_layer, 2781 unsigned width, unsigned height, unsigned depth, 2782 uint32_t *state, 2783 uint32_t *fmask_state) 2784{ 2785 struct pipe_resource *res = &tex->resource.b.b; 2786 const struct util_format_description *desc; 2787 unsigned char swizzle[4]; 2788 int first_non_void; 2789 unsigned num_format, data_format, type; 2790 uint64_t va; 2791 2792 desc = util_format_description(pipe_format); 2793 2794 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2795 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2796 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2797 2798 switch (pipe_format) { 2799 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2800 case PIPE_FORMAT_X24S8_UINT: 2801 case PIPE_FORMAT_X32_S8X24_UINT: 2802 case PIPE_FORMAT_X8Z24_UNORM: 2803 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2804 break; 2805 default: 2806 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2807 } 2808 } else { 2809 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2810 } 2811 2812 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2813 2814 switch (pipe_format) { 2815 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2816 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2817 break; 2818 default: 2819 if (first_non_void < 0) { 2820 if (util_format_is_compressed(pipe_format)) { 2821 switch (pipe_format) { 2822 case PIPE_FORMAT_DXT1_SRGB: 2823 case PIPE_FORMAT_DXT1_SRGBA: 2824 case PIPE_FORMAT_DXT3_SRGBA: 2825 case PIPE_FORMAT_DXT5_SRGBA: 2826 case PIPE_FORMAT_BPTC_SRGBA: 2827 case PIPE_FORMAT_ETC2_SRGB8: 2828 case PIPE_FORMAT_ETC2_SRGB8A1: 2829 case PIPE_FORMAT_ETC2_SRGBA8: 2830 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2831 break; 2832 case PIPE_FORMAT_RGTC1_SNORM: 2833 case PIPE_FORMAT_LATC1_SNORM: 2834 case PIPE_FORMAT_RGTC2_SNORM: 2835 case PIPE_FORMAT_LATC2_SNORM: 2836 case PIPE_FORMAT_ETC2_R11_SNORM: 2837 case PIPE_FORMAT_ETC2_RG11_SNORM: 2838 /* implies float, so use SNORM/UNORM to determine 2839 whether data is signed or not */ 2840 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2841 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2842 break; 2843 default: 2844 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2845 break; 2846 } 2847 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2848 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2849 } else { 2850 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2851 } 2852 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2853 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2854 } else { 2855 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2856 2857 switch (desc->channel[first_non_void].type) { 2858 case UTIL_FORMAT_TYPE_FLOAT: 2859 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2860 break; 2861 case UTIL_FORMAT_TYPE_SIGNED: 2862 if (desc->channel[first_non_void].normalized) 2863 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2864 else if (desc->channel[first_non_void].pure_integer) 2865 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2866 else 2867 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2868 break; 2869 case UTIL_FORMAT_TYPE_UNSIGNED: 2870 if (desc->channel[first_non_void].normalized) 2871 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2872 else if (desc->channel[first_non_void].pure_integer) 2873 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2874 else 2875 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2876 } 2877 } 2878 } 2879 2880 data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void); 2881 if (data_format == ~0) { 2882 data_format = 0; 2883 } 2884 2885 if (!sampler && 2886 (res->target == PIPE_TEXTURE_CUBE || 2887 res->target == PIPE_TEXTURE_CUBE_ARRAY || 2888 res->target == PIPE_TEXTURE_3D)) { 2889 /* For the purpose of shader images, treat cube maps and 3D 2890 * textures as 2D arrays. For 3D textures, the address 2891 * calculations for mipmaps are different, so we rely on the 2892 * caller to effectively disable mipmaps. 2893 */ 2894 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 2895 2896 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 2897 } else { 2898 type = si_tex_dim(res->target, target, res->nr_samples); 2899 } 2900 2901 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 2902 height = 1; 2903 depth = res->array_size; 2904 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 2905 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 2906 if (sampler || res->target != PIPE_TEXTURE_3D) 2907 depth = res->array_size; 2908 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 2909 depth = res->array_size / 6; 2910 2911 state[0] = 0; 2912 state[1] = (S_008F14_DATA_FORMAT(data_format) | 2913 S_008F14_NUM_FORMAT(num_format)); 2914 state[2] = (S_008F18_WIDTH(width - 1) | 2915 S_008F18_HEIGHT(height - 1) | 2916 S_008F18_PERF_MOD(4)); 2917 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2918 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2919 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2920 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2921 S_008F1C_BASE_LEVEL(res->nr_samples > 1 ? 2922 0 : first_level) | 2923 S_008F1C_LAST_LEVEL(res->nr_samples > 1 ? 2924 util_logbase2(res->nr_samples) : 2925 last_level) | 2926 S_008F1C_POW2_PAD(res->last_level > 0) | 2927 S_008F1C_TYPE(type)); 2928 state[4] = S_008F20_DEPTH(depth - 1); 2929 state[5] = (S_008F24_BASE_ARRAY(first_layer) | 2930 S_008F24_LAST_ARRAY(last_layer)); 2931 state[6] = 0; 2932 state[7] = 0; 2933 2934 if (tex->dcc_offset) { 2935 unsigned swap = r600_translate_colorswap(pipe_format, false); 2936 2937 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 2938 } else { 2939 /* The last dword is unused by hw. The shader uses it to clear 2940 * bits in the first dword of sampler state. 2941 */ 2942 if (screen->b.chip_class <= CIK && res->nr_samples <= 1) { 2943 if (first_level == last_level) 2944 state[7] = C_008F30_MAX_ANISO_RATIO; 2945 else 2946 state[7] = 0xffffffff; 2947 } 2948 } 2949 2950 /* Initialize the sampler view for FMASK. */ 2951 if (tex->fmask.size) { 2952 uint32_t fmask_format; 2953 2954 va = tex->resource.gpu_address + tex->fmask.offset; 2955 2956 switch (res->nr_samples) { 2957 case 2: 2958 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2959 break; 2960 case 4: 2961 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2962 break; 2963 case 8: 2964 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2965 break; 2966 default: 2967 assert(0); 2968 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2969 } 2970 2971 fmask_state[0] = va >> 8; 2972 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2973 S_008F14_DATA_FORMAT(fmask_format) | 2974 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2975 fmask_state[2] = S_008F18_WIDTH(width - 1) | 2976 S_008F18_HEIGHT(height - 1); 2977 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2978 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2979 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2980 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2981 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) | 2982 S_008F1C_TYPE(si_tex_dim(res->target, target, 0)); 2983 fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2984 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1); 2985 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) | 2986 S_008F24_LAST_ARRAY(last_layer); 2987 fmask_state[6] = 0; 2988 fmask_state[7] = 0; 2989 } 2990} 2991 2992/** 2993 * Create a sampler view. 2994 * 2995 * @param ctx context 2996 * @param texture texture 2997 * @param state sampler view template 2998 * @param width0 width0 override (for compressed textures as int) 2999 * @param height0 height0 override (for compressed textures as int) 3000 * @param force_level set the base address to the level (for compressed textures) 3001 */ 3002struct pipe_sampler_view * 3003si_create_sampler_view_custom(struct pipe_context *ctx, 3004 struct pipe_resource *texture, 3005 const struct pipe_sampler_view *state, 3006 unsigned width0, unsigned height0, 3007 unsigned force_level) 3008{ 3009 struct si_context *sctx = (struct si_context*)ctx; 3010 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 3011 struct r600_texture *tmp = (struct r600_texture*)texture; 3012 unsigned base_level, first_level, last_level; 3013 unsigned char state_swizzle[4]; 3014 unsigned height, depth, width; 3015 unsigned last_layer = state->u.tex.last_layer; 3016 enum pipe_format pipe_format; 3017 const struct radeon_surf_level *surflevel; 3018 3019 if (!view) 3020 return NULL; 3021 3022 /* initialize base object */ 3023 view->base = *state; 3024 view->base.texture = NULL; 3025 view->base.reference.count = 1; 3026 view->base.context = ctx; 3027 3028 assert(texture); 3029 pipe_resource_reference(&view->base.texture, texture); 3030 3031 if (state->format == PIPE_FORMAT_X24S8_UINT || 3032 state->format == PIPE_FORMAT_S8X24_UINT || 3033 state->format == PIPE_FORMAT_X32_S8X24_UINT || 3034 state->format == PIPE_FORMAT_S8_UINT) 3035 view->is_stencil_sampler = true; 3036 3037 /* Buffer resource. */ 3038 if (texture->target == PIPE_BUFFER) { 3039 si_make_buffer_descriptor(sctx->screen, 3040 (struct r600_resource *)texture, 3041 state->format, 3042 state->u.buf.offset, 3043 state->u.buf.size, 3044 view->state); 3045 return &view->base; 3046 } 3047 3048 state_swizzle[0] = state->swizzle_r; 3049 state_swizzle[1] = state->swizzle_g; 3050 state_swizzle[2] = state->swizzle_b; 3051 state_swizzle[3] = state->swizzle_a; 3052 3053 base_level = 0; 3054 first_level = state->u.tex.first_level; 3055 last_level = state->u.tex.last_level; 3056 width = width0; 3057 height = height0; 3058 depth = texture->depth0; 3059 3060 if (force_level) { 3061 assert(force_level == first_level && 3062 force_level == last_level); 3063 base_level = force_level; 3064 first_level = 0; 3065 last_level = 0; 3066 width = u_minify(width, force_level); 3067 height = u_minify(height, force_level); 3068 depth = u_minify(depth, force_level); 3069 } 3070 3071 /* This is not needed if state trackers set last_layer correctly. */ 3072 if (state->target == PIPE_TEXTURE_1D || 3073 state->target == PIPE_TEXTURE_2D || 3074 state->target == PIPE_TEXTURE_RECT || 3075 state->target == PIPE_TEXTURE_CUBE) 3076 last_layer = state->u.tex.first_layer; 3077 3078 /* Texturing with separate depth and stencil. */ 3079 pipe_format = state->format; 3080 3081 /* Depth/stencil texturing sometimes needs separate texture. */ 3082 if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) { 3083 if (!tmp->flushed_depth_texture && 3084 !r600_init_flushed_depth_texture(ctx, texture, NULL)) { 3085 pipe_resource_reference(&view->base.texture, NULL); 3086 FREE(view); 3087 return NULL; 3088 } 3089 3090 assert(tmp->flushed_depth_texture); 3091 3092 /* Override format for the case where the flushed texture 3093 * contains only Z or only S. 3094 */ 3095 if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format) 3096 pipe_format = tmp->flushed_depth_texture->resource.b.b.format; 3097 3098 tmp = tmp->flushed_depth_texture; 3099 } 3100 3101 surflevel = tmp->surface.level; 3102 3103 if (tmp->db_compatible) { 3104 if (!view->is_stencil_sampler) 3105 pipe_format = tmp->db_render_format; 3106 3107 switch (pipe_format) { 3108 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 3109 pipe_format = PIPE_FORMAT_Z32_FLOAT; 3110 break; 3111 case PIPE_FORMAT_X8Z24_UNORM: 3112 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3113 /* Z24 is always stored like this for DB 3114 * compatibility. 3115 */ 3116 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 3117 break; 3118 case PIPE_FORMAT_X24S8_UINT: 3119 case PIPE_FORMAT_S8X24_UINT: 3120 case PIPE_FORMAT_X32_S8X24_UINT: 3121 pipe_format = PIPE_FORMAT_S8_UINT; 3122 surflevel = tmp->surface.stencil_level; 3123 break; 3124 default:; 3125 } 3126 } 3127 3128 vi_dcc_disable_if_incompatible_format(&sctx->b, texture, 3129 state->u.tex.first_level, 3130 state->format); 3131 3132 si_make_texture_descriptor(sctx->screen, tmp, true, 3133 state->target, pipe_format, state_swizzle, 3134 first_level, last_level, 3135 state->u.tex.first_layer, last_layer, 3136 width, height, depth, 3137 view->state, view->fmask_state); 3138 3139 view->base_level_info = &surflevel[base_level]; 3140 view->base_level = base_level; 3141 view->block_width = util_format_get_blockwidth(pipe_format); 3142 return &view->base; 3143} 3144 3145static struct pipe_sampler_view * 3146si_create_sampler_view(struct pipe_context *ctx, 3147 struct pipe_resource *texture, 3148 const struct pipe_sampler_view *state) 3149{ 3150 return si_create_sampler_view_custom(ctx, texture, state, 3151 texture ? texture->width0 : 0, 3152 texture ? texture->height0 : 0, 0); 3153} 3154 3155static void si_sampler_view_destroy(struct pipe_context *ctx, 3156 struct pipe_sampler_view *state) 3157{ 3158 struct si_sampler_view *view = (struct si_sampler_view *)state; 3159 3160 pipe_resource_reference(&state->texture, NULL); 3161 FREE(view); 3162} 3163 3164static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 3165{ 3166 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 3167 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 3168 (linear_filter && 3169 (wrap == PIPE_TEX_WRAP_CLAMP || 3170 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 3171} 3172 3173static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 3174{ 3175 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 3176 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 3177 3178 return (state->border_color.ui[0] || state->border_color.ui[1] || 3179 state->border_color.ui[2] || state->border_color.ui[3]) && 3180 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 3181 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 3182 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 3183} 3184 3185static void *si_create_sampler_state(struct pipe_context *ctx, 3186 const struct pipe_sampler_state *state) 3187{ 3188 struct si_context *sctx = (struct si_context *)ctx; 3189 struct r600_common_screen *rscreen = sctx->b.screen; 3190 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 3191 unsigned border_color_type, border_color_index = 0; 3192 unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso 3193 : state->max_anisotropy; 3194 unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); 3195 3196 if (!rstate) { 3197 return NULL; 3198 } 3199 3200 if (!sampler_state_needs_border_color(state)) 3201 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3202 else if (state->border_color.f[0] == 0 && 3203 state->border_color.f[1] == 0 && 3204 state->border_color.f[2] == 0 && 3205 state->border_color.f[3] == 0) 3206 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3207 else if (state->border_color.f[0] == 0 && 3208 state->border_color.f[1] == 0 && 3209 state->border_color.f[2] == 0 && 3210 state->border_color.f[3] == 1) 3211 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 3212 else if (state->border_color.f[0] == 1 && 3213 state->border_color.f[1] == 1 && 3214 state->border_color.f[2] == 1 && 3215 state->border_color.f[3] == 1) 3216 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 3217 else { 3218 int i; 3219 3220 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 3221 3222 /* Check if the border has been uploaded already. */ 3223 for (i = 0; i < sctx->border_color_count; i++) 3224 if (memcmp(&sctx->border_color_table[i], &state->border_color, 3225 sizeof(state->border_color)) == 0) 3226 break; 3227 3228 if (i >= SI_MAX_BORDER_COLORS) { 3229 /* Getting 4096 unique border colors is very unlikely. */ 3230 fprintf(stderr, "radeonsi: The border color table is full. " 3231 "Any new border colors will be just black. " 3232 "Please file a bug.\n"); 3233 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3234 } else { 3235 if (i == sctx->border_color_count) { 3236 /* Upload a new border color. */ 3237 memcpy(&sctx->border_color_table[i], &state->border_color, 3238 sizeof(state->border_color)); 3239 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 3240 &state->border_color, 3241 sizeof(state->border_color)); 3242 sctx->border_color_count++; 3243 } 3244 3245 border_color_index = i; 3246 } 3247 } 3248 3249#ifdef DEBUG 3250 rstate->magic = SI_SAMPLER_STATE_MAGIC; 3251#endif 3252 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 3253 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 3254 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 3255 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 3256 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 3257 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 3258 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | 3259 S_008F30_ANISO_BIAS(max_aniso_ratio) | 3260 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 3261 S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI)); 3262 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 3263 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 3264 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 3265 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 3266 S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) | 3267 S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) | 3268 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 3269 S_008F38_MIP_POINT_PRECLAMP(1) | 3270 S_008F38_DISABLE_LSB_CEIL(1) | 3271 S_008F38_FILTER_PREC_FIX(1) | 3272 S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI)); 3273 rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | 3274 S_008F3C_BORDER_COLOR_TYPE(border_color_type); 3275 return rstate; 3276} 3277 3278static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 3279{ 3280 struct si_context *sctx = (struct si_context *)ctx; 3281 3282 if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask) 3283 return; 3284 3285 sctx->sample_mask.sample_mask = sample_mask; 3286 si_mark_atom_dirty(sctx, &sctx->sample_mask.atom); 3287} 3288 3289static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom) 3290{ 3291 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 3292 unsigned mask = sctx->sample_mask.sample_mask; 3293 3294 /* Needed for line and polygon smoothing as well as for the Polaris 3295 * small primitive filter. We expect the state tracker to take care of 3296 * this for us. 3297 */ 3298 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 3299 (mask & 1 && sctx->blitter->running)); 3300 3301 radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 3302 radeon_emit(cs, mask | (mask << 16)); 3303 radeon_emit(cs, mask | (mask << 16)); 3304} 3305 3306static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 3307{ 3308#ifdef DEBUG 3309 struct si_sampler_state *s = state; 3310 3311 assert(s->magic == SI_SAMPLER_STATE_MAGIC); 3312 s->magic = 0; 3313#endif 3314 free(state); 3315} 3316 3317/* 3318 * Vertex elements & buffers 3319 */ 3320 3321static void *si_create_vertex_elements(struct pipe_context *ctx, 3322 unsigned count, 3323 const struct pipe_vertex_element *elements) 3324{ 3325 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 3326 int i; 3327 3328 assert(count <= SI_MAX_ATTRIBS); 3329 if (!v) 3330 return NULL; 3331 3332 v->count = count; 3333 for (i = 0; i < count; ++i) { 3334 const struct util_format_description *desc; 3335 const struct util_format_channel_description *channel; 3336 unsigned data_format, num_format; 3337 int first_non_void; 3338 unsigned vbo_index = elements[i].vertex_buffer_index; 3339 3340 if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { 3341 FREE(v); 3342 return NULL; 3343 } 3344 3345 desc = util_format_description(elements[i].src_format); 3346 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 3347 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 3348 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 3349 channel = &desc->channel[first_non_void]; 3350 3351 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3352 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3353 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3354 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3355 S_008F0C_NUM_FORMAT(num_format) | 3356 S_008F0C_DATA_FORMAT(data_format); 3357 v->format_size[i] = desc->block.bits / 8; 3358 3359 /* The hardware always treats the 2-bit alpha channel as 3360 * unsigned, so a shader workaround is needed. 3361 */ 3362 if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { 3363 if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { 3364 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i); 3365 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { 3366 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i); 3367 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) { 3368 /* This isn't actually used in OpenGL. */ 3369 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i); 3370 } 3371 } else if (channel->type == UTIL_FORMAT_TYPE_FIXED) { 3372 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 3373 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i); 3374 else 3375 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i); 3376 } else if (channel->size == 32 && !channel->pure_integer) { 3377 if (channel->type == UTIL_FORMAT_TYPE_SIGNED) { 3378 if (channel->normalized) { 3379 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 3380 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i); 3381 else 3382 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i); 3383 } else { 3384 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i); 3385 } 3386 } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) { 3387 if (channel->normalized) { 3388 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 3389 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i); 3390 else 3391 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i); 3392 } else { 3393 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i); 3394 } 3395 } 3396 } 3397 3398 /* We work around the fact that 8_8_8 and 16_16_16 data formats 3399 * do not exist by using the corresponding 4-component formats. 3400 * This requires a fixup of the descriptor for bounds checks. 3401 */ 3402 if (desc->block.bits == 3 * 8 || 3403 desc->block.bits == 3 * 16) { 3404 v->fix_size3 |= (desc->block.bits / 24) << (2 * i); 3405 } 3406 } 3407 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 3408 3409 return v; 3410} 3411 3412static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 3413{ 3414 struct si_context *sctx = (struct si_context *)ctx; 3415 struct si_vertex_element *v = (struct si_vertex_element*)state; 3416 3417 sctx->vertex_elements = v; 3418 sctx->vertex_buffers_dirty = true; 3419 sctx->do_update_shaders = true; 3420} 3421 3422static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 3423{ 3424 struct si_context *sctx = (struct si_context *)ctx; 3425 3426 if (sctx->vertex_elements == state) 3427 sctx->vertex_elements = NULL; 3428 FREE(state); 3429} 3430 3431static void si_set_vertex_buffers(struct pipe_context *ctx, 3432 unsigned start_slot, unsigned count, 3433 const struct pipe_vertex_buffer *buffers) 3434{ 3435 struct si_context *sctx = (struct si_context *)ctx; 3436 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 3437 int i; 3438 3439 assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); 3440 3441 if (buffers) { 3442 for (i = 0; i < count; i++) { 3443 const struct pipe_vertex_buffer *src = buffers + i; 3444 struct pipe_vertex_buffer *dsti = dst + i; 3445 struct pipe_resource *buf = src->buffer; 3446 3447 pipe_resource_reference(&dsti->buffer, buf); 3448 dsti->buffer_offset = src->buffer_offset; 3449 dsti->stride = src->stride; 3450 r600_context_add_resource_size(ctx, buf); 3451 if (buf) 3452 r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 3453 } 3454 } else { 3455 for (i = 0; i < count; i++) { 3456 pipe_resource_reference(&dst[i].buffer, NULL); 3457 } 3458 } 3459 sctx->vertex_buffers_dirty = true; 3460} 3461 3462static void si_set_index_buffer(struct pipe_context *ctx, 3463 const struct pipe_index_buffer *ib) 3464{ 3465 struct si_context *sctx = (struct si_context *)ctx; 3466 3467 if (ib) { 3468 struct pipe_resource *buf = ib->buffer; 3469 3470 pipe_resource_reference(&sctx->index_buffer.buffer, buf); 3471 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 3472 r600_context_add_resource_size(ctx, buf); 3473 if (buf) 3474 r600_resource(buf)->bind_history |= PIPE_BIND_INDEX_BUFFER; 3475 } else { 3476 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 3477 } 3478} 3479 3480/* 3481 * Misc 3482 */ 3483 3484static void si_set_tess_state(struct pipe_context *ctx, 3485 const float default_outer_level[4], 3486 const float default_inner_level[2]) 3487{ 3488 struct si_context *sctx = (struct si_context *)ctx; 3489 struct pipe_constant_buffer cb; 3490 float array[8]; 3491 3492 memcpy(array, default_outer_level, sizeof(float) * 4); 3493 memcpy(array+4, default_inner_level, sizeof(float) * 2); 3494 3495 cb.buffer = NULL; 3496 cb.user_buffer = NULL; 3497 cb.buffer_size = sizeof(array); 3498 3499 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, 3500 (void*)array, sizeof(array), 3501 &cb.buffer_offset); 3502 3503 si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 3504 pipe_resource_reference(&cb.buffer, NULL); 3505} 3506 3507static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) 3508{ 3509 struct si_context *sctx = (struct si_context *)ctx; 3510 3511 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 3512 SI_CONTEXT_INV_GLOBAL_L2 | 3513 SI_CONTEXT_FLUSH_AND_INV_CB; 3514} 3515 3516/* This only ensures coherency for shader image/buffer stores. */ 3517static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 3518{ 3519 struct si_context *sctx = (struct si_context *)ctx; 3520 3521 /* Subsequent commands must wait for all shader invocations to 3522 * complete. */ 3523 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 3524 SI_CONTEXT_CS_PARTIAL_FLUSH; 3525 3526 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 3527 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | 3528 SI_CONTEXT_INV_VMEM_L1; 3529 3530 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 3531 PIPE_BARRIER_SHADER_BUFFER | 3532 PIPE_BARRIER_TEXTURE | 3533 PIPE_BARRIER_IMAGE | 3534 PIPE_BARRIER_STREAMOUT_BUFFER | 3535 PIPE_BARRIER_GLOBAL_BUFFER)) { 3536 /* As far as I can tell, L1 contents are written back to L2 3537 * automatically at end of shader, but the contents of other 3538 * L1 caches might still be stale. */ 3539 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 3540 } 3541 3542 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 3543 /* Indices are read through TC L2 since VI. 3544 * L1 isn't used. 3545 */ 3546 if (sctx->screen->b.chip_class <= CIK) 3547 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 3548 } 3549 3550 if (flags & PIPE_BARRIER_FRAMEBUFFER) 3551 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; 3552 3553 if (flags & (PIPE_BARRIER_FRAMEBUFFER | 3554 PIPE_BARRIER_INDIRECT_BUFFER)) 3555 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 3556} 3557 3558static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 3559{ 3560 struct pipe_blend_state blend; 3561 3562 memset(&blend, 0, sizeof(blend)); 3563 blend.independent_blend_enable = true; 3564 blend.rt[0].colormask = 0xf; 3565 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 3566} 3567 3568static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 3569 bool include_draw_vbo) 3570{ 3571 si_need_cs_space((struct si_context*)ctx); 3572} 3573 3574static void si_init_config(struct si_context *sctx); 3575 3576void si_init_state_functions(struct si_context *sctx) 3577{ 3578 si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond); 3579 si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin); 3580 si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable); 3581 si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors); 3582 si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports); 3583 3584 si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); 3585 si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); 3586 si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); 3587 si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); 3588 si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); 3589 si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state); 3590 si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color); 3591 si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs); 3592 si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state); 3593 si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref); 3594 3595 sctx->b.b.create_blend_state = si_create_blend_state; 3596 sctx->b.b.bind_blend_state = si_bind_blend_state; 3597 sctx->b.b.delete_blend_state = si_delete_blend_state; 3598 sctx->b.b.set_blend_color = si_set_blend_color; 3599 3600 sctx->b.b.create_rasterizer_state = si_create_rs_state; 3601 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 3602 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 3603 3604 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 3605 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 3606 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 3607 3608 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 3609 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 3610 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 3611 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 3612 sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 3613 3614 sctx->b.b.set_clip_state = si_set_clip_state; 3615 sctx->b.b.set_stencil_ref = si_set_stencil_ref; 3616 3617 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 3618 sctx->b.b.get_sample_position = cayman_get_sample_position; 3619 3620 sctx->b.b.create_sampler_state = si_create_sampler_state; 3621 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 3622 3623 sctx->b.b.create_sampler_view = si_create_sampler_view; 3624 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 3625 3626 sctx->b.b.set_sample_mask = si_set_sample_mask; 3627 3628 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 3629 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 3630 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 3631 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 3632 sctx->b.b.set_index_buffer = si_set_index_buffer; 3633 3634 sctx->b.b.texture_barrier = si_texture_barrier; 3635 sctx->b.b.memory_barrier = si_memory_barrier; 3636 sctx->b.b.set_min_samples = si_set_min_samples; 3637 sctx->b.b.set_tess_state = si_set_tess_state; 3638 3639 sctx->b.b.set_active_query_state = si_set_active_query_state; 3640 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 3641 sctx->b.save_qbo_state = si_save_qbo_state; 3642 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 3643 3644 sctx->b.b.draw_vbo = si_draw_vbo; 3645 3646 si_init_config(sctx); 3647} 3648 3649static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen) 3650{ 3651 return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; 3652} 3653 3654static void si_query_opaque_metadata(struct r600_common_screen *rscreen, 3655 struct r600_texture *rtex, 3656 struct radeon_bo_metadata *md) 3657{ 3658 struct si_screen *sscreen = (struct si_screen*)rscreen; 3659 struct pipe_resource *res = &rtex->resource.b.b; 3660 static const unsigned char swizzle[] = { 3661 PIPE_SWIZZLE_X, 3662 PIPE_SWIZZLE_Y, 3663 PIPE_SWIZZLE_Z, 3664 PIPE_SWIZZLE_W 3665 }; 3666 uint32_t desc[8], i; 3667 bool is_array = util_resource_is_array_texture(res); 3668 3669 /* DRM 2.x.x doesn't support this. */ 3670 if (rscreen->info.drm_major != 3) 3671 return; 3672 3673 assert(rtex->dcc_separate_buffer == NULL); 3674 assert(rtex->fmask.size == 0); 3675 3676 /* Metadata image format format version 1: 3677 * [0] = 1 (metadata format identifier) 3678 * [1] = (VENDOR_ID << 16) | PCI_ID 3679 * [2:9] = image descriptor for the whole resource 3680 * [2] is always 0, because the base address is cleared 3681 * [9] is the DCC offset bits [39:8] from the beginning of 3682 * the buffer 3683 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 3684 */ 3685 3686 md->metadata[0] = 1; /* metadata image format version 1 */ 3687 3688 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ 3689 md->metadata[1] = si_get_bo_metadata_word1(rscreen); 3690 3691 si_make_texture_descriptor(sscreen, rtex, true, 3692 res->target, res->format, 3693 swizzle, 0, res->last_level, 0, 3694 is_array ? res->array_size - 1 : 0, 3695 res->width0, res->height0, res->depth0, 3696 desc, NULL); 3697 3698 si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0, 3699 rtex->surface.blk_w, false, desc); 3700 3701 /* Clear the base address and set the relative DCC offset. */ 3702 desc[0] = 0; 3703 desc[1] &= C_008F14_BASE_ADDRESS_HI; 3704 desc[7] = rtex->dcc_offset >> 8; 3705 3706 /* Dwords [2:9] contain the image descriptor. */ 3707 memcpy(&md->metadata[2], desc, sizeof(desc)); 3708 3709 /* Dwords [10:..] contain the mipmap level offsets. */ 3710 for (i = 0; i <= res->last_level; i++) 3711 md->metadata[10+i] = rtex->surface.level[i].offset >> 8; 3712 3713 md->size_metadata = (11 + res->last_level) * 4; 3714} 3715 3716static void si_apply_opaque_metadata(struct r600_common_screen *rscreen, 3717 struct r600_texture *rtex, 3718 struct radeon_bo_metadata *md) 3719{ 3720 uint32_t *desc = &md->metadata[2]; 3721 3722 if (rscreen->chip_class < VI) 3723 return; 3724 3725 /* Return if DCC is enabled. The texture should be set up with it 3726 * already. 3727 */ 3728 if (md->size_metadata >= 11 * 4 && 3729 md->metadata[0] != 0 && 3730 md->metadata[1] == si_get_bo_metadata_word1(rscreen) && 3731 G_008F28_COMPRESSION_EN(desc[6])) { 3732 assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8)); 3733 return; 3734 } 3735 3736 /* Disable DCC. These are always set by texture_from_handle and must 3737 * be cleared here. 3738 */ 3739 rtex->dcc_offset = 0; 3740} 3741 3742void si_init_screen_state_functions(struct si_screen *sscreen) 3743{ 3744 sscreen->b.b.is_format_supported = si_is_format_supported; 3745 sscreen->b.query_opaque_metadata = si_query_opaque_metadata; 3746 sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata; 3747} 3748 3749static void 3750si_write_harvested_raster_configs(struct si_context *sctx, 3751 struct si_pm4_state *pm4, 3752 unsigned raster_config, 3753 unsigned raster_config_1) 3754{ 3755 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 3756 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 3757 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3758 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3759 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 3760 unsigned rb_per_se = num_rb / num_se; 3761 unsigned se_mask[4]; 3762 unsigned se; 3763 3764 se_mask[0] = ((1 << rb_per_se) - 1); 3765 se_mask[1] = (se_mask[0] << rb_per_se); 3766 se_mask[2] = (se_mask[1] << rb_per_se); 3767 se_mask[3] = (se_mask[2] << rb_per_se); 3768 3769 se_mask[0] &= rb_mask; 3770 se_mask[1] &= rb_mask; 3771 se_mask[2] &= rb_mask; 3772 se_mask[3] &= rb_mask; 3773 3774 assert(num_se == 1 || num_se == 2 || num_se == 4); 3775 assert(sh_per_se == 1 || sh_per_se == 2); 3776 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 3777 3778 /* XXX: I can't figure out what the *_XSEL and *_YSEL 3779 * fields are for, so I'm leaving them as their default 3780 * values. */ 3781 3782 for (se = 0; se < num_se; se++) { 3783 unsigned raster_config_se = raster_config; 3784 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3785 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3786 int idx = (se / 2) * 2; 3787 3788 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3789 raster_config_se &= C_028350_SE_MAP; 3790 3791 if (!se_mask[idx]) { 3792 raster_config_se |= 3793 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 3794 } else { 3795 raster_config_se |= 3796 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 3797 } 3798 } 3799 3800 pkr0_mask &= rb_mask; 3801 pkr1_mask &= rb_mask; 3802 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3803 raster_config_se &= C_028350_PKR_MAP; 3804 3805 if (!pkr0_mask) { 3806 raster_config_se |= 3807 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 3808 } else { 3809 raster_config_se |= 3810 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 3811 } 3812 } 3813 3814 if (rb_per_se >= 2) { 3815 unsigned rb0_mask = 1 << (se * rb_per_se); 3816 unsigned rb1_mask = rb0_mask << 1; 3817 3818 rb0_mask &= rb_mask; 3819 rb1_mask &= rb_mask; 3820 if (!rb0_mask || !rb1_mask) { 3821 raster_config_se &= C_028350_RB_MAP_PKR0; 3822 3823 if (!rb0_mask) { 3824 raster_config_se |= 3825 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 3826 } else { 3827 raster_config_se |= 3828 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 3829 } 3830 } 3831 3832 if (rb_per_se > 2) { 3833 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3834 rb1_mask = rb0_mask << 1; 3835 rb0_mask &= rb_mask; 3836 rb1_mask &= rb_mask; 3837 if (!rb0_mask || !rb1_mask) { 3838 raster_config_se &= C_028350_RB_MAP_PKR1; 3839 3840 if (!rb0_mask) { 3841 raster_config_se |= 3842 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 3843 } else { 3844 raster_config_se |= 3845 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 3846 } 3847 } 3848 } 3849 } 3850 3851 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3852 if (sctx->b.chip_class < CIK) 3853 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3854 SE_INDEX(se) | SH_BROADCAST_WRITES | 3855 INSTANCE_BROADCAST_WRITES); 3856 else 3857 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3858 S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | 3859 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3860 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 3861 } 3862 3863 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3864 if (sctx->b.chip_class < CIK) 3865 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3866 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 3867 INSTANCE_BROADCAST_WRITES); 3868 else { 3869 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3870 S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 3871 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3872 3873 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3874 (!se_mask[2] && !se_mask[3]))) { 3875 raster_config_1 &= C_028354_SE_PAIR_MAP; 3876 3877 if (!se_mask[0] && !se_mask[1]) { 3878 raster_config_1 |= 3879 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 3880 } else { 3881 raster_config_1 |= 3882 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 3883 } 3884 } 3885 3886 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 3887 } 3888} 3889 3890static void si_init_config(struct si_context *sctx) 3891{ 3892 struct si_screen *sscreen = sctx->screen; 3893 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3894 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3895 unsigned raster_config, raster_config_1; 3896 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 3897 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 3898 3899 if (!pm4) 3900 return; 3901 3902 si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 3903 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); 3904 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 3905 si_pm4_cmd_end(pm4, false); 3906 3907 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 3908 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 3909 3910 /* FIXME calculate these values somehow ??? */ 3911 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 3912 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3913 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3914 3915 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3916 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3917 3918 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3919 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3920 if (sctx->b.chip_class < CIK) 3921 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3922 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3923 3924 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3925 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3926 3927 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3928 3929 switch (sctx->screen->b.family) { 3930 case CHIP_TAHITI: 3931 case CHIP_PITCAIRN: 3932 raster_config = 0x2a00126a; 3933 raster_config_1 = 0x00000000; 3934 break; 3935 case CHIP_VERDE: 3936 raster_config = 0x0000124a; 3937 raster_config_1 = 0x00000000; 3938 break; 3939 case CHIP_OLAND: 3940 raster_config = 0x00000082; 3941 raster_config_1 = 0x00000000; 3942 break; 3943 case CHIP_HAINAN: 3944 raster_config = 0x00000000; 3945 raster_config_1 = 0x00000000; 3946 break; 3947 case CHIP_BONAIRE: 3948 raster_config = 0x16000012; 3949 raster_config_1 = 0x00000000; 3950 break; 3951 case CHIP_HAWAII: 3952 raster_config = 0x3a00161a; 3953 raster_config_1 = 0x0000002e; 3954 break; 3955 case CHIP_FIJI: 3956 if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { 3957 /* old kernels with old tiling config */ 3958 raster_config = 0x16000012; 3959 raster_config_1 = 0x0000002a; 3960 } else { 3961 raster_config = 0x3a00161a; 3962 raster_config_1 = 0x0000002e; 3963 } 3964 break; 3965 case CHIP_POLARIS10: 3966 raster_config = 0x16000012; 3967 raster_config_1 = 0x0000002a; 3968 break; 3969 case CHIP_POLARIS11: 3970 case CHIP_POLARIS12: 3971 raster_config = 0x16000012; 3972 raster_config_1 = 0x00000000; 3973 break; 3974 case CHIP_TONGA: 3975 raster_config = 0x16000012; 3976 raster_config_1 = 0x0000002a; 3977 break; 3978 case CHIP_ICELAND: 3979 if (num_rb == 1) 3980 raster_config = 0x00000000; 3981 else 3982 raster_config = 0x00000002; 3983 raster_config_1 = 0x00000000; 3984 break; 3985 case CHIP_CARRIZO: 3986 raster_config = 0x00000002; 3987 raster_config_1 = 0x00000000; 3988 break; 3989 case CHIP_KAVERI: 3990 /* KV should be 0x00000002, but that causes problems with radeon */ 3991 raster_config = 0x00000000; /* 0x00000002 */ 3992 raster_config_1 = 0x00000000; 3993 break; 3994 case CHIP_KABINI: 3995 case CHIP_MULLINS: 3996 case CHIP_STONEY: 3997 raster_config = 0x00000000; 3998 raster_config_1 = 0x00000000; 3999 break; 4000 default: 4001 fprintf(stderr, 4002 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 4003 raster_config = 0x00000000; 4004 raster_config_1 = 0x00000000; 4005 break; 4006 } 4007 4008 /* Always use the default config when all backends are enabled 4009 * (or when we failed to determine the enabled backends). 4010 */ 4011 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 4012 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 4013 raster_config); 4014 if (sctx->b.chip_class >= CIK) 4015 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 4016 raster_config_1); 4017 } else { 4018 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 4019 } 4020 4021 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 4022 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 4023 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 4024 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 4025 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 4026 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 4027 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 4028 4029 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 4030 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 4031 S_028230_ER_TRI(0xA) | 4032 S_028230_ER_POINT(0xA) | 4033 S_028230_ER_RECT(0xA) | 4034 /* Required by DX10_DIAMOND_TEST_ENA: */ 4035 S_028230_ER_LINE_LR(0x1A) | 4036 S_028230_ER_LINE_RL(0x26) | 4037 S_028230_ER_LINE_TB(0xA) | 4038 S_028230_ER_LINE_BT(0xA)); 4039 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 4040 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 4041 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 4042 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 4043 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 4044 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 4045 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 4046 4047 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 4048 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 4049 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 4050 4051 if (sctx->b.chip_class >= CIK) { 4052 /* If this is 0, Bonaire can hang even if GS isn't being used. 4053 * Other chips are unaffected. These are suboptimal values, 4054 * but we don't use on-chip GS. 4055 */ 4056 si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 4057 S_028A44_ES_VERTS_PER_SUBGRP(64) | 4058 S_028A44_GS_PRIMS_PER_SUBGRP(4)); 4059 4060 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); 4061 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); 4062 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); 4063 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); 4064 4065 if (sscreen->b.info.num_good_compute_units / 4066 (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) { 4067 /* Too few available compute units per SH. Disallowing 4068 * VS to run on CU0 could hurt us more than late VS 4069 * allocation would help. 4070 * 4071 * LATE_ALLOC_VS = 2 is the highest safe number. 4072 */ 4073 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 4074 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); 4075 } else { 4076 /* Set LATE_ALLOC_VS == 31. It should be less than 4077 * the number of scratch waves. Limitations: 4078 * - VS can't execute on CU0. 4079 * - If HS writes outputs to LDS, LS can't execute on CU0. 4080 */ 4081 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); 4082 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); 4083 } 4084 4085 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 4086 } 4087 4088 if (sctx->b.chip_class >= VI) { 4089 unsigned vgt_tess_distribution; 4090 4091 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, 4092 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 4093 S_028424_OVERWRITE_COMBINER_WATERMARK(4)); 4094 if (sctx->b.family < CHIP_POLARIS10) 4095 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); 4096 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); 4097 4098 vgt_tess_distribution = 4099 S_028B50_ACCUM_ISOLINE(32) | 4100 S_028B50_ACCUM_TRI(11) | 4101 S_028B50_ACCUM_QUAD(11) | 4102 S_028B50_DONUT_SPLIT(16); 4103 4104 /* Testing with Unigine Heaven extreme tesselation yielded best results 4105 * with TRAP_SPLIT = 3. 4106 */ 4107 if (sctx->b.family == CHIP_FIJI || 4108 sctx->b.family >= CHIP_POLARIS10) 4109 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 4110 4111 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 4112 } else { 4113 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 4114 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 4115 } 4116 4117 if (sctx->b.family == CHIP_STONEY) 4118 si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); 4119 4120 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 4121 if (sctx->b.chip_class >= CIK) 4122 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); 4123 si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 4124 RADEON_PRIO_BORDER_COLORS); 4125 4126 si_pm4_upload_indirect_buffer(sctx, pm4); 4127 sctx->init_config = pm4; 4128} 4129