si_state.c revision 9c63fd90561c32bccd905d8e0abf4864941f359a
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "si_shader.h" 29#include "sid.h" 30#include "radeon/r600_cs.h" 31 32#include "util/u_dual_blend.h" 33#include "util/u_format.h" 34#include "util/u_format_s3tc.h" 35#include "util/u_memory.h" 36#include "util/u_pstipple.h" 37#include "util/u_resource.h" 38 39/* Initialize an external atom (owned by ../radeon). */ 40static void 41si_init_external_atom(struct si_context *sctx, struct r600_atom *atom, 42 struct r600_atom **list_elem) 43{ 44 atom->id = list_elem - sctx->atoms.array + 1; 45 *list_elem = atom; 46} 47 48/* Initialize an atom owned by radeonsi. */ 49void si_init_atom(struct si_context *sctx, struct r600_atom *atom, 50 struct r600_atom **list_elem, 51 void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) 52{ 53 atom->emit = (void*)emit_func; 54 atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */ 55 *list_elem = atom; 56} 57 58static unsigned si_map_swizzle(unsigned swizzle) 59{ 60 switch (swizzle) { 61 case PIPE_SWIZZLE_Y: 62 return V_008F0C_SQ_SEL_Y; 63 case PIPE_SWIZZLE_Z: 64 return V_008F0C_SQ_SEL_Z; 65 case PIPE_SWIZZLE_W: 66 return V_008F0C_SQ_SEL_W; 67 case PIPE_SWIZZLE_0: 68 return V_008F0C_SQ_SEL_0; 69 case PIPE_SWIZZLE_1: 70 return V_008F0C_SQ_SEL_1; 71 default: /* PIPE_SWIZZLE_X */ 72 return V_008F0C_SQ_SEL_X; 73 } 74} 75 76static uint32_t S_FIXED(float value, uint32_t frac_bits) 77{ 78 return value * (1 << frac_bits); 79} 80 81/* 12.4 fixed-point */ 82static unsigned si_pack_float_12p4(float x) 83{ 84 return x <= 0 ? 0 : 85 x >= 4096 ? 0xffff : x * 16; 86} 87 88/* 89 * Inferred framebuffer and blender state. 90 * 91 * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state 92 * is that: 93 * - The blend state mask is 0xf most of the time. 94 * - The COLOR1 format isn't INVALID because of possible dual-source blending, 95 * so COLOR1 is enabled pretty much all the time. 96 * So CB_TARGET_MASK is the only register that can disable COLOR1. 97 * 98 * Another reason is to avoid a hang with dual source blending. 99 */ 100static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom) 101{ 102 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 103 struct si_state_blend *blend = sctx->queued.named.blend; 104 uint32_t cb_target_mask = 0, i; 105 106 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) 107 if (sctx->framebuffer.state.cbufs[i]) 108 cb_target_mask |= 0xf << (4*i); 109 110 if (blend) 111 cb_target_mask &= blend->cb_target_mask; 112 113 /* Avoid a hang that happens when dual source blending is enabled 114 * but there is not enough color outputs. This is undefined behavior, 115 * so disable color writes completely. 116 * 117 * Reproducible with Unigine Heaven 4.0 and drirc missing. 118 */ 119 if (blend && blend->dual_src_blend && 120 sctx->ps_shader.cso && 121 (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 122 cb_target_mask = 0; 123 124 radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); 125 126 /* STONEY-specific register settings. */ 127 if (sctx->b.family == CHIP_STONEY) { 128 unsigned spi_shader_col_format = 129 sctx->ps_shader.cso ? 130 sctx->ps_shader.current->key.ps.epilog.spi_shader_col_format : 0; 131 unsigned sx_ps_downconvert = 0; 132 unsigned sx_blend_opt_epsilon = 0; 133 unsigned sx_blend_opt_control = 0; 134 135 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 136 struct r600_surface *surf = 137 (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; 138 unsigned format, swap, spi_format, colormask; 139 bool has_alpha, has_rgb; 140 141 if (!surf) 142 continue; 143 144 format = G_028C70_FORMAT(surf->cb_color_info); 145 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 146 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 147 colormask = (cb_target_mask >> (i * 4)) & 0xf; 148 149 /* Set if RGB and A are present. */ 150 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 151 152 if (format == V_028C70_COLOR_8 || 153 format == V_028C70_COLOR_16 || 154 format == V_028C70_COLOR_32) 155 has_rgb = !has_alpha; 156 else 157 has_rgb = true; 158 159 /* Check the colormask and export format. */ 160 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 161 has_rgb = false; 162 if (!(colormask & PIPE_MASK_A)) 163 has_alpha = false; 164 165 if (spi_format == V_028714_SPI_SHADER_ZERO) { 166 has_rgb = false; 167 has_alpha = false; 168 } 169 170 /* Disable value checking for disabled channels. */ 171 if (!has_rgb) 172 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 173 if (!has_alpha) 174 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 175 176 /* Enable down-conversion for 32bpp and smaller formats. */ 177 switch (format) { 178 case V_028C70_COLOR_8: 179 case V_028C70_COLOR_8_8: 180 case V_028C70_COLOR_8_8_8_8: 181 /* For 1 and 2-channel formats, use the superset thereof. */ 182 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 183 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 184 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 185 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 186 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 187 } 188 break; 189 190 case V_028C70_COLOR_5_6_5: 191 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 192 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 193 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 194 } 195 break; 196 197 case V_028C70_COLOR_1_5_5_5: 198 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 199 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 200 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 201 } 202 break; 203 204 case V_028C70_COLOR_4_4_4_4: 205 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 206 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 207 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 208 } 209 break; 210 211 case V_028C70_COLOR_32: 212 if (swap == V_0280A0_SWAP_STD && 213 spi_format == V_028714_SPI_SHADER_32_R) 214 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 215 else if (swap == V_0280A0_SWAP_ALT_REV && 216 spi_format == V_028714_SPI_SHADER_32_AR) 217 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 218 break; 219 220 case V_028C70_COLOR_16: 221 case V_028C70_COLOR_16_16: 222 /* For 1-channel formats, use the superset thereof. */ 223 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 224 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 225 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 226 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 227 if (swap == V_0280A0_SWAP_STD || 228 swap == V_0280A0_SWAP_STD_REV) 229 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 230 else 231 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 232 } 233 break; 234 235 case V_028C70_COLOR_10_11_11: 236 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 237 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 238 sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 239 } 240 break; 241 242 case V_028C70_COLOR_2_10_10_10: 243 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 244 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 245 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 246 } 247 break; 248 } 249 } 250 251 if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { 252 sx_ps_downconvert = 0; 253 sx_blend_opt_epsilon = 0; 254 sx_blend_opt_control = 0; 255 } 256 257 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); 258 radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ 259 radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ 260 radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ 261 } 262} 263 264/* 265 * Blender functions 266 */ 267 268static uint32_t si_translate_blend_function(int blend_func) 269{ 270 switch (blend_func) { 271 case PIPE_BLEND_ADD: 272 return V_028780_COMB_DST_PLUS_SRC; 273 case PIPE_BLEND_SUBTRACT: 274 return V_028780_COMB_SRC_MINUS_DST; 275 case PIPE_BLEND_REVERSE_SUBTRACT: 276 return V_028780_COMB_DST_MINUS_SRC; 277 case PIPE_BLEND_MIN: 278 return V_028780_COMB_MIN_DST_SRC; 279 case PIPE_BLEND_MAX: 280 return V_028780_COMB_MAX_DST_SRC; 281 default: 282 R600_ERR("Unknown blend function %d\n", blend_func); 283 assert(0); 284 break; 285 } 286 return 0; 287} 288 289static uint32_t si_translate_blend_factor(int blend_fact) 290{ 291 switch (blend_fact) { 292 case PIPE_BLENDFACTOR_ONE: 293 return V_028780_BLEND_ONE; 294 case PIPE_BLENDFACTOR_SRC_COLOR: 295 return V_028780_BLEND_SRC_COLOR; 296 case PIPE_BLENDFACTOR_SRC_ALPHA: 297 return V_028780_BLEND_SRC_ALPHA; 298 case PIPE_BLENDFACTOR_DST_ALPHA: 299 return V_028780_BLEND_DST_ALPHA; 300 case PIPE_BLENDFACTOR_DST_COLOR: 301 return V_028780_BLEND_DST_COLOR; 302 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 303 return V_028780_BLEND_SRC_ALPHA_SATURATE; 304 case PIPE_BLENDFACTOR_CONST_COLOR: 305 return V_028780_BLEND_CONSTANT_COLOR; 306 case PIPE_BLENDFACTOR_CONST_ALPHA: 307 return V_028780_BLEND_CONSTANT_ALPHA; 308 case PIPE_BLENDFACTOR_ZERO: 309 return V_028780_BLEND_ZERO; 310 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 311 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 312 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 313 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 314 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 315 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 316 case PIPE_BLENDFACTOR_INV_DST_COLOR: 317 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 318 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 319 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 320 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 321 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 322 case PIPE_BLENDFACTOR_SRC1_COLOR: 323 return V_028780_BLEND_SRC1_COLOR; 324 case PIPE_BLENDFACTOR_SRC1_ALPHA: 325 return V_028780_BLEND_SRC1_ALPHA; 326 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 327 return V_028780_BLEND_INV_SRC1_COLOR; 328 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 329 return V_028780_BLEND_INV_SRC1_ALPHA; 330 default: 331 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 332 assert(0); 333 break; 334 } 335 return 0; 336} 337 338static uint32_t si_translate_blend_opt_function(int blend_func) 339{ 340 switch (blend_func) { 341 case PIPE_BLEND_ADD: 342 return V_028760_OPT_COMB_ADD; 343 case PIPE_BLEND_SUBTRACT: 344 return V_028760_OPT_COMB_SUBTRACT; 345 case PIPE_BLEND_REVERSE_SUBTRACT: 346 return V_028760_OPT_COMB_REVSUBTRACT; 347 case PIPE_BLEND_MIN: 348 return V_028760_OPT_COMB_MIN; 349 case PIPE_BLEND_MAX: 350 return V_028760_OPT_COMB_MAX; 351 default: 352 return V_028760_OPT_COMB_BLEND_DISABLED; 353 } 354} 355 356static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 357{ 358 switch (blend_fact) { 359 case PIPE_BLENDFACTOR_ZERO: 360 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 361 case PIPE_BLENDFACTOR_ONE: 362 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 363 case PIPE_BLENDFACTOR_SRC_COLOR: 364 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 365 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 366 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 367 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 368 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 369 case PIPE_BLENDFACTOR_SRC_ALPHA: 370 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 371 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 372 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 373 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 374 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 375 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 376 default: 377 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 378 } 379} 380 381/** 382 * Get rid of DST in the blend factors by commuting the operands: 383 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 384 */ 385static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 386 unsigned *dst_factor, unsigned expected_dst, 387 unsigned replacement_src) 388{ 389 if (*src_factor == expected_dst && 390 *dst_factor == PIPE_BLENDFACTOR_ZERO) { 391 *src_factor = PIPE_BLENDFACTOR_ZERO; 392 *dst_factor = replacement_src; 393 394 /* Commuting the operands requires reversing subtractions. */ 395 if (*func == PIPE_BLEND_SUBTRACT) 396 *func = PIPE_BLEND_REVERSE_SUBTRACT; 397 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 398 *func = PIPE_BLEND_SUBTRACT; 399 } 400} 401 402static bool si_blend_factor_uses_dst(unsigned factor) 403{ 404 return factor == PIPE_BLENDFACTOR_DST_COLOR || 405 factor == PIPE_BLENDFACTOR_DST_ALPHA || 406 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 407 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 408 factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 409} 410 411static void *si_create_blend_state_mode(struct pipe_context *ctx, 412 const struct pipe_blend_state *state, 413 unsigned mode) 414{ 415 struct si_context *sctx = (struct si_context*)ctx; 416 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 417 struct si_pm4_state *pm4 = &blend->pm4; 418 uint32_t sx_mrt_blend_opt[8] = {0}; 419 uint32_t color_control = 0; 420 421 if (!blend) 422 return NULL; 423 424 blend->alpha_to_coverage = state->alpha_to_coverage; 425 blend->alpha_to_one = state->alpha_to_one; 426 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 427 428 if (state->logicop_enable) { 429 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 430 } else { 431 color_control |= S_028808_ROP3(0xcc); 432 } 433 434 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 435 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 436 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 437 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 438 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 439 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 440 441 if (state->alpha_to_coverage) 442 blend->need_src_alpha_4bit |= 0xf; 443 444 blend->cb_target_mask = 0; 445 for (int i = 0; i < 8; i++) { 446 /* state->rt entries > 0 only written if independent blending */ 447 const int j = state->independent_blend_enable ? i : 0; 448 449 unsigned eqRGB = state->rt[j].rgb_func; 450 unsigned srcRGB = state->rt[j].rgb_src_factor; 451 unsigned dstRGB = state->rt[j].rgb_dst_factor; 452 unsigned eqA = state->rt[j].alpha_func; 453 unsigned srcA = state->rt[j].alpha_src_factor; 454 unsigned dstA = state->rt[j].alpha_dst_factor; 455 456 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 457 unsigned blend_cntl = 0; 458 459 sx_mrt_blend_opt[i] = 460 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 461 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 462 463 /* Only set dual source blending for MRT0 to avoid a hang. */ 464 if (i >= 1 && blend->dual_src_blend) 465 continue; 466 467 /* Only addition and subtraction equations are supported with 468 * dual source blending. 469 */ 470 if (blend->dual_src_blend && 471 (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 472 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 473 assert(!"Unsupported equation for dual source blending"); 474 continue; 475 } 476 477 if (!state->rt[j].colormask) 478 continue; 479 480 /* cb_render_state will disable unused ones */ 481 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 482 483 if (!state->rt[j].blend_enable) { 484 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 485 continue; 486 } 487 488 /* Blending optimizations for Stoney. 489 * These transformations don't change the behavior. 490 * 491 * First, get rid of DST in the blend factors: 492 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 493 */ 494 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 495 PIPE_BLENDFACTOR_DST_COLOR, 496 PIPE_BLENDFACTOR_SRC_COLOR); 497 si_blend_remove_dst(&eqA, &srcA, &dstA, 498 PIPE_BLENDFACTOR_DST_COLOR, 499 PIPE_BLENDFACTOR_SRC_COLOR); 500 si_blend_remove_dst(&eqA, &srcA, &dstA, 501 PIPE_BLENDFACTOR_DST_ALPHA, 502 PIPE_BLENDFACTOR_SRC_ALPHA); 503 504 /* Look up the ideal settings from tables. */ 505 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 506 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 507 srcA_opt = si_translate_blend_opt_factor(srcA, true); 508 dstA_opt = si_translate_blend_opt_factor(dstA, true); 509 510 /* Handle interdependencies. */ 511 if (si_blend_factor_uses_dst(srcRGB)) 512 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 513 if (si_blend_factor_uses_dst(srcA)) 514 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 515 516 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 517 (dstRGB == PIPE_BLENDFACTOR_ZERO || 518 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 519 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 520 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 521 522 /* Set the final value. */ 523 sx_mrt_blend_opt[i] = 524 S_028760_COLOR_SRC_OPT(srcRGB_opt) | 525 S_028760_COLOR_DST_OPT(dstRGB_opt) | 526 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 527 S_028760_ALPHA_SRC_OPT(srcA_opt) | 528 S_028760_ALPHA_DST_OPT(dstA_opt) | 529 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 530 531 /* Set blend state. */ 532 blend_cntl |= S_028780_ENABLE(1); 533 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 534 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 535 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 536 537 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 538 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 539 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 540 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 541 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 542 } 543 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 544 545 blend->blend_enable_4bit |= 0xfu << (i * 4); 546 547 /* This is only important for formats without alpha. */ 548 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 549 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 550 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 551 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 552 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 553 dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 554 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 555 } 556 557 if (blend->cb_target_mask) { 558 color_control |= S_028808_MODE(mode); 559 } else { 560 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 561 } 562 563 if (sctx->b.family == CHIP_STONEY) { 564 for (int i = 0; i < 8; i++) 565 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 566 sx_mrt_blend_opt[i]); 567 568 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 569 if (blend->dual_src_blend || state->logicop_enable || 570 mode == V_028808_CB_RESOLVE) 571 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 572 } 573 574 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 575 return blend; 576} 577 578static void *si_create_blend_state(struct pipe_context *ctx, 579 const struct pipe_blend_state *state) 580{ 581 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 582} 583 584static void si_bind_blend_state(struct pipe_context *ctx, void *state) 585{ 586 struct si_context *sctx = (struct si_context *)ctx; 587 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 588 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 589 sctx->do_update_shaders = true; 590} 591 592static void si_delete_blend_state(struct pipe_context *ctx, void *state) 593{ 594 struct si_context *sctx = (struct si_context *)ctx; 595 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 596} 597 598static void si_set_blend_color(struct pipe_context *ctx, 599 const struct pipe_blend_color *state) 600{ 601 struct si_context *sctx = (struct si_context *)ctx; 602 603 if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0) 604 return; 605 606 sctx->blend_color.state = *state; 607 si_mark_atom_dirty(sctx, &sctx->blend_color.atom); 608} 609 610static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom) 611{ 612 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 613 614 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 615 radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 616} 617 618/* 619 * Clipping 620 */ 621 622static void si_set_clip_state(struct pipe_context *ctx, 623 const struct pipe_clip_state *state) 624{ 625 struct si_context *sctx = (struct si_context *)ctx; 626 struct pipe_constant_buffer cb; 627 628 if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 629 return; 630 631 sctx->clip_state.state = *state; 632 si_mark_atom_dirty(sctx, &sctx->clip_state.atom); 633 634 cb.buffer = NULL; 635 cb.user_buffer = state->ucp; 636 cb.buffer_offset = 0; 637 cb.buffer_size = 4*4*8; 638 si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 639 pipe_resource_reference(&cb.buffer, NULL); 640} 641 642static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom) 643{ 644 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 645 646 radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 647 radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 648} 649 650#define SIX_BITS 0x3F 651 652static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 653{ 654 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 655 struct tgsi_shader_info *info = si_get_vs_info(sctx); 656 unsigned window_space = 657 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 658 unsigned clipdist_mask = 659 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 660 661 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 662 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 663 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 664 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 665 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | 666 S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) | 667 S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) | 668 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || 669 info->writes_edgeflag || 670 info->writes_layer || 671 info->writes_viewport_index) | 672 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | 673 (sctx->queued.named.rasterizer->clip_plane_enable & 674 clipdist_mask)); 675 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 676 sctx->queued.named.rasterizer->pa_cl_clip_cntl | 677 (clipdist_mask ? 0 : 678 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) | 679 S_028810_CLIP_DISABLE(window_space)); 680 681 /* reuse needs to be set off if we write oViewport */ 682 radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 683 S_028AB4_REUSE_OFF(info->writes_viewport_index)); 684} 685 686/* 687 * inferred state between framebuffer and rasterizer 688 */ 689static void si_update_poly_offset_state(struct si_context *sctx) 690{ 691 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 692 693 if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) 694 return; 695 696 switch (sctx->framebuffer.state.zsbuf->texture->format) { 697 case PIPE_FORMAT_Z16_UNORM: 698 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 699 break; 700 default: /* 24-bit */ 701 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 702 break; 703 case PIPE_FORMAT_Z32_FLOAT: 704 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 705 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 706 break; 707 } 708} 709 710/* 711 * Rasterizer 712 */ 713 714static uint32_t si_translate_fill(uint32_t func) 715{ 716 switch(func) { 717 case PIPE_POLYGON_MODE_FILL: 718 return V_028814_X_DRAW_TRIANGLES; 719 case PIPE_POLYGON_MODE_LINE: 720 return V_028814_X_DRAW_LINES; 721 case PIPE_POLYGON_MODE_POINT: 722 return V_028814_X_DRAW_POINTS; 723 default: 724 assert(0); 725 return V_028814_X_DRAW_POINTS; 726 } 727} 728 729static void *si_create_rs_state(struct pipe_context *ctx, 730 const struct pipe_rasterizer_state *state) 731{ 732 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 733 struct si_pm4_state *pm4 = &rs->pm4; 734 unsigned tmp, i; 735 float psize_min, psize_max; 736 737 if (!rs) { 738 return NULL; 739 } 740 741 rs->scissor_enable = state->scissor; 742 rs->two_side = state->light_twoside; 743 rs->multisample_enable = state->multisample; 744 rs->force_persample_interp = state->force_persample_interp; 745 rs->clip_plane_enable = state->clip_plane_enable; 746 rs->line_stipple_enable = state->line_stipple_enable; 747 rs->poly_stipple_enable = state->poly_stipple_enable; 748 rs->line_smooth = state->line_smooth; 749 rs->poly_smooth = state->poly_smooth; 750 rs->uses_poly_offset = state->offset_point || state->offset_line || 751 state->offset_tri; 752 rs->clamp_fragment_color = state->clamp_fragment_color; 753 rs->flatshade = state->flatshade; 754 rs->sprite_coord_enable = state->sprite_coord_enable; 755 rs->rasterizer_discard = state->rasterizer_discard; 756 rs->pa_sc_line_stipple = state->line_stipple_enable ? 757 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 758 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 759 rs->pa_cl_clip_cntl = 760 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 761 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 762 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 763 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 764 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 765 766 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 767 S_0286D4_FLAT_SHADE_ENA(1) | 768 S_0286D4_PNT_SPRITE_ENA(1) | 769 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 770 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 771 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 772 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 773 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 774 775 /* point size 12.4 fixed point */ 776 tmp = (unsigned)(state->point_size * 8.0); 777 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 778 779 if (state->point_size_per_vertex) { 780 psize_min = util_get_min_point_size(state); 781 psize_max = 8192; 782 } else { 783 /* Force the point size to be as if the vertex output was disabled. */ 784 psize_min = state->point_size; 785 psize_max = state->point_size; 786 } 787 /* Divide by two, because 0.5 = 1 pixel. */ 788 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 789 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 790 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 791 792 tmp = (unsigned)state->line_width * 8; 793 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 794 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 795 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 796 S_028A48_MSAA_ENABLE(state->multisample || 797 state->poly_smooth || 798 state->line_smooth) | 799 S_028A48_VPORT_SCISSOR_ENABLE(1)); 800 801 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 802 S_028BE4_PIX_CENTER(state->half_pixel_center) | 803 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 804 805 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 806 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 807 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 808 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 809 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 810 S_028814_FACE(!state->front_ccw) | 811 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 812 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 813 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 814 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 815 state->fill_back != PIPE_POLYGON_MODE_FILL) | 816 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 817 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 818 si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 + 819 SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color); 820 821 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 822 for (i = 0; i < 3; i++) { 823 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 824 float offset_units = state->offset_units; 825 float offset_scale = state->offset_scale * 16.0f; 826 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 827 828 if (!state->offset_units_unscaled) { 829 switch (i) { 830 case 0: /* 16-bit zbuffer */ 831 offset_units *= 4.0f; 832 pa_su_poly_offset_db_fmt_cntl = 833 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 834 break; 835 case 1: /* 24-bit zbuffer */ 836 offset_units *= 2.0f; 837 pa_su_poly_offset_db_fmt_cntl = 838 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 839 break; 840 case 2: /* 32-bit zbuffer */ 841 offset_units *= 1.0f; 842 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 843 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 844 break; 845 } 846 } 847 848 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 849 fui(offset_scale)); 850 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 851 fui(offset_units)); 852 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 853 fui(offset_scale)); 854 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 855 fui(offset_units)); 856 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 857 pa_su_poly_offset_db_fmt_cntl); 858 } 859 860 return rs; 861} 862 863static void si_bind_rs_state(struct pipe_context *ctx, void *state) 864{ 865 struct si_context *sctx = (struct si_context *)ctx; 866 struct si_state_rasterizer *old_rs = 867 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 868 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 869 870 if (!state) 871 return; 872 873 if (sctx->framebuffer.nr_samples > 1 && 874 (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) { 875 si_mark_atom_dirty(sctx, &sctx->db_render_state); 876 877 if (sctx->b.family >= CHIP_POLARIS10) 878 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 879 } 880 881 r600_set_scissor_enable(&sctx->b, rs->scissor_enable); 882 883 si_pm4_bind_state(sctx, rasterizer, rs); 884 si_update_poly_offset_state(sctx); 885 886 si_mark_atom_dirty(sctx, &sctx->clip_regs); 887 sctx->do_update_shaders = true; 888} 889 890static void si_delete_rs_state(struct pipe_context *ctx, void *state) 891{ 892 struct si_context *sctx = (struct si_context *)ctx; 893 894 if (sctx->queued.named.rasterizer == state) 895 si_pm4_bind_state(sctx, poly_offset, NULL); 896 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 897} 898 899/* 900 * infeered state between dsa and stencil ref 901 */ 902static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom) 903{ 904 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 905 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 906 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 907 908 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 909 radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 910 S_028430_STENCILMASK(dsa->valuemask[0]) | 911 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 912 S_028430_STENCILOPVAL(1)); 913 radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 914 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 915 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 916 S_028434_STENCILOPVAL_BF(1)); 917} 918 919static void si_set_stencil_ref(struct pipe_context *ctx, 920 const struct pipe_stencil_ref *state) 921{ 922 struct si_context *sctx = (struct si_context *)ctx; 923 924 if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 925 return; 926 927 sctx->stencil_ref.state = *state; 928 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 929} 930 931 932/* 933 * DSA 934 */ 935 936static uint32_t si_translate_stencil_op(int s_op) 937{ 938 switch (s_op) { 939 case PIPE_STENCIL_OP_KEEP: 940 return V_02842C_STENCIL_KEEP; 941 case PIPE_STENCIL_OP_ZERO: 942 return V_02842C_STENCIL_ZERO; 943 case PIPE_STENCIL_OP_REPLACE: 944 return V_02842C_STENCIL_REPLACE_TEST; 945 case PIPE_STENCIL_OP_INCR: 946 return V_02842C_STENCIL_ADD_CLAMP; 947 case PIPE_STENCIL_OP_DECR: 948 return V_02842C_STENCIL_SUB_CLAMP; 949 case PIPE_STENCIL_OP_INCR_WRAP: 950 return V_02842C_STENCIL_ADD_WRAP; 951 case PIPE_STENCIL_OP_DECR_WRAP: 952 return V_02842C_STENCIL_SUB_WRAP; 953 case PIPE_STENCIL_OP_INVERT: 954 return V_02842C_STENCIL_INVERT; 955 default: 956 R600_ERR("Unknown stencil op %d", s_op); 957 assert(0); 958 break; 959 } 960 return 0; 961} 962 963static void *si_create_dsa_state(struct pipe_context *ctx, 964 const struct pipe_depth_stencil_alpha_state *state) 965{ 966 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 967 struct si_pm4_state *pm4 = &dsa->pm4; 968 unsigned db_depth_control; 969 uint32_t db_stencil_control = 0; 970 971 if (!dsa) { 972 return NULL; 973 } 974 975 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 976 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 977 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 978 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 979 980 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 981 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 982 S_028800_ZFUNC(state->depth.func) | 983 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 984 985 /* stencil */ 986 if (state->stencil[0].enabled) { 987 db_depth_control |= S_028800_STENCIL_ENABLE(1); 988 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 989 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 990 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 991 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 992 993 if (state->stencil[1].enabled) { 994 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 995 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 996 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 997 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 998 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 999 } 1000 } 1001 1002 /* alpha */ 1003 if (state->alpha.enabled) { 1004 dsa->alpha_func = state->alpha.func; 1005 1006 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 1007 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 1008 } else { 1009 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1010 } 1011 1012 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1013 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1014 if (state->depth.bounds_test) { 1015 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1016 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1017 } 1018 1019 return dsa; 1020} 1021 1022static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1023{ 1024 struct si_context *sctx = (struct si_context *)ctx; 1025 struct si_state_dsa *dsa = state; 1026 1027 if (!state) 1028 return; 1029 1030 si_pm4_bind_state(sctx, dsa, dsa); 1031 1032 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1033 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1034 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1035 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1036 } 1037 sctx->do_update_shaders = true; 1038} 1039 1040static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1041{ 1042 struct si_context *sctx = (struct si_context *)ctx; 1043 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1044} 1045 1046static void *si_create_db_flush_dsa(struct si_context *sctx) 1047{ 1048 struct pipe_depth_stencil_alpha_state dsa = {}; 1049 1050 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 1051} 1052 1053/* DB RENDER STATE */ 1054 1055static void si_set_active_query_state(struct pipe_context *ctx, boolean enable) 1056{ 1057 struct si_context *sctx = (struct si_context*)ctx; 1058 1059 /* Pipeline stat & streamout queries. */ 1060 if (enable) { 1061 sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS; 1062 sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS; 1063 } else { 1064 sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS; 1065 sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS; 1066 } 1067 1068 /* Occlusion queries. */ 1069 if (sctx->occlusion_queries_disabled != !enable) { 1070 sctx->occlusion_queries_disabled = !enable; 1071 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1072 } 1073} 1074 1075static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 1076{ 1077 struct si_context *sctx = (struct si_context*)ctx; 1078 1079 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1080} 1081 1082static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 1083{ 1084 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1085 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1086 unsigned db_shader_control; 1087 1088 radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 1089 1090 /* DB_RENDER_CONTROL */ 1091 if (sctx->dbcb_depth_copy_enabled || 1092 sctx->dbcb_stencil_copy_enabled) { 1093 radeon_emit(cs, 1094 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1095 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1096 S_028000_COPY_CENTROID(1) | 1097 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 1098 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1099 radeon_emit(cs, 1100 S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1101 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); 1102 } else { 1103 radeon_emit(cs, 1104 S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1105 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); 1106 } 1107 1108 /* DB_COUNT_CONTROL (occlusion queries) */ 1109 if (sctx->b.num_occlusion_queries > 0 && 1110 !sctx->occlusion_queries_disabled) { 1111 bool perfect = sctx->b.num_perfect_occlusion_queries > 0; 1112 1113 if (sctx->b.chip_class >= CIK) { 1114 radeon_emit(cs, 1115 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1116 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 1117 S_028004_ZPASS_ENABLE(1) | 1118 S_028004_SLICE_EVEN_ENABLE(1) | 1119 S_028004_SLICE_ODD_ENABLE(1)); 1120 } else { 1121 radeon_emit(cs, 1122 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1123 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 1124 } 1125 } else { 1126 /* Disable occlusion queries. */ 1127 if (sctx->b.chip_class >= CIK) { 1128 radeon_emit(cs, 0); 1129 } else { 1130 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 1131 } 1132 } 1133 1134 /* DB_RENDER_OVERRIDE2 */ 1135 radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 1136 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1137 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1138 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); 1139 1140 db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | 1141 sctx->ps_db_shader_control; 1142 1143 /* Bug workaround for smoothing (overrasterization) on SI. */ 1144 if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { 1145 db_shader_control &= C_02880C_Z_ORDER; 1146 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1147 } 1148 1149 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1150 if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) 1151 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1152 1153 if (sctx->b.family == CHIP_STONEY && 1154 sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) 1155 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1156 1157 radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 1158 db_shader_control); 1159} 1160 1161/* 1162 * format translation 1163 */ 1164static uint32_t si_translate_colorformat(enum pipe_format format) 1165{ 1166 const struct util_format_description *desc = util_format_description(format); 1167 1168#define HAS_SIZE(x,y,z,w) \ 1169 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1170 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1171 1172 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1173 return V_028C70_COLOR_10_11_11; 1174 1175 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1176 return V_028C70_COLOR_INVALID; 1177 1178 /* hw cannot support mixed formats (except depth/stencil, since 1179 * stencil is not written to). */ 1180 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1181 return V_028C70_COLOR_INVALID; 1182 1183 switch (desc->nr_channels) { 1184 case 1: 1185 switch (desc->channel[0].size) { 1186 case 8: 1187 return V_028C70_COLOR_8; 1188 case 16: 1189 return V_028C70_COLOR_16; 1190 case 32: 1191 return V_028C70_COLOR_32; 1192 } 1193 break; 1194 case 2: 1195 if (desc->channel[0].size == desc->channel[1].size) { 1196 switch (desc->channel[0].size) { 1197 case 8: 1198 return V_028C70_COLOR_8_8; 1199 case 16: 1200 return V_028C70_COLOR_16_16; 1201 case 32: 1202 return V_028C70_COLOR_32_32; 1203 } 1204 } else if (HAS_SIZE(8,24,0,0)) { 1205 return V_028C70_COLOR_24_8; 1206 } else if (HAS_SIZE(24,8,0,0)) { 1207 return V_028C70_COLOR_8_24; 1208 } 1209 break; 1210 case 3: 1211 if (HAS_SIZE(5,6,5,0)) { 1212 return V_028C70_COLOR_5_6_5; 1213 } else if (HAS_SIZE(32,8,24,0)) { 1214 return V_028C70_COLOR_X24_8_32_FLOAT; 1215 } 1216 break; 1217 case 4: 1218 if (desc->channel[0].size == desc->channel[1].size && 1219 desc->channel[0].size == desc->channel[2].size && 1220 desc->channel[0].size == desc->channel[3].size) { 1221 switch (desc->channel[0].size) { 1222 case 4: 1223 return V_028C70_COLOR_4_4_4_4; 1224 case 8: 1225 return V_028C70_COLOR_8_8_8_8; 1226 case 16: 1227 return V_028C70_COLOR_16_16_16_16; 1228 case 32: 1229 return V_028C70_COLOR_32_32_32_32; 1230 } 1231 } else if (HAS_SIZE(5,5,5,1)) { 1232 return V_028C70_COLOR_1_5_5_5; 1233 } else if (HAS_SIZE(10,10,10,2)) { 1234 return V_028C70_COLOR_2_10_10_10; 1235 } 1236 break; 1237 } 1238 return V_028C70_COLOR_INVALID; 1239} 1240 1241static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1242{ 1243 if (SI_BIG_ENDIAN) { 1244 switch(colorformat) { 1245 /* 8-bit buffers. */ 1246 case V_028C70_COLOR_8: 1247 return V_028C70_ENDIAN_NONE; 1248 1249 /* 16-bit buffers. */ 1250 case V_028C70_COLOR_5_6_5: 1251 case V_028C70_COLOR_1_5_5_5: 1252 case V_028C70_COLOR_4_4_4_4: 1253 case V_028C70_COLOR_16: 1254 case V_028C70_COLOR_8_8: 1255 return V_028C70_ENDIAN_8IN16; 1256 1257 /* 32-bit buffers. */ 1258 case V_028C70_COLOR_8_8_8_8: 1259 case V_028C70_COLOR_2_10_10_10: 1260 case V_028C70_COLOR_8_24: 1261 case V_028C70_COLOR_24_8: 1262 case V_028C70_COLOR_16_16: 1263 return V_028C70_ENDIAN_8IN32; 1264 1265 /* 64-bit buffers. */ 1266 case V_028C70_COLOR_16_16_16_16: 1267 return V_028C70_ENDIAN_8IN16; 1268 1269 case V_028C70_COLOR_32_32: 1270 return V_028C70_ENDIAN_8IN32; 1271 1272 /* 128-bit buffers. */ 1273 case V_028C70_COLOR_32_32_32_32: 1274 return V_028C70_ENDIAN_8IN32; 1275 default: 1276 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1277 } 1278 } else { 1279 return V_028C70_ENDIAN_NONE; 1280 } 1281} 1282 1283static uint32_t si_translate_dbformat(enum pipe_format format) 1284{ 1285 switch (format) { 1286 case PIPE_FORMAT_Z16_UNORM: 1287 return V_028040_Z_16; 1288 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1289 case PIPE_FORMAT_X8Z24_UNORM: 1290 case PIPE_FORMAT_Z24X8_UNORM: 1291 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1292 return V_028040_Z_24; /* deprecated on SI */ 1293 case PIPE_FORMAT_Z32_FLOAT: 1294 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1295 return V_028040_Z_32_FLOAT; 1296 default: 1297 return V_028040_Z_INVALID; 1298 } 1299} 1300 1301/* 1302 * Texture translation 1303 */ 1304 1305static uint32_t si_translate_texformat(struct pipe_screen *screen, 1306 enum pipe_format format, 1307 const struct util_format_description *desc, 1308 int first_non_void) 1309{ 1310 struct si_screen *sscreen = (struct si_screen*)screen; 1311 bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 && 1312 sscreen->b.info.drm_minor >= 31) || 1313 sscreen->b.info.drm_major == 3; 1314 bool uniform = true; 1315 int i; 1316 1317 /* Colorspace (return non-RGB formats directly). */ 1318 switch (desc->colorspace) { 1319 /* Depth stencil formats */ 1320 case UTIL_FORMAT_COLORSPACE_ZS: 1321 switch (format) { 1322 case PIPE_FORMAT_Z16_UNORM: 1323 return V_008F14_IMG_DATA_FORMAT_16; 1324 case PIPE_FORMAT_X24S8_UINT: 1325 case PIPE_FORMAT_Z24X8_UNORM: 1326 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1327 return V_008F14_IMG_DATA_FORMAT_8_24; 1328 case PIPE_FORMAT_X8Z24_UNORM: 1329 case PIPE_FORMAT_S8X24_UINT: 1330 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1331 return V_008F14_IMG_DATA_FORMAT_24_8; 1332 case PIPE_FORMAT_S8_UINT: 1333 return V_008F14_IMG_DATA_FORMAT_8; 1334 case PIPE_FORMAT_Z32_FLOAT: 1335 return V_008F14_IMG_DATA_FORMAT_32; 1336 case PIPE_FORMAT_X32_S8X24_UINT: 1337 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1338 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1339 default: 1340 goto out_unknown; 1341 } 1342 1343 case UTIL_FORMAT_COLORSPACE_YUV: 1344 goto out_unknown; /* TODO */ 1345 1346 case UTIL_FORMAT_COLORSPACE_SRGB: 1347 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1348 goto out_unknown; 1349 break; 1350 1351 default: 1352 break; 1353 } 1354 1355 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1356 if (!enable_compressed_formats) 1357 goto out_unknown; 1358 1359 switch (format) { 1360 case PIPE_FORMAT_RGTC1_SNORM: 1361 case PIPE_FORMAT_LATC1_SNORM: 1362 case PIPE_FORMAT_RGTC1_UNORM: 1363 case PIPE_FORMAT_LATC1_UNORM: 1364 return V_008F14_IMG_DATA_FORMAT_BC4; 1365 case PIPE_FORMAT_RGTC2_SNORM: 1366 case PIPE_FORMAT_LATC2_SNORM: 1367 case PIPE_FORMAT_RGTC2_UNORM: 1368 case PIPE_FORMAT_LATC2_UNORM: 1369 return V_008F14_IMG_DATA_FORMAT_BC5; 1370 default: 1371 goto out_unknown; 1372 } 1373 } 1374 1375 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1376 sscreen->b.family == CHIP_STONEY) { 1377 switch (format) { 1378 case PIPE_FORMAT_ETC1_RGB8: 1379 case PIPE_FORMAT_ETC2_RGB8: 1380 case PIPE_FORMAT_ETC2_SRGB8: 1381 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1382 case PIPE_FORMAT_ETC2_RGB8A1: 1383 case PIPE_FORMAT_ETC2_SRGB8A1: 1384 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1385 case PIPE_FORMAT_ETC2_RGBA8: 1386 case PIPE_FORMAT_ETC2_SRGBA8: 1387 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1388 case PIPE_FORMAT_ETC2_R11_UNORM: 1389 case PIPE_FORMAT_ETC2_R11_SNORM: 1390 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1391 case PIPE_FORMAT_ETC2_RG11_UNORM: 1392 case PIPE_FORMAT_ETC2_RG11_SNORM: 1393 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1394 default: 1395 goto out_unknown; 1396 } 1397 } 1398 1399 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1400 if (!enable_compressed_formats) 1401 goto out_unknown; 1402 1403 switch (format) { 1404 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1405 case PIPE_FORMAT_BPTC_SRGBA: 1406 return V_008F14_IMG_DATA_FORMAT_BC7; 1407 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1408 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1409 return V_008F14_IMG_DATA_FORMAT_BC6; 1410 default: 1411 goto out_unknown; 1412 } 1413 } 1414 1415 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1416 switch (format) { 1417 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1418 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1419 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1420 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1421 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1422 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1423 default: 1424 goto out_unknown; 1425 } 1426 } 1427 1428 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1429 if (!enable_compressed_formats) 1430 goto out_unknown; 1431 1432 if (!util_format_s3tc_enabled) { 1433 goto out_unknown; 1434 } 1435 1436 switch (format) { 1437 case PIPE_FORMAT_DXT1_RGB: 1438 case PIPE_FORMAT_DXT1_RGBA: 1439 case PIPE_FORMAT_DXT1_SRGB: 1440 case PIPE_FORMAT_DXT1_SRGBA: 1441 return V_008F14_IMG_DATA_FORMAT_BC1; 1442 case PIPE_FORMAT_DXT3_RGBA: 1443 case PIPE_FORMAT_DXT3_SRGBA: 1444 return V_008F14_IMG_DATA_FORMAT_BC2; 1445 case PIPE_FORMAT_DXT5_RGBA: 1446 case PIPE_FORMAT_DXT5_SRGBA: 1447 return V_008F14_IMG_DATA_FORMAT_BC3; 1448 default: 1449 goto out_unknown; 1450 } 1451 } 1452 1453 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1454 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1455 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1456 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1457 } 1458 1459 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1460 1461 /* hw cannot support mixed formats (except depth/stencil, since only 1462 * depth is read).*/ 1463 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1464 goto out_unknown; 1465 1466 /* See whether the components are of the same size. */ 1467 for (i = 1; i < desc->nr_channels; i++) { 1468 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1469 } 1470 1471 /* Non-uniform formats. */ 1472 if (!uniform) { 1473 switch(desc->nr_channels) { 1474 case 3: 1475 if (desc->channel[0].size == 5 && 1476 desc->channel[1].size == 6 && 1477 desc->channel[2].size == 5) { 1478 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1479 } 1480 goto out_unknown; 1481 case 4: 1482 if (desc->channel[0].size == 5 && 1483 desc->channel[1].size == 5 && 1484 desc->channel[2].size == 5 && 1485 desc->channel[3].size == 1) { 1486 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1487 } 1488 if (desc->channel[0].size == 10 && 1489 desc->channel[1].size == 10 && 1490 desc->channel[2].size == 10 && 1491 desc->channel[3].size == 2) { 1492 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1493 } 1494 goto out_unknown; 1495 } 1496 goto out_unknown; 1497 } 1498 1499 if (first_non_void < 0 || first_non_void > 3) 1500 goto out_unknown; 1501 1502 /* uniform formats */ 1503 switch (desc->channel[first_non_void].size) { 1504 case 4: 1505 switch (desc->nr_channels) { 1506#if 0 /* Not supported for render targets */ 1507 case 2: 1508 return V_008F14_IMG_DATA_FORMAT_4_4; 1509#endif 1510 case 4: 1511 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1512 } 1513 break; 1514 case 8: 1515 switch (desc->nr_channels) { 1516 case 1: 1517 return V_008F14_IMG_DATA_FORMAT_8; 1518 case 2: 1519 return V_008F14_IMG_DATA_FORMAT_8_8; 1520 case 4: 1521 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1522 } 1523 break; 1524 case 16: 1525 switch (desc->nr_channels) { 1526 case 1: 1527 return V_008F14_IMG_DATA_FORMAT_16; 1528 case 2: 1529 return V_008F14_IMG_DATA_FORMAT_16_16; 1530 case 4: 1531 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1532 } 1533 break; 1534 case 32: 1535 switch (desc->nr_channels) { 1536 case 1: 1537 return V_008F14_IMG_DATA_FORMAT_32; 1538 case 2: 1539 return V_008F14_IMG_DATA_FORMAT_32_32; 1540#if 0 /* Not supported for render targets */ 1541 case 3: 1542 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1543#endif 1544 case 4: 1545 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1546 } 1547 } 1548 1549out_unknown: 1550 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1551 return ~0; 1552} 1553 1554static unsigned si_tex_wrap(unsigned wrap) 1555{ 1556 switch (wrap) { 1557 default: 1558 case PIPE_TEX_WRAP_REPEAT: 1559 return V_008F30_SQ_TEX_WRAP; 1560 case PIPE_TEX_WRAP_CLAMP: 1561 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1562 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1563 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1564 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1565 return V_008F30_SQ_TEX_CLAMP_BORDER; 1566 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1567 return V_008F30_SQ_TEX_MIRROR; 1568 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1569 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1570 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1571 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1572 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1573 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1574 } 1575} 1576 1577static unsigned si_tex_mipfilter(unsigned filter) 1578{ 1579 switch (filter) { 1580 case PIPE_TEX_MIPFILTER_NEAREST: 1581 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1582 case PIPE_TEX_MIPFILTER_LINEAR: 1583 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1584 default: 1585 case PIPE_TEX_MIPFILTER_NONE: 1586 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1587 } 1588} 1589 1590static unsigned si_tex_compare(unsigned compare) 1591{ 1592 switch (compare) { 1593 default: 1594 case PIPE_FUNC_NEVER: 1595 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1596 case PIPE_FUNC_LESS: 1597 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1598 case PIPE_FUNC_EQUAL: 1599 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1600 case PIPE_FUNC_LEQUAL: 1601 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1602 case PIPE_FUNC_GREATER: 1603 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1604 case PIPE_FUNC_NOTEQUAL: 1605 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1606 case PIPE_FUNC_GEQUAL: 1607 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1608 case PIPE_FUNC_ALWAYS: 1609 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1610 } 1611} 1612 1613static unsigned si_tex_dim(unsigned res_target, unsigned view_target, 1614 unsigned nr_samples) 1615{ 1616 if (view_target == PIPE_TEXTURE_CUBE || 1617 view_target == PIPE_TEXTURE_CUBE_ARRAY) 1618 res_target = view_target; 1619 1620 switch (res_target) { 1621 default: 1622 case PIPE_TEXTURE_1D: 1623 return V_008F1C_SQ_RSRC_IMG_1D; 1624 case PIPE_TEXTURE_1D_ARRAY: 1625 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1626 case PIPE_TEXTURE_2D: 1627 case PIPE_TEXTURE_RECT: 1628 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1629 V_008F1C_SQ_RSRC_IMG_2D; 1630 case PIPE_TEXTURE_2D_ARRAY: 1631 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1632 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1633 case PIPE_TEXTURE_3D: 1634 return V_008F1C_SQ_RSRC_IMG_3D; 1635 case PIPE_TEXTURE_CUBE: 1636 case PIPE_TEXTURE_CUBE_ARRAY: 1637 return V_008F1C_SQ_RSRC_IMG_CUBE; 1638 } 1639} 1640 1641/* 1642 * Format support testing 1643 */ 1644 1645static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1646{ 1647 return si_translate_texformat(screen, format, util_format_description(format), 1648 util_format_get_first_non_void_channel(format)) != ~0U; 1649} 1650 1651static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1652 const struct util_format_description *desc, 1653 int first_non_void) 1654{ 1655 unsigned type; 1656 int i; 1657 1658 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1659 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1660 1661 assert(first_non_void >= 0); 1662 type = desc->channel[first_non_void].type; 1663 1664 if (type == UTIL_FORMAT_TYPE_FIXED) 1665 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1666 1667 if (desc->nr_channels == 4 && 1668 desc->channel[0].size == 10 && 1669 desc->channel[1].size == 10 && 1670 desc->channel[2].size == 10 && 1671 desc->channel[3].size == 2) 1672 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1673 1674 /* See whether the components are of the same size. */ 1675 for (i = 0; i < desc->nr_channels; i++) { 1676 if (desc->channel[first_non_void].size != desc->channel[i].size) 1677 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1678 } 1679 1680 switch (desc->channel[first_non_void].size) { 1681 case 8: 1682 switch (desc->nr_channels) { 1683 case 1: 1684 return V_008F0C_BUF_DATA_FORMAT_8; 1685 case 2: 1686 return V_008F0C_BUF_DATA_FORMAT_8_8; 1687 case 3: 1688 case 4: 1689 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1690 } 1691 break; 1692 case 16: 1693 switch (desc->nr_channels) { 1694 case 1: 1695 return V_008F0C_BUF_DATA_FORMAT_16; 1696 case 2: 1697 return V_008F0C_BUF_DATA_FORMAT_16_16; 1698 case 3: 1699 case 4: 1700 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1701 } 1702 break; 1703 case 32: 1704 /* From the Southern Islands ISA documentation about MTBUF: 1705 * 'Memory reads of data in memory that is 32 or 64 bits do not 1706 * undergo any format conversion.' 1707 */ 1708 if (type != UTIL_FORMAT_TYPE_FLOAT && 1709 !desc->channel[first_non_void].pure_integer) 1710 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1711 1712 switch (desc->nr_channels) { 1713 case 1: 1714 return V_008F0C_BUF_DATA_FORMAT_32; 1715 case 2: 1716 return V_008F0C_BUF_DATA_FORMAT_32_32; 1717 case 3: 1718 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1719 case 4: 1720 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1721 } 1722 break; 1723 } 1724 1725 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1726} 1727 1728static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1729 const struct util_format_description *desc, 1730 int first_non_void) 1731{ 1732 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1733 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1734 1735 assert(first_non_void >= 0); 1736 1737 switch (desc->channel[first_non_void].type) { 1738 case UTIL_FORMAT_TYPE_SIGNED: 1739 if (desc->channel[first_non_void].normalized) 1740 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1741 else if (desc->channel[first_non_void].pure_integer) 1742 return V_008F0C_BUF_NUM_FORMAT_SINT; 1743 else 1744 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1745 break; 1746 case UTIL_FORMAT_TYPE_UNSIGNED: 1747 if (desc->channel[first_non_void].normalized) 1748 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1749 else if (desc->channel[first_non_void].pure_integer) 1750 return V_008F0C_BUF_NUM_FORMAT_UINT; 1751 else 1752 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1753 break; 1754 case UTIL_FORMAT_TYPE_FLOAT: 1755 default: 1756 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1757 } 1758} 1759 1760static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1761{ 1762 const struct util_format_description *desc; 1763 int first_non_void; 1764 unsigned data_format; 1765 1766 desc = util_format_description(format); 1767 first_non_void = util_format_get_first_non_void_channel(format); 1768 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1769 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1770} 1771 1772static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1773{ 1774 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1775 r600_translate_colorswap(format, false) != ~0U; 1776} 1777 1778static bool si_is_zs_format_supported(enum pipe_format format) 1779{ 1780 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1781} 1782 1783static boolean si_is_format_supported(struct pipe_screen *screen, 1784 enum pipe_format format, 1785 enum pipe_texture_target target, 1786 unsigned sample_count, 1787 unsigned usage) 1788{ 1789 unsigned retval = 0; 1790 1791 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1792 R600_ERR("r600: unsupported texture type %d\n", target); 1793 return false; 1794 } 1795 1796 if (!util_format_is_supported(format, usage)) 1797 return false; 1798 1799 if (sample_count > 1) { 1800 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 1801 return false; 1802 1803 if (usage & PIPE_BIND_SHADER_IMAGE) 1804 return false; 1805 1806 switch (sample_count) { 1807 case 2: 1808 case 4: 1809 case 8: 1810 break; 1811 case 16: 1812 if (format == PIPE_FORMAT_NONE) 1813 return true; 1814 else 1815 return false; 1816 default: 1817 return false; 1818 } 1819 } 1820 1821 if (usage & (PIPE_BIND_SAMPLER_VIEW | 1822 PIPE_BIND_SHADER_IMAGE)) { 1823 if (target == PIPE_BUFFER) { 1824 if (si_is_vertex_format_supported(screen, format)) 1825 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 1826 PIPE_BIND_SHADER_IMAGE); 1827 } else { 1828 if (si_is_sampler_format_supported(screen, format)) 1829 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 1830 PIPE_BIND_SHADER_IMAGE); 1831 } 1832 } 1833 1834 if ((usage & (PIPE_BIND_RENDER_TARGET | 1835 PIPE_BIND_DISPLAY_TARGET | 1836 PIPE_BIND_SCANOUT | 1837 PIPE_BIND_SHARED | 1838 PIPE_BIND_BLENDABLE)) && 1839 si_is_colorbuffer_format_supported(format)) { 1840 retval |= usage & 1841 (PIPE_BIND_RENDER_TARGET | 1842 PIPE_BIND_DISPLAY_TARGET | 1843 PIPE_BIND_SCANOUT | 1844 PIPE_BIND_SHARED); 1845 if (!util_format_is_pure_integer(format) && 1846 !util_format_is_depth_or_stencil(format)) 1847 retval |= usage & PIPE_BIND_BLENDABLE; 1848 } 1849 1850 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1851 si_is_zs_format_supported(format)) { 1852 retval |= PIPE_BIND_DEPTH_STENCIL; 1853 } 1854 1855 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 1856 si_is_vertex_format_supported(screen, format)) { 1857 retval |= PIPE_BIND_VERTEX_BUFFER; 1858 } 1859 1860 if (usage & PIPE_BIND_TRANSFER_READ) 1861 retval |= PIPE_BIND_TRANSFER_READ; 1862 if (usage & PIPE_BIND_TRANSFER_WRITE) 1863 retval |= PIPE_BIND_TRANSFER_WRITE; 1864 1865 if ((usage & PIPE_BIND_LINEAR) && 1866 !util_format_is_compressed(format) && 1867 !(usage & PIPE_BIND_DEPTH_STENCIL)) 1868 retval |= PIPE_BIND_LINEAR; 1869 1870 return retval == usage; 1871} 1872 1873/* 1874 * framebuffer handling 1875 */ 1876 1877static void si_choose_spi_color_formats(struct r600_surface *surf, 1878 unsigned format, unsigned swap, 1879 unsigned ntype, bool is_depth) 1880{ 1881 /* Alpha is needed for alpha-to-coverage. 1882 * Blending may be with or without alpha. 1883 */ 1884 unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 1885 unsigned alpha = 0; /* exports alpha, but may not support blending */ 1886 unsigned blend = 0; /* supports blending, but may not export alpha */ 1887 unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 1888 1889 /* Choose the SPI color formats. These are required values for Stoney/RB+. 1890 * Other chips have multiple choices, though they are not necessarily better. 1891 */ 1892 switch (format) { 1893 case V_028C70_COLOR_5_6_5: 1894 case V_028C70_COLOR_1_5_5_5: 1895 case V_028C70_COLOR_5_5_5_1: 1896 case V_028C70_COLOR_4_4_4_4: 1897 case V_028C70_COLOR_10_11_11: 1898 case V_028C70_COLOR_11_11_10: 1899 case V_028C70_COLOR_8: 1900 case V_028C70_COLOR_8_8: 1901 case V_028C70_COLOR_8_8_8_8: 1902 case V_028C70_COLOR_10_10_10_2: 1903 case V_028C70_COLOR_2_10_10_10: 1904 if (ntype == V_028C70_NUMBER_UINT) 1905 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 1906 else if (ntype == V_028C70_NUMBER_SINT) 1907 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 1908 else 1909 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 1910 break; 1911 1912 case V_028C70_COLOR_16: 1913 case V_028C70_COLOR_16_16: 1914 case V_028C70_COLOR_16_16_16_16: 1915 if (ntype == V_028C70_NUMBER_UNORM || 1916 ntype == V_028C70_NUMBER_SNORM) { 1917 /* UNORM16 and SNORM16 don't support blending */ 1918 if (ntype == V_028C70_NUMBER_UNORM) 1919 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 1920 else 1921 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 1922 1923 /* Use 32 bits per channel for blending. */ 1924 if (format == V_028C70_COLOR_16) { 1925 if (swap == V_028C70_SWAP_STD) { /* R */ 1926 blend = V_028714_SPI_SHADER_32_R; 1927 blend_alpha = V_028714_SPI_SHADER_32_AR; 1928 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 1929 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1930 else 1931 assert(0); 1932 } else if (format == V_028C70_COLOR_16_16) { 1933 if (swap == V_028C70_SWAP_STD) { /* RG */ 1934 blend = V_028714_SPI_SHADER_32_GR; 1935 blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1936 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 1937 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1938 else 1939 assert(0); 1940 } else /* 16_16_16_16 */ 1941 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1942 } else if (ntype == V_028C70_NUMBER_UINT) 1943 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 1944 else if (ntype == V_028C70_NUMBER_SINT) 1945 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 1946 else if (ntype == V_028C70_NUMBER_FLOAT) 1947 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 1948 else 1949 assert(0); 1950 break; 1951 1952 case V_028C70_COLOR_32: 1953 if (swap == V_028C70_SWAP_STD) { /* R */ 1954 blend = normal = V_028714_SPI_SHADER_32_R; 1955 alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 1956 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 1957 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 1958 else 1959 assert(0); 1960 break; 1961 1962 case V_028C70_COLOR_32_32: 1963 if (swap == V_028C70_SWAP_STD) { /* RG */ 1964 blend = normal = V_028714_SPI_SHADER_32_GR; 1965 alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1966 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 1967 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 1968 else 1969 assert(0); 1970 break; 1971 1972 case V_028C70_COLOR_32_32_32_32: 1973 case V_028C70_COLOR_8_24: 1974 case V_028C70_COLOR_24_8: 1975 case V_028C70_COLOR_X24_8_32_FLOAT: 1976 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 1977 break; 1978 1979 default: 1980 assert(0); 1981 return; 1982 } 1983 1984 /* The DB->CB copy needs 32_ABGR. */ 1985 if (is_depth) 1986 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 1987 1988 surf->spi_shader_col_format = normal; 1989 surf->spi_shader_col_format_alpha = alpha; 1990 surf->spi_shader_col_format_blend = blend; 1991 surf->spi_shader_col_format_blend_alpha = blend_alpha; 1992} 1993 1994static void si_initialize_color_surface(struct si_context *sctx, 1995 struct r600_surface *surf) 1996{ 1997 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1998 unsigned color_info, color_attrib, color_view; 1999 unsigned format, swap, ntype, endian; 2000 const struct util_format_description *desc; 2001 int i; 2002 unsigned blend_clamp = 0, blend_bypass = 0; 2003 2004 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2005 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 2006 2007 desc = util_format_description(surf->base.format); 2008 for (i = 0; i < 4; i++) { 2009 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2010 break; 2011 } 2012 } 2013 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 2014 ntype = V_028C70_NUMBER_FLOAT; 2015 } else { 2016 ntype = V_028C70_NUMBER_UNORM; 2017 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2018 ntype = V_028C70_NUMBER_SRGB; 2019 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2020 if (desc->channel[i].pure_integer) { 2021 ntype = V_028C70_NUMBER_SINT; 2022 } else { 2023 assert(desc->channel[i].normalized); 2024 ntype = V_028C70_NUMBER_SNORM; 2025 } 2026 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2027 if (desc->channel[i].pure_integer) { 2028 ntype = V_028C70_NUMBER_UINT; 2029 } else { 2030 assert(desc->channel[i].normalized); 2031 ntype = V_028C70_NUMBER_UNORM; 2032 } 2033 } 2034 } 2035 2036 format = si_translate_colorformat(surf->base.format); 2037 if (format == V_028C70_COLOR_INVALID) { 2038 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2039 } 2040 assert(format != V_028C70_COLOR_INVALID); 2041 swap = r600_translate_colorswap(surf->base.format, false); 2042 endian = si_colorformat_endian_swap(format); 2043 2044 /* blend clamp should be set for all NORM/SRGB types */ 2045 if (ntype == V_028C70_NUMBER_UNORM || 2046 ntype == V_028C70_NUMBER_SNORM || 2047 ntype == V_028C70_NUMBER_SRGB) 2048 blend_clamp = 1; 2049 2050 /* set blend bypass according to docs if SINT/UINT or 2051 8/24 COLOR variants */ 2052 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2053 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2054 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2055 blend_clamp = 0; 2056 blend_bypass = 1; 2057 } 2058 2059 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 2060 (format == V_028C70_COLOR_8 || 2061 format == V_028C70_COLOR_8_8 || 2062 format == V_028C70_COLOR_8_8_8_8)) 2063 surf->color_is_int8 = true; 2064 2065 color_info = S_028C70_FORMAT(format) | 2066 S_028C70_COMP_SWAP(swap) | 2067 S_028C70_BLEND_CLAMP(blend_clamp) | 2068 S_028C70_BLEND_BYPASS(blend_bypass) | 2069 S_028C70_SIMPLE_FLOAT(1) | 2070 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 2071 ntype != V_028C70_NUMBER_SNORM && 2072 ntype != V_028C70_NUMBER_SRGB && 2073 format != V_028C70_COLOR_8_24 && 2074 format != V_028C70_COLOR_24_8) | 2075 S_028C70_NUMBER_TYPE(ntype) | 2076 S_028C70_ENDIAN(endian); 2077 2078 /* Intensity is implemented as Red, so treat it that way. */ 2079 color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2080 util_format_is_intensity(surf->base.format)); 2081 2082 if (rtex->resource.b.b.nr_samples > 1) { 2083 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 2084 2085 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2086 S_028C74_NUM_FRAGMENTS(log_samples); 2087 2088 if (rtex->fmask.size) { 2089 color_info |= S_028C70_COMPRESSION(1); 2090 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 2091 2092 if (sctx->b.chip_class == SI) { 2093 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2094 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2095 } 2096 } 2097 } 2098 2099 surf->cb_color_view = color_view; 2100 surf->cb_color_info = color_info; 2101 surf->cb_color_attrib = color_attrib; 2102 2103 if (sctx->b.chip_class >= VI) { 2104 unsigned max_uncompressed_block_size = 2; 2105 2106 if (rtex->surface.nsamples > 1) { 2107 if (rtex->surface.bpe == 1) 2108 max_uncompressed_block_size = 0; 2109 else if (rtex->surface.bpe == 2) 2110 max_uncompressed_block_size = 1; 2111 } 2112 2113 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2114 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2115 } 2116 2117 /* This must be set for fast clear to work without FMASK. */ 2118 if (!rtex->fmask.size && sctx->b.chip_class == SI) { 2119 unsigned bankh = util_logbase2(rtex->surface.bankh); 2120 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2121 } 2122 2123 /* Determine pixel shader export format */ 2124 si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); 2125 2126 surf->color_initialized = true; 2127} 2128 2129static void si_init_depth_surface(struct si_context *sctx, 2130 struct r600_surface *surf) 2131{ 2132 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2133 unsigned level = surf->base.u.tex.level; 2134 struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; 2135 unsigned format; 2136 uint32_t z_info, s_info, db_depth_info; 2137 uint64_t z_offs, s_offs; 2138 uint32_t db_htile_data_base, db_htile_surface; 2139 2140 format = si_translate_dbformat(rtex->resource.b.b.format); 2141 2142 if (format == V_028040_Z_INVALID) { 2143 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 2144 } 2145 assert(format != V_028040_Z_INVALID); 2146 2147 s_offs = z_offs = rtex->resource.gpu_address; 2148 z_offs += rtex->surface.level[level].offset; 2149 s_offs += rtex->surface.stencil_level[level].offset; 2150 2151 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 2152 2153 z_info = S_028040_FORMAT(format); 2154 if (rtex->resource.b.b.nr_samples > 1) { 2155 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 2156 } 2157 2158 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 2159 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 2160 else 2161 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 2162 2163 if (sctx->b.chip_class >= CIK) { 2164 struct radeon_info *info = &sctx->screen->b.info; 2165 unsigned index = rtex->surface.tiling_index[level]; 2166 unsigned stencil_index = rtex->surface.stencil_tiling_index[level]; 2167 unsigned macro_index = rtex->surface.macro_tile_index; 2168 unsigned tile_mode = info->si_tile_mode_array[index]; 2169 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2170 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2171 2172 db_depth_info |= 2173 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2174 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2175 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2176 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2177 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2178 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2179 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2180 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2181 } else { 2182 unsigned tile_mode_index = si_tile_mode_index(rtex, level, false); 2183 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2184 tile_mode_index = si_tile_mode_index(rtex, level, true); 2185 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2186 } 2187 2188 /* HiZ aka depth buffer htile */ 2189 /* use htile only for first level */ 2190 if (rtex->htile_buffer && !level) { 2191 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2192 S_028040_ALLOW_EXPCLEAR(1); 2193 2194 if (rtex->surface.flags & RADEON_SURF_SBUFFER) { 2195 /* Workaround: For a not yet understood reason, the 2196 * combination of MSAA, fast stencil clear and stencil 2197 * decompress messes with subsequent stencil buffer 2198 * uses. Problem was reproduced on Verde, Bonaire, 2199 * Tonga, and Carrizo. 2200 * 2201 * Disabling EXPCLEAR works around the problem. 2202 * 2203 * Check piglit's arb_texture_multisample-stencil-clear 2204 * test if you want to try changing this. 2205 */ 2206 if (rtex->resource.b.b.nr_samples <= 1) 2207 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2208 } else 2209 /* Use all of the htile_buffer for depth if there's no stencil. */ 2210 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2211 2212 uint64_t va = rtex->htile_buffer->gpu_address; 2213 db_htile_data_base = va >> 8; 2214 db_htile_surface = S_028ABC_FULL_CACHE(1); 2215 } else { 2216 db_htile_data_base = 0; 2217 db_htile_surface = 0; 2218 } 2219 2220 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2221 2222 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2223 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2224 surf->db_htile_data_base = db_htile_data_base; 2225 surf->db_depth_info = db_depth_info; 2226 surf->db_z_info = z_info; 2227 surf->db_stencil_info = s_info; 2228 surf->db_depth_base = z_offs >> 8; 2229 surf->db_stencil_base = s_offs >> 8; 2230 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2231 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2232 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2233 levelinfo->nblk_y) / 64 - 1); 2234 surf->db_htile_surface = db_htile_surface; 2235 2236 surf->depth_initialized = true; 2237} 2238 2239static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2240{ 2241 for (int i = 0; i < state->nr_cbufs; ++i) { 2242 struct r600_surface *surf = NULL; 2243 struct r600_texture *rtex; 2244 2245 if (!state->cbufs[i]) 2246 continue; 2247 surf = (struct r600_surface*)state->cbufs[i]; 2248 rtex = (struct r600_texture*)surf->base.texture; 2249 2250 p_atomic_dec(&rtex->framebuffers_bound); 2251 } 2252} 2253 2254static void si_set_framebuffer_state(struct pipe_context *ctx, 2255 const struct pipe_framebuffer_state *state) 2256{ 2257 struct si_context *sctx = (struct si_context *)ctx; 2258 struct pipe_constant_buffer constbuf = {0}; 2259 struct r600_surface *surf = NULL; 2260 struct r600_texture *rtex; 2261 bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer; 2262 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2263 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2264 int i; 2265 2266 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2267 if (!sctx->framebuffer.state.cbufs[i]) 2268 continue; 2269 2270 rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture; 2271 if (rtex->dcc_gather_statistics) 2272 vi_separate_dcc_stop_query(ctx, rtex); 2273 } 2274 2275 /* Only flush TC when changing the framebuffer state, because 2276 * the only client not using TC that can change textures is 2277 * the framebuffer. 2278 * 2279 * Flush all CB and DB caches here because all buffers can be used 2280 * for write by both TC (with shader image stores) and CB/DB. 2281 */ 2282 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 2283 SI_CONTEXT_INV_GLOBAL_L2 | 2284 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | 2285 SI_CONTEXT_CS_PARTIAL_FLUSH; 2286 2287 /* Take the maximum of the old and new count. If the new count is lower, 2288 * dirtying is needed to disable the unbound colorbuffers. 2289 */ 2290 sctx->framebuffer.dirty_cbufs |= 2291 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2292 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2293 2294 si_dec_framebuffer_counters(&sctx->framebuffer.state); 2295 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2296 2297 sctx->framebuffer.spi_shader_col_format = 0; 2298 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2299 sctx->framebuffer.spi_shader_col_format_blend = 0; 2300 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2301 sctx->framebuffer.color_is_int8 = 0; 2302 2303 sctx->framebuffer.compressed_cb_mask = 0; 2304 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2305 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2306 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 2307 util_format_is_pure_integer(state->cbufs[0]->format); 2308 sctx->framebuffer.any_dst_linear = false; 2309 2310 if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer) 2311 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2312 2313 for (i = 0; i < state->nr_cbufs; i++) { 2314 if (!state->cbufs[i]) 2315 continue; 2316 2317 surf = (struct r600_surface*)state->cbufs[i]; 2318 rtex = (struct r600_texture*)surf->base.texture; 2319 2320 if (!surf->color_initialized) { 2321 si_initialize_color_surface(sctx, surf); 2322 } 2323 2324 sctx->framebuffer.spi_shader_col_format |= 2325 surf->spi_shader_col_format << (i * 4); 2326 sctx->framebuffer.spi_shader_col_format_alpha |= 2327 surf->spi_shader_col_format_alpha << (i * 4); 2328 sctx->framebuffer.spi_shader_col_format_blend |= 2329 surf->spi_shader_col_format_blend << (i * 4); 2330 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2331 surf->spi_shader_col_format_blend_alpha << (i * 4); 2332 2333 if (surf->color_is_int8) 2334 sctx->framebuffer.color_is_int8 |= 1 << i; 2335 2336 if (rtex->fmask.size && rtex->cmask.size) { 2337 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2338 } 2339 2340 if (surf->level_info->mode == RADEON_SURF_MODE_LINEAR_ALIGNED) 2341 sctx->framebuffer.any_dst_linear = true; 2342 2343 r600_context_add_resource_size(ctx, surf->base.texture); 2344 2345 p_atomic_inc(&rtex->framebuffers_bound); 2346 2347 if (rtex->dcc_gather_statistics) { 2348 /* Dirty tracking must be enabled for DCC usage analysis. */ 2349 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2350 vi_separate_dcc_start_query(ctx, rtex); 2351 } 2352 } 2353 /* Set the second SPI format for possible dual-src blending. */ 2354 if (i == 1 && surf) { 2355 sctx->framebuffer.spi_shader_col_format |= 2356 surf->spi_shader_col_format << (i * 4); 2357 sctx->framebuffer.spi_shader_col_format_alpha |= 2358 surf->spi_shader_col_format_alpha << (i * 4); 2359 sctx->framebuffer.spi_shader_col_format_blend |= 2360 surf->spi_shader_col_format_blend << (i * 4); 2361 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2362 surf->spi_shader_col_format_blend_alpha << (i * 4); 2363 } 2364 2365 if (state->zsbuf) { 2366 surf = (struct r600_surface*)state->zsbuf; 2367 2368 if (!surf->depth_initialized) { 2369 si_init_depth_surface(sctx, surf); 2370 } 2371 r600_context_add_resource_size(ctx, surf->base.texture); 2372 } 2373 2374 si_update_poly_offset_state(sctx); 2375 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 2376 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); 2377 2378 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2379 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2380 2381 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2382 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2383 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2384 2385 /* Set sample locations as fragment shader constants. */ 2386 switch (sctx->framebuffer.nr_samples) { 2387 case 1: 2388 constbuf.user_buffer = sctx->b.sample_locations_1x; 2389 break; 2390 case 2: 2391 constbuf.user_buffer = sctx->b.sample_locations_2x; 2392 break; 2393 case 4: 2394 constbuf.user_buffer = sctx->b.sample_locations_4x; 2395 break; 2396 case 8: 2397 constbuf.user_buffer = sctx->b.sample_locations_8x; 2398 break; 2399 case 16: 2400 constbuf.user_buffer = sctx->b.sample_locations_16x; 2401 break; 2402 default: 2403 R600_ERR("Requested an invalid number of samples %i.\n", 2404 sctx->framebuffer.nr_samples); 2405 assert(0); 2406 } 2407 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2408 si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 2409 2410 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 2411 } 2412 2413 sctx->need_check_render_feedback = true; 2414 sctx->do_update_shaders = true; 2415} 2416 2417static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2418{ 2419 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2420 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2421 unsigned i, nr_cbufs = state->nr_cbufs; 2422 struct r600_texture *tex = NULL; 2423 struct r600_surface *cb = NULL; 2424 unsigned cb_color_info = 0; 2425 2426 /* Colorbuffers. */ 2427 for (i = 0; i < nr_cbufs; i++) { 2428 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 2429 unsigned cb_color_base, cb_color_fmask, cb_color_attrib; 2430 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 2431 2432 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 2433 continue; 2434 2435 cb = (struct r600_surface*)state->cbufs[i]; 2436 if (!cb) { 2437 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2438 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2439 continue; 2440 } 2441 2442 tex = (struct r600_texture *)cb->base.texture; 2443 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2444 &tex->resource, RADEON_USAGE_READWRITE, 2445 tex->surface.nsamples > 1 ? 2446 RADEON_PRIO_COLOR_BUFFER_MSAA : 2447 RADEON_PRIO_COLOR_BUFFER); 2448 2449 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2450 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2451 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2452 RADEON_PRIO_CMASK); 2453 } 2454 2455 if (tex->dcc_separate_buffer) 2456 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2457 tex->dcc_separate_buffer, 2458 RADEON_USAGE_READWRITE, 2459 RADEON_PRIO_DCC); 2460 2461 /* Compute mutable surface parameters. */ 2462 pitch_tile_max = cb->level_info->nblk_x / 8 - 1; 2463 slice_tile_max = cb->level_info->nblk_x * 2464 cb->level_info->nblk_y / 64 - 1; 2465 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 2466 2467 cb_color_base = (tex->resource.gpu_address + cb->level_info->offset) >> 8; 2468 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 2469 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 2470 cb_color_attrib = cb->cb_color_attrib | 2471 S_028C74_TILE_MODE_INDEX(tile_mode_index); 2472 2473 if (tex->fmask.size) { 2474 if (sctx->b.chip_class >= CIK) 2475 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1); 2476 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index); 2477 cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8; 2478 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max); 2479 } else { 2480 /* This must be set for fast clear to work without FMASK. */ 2481 if (sctx->b.chip_class >= CIK) 2482 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 2483 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 2484 cb_color_fmask = cb_color_base; 2485 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 2486 } 2487 2488 cb_color_info = cb->cb_color_info | tex->cb_color_info; 2489 2490 if (tex->dcc_offset && cb->level_info->dcc_enabled) { 2491 bool is_msaa_resolve_dst = state->cbufs[0] && 2492 state->cbufs[0]->texture->nr_samples > 1 && 2493 state->cbufs[1] == &cb->base && 2494 state->cbufs[1]->texture->nr_samples <= 1; 2495 2496 if (!is_msaa_resolve_dst) 2497 cb_color_info |= S_028C70_DCC_ENABLE(1); 2498 } 2499 2500 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 2501 sctx->b.chip_class >= VI ? 14 : 13); 2502 radeon_emit(cs, cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2503 radeon_emit(cs, cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2504 radeon_emit(cs, cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2505 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2506 radeon_emit(cs, cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2507 radeon_emit(cs, cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2508 radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ 2509 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2510 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2511 radeon_emit(cs, cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2512 radeon_emit(cs, cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2513 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2514 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2515 2516 if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ 2517 radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + 2518 tex->dcc_offset + 2519 tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8); 2520 } 2521 /* set CB_COLOR1_INFO for possible dual-src blending */ 2522 if (i == 1 && state->cbufs[0] && 2523 sctx->framebuffer.dirty_cbufs & (1 << 0)) { 2524 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 2525 cb_color_info); 2526 i++; 2527 } 2528 for (; i < 8 ; i++) 2529 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 2530 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2531 2532 /* ZS buffer. */ 2533 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 2534 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2535 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2536 2537 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2538 &rtex->resource, RADEON_USAGE_READWRITE, 2539 zb->base.texture->nr_samples > 1 ? 2540 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2541 RADEON_PRIO_DEPTH_BUFFER); 2542 2543 if (zb->db_htile_data_base) { 2544 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2545 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2546 RADEON_PRIO_HTILE); 2547 } 2548 2549 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2550 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2551 2552 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2553 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2554 radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */ 2555 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 2556 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2557 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2558 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2559 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2560 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2561 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2562 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2563 2564 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 2565 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 2566 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 2567 2568 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2569 } else if (sctx->framebuffer.dirty_zsbuf) { 2570 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2571 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2572 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2573 } 2574 2575 /* Framebuffer dimensions. */ 2576 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2577 radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2578 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2579 2580 sctx->framebuffer.dirty_cbufs = 0; 2581 sctx->framebuffer.dirty_zsbuf = false; 2582} 2583 2584static void si_emit_msaa_sample_locs(struct si_context *sctx, 2585 struct r600_atom *atom) 2586{ 2587 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2588 unsigned nr_samples = sctx->framebuffer.nr_samples; 2589 2590 /* Smoothing (only possible with nr_samples == 1) uses the same 2591 * sample locations as the MSAA it simulates. 2592 */ 2593 if (nr_samples <= 1 && sctx->smoothing_enabled) 2594 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 2595 2596 /* On Polaris, the small primitive filter uses the sample locations 2597 * even when MSAA is off, so we need to make sure they're set to 0. 2598 */ 2599 if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) && 2600 (nr_samples != sctx->msaa_sample_locs.nr_samples)) { 2601 sctx->msaa_sample_locs.nr_samples = nr_samples; 2602 cayman_emit_msaa_sample_locs(cs, nr_samples); 2603 } 2604 2605 if (sctx->b.family >= CHIP_POLARIS10) { 2606 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 2607 unsigned small_prim_filter_cntl = 2608 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 2609 S_028830_LINE_FILTER_DISABLE(1); /* line bug */ 2610 2611 /* The alternative of setting sample locations to 0 would 2612 * require a DB flush to avoid Z errors, see 2613 * https://bugs.freedesktop.org/show_bug.cgi?id=96908 2614 */ 2615 if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable) 2616 small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 2617 2618 radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 2619 small_prim_filter_cntl); 2620 } 2621} 2622 2623static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) 2624{ 2625 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2626 unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes; 2627 /* 33% faster rendering to linear color buffers */ 2628 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 2629 unsigned sc_mode_cntl_1 = 2630 S_028A4C_WALK_SIZE(dst_is_linear) | 2631 S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 2632 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 2633 /* always 1: */ 2634 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | 2635 S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 2636 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | 2637 S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 2638 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 2639 S_028A4C_FORCE_EOV_REZ_ENABLE(1); 2640 2641 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2642 sctx->ps_iter_samples, 2643 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0, 2644 sc_mode_cntl_1); 2645} 2646 2647static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2648{ 2649 struct si_context *sctx = (struct si_context *)ctx; 2650 2651 if (sctx->ps_iter_samples == min_samples) 2652 return; 2653 2654 sctx->ps_iter_samples = min_samples; 2655 sctx->do_update_shaders = true; 2656 2657 if (sctx->framebuffer.nr_samples > 1) 2658 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2659} 2660 2661/* 2662 * Samplers 2663 */ 2664 2665/** 2666 * Build the sampler view descriptor for a buffer texture. 2667 * @param state 256-bit descriptor; only the high 128 bits are filled in 2668 */ 2669void 2670si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, 2671 enum pipe_format format, 2672 unsigned first_element, unsigned last_element, 2673 uint32_t *state) 2674{ 2675 const struct util_format_description *desc; 2676 int first_non_void; 2677 uint64_t va; 2678 unsigned stride; 2679 unsigned num_records; 2680 unsigned num_format, data_format; 2681 2682 desc = util_format_description(format); 2683 first_non_void = util_format_get_first_non_void_channel(format); 2684 stride = desc->block.bits / 8; 2685 va = buf->gpu_address + first_element * stride; 2686 num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void); 2687 data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void); 2688 2689 num_records = last_element + 1 - first_element; 2690 num_records = MIN2(num_records, buf->b.b.width0 / stride); 2691 2692 if (screen->b.chip_class >= VI) 2693 num_records *= stride; 2694 2695 state[4] = va; 2696 state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 2697 S_008F04_STRIDE(stride); 2698 state[6] = num_records; 2699 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2700 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2701 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2702 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2703 S_008F0C_NUM_FORMAT(num_format) | 2704 S_008F0C_DATA_FORMAT(data_format); 2705} 2706 2707/** 2708 * Build the sampler view descriptor for a texture. 2709 */ 2710void 2711si_make_texture_descriptor(struct si_screen *screen, 2712 struct r600_texture *tex, 2713 bool sampler, 2714 enum pipe_texture_target target, 2715 enum pipe_format pipe_format, 2716 const unsigned char state_swizzle[4], 2717 unsigned first_level, unsigned last_level, 2718 unsigned first_layer, unsigned last_layer, 2719 unsigned width, unsigned height, unsigned depth, 2720 uint32_t *state, 2721 uint32_t *fmask_state) 2722{ 2723 struct pipe_resource *res = &tex->resource.b.b; 2724 const struct util_format_description *desc; 2725 unsigned char swizzle[4]; 2726 int first_non_void; 2727 unsigned num_format, data_format, type; 2728 uint64_t va; 2729 2730 desc = util_format_description(pipe_format); 2731 2732 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2733 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2734 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2735 2736 switch (pipe_format) { 2737 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2738 case PIPE_FORMAT_X24S8_UINT: 2739 case PIPE_FORMAT_X32_S8X24_UINT: 2740 case PIPE_FORMAT_X8Z24_UNORM: 2741 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2742 break; 2743 default: 2744 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2745 } 2746 } else { 2747 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2748 } 2749 2750 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2751 2752 switch (pipe_format) { 2753 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2754 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2755 break; 2756 default: 2757 if (first_non_void < 0) { 2758 if (util_format_is_compressed(pipe_format)) { 2759 switch (pipe_format) { 2760 case PIPE_FORMAT_DXT1_SRGB: 2761 case PIPE_FORMAT_DXT1_SRGBA: 2762 case PIPE_FORMAT_DXT3_SRGBA: 2763 case PIPE_FORMAT_DXT5_SRGBA: 2764 case PIPE_FORMAT_BPTC_SRGBA: 2765 case PIPE_FORMAT_ETC2_SRGB8: 2766 case PIPE_FORMAT_ETC2_SRGB8A1: 2767 case PIPE_FORMAT_ETC2_SRGBA8: 2768 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2769 break; 2770 case PIPE_FORMAT_RGTC1_SNORM: 2771 case PIPE_FORMAT_LATC1_SNORM: 2772 case PIPE_FORMAT_RGTC2_SNORM: 2773 case PIPE_FORMAT_LATC2_SNORM: 2774 case PIPE_FORMAT_ETC2_R11_SNORM: 2775 case PIPE_FORMAT_ETC2_RG11_SNORM: 2776 /* implies float, so use SNORM/UNORM to determine 2777 whether data is signed or not */ 2778 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2779 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2780 break; 2781 default: 2782 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2783 break; 2784 } 2785 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2786 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2787 } else { 2788 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2789 } 2790 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2791 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2792 } else { 2793 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2794 2795 switch (desc->channel[first_non_void].type) { 2796 case UTIL_FORMAT_TYPE_FLOAT: 2797 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2798 break; 2799 case UTIL_FORMAT_TYPE_SIGNED: 2800 if (desc->channel[first_non_void].normalized) 2801 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2802 else if (desc->channel[first_non_void].pure_integer) 2803 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2804 else 2805 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2806 break; 2807 case UTIL_FORMAT_TYPE_UNSIGNED: 2808 if (desc->channel[first_non_void].normalized) 2809 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2810 else if (desc->channel[first_non_void].pure_integer) 2811 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2812 else 2813 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2814 } 2815 } 2816 } 2817 2818 data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void); 2819 if (data_format == ~0) { 2820 data_format = 0; 2821 } 2822 2823 if (!sampler && 2824 (res->target == PIPE_TEXTURE_CUBE || 2825 res->target == PIPE_TEXTURE_CUBE_ARRAY || 2826 res->target == PIPE_TEXTURE_3D)) { 2827 /* For the purpose of shader images, treat cube maps and 3D 2828 * textures as 2D arrays. For 3D textures, the address 2829 * calculations for mipmaps are different, so we rely on the 2830 * caller to effectively disable mipmaps. 2831 */ 2832 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 2833 2834 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 2835 } else { 2836 type = si_tex_dim(res->target, target, res->nr_samples); 2837 } 2838 2839 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 2840 height = 1; 2841 depth = res->array_size; 2842 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 2843 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 2844 if (sampler || res->target != PIPE_TEXTURE_3D) 2845 depth = res->array_size; 2846 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 2847 depth = res->array_size / 6; 2848 2849 state[0] = 0; 2850 state[1] = (S_008F14_DATA_FORMAT(data_format) | 2851 S_008F14_NUM_FORMAT(num_format)); 2852 state[2] = (S_008F18_WIDTH(width - 1) | 2853 S_008F18_HEIGHT(height - 1)); 2854 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2855 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2856 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2857 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2858 S_008F1C_BASE_LEVEL(res->nr_samples > 1 ? 2859 0 : first_level) | 2860 S_008F1C_LAST_LEVEL(res->nr_samples > 1 ? 2861 util_logbase2(res->nr_samples) : 2862 last_level) | 2863 S_008F1C_POW2_PAD(res->last_level > 0) | 2864 S_008F1C_TYPE(type)); 2865 state[4] = S_008F20_DEPTH(depth - 1); 2866 state[5] = (S_008F24_BASE_ARRAY(first_layer) | 2867 S_008F24_LAST_ARRAY(last_layer)); 2868 state[6] = 0; 2869 state[7] = 0; 2870 2871 if (tex->dcc_offset) { 2872 unsigned swap = r600_translate_colorswap(pipe_format, false); 2873 2874 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 2875 } else { 2876 /* The last dword is unused by hw. The shader uses it to clear 2877 * bits in the first dword of sampler state. 2878 */ 2879 if (screen->b.chip_class <= CIK && res->nr_samples <= 1) { 2880 if (first_level == last_level) 2881 state[7] = C_008F30_MAX_ANISO_RATIO; 2882 else 2883 state[7] = 0xffffffff; 2884 } 2885 } 2886 2887 /* Initialize the sampler view for FMASK. */ 2888 if (tex->fmask.size) { 2889 uint32_t fmask_format; 2890 2891 va = tex->resource.gpu_address + tex->fmask.offset; 2892 2893 switch (res->nr_samples) { 2894 case 2: 2895 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2896 break; 2897 case 4: 2898 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2899 break; 2900 case 8: 2901 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2902 break; 2903 default: 2904 assert(0); 2905 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2906 } 2907 2908 fmask_state[0] = va >> 8; 2909 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2910 S_008F14_DATA_FORMAT(fmask_format) | 2911 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2912 fmask_state[2] = S_008F18_WIDTH(width - 1) | 2913 S_008F18_HEIGHT(height - 1); 2914 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2915 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2916 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2917 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2918 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) | 2919 S_008F1C_TYPE(si_tex_dim(res->target, target, 0)); 2920 fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2921 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1); 2922 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) | 2923 S_008F24_LAST_ARRAY(last_layer); 2924 fmask_state[6] = 0; 2925 fmask_state[7] = 0; 2926 } 2927} 2928 2929/** 2930 * Create a sampler view. 2931 * 2932 * @param ctx context 2933 * @param texture texture 2934 * @param state sampler view template 2935 * @param width0 width0 override (for compressed textures as int) 2936 * @param height0 height0 override (for compressed textures as int) 2937 * @param force_level set the base address to the level (for compressed textures) 2938 */ 2939struct pipe_sampler_view * 2940si_create_sampler_view_custom(struct pipe_context *ctx, 2941 struct pipe_resource *texture, 2942 const struct pipe_sampler_view *state, 2943 unsigned width0, unsigned height0, 2944 unsigned force_level) 2945{ 2946 struct si_context *sctx = (struct si_context*)ctx; 2947 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 2948 struct r600_texture *tmp = (struct r600_texture*)texture; 2949 unsigned base_level, first_level, last_level; 2950 unsigned char state_swizzle[4]; 2951 unsigned height, depth, width; 2952 unsigned last_layer = state->u.tex.last_layer; 2953 enum pipe_format pipe_format; 2954 const struct radeon_surf_level *surflevel; 2955 2956 if (!view) 2957 return NULL; 2958 2959 /* initialize base object */ 2960 view->base = *state; 2961 view->base.texture = NULL; 2962 view->base.reference.count = 1; 2963 view->base.context = ctx; 2964 2965 /* NULL resource, obey swizzle (only ZERO and ONE make sense). */ 2966 if (!texture) { 2967 view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) | 2968 S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) | 2969 S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) | 2970 S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) | 2971 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D); 2972 return &view->base; 2973 } 2974 2975 pipe_resource_reference(&view->base.texture, texture); 2976 2977 if (state->format == PIPE_FORMAT_X24S8_UINT || 2978 state->format == PIPE_FORMAT_S8X24_UINT || 2979 state->format == PIPE_FORMAT_X32_S8X24_UINT || 2980 state->format == PIPE_FORMAT_S8_UINT) 2981 view->is_stencil_sampler = true; 2982 2983 /* Buffer resource. */ 2984 if (texture->target == PIPE_BUFFER) { 2985 si_make_buffer_descriptor(sctx->screen, 2986 (struct r600_resource *)texture, 2987 state->format, 2988 state->u.buf.first_element, 2989 state->u.buf.last_element, 2990 view->state); 2991 2992 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); 2993 return &view->base; 2994 } 2995 2996 state_swizzle[0] = state->swizzle_r; 2997 state_swizzle[1] = state->swizzle_g; 2998 state_swizzle[2] = state->swizzle_b; 2999 state_swizzle[3] = state->swizzle_a; 3000 3001 base_level = 0; 3002 first_level = state->u.tex.first_level; 3003 last_level = state->u.tex.last_level; 3004 width = width0; 3005 height = height0; 3006 depth = texture->depth0; 3007 3008 if (force_level) { 3009 assert(force_level == first_level && 3010 force_level == last_level); 3011 base_level = force_level; 3012 first_level = 0; 3013 last_level = 0; 3014 width = u_minify(width, force_level); 3015 height = u_minify(height, force_level); 3016 depth = u_minify(depth, force_level); 3017 } 3018 3019 /* This is not needed if state trackers set last_layer correctly. */ 3020 if (state->target == PIPE_TEXTURE_1D || 3021 state->target == PIPE_TEXTURE_2D || 3022 state->target == PIPE_TEXTURE_RECT || 3023 state->target == PIPE_TEXTURE_CUBE) 3024 last_layer = state->u.tex.first_layer; 3025 3026 /* Texturing with separate depth and stencil. */ 3027 pipe_format = state->format; 3028 3029 /* Depth/stencil texturing sometimes needs separate texture. */ 3030 if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) { 3031 if (!tmp->flushed_depth_texture && 3032 !r600_init_flushed_depth_texture(ctx, texture, NULL)) { 3033 pipe_resource_reference(&view->base.texture, NULL); 3034 FREE(view); 3035 return NULL; 3036 } 3037 3038 assert(tmp->flushed_depth_texture); 3039 3040 /* Override format for the case where the flushed texture 3041 * contains only Z or only S. 3042 */ 3043 if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format) 3044 pipe_format = tmp->flushed_depth_texture->resource.b.b.format; 3045 3046 tmp = tmp->flushed_depth_texture; 3047 } 3048 3049 surflevel = tmp->surface.level; 3050 3051 if (tmp->db_compatible) { 3052 switch (pipe_format) { 3053 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 3054 pipe_format = PIPE_FORMAT_Z32_FLOAT; 3055 break; 3056 case PIPE_FORMAT_X8Z24_UNORM: 3057 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3058 /* Z24 is always stored like this for DB 3059 * compatibility. 3060 */ 3061 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 3062 break; 3063 case PIPE_FORMAT_X24S8_UINT: 3064 case PIPE_FORMAT_S8X24_UINT: 3065 case PIPE_FORMAT_X32_S8X24_UINT: 3066 pipe_format = PIPE_FORMAT_S8_UINT; 3067 surflevel = tmp->surface.stencil_level; 3068 break; 3069 default:; 3070 } 3071 } 3072 3073 si_make_texture_descriptor(sctx->screen, tmp, true, 3074 state->target, pipe_format, state_swizzle, 3075 first_level, last_level, 3076 state->u.tex.first_layer, last_layer, 3077 width, height, depth, 3078 view->state, view->fmask_state); 3079 3080 view->base_level_info = &surflevel[base_level]; 3081 view->base_level = base_level; 3082 view->block_width = util_format_get_blockwidth(pipe_format); 3083 return &view->base; 3084} 3085 3086static struct pipe_sampler_view * 3087si_create_sampler_view(struct pipe_context *ctx, 3088 struct pipe_resource *texture, 3089 const struct pipe_sampler_view *state) 3090{ 3091 return si_create_sampler_view_custom(ctx, texture, state, 3092 texture ? texture->width0 : 0, 3093 texture ? texture->height0 : 0, 0); 3094} 3095 3096static void si_sampler_view_destroy(struct pipe_context *ctx, 3097 struct pipe_sampler_view *state) 3098{ 3099 struct si_sampler_view *view = (struct si_sampler_view *)state; 3100 3101 if (state->texture && state->texture->target == PIPE_BUFFER) 3102 LIST_DELINIT(&view->list); 3103 3104 pipe_resource_reference(&state->texture, NULL); 3105 FREE(view); 3106} 3107 3108static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 3109{ 3110 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 3111 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 3112 (linear_filter && 3113 (wrap == PIPE_TEX_WRAP_CLAMP || 3114 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 3115} 3116 3117static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 3118{ 3119 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 3120 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 3121 3122 return (state->border_color.ui[0] || state->border_color.ui[1] || 3123 state->border_color.ui[2] || state->border_color.ui[3]) && 3124 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 3125 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 3126 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 3127} 3128 3129static void *si_create_sampler_state(struct pipe_context *ctx, 3130 const struct pipe_sampler_state *state) 3131{ 3132 struct si_context *sctx = (struct si_context *)ctx; 3133 struct r600_common_screen *rscreen = sctx->b.screen; 3134 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 3135 unsigned border_color_type, border_color_index = 0; 3136 unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso 3137 : state->max_anisotropy; 3138 unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); 3139 3140 if (!rstate) { 3141 return NULL; 3142 } 3143 3144 if (!sampler_state_needs_border_color(state)) 3145 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3146 else if (state->border_color.f[0] == 0 && 3147 state->border_color.f[1] == 0 && 3148 state->border_color.f[2] == 0 && 3149 state->border_color.f[3] == 0) 3150 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3151 else if (state->border_color.f[0] == 0 && 3152 state->border_color.f[1] == 0 && 3153 state->border_color.f[2] == 0 && 3154 state->border_color.f[3] == 1) 3155 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 3156 else if (state->border_color.f[0] == 1 && 3157 state->border_color.f[1] == 1 && 3158 state->border_color.f[2] == 1 && 3159 state->border_color.f[3] == 1) 3160 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 3161 else { 3162 int i; 3163 3164 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 3165 3166 /* Check if the border has been uploaded already. */ 3167 for (i = 0; i < sctx->border_color_count; i++) 3168 if (memcmp(&sctx->border_color_table[i], &state->border_color, 3169 sizeof(state->border_color)) == 0) 3170 break; 3171 3172 if (i >= SI_MAX_BORDER_COLORS) { 3173 /* Getting 4096 unique border colors is very unlikely. */ 3174 fprintf(stderr, "radeonsi: The border color table is full. " 3175 "Any new border colors will be just black. " 3176 "Please file a bug.\n"); 3177 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3178 } else { 3179 if (i == sctx->border_color_count) { 3180 /* Upload a new border color. */ 3181 memcpy(&sctx->border_color_table[i], &state->border_color, 3182 sizeof(state->border_color)); 3183 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 3184 &state->border_color, 3185 sizeof(state->border_color)); 3186 sctx->border_color_count++; 3187 } 3188 3189 border_color_index = i; 3190 } 3191 } 3192 3193 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 3194 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 3195 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 3196 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 3197 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 3198 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 3199 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 3200 S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI)); 3201 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 3202 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 3203 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 3204 S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) | 3205 S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) | 3206 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 3207 S_008F38_MIP_POINT_PRECLAMP(1) | 3208 S_008F38_DISABLE_LSB_CEIL(1) | 3209 S_008F38_FILTER_PREC_FIX(1) | 3210 S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI)); 3211 rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | 3212 S_008F3C_BORDER_COLOR_TYPE(border_color_type); 3213 return rstate; 3214} 3215 3216static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 3217{ 3218 struct si_context *sctx = (struct si_context *)ctx; 3219 3220 if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask) 3221 return; 3222 3223 sctx->sample_mask.sample_mask = sample_mask; 3224 si_mark_atom_dirty(sctx, &sctx->sample_mask.atom); 3225} 3226 3227static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom) 3228{ 3229 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 3230 unsigned mask = sctx->sample_mask.sample_mask; 3231 3232 /* Needed for line and polygon smoothing as well as for the Polaris 3233 * small primitive filter. We expect the state tracker to take care of 3234 * this for us. 3235 */ 3236 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 3237 (mask & 1 && sctx->blitter->running)); 3238 3239 radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 3240 radeon_emit(cs, mask | (mask << 16)); 3241 radeon_emit(cs, mask | (mask << 16)); 3242} 3243 3244static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 3245{ 3246 free(state); 3247} 3248 3249/* 3250 * Vertex elements & buffers 3251 */ 3252 3253static void *si_create_vertex_elements(struct pipe_context *ctx, 3254 unsigned count, 3255 const struct pipe_vertex_element *elements) 3256{ 3257 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 3258 int i; 3259 3260 assert(count <= SI_MAX_ATTRIBS); 3261 if (!v) 3262 return NULL; 3263 3264 v->count = count; 3265 for (i = 0; i < count; ++i) { 3266 const struct util_format_description *desc; 3267 unsigned data_format, num_format; 3268 int first_non_void; 3269 3270 desc = util_format_description(elements[i].src_format); 3271 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 3272 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 3273 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 3274 3275 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3276 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3277 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3278 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3279 S_008F0C_NUM_FORMAT(num_format) | 3280 S_008F0C_DATA_FORMAT(data_format); 3281 v->format_size[i] = desc->block.bits / 8; 3282 } 3283 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 3284 3285 return v; 3286} 3287 3288static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 3289{ 3290 struct si_context *sctx = (struct si_context *)ctx; 3291 struct si_vertex_element *v = (struct si_vertex_element*)state; 3292 3293 sctx->vertex_elements = v; 3294 sctx->vertex_buffers_dirty = true; 3295 sctx->do_update_shaders = true; 3296} 3297 3298static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 3299{ 3300 struct si_context *sctx = (struct si_context *)ctx; 3301 3302 if (sctx->vertex_elements == state) 3303 sctx->vertex_elements = NULL; 3304 FREE(state); 3305} 3306 3307static void si_set_vertex_buffers(struct pipe_context *ctx, 3308 unsigned start_slot, unsigned count, 3309 const struct pipe_vertex_buffer *buffers) 3310{ 3311 struct si_context *sctx = (struct si_context *)ctx; 3312 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 3313 int i; 3314 3315 assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); 3316 3317 if (buffers) { 3318 for (i = 0; i < count; i++) { 3319 const struct pipe_vertex_buffer *src = buffers + i; 3320 struct pipe_vertex_buffer *dsti = dst + i; 3321 3322 pipe_resource_reference(&dsti->buffer, src->buffer); 3323 dsti->buffer_offset = src->buffer_offset; 3324 dsti->stride = src->stride; 3325 r600_context_add_resource_size(ctx, src->buffer); 3326 } 3327 } else { 3328 for (i = 0; i < count; i++) { 3329 pipe_resource_reference(&dst[i].buffer, NULL); 3330 } 3331 } 3332 sctx->vertex_buffers_dirty = true; 3333} 3334 3335static void si_set_index_buffer(struct pipe_context *ctx, 3336 const struct pipe_index_buffer *ib) 3337{ 3338 struct si_context *sctx = (struct si_context *)ctx; 3339 3340 if (ib) { 3341 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer); 3342 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 3343 r600_context_add_resource_size(ctx, ib->buffer); 3344 } else { 3345 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 3346 } 3347} 3348 3349/* 3350 * Misc 3351 */ 3352 3353static void si_set_tess_state(struct pipe_context *ctx, 3354 const float default_outer_level[4], 3355 const float default_inner_level[2]) 3356{ 3357 struct si_context *sctx = (struct si_context *)ctx; 3358 struct pipe_constant_buffer cb; 3359 float array[8]; 3360 3361 memcpy(array, default_outer_level, sizeof(float) * 4); 3362 memcpy(array+4, default_inner_level, sizeof(float) * 2); 3363 3364 cb.buffer = NULL; 3365 cb.user_buffer = NULL; 3366 cb.buffer_size = sizeof(array); 3367 3368 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, 3369 (void*)array, sizeof(array), 3370 &cb.buffer_offset); 3371 3372 si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 3373 pipe_resource_reference(&cb.buffer, NULL); 3374} 3375 3376static void si_texture_barrier(struct pipe_context *ctx) 3377{ 3378 struct si_context *sctx = (struct si_context *)ctx; 3379 3380 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 3381 SI_CONTEXT_INV_GLOBAL_L2 | 3382 SI_CONTEXT_FLUSH_AND_INV_CB | 3383 SI_CONTEXT_CS_PARTIAL_FLUSH; 3384} 3385 3386static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 3387{ 3388 struct si_context *sctx = (struct si_context *)ctx; 3389 3390 /* Subsequent commands must wait for all shader invocations to 3391 * complete. */ 3392 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 3393 SI_CONTEXT_CS_PARTIAL_FLUSH; 3394 3395 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 3396 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | 3397 SI_CONTEXT_INV_VMEM_L1; 3398 3399 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 3400 PIPE_BARRIER_SHADER_BUFFER | 3401 PIPE_BARRIER_TEXTURE | 3402 PIPE_BARRIER_IMAGE | 3403 PIPE_BARRIER_STREAMOUT_BUFFER | 3404 PIPE_BARRIER_GLOBAL_BUFFER)) { 3405 /* As far as I can tell, L1 contents are written back to L2 3406 * automatically at end of shader, but the contents of other 3407 * L1 caches might still be stale. */ 3408 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 3409 } 3410 3411 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 3412 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 3413 3414 /* Indices are read through TC L2 since VI. */ 3415 if (sctx->screen->b.chip_class <= CIK) 3416 sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; 3417 } 3418 3419 if (flags & PIPE_BARRIER_FRAMEBUFFER) 3420 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; 3421 3422 if (flags & (PIPE_BARRIER_MAPPED_BUFFER | 3423 PIPE_BARRIER_FRAMEBUFFER | 3424 PIPE_BARRIER_INDIRECT_BUFFER)) { 3425 /* Not sure if INV_GLOBAL_L2 is the best thing here. 3426 * 3427 * We need to make sure that TC L1 & L2 are written back to 3428 * memory, because neither CPU accesses nor CB fetches consider 3429 * TC, but there's no need to invalidate any TC cache lines. */ 3430 sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; 3431 } 3432} 3433 3434static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 3435{ 3436 struct pipe_blend_state blend; 3437 3438 memset(&blend, 0, sizeof(blend)); 3439 blend.independent_blend_enable = true; 3440 blend.rt[0].colormask = 0xf; 3441 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 3442} 3443 3444static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 3445 bool include_draw_vbo) 3446{ 3447 si_need_cs_space((struct si_context*)ctx); 3448} 3449 3450static void si_init_config(struct si_context *sctx); 3451 3452void si_init_state_functions(struct si_context *sctx) 3453{ 3454 si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond); 3455 si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin); 3456 si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable); 3457 si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors); 3458 si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports); 3459 3460 si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush); 3461 si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); 3462 si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); 3463 si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); 3464 si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); 3465 si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); 3466 si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state); 3467 si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color); 3468 si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs); 3469 si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state); 3470 si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref); 3471 3472 sctx->b.b.create_blend_state = si_create_blend_state; 3473 sctx->b.b.bind_blend_state = si_bind_blend_state; 3474 sctx->b.b.delete_blend_state = si_delete_blend_state; 3475 sctx->b.b.set_blend_color = si_set_blend_color; 3476 3477 sctx->b.b.create_rasterizer_state = si_create_rs_state; 3478 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 3479 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 3480 3481 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 3482 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 3483 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 3484 3485 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 3486 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 3487 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 3488 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 3489 sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 3490 3491 sctx->b.b.set_clip_state = si_set_clip_state; 3492 sctx->b.b.set_stencil_ref = si_set_stencil_ref; 3493 3494 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 3495 sctx->b.b.get_sample_position = cayman_get_sample_position; 3496 3497 sctx->b.b.create_sampler_state = si_create_sampler_state; 3498 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 3499 3500 sctx->b.b.create_sampler_view = si_create_sampler_view; 3501 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 3502 3503 sctx->b.b.set_sample_mask = si_set_sample_mask; 3504 3505 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 3506 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 3507 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 3508 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 3509 sctx->b.b.set_index_buffer = si_set_index_buffer; 3510 3511 sctx->b.b.texture_barrier = si_texture_barrier; 3512 sctx->b.b.memory_barrier = si_memory_barrier; 3513 sctx->b.b.set_min_samples = si_set_min_samples; 3514 sctx->b.b.set_tess_state = si_set_tess_state; 3515 3516 sctx->b.b.set_active_query_state = si_set_active_query_state; 3517 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 3518 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 3519 3520 sctx->b.b.draw_vbo = si_draw_vbo; 3521 3522 si_init_config(sctx); 3523} 3524 3525static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen) 3526{ 3527 return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; 3528} 3529 3530static void si_query_opaque_metadata(struct r600_common_screen *rscreen, 3531 struct r600_texture *rtex, 3532 struct radeon_bo_metadata *md) 3533{ 3534 struct si_screen *sscreen = (struct si_screen*)rscreen; 3535 struct pipe_resource *res = &rtex->resource.b.b; 3536 static const unsigned char swizzle[] = { 3537 PIPE_SWIZZLE_X, 3538 PIPE_SWIZZLE_Y, 3539 PIPE_SWIZZLE_Z, 3540 PIPE_SWIZZLE_W 3541 }; 3542 uint32_t desc[8], i; 3543 bool is_array = util_resource_is_array_texture(res); 3544 3545 /* DRM 2.x.x doesn't support this. */ 3546 if (rscreen->info.drm_major != 3) 3547 return; 3548 3549 assert(rtex->dcc_separate_buffer == NULL); 3550 assert(rtex->fmask.size == 0); 3551 3552 /* Metadata image format format version 1: 3553 * [0] = 1 (metadata format identifier) 3554 * [1] = (VENDOR_ID << 16) | PCI_ID 3555 * [2:9] = image descriptor for the whole resource 3556 * [2] is always 0, because the base address is cleared 3557 * [9] is the DCC offset bits [39:8] from the beginning of 3558 * the buffer 3559 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 3560 */ 3561 3562 md->metadata[0] = 1; /* metadata image format version 1 */ 3563 3564 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ 3565 md->metadata[1] = si_get_bo_metadata_word1(rscreen); 3566 3567 si_make_texture_descriptor(sscreen, rtex, true, 3568 res->target, res->format, 3569 swizzle, 0, res->last_level, 0, 3570 is_array ? res->array_size - 1 : 0, 3571 res->width0, res->height0, res->depth0, 3572 desc, NULL); 3573 3574 si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0, 3575 rtex->surface.blk_w, false, desc); 3576 3577 /* Clear the base address and set the relative DCC offset. */ 3578 desc[0] = 0; 3579 desc[1] &= C_008F14_BASE_ADDRESS_HI; 3580 desc[7] = rtex->dcc_offset >> 8; 3581 3582 /* Dwords [2:9] contain the image descriptor. */ 3583 memcpy(&md->metadata[2], desc, sizeof(desc)); 3584 3585 /* Dwords [10:..] contain the mipmap level offsets. */ 3586 for (i = 0; i <= res->last_level; i++) 3587 md->metadata[10+i] = rtex->surface.level[i].offset >> 8; 3588 3589 md->size_metadata = (11 + res->last_level) * 4; 3590} 3591 3592static void si_apply_opaque_metadata(struct r600_common_screen *rscreen, 3593 struct r600_texture *rtex, 3594 struct radeon_bo_metadata *md) 3595{ 3596 uint32_t *desc = &md->metadata[2]; 3597 3598 if (rscreen->chip_class < VI) 3599 return; 3600 3601 /* Return if DCC is enabled. The texture should be set up with it 3602 * already. 3603 */ 3604 if (md->size_metadata >= 11 * 4 && 3605 md->metadata[0] != 0 && 3606 md->metadata[1] == si_get_bo_metadata_word1(rscreen) && 3607 G_008F28_COMPRESSION_EN(desc[6])) { 3608 assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8)); 3609 return; 3610 } 3611 3612 /* Disable DCC. These are always set by texture_from_handle and must 3613 * be cleared here. 3614 */ 3615 rtex->dcc_offset = 0; 3616} 3617 3618void si_init_screen_state_functions(struct si_screen *sscreen) 3619{ 3620 sscreen->b.b.is_format_supported = si_is_format_supported; 3621 sscreen->b.query_opaque_metadata = si_query_opaque_metadata; 3622 sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata; 3623} 3624 3625static void 3626si_write_harvested_raster_configs(struct si_context *sctx, 3627 struct si_pm4_state *pm4, 3628 unsigned raster_config, 3629 unsigned raster_config_1) 3630{ 3631 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 3632 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 3633 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3634 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3635 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 3636 unsigned rb_per_se = num_rb / num_se; 3637 unsigned se_mask[4]; 3638 unsigned se; 3639 3640 se_mask[0] = ((1 << rb_per_se) - 1); 3641 se_mask[1] = (se_mask[0] << rb_per_se); 3642 se_mask[2] = (se_mask[1] << rb_per_se); 3643 se_mask[3] = (se_mask[2] << rb_per_se); 3644 3645 se_mask[0] &= rb_mask; 3646 se_mask[1] &= rb_mask; 3647 se_mask[2] &= rb_mask; 3648 se_mask[3] &= rb_mask; 3649 3650 assert(num_se == 1 || num_se == 2 || num_se == 4); 3651 assert(sh_per_se == 1 || sh_per_se == 2); 3652 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 3653 3654 /* XXX: I can't figure out what the *_XSEL and *_YSEL 3655 * fields are for, so I'm leaving them as their default 3656 * values. */ 3657 3658 for (se = 0; se < num_se; se++) { 3659 unsigned raster_config_se = raster_config; 3660 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3661 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3662 int idx = (se / 2) * 2; 3663 3664 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3665 raster_config_se &= C_028350_SE_MAP; 3666 3667 if (!se_mask[idx]) { 3668 raster_config_se |= 3669 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 3670 } else { 3671 raster_config_se |= 3672 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 3673 } 3674 } 3675 3676 pkr0_mask &= rb_mask; 3677 pkr1_mask &= rb_mask; 3678 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3679 raster_config_se &= C_028350_PKR_MAP; 3680 3681 if (!pkr0_mask) { 3682 raster_config_se |= 3683 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 3684 } else { 3685 raster_config_se |= 3686 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 3687 } 3688 } 3689 3690 if (rb_per_se >= 2) { 3691 unsigned rb0_mask = 1 << (se * rb_per_se); 3692 unsigned rb1_mask = rb0_mask << 1; 3693 3694 rb0_mask &= rb_mask; 3695 rb1_mask &= rb_mask; 3696 if (!rb0_mask || !rb1_mask) { 3697 raster_config_se &= C_028350_RB_MAP_PKR0; 3698 3699 if (!rb0_mask) { 3700 raster_config_se |= 3701 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 3702 } else { 3703 raster_config_se |= 3704 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 3705 } 3706 } 3707 3708 if (rb_per_se > 2) { 3709 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3710 rb1_mask = rb0_mask << 1; 3711 rb0_mask &= rb_mask; 3712 rb1_mask &= rb_mask; 3713 if (!rb0_mask || !rb1_mask) { 3714 raster_config_se &= C_028350_RB_MAP_PKR1; 3715 3716 if (!rb0_mask) { 3717 raster_config_se |= 3718 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 3719 } else { 3720 raster_config_se |= 3721 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 3722 } 3723 } 3724 } 3725 } 3726 3727 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3728 if (sctx->b.chip_class < CIK) 3729 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3730 SE_INDEX(se) | SH_BROADCAST_WRITES | 3731 INSTANCE_BROADCAST_WRITES); 3732 else 3733 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3734 S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | 3735 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3736 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 3737 } 3738 3739 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3740 if (sctx->b.chip_class < CIK) 3741 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3742 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 3743 INSTANCE_BROADCAST_WRITES); 3744 else { 3745 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3746 S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 3747 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3748 3749 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3750 (!se_mask[2] && !se_mask[3]))) { 3751 raster_config_1 &= C_028354_SE_PAIR_MAP; 3752 3753 if (!se_mask[0] && !se_mask[1]) { 3754 raster_config_1 |= 3755 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 3756 } else { 3757 raster_config_1 |= 3758 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 3759 } 3760 } 3761 3762 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 3763 } 3764} 3765 3766static void si_init_config(struct si_context *sctx) 3767{ 3768 struct si_screen *sscreen = sctx->screen; 3769 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3770 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3771 unsigned raster_config, raster_config_1; 3772 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 3773 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 3774 int i; 3775 3776 if (!pm4) 3777 return; 3778 3779 si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 3780 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); 3781 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 3782 si_pm4_cmd_end(pm4, false); 3783 3784 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 3785 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 3786 3787 /* FIXME calculate these values somehow ??? */ 3788 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 3789 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3790 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3791 3792 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3793 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3794 3795 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3796 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3797 if (sctx->b.chip_class < CIK) 3798 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3799 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3800 3801 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3802 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3803 3804 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3805 3806 for (i = 0; i < 16; i++) { 3807 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0); 3808 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0)); 3809 } 3810 3811 switch (sctx->screen->b.family) { 3812 case CHIP_TAHITI: 3813 case CHIP_PITCAIRN: 3814 raster_config = 0x2a00126a; 3815 raster_config_1 = 0x00000000; 3816 break; 3817 case CHIP_VERDE: 3818 raster_config = 0x0000124a; 3819 raster_config_1 = 0x00000000; 3820 break; 3821 case CHIP_OLAND: 3822 raster_config = 0x00000082; 3823 raster_config_1 = 0x00000000; 3824 break; 3825 case CHIP_HAINAN: 3826 raster_config = 0x00000000; 3827 raster_config_1 = 0x00000000; 3828 break; 3829 case CHIP_BONAIRE: 3830 raster_config = 0x16000012; 3831 raster_config_1 = 0x00000000; 3832 break; 3833 case CHIP_HAWAII: 3834 raster_config = 0x3a00161a; 3835 raster_config_1 = 0x0000002e; 3836 break; 3837 case CHIP_FIJI: 3838 if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { 3839 /* old kernels with old tiling config */ 3840 raster_config = 0x16000012; 3841 raster_config_1 = 0x0000002a; 3842 } else { 3843 raster_config = 0x3a00161a; 3844 raster_config_1 = 0x0000002e; 3845 } 3846 break; 3847 case CHIP_POLARIS10: 3848 raster_config = 0x16000012; 3849 raster_config_1 = 0x0000002a; 3850 break; 3851 case CHIP_POLARIS11: 3852 raster_config = 0x16000012; 3853 raster_config_1 = 0x00000000; 3854 break; 3855 case CHIP_TONGA: 3856 raster_config = 0x16000012; 3857 raster_config_1 = 0x0000002a; 3858 break; 3859 case CHIP_ICELAND: 3860 if (num_rb == 1) 3861 raster_config = 0x00000000; 3862 else 3863 raster_config = 0x00000002; 3864 raster_config_1 = 0x00000000; 3865 break; 3866 case CHIP_CARRIZO: 3867 raster_config = 0x00000002; 3868 raster_config_1 = 0x00000000; 3869 break; 3870 case CHIP_KAVERI: 3871 /* KV should be 0x00000002, but that causes problems with radeon */ 3872 raster_config = 0x00000000; /* 0x00000002 */ 3873 raster_config_1 = 0x00000000; 3874 break; 3875 case CHIP_KABINI: 3876 case CHIP_MULLINS: 3877 case CHIP_STONEY: 3878 raster_config = 0x00000000; 3879 raster_config_1 = 0x00000000; 3880 break; 3881 default: 3882 fprintf(stderr, 3883 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 3884 raster_config = 0x00000000; 3885 raster_config_1 = 0x00000000; 3886 break; 3887 } 3888 3889 /* Always use the default config when all backends are enabled 3890 * (or when we failed to determine the enabled backends). 3891 */ 3892 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 3893 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 3894 raster_config); 3895 if (sctx->b.chip_class >= CIK) 3896 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 3897 raster_config_1); 3898 } else { 3899 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 3900 } 3901 3902 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 3903 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 3904 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 3905 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 3906 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 3907 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 3908 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 3909 3910 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3911 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 3912 S_028230_ER_TRI(0xA) | 3913 S_028230_ER_POINT(0xA) | 3914 S_028230_ER_RECT(0xA) | 3915 /* Required by DX10_DIAMOND_TEST_ENA: */ 3916 S_028230_ER_LINE_LR(0x1A) | 3917 S_028230_ER_LINE_RL(0x26) | 3918 S_028230_ER_LINE_TB(0xA) | 3919 S_028230_ER_LINE_BT(0xA)); 3920 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 3921 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 3922 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 3923 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 3924 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 3925 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 3926 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 3927 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 3928 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); 3929 3930 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 3931 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 3932 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 3933 3934 if (sctx->b.chip_class >= CIK) { 3935 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); 3936 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); 3937 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); 3938 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); 3939 3940 if (sscreen->b.info.num_good_compute_units / 3941 (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) { 3942 /* Too few available compute units per SH. Disallowing 3943 * VS to run on CU0 could hurt us more than late VS 3944 * allocation would help. 3945 * 3946 * LATE_ALLOC_VS = 2 is the highest safe number. 3947 */ 3948 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 3949 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); 3950 } else { 3951 /* Set LATE_ALLOC_VS == 31. It should be less than 3952 * the number of scratch waves. Limitations: 3953 * - VS can't execute on CU0. 3954 * - If HS writes outputs to LDS, LS can't execute on CU0. 3955 */ 3956 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); 3957 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); 3958 } 3959 3960 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 3961 } 3962 3963 if (sctx->b.chip_class >= VI) { 3964 unsigned vgt_tess_distribution; 3965 3966 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, 3967 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 3968 S_028424_OVERWRITE_COMBINER_WATERMARK(4)); 3969 if (sctx->b.family < CHIP_POLARIS10) 3970 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); 3971 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); 3972 3973 vgt_tess_distribution = 3974 S_028B50_ACCUM_ISOLINE(32) | 3975 S_028B50_ACCUM_TRI(11) | 3976 S_028B50_ACCUM_QUAD(11) | 3977 S_028B50_DONUT_SPLIT(16); 3978 3979 /* Testing with Unigine Heaven extreme tesselation yielded best results 3980 * with TRAP_SPLIT = 3. 3981 */ 3982 if (sctx->b.family == CHIP_FIJI || 3983 sctx->b.family >= CHIP_POLARIS10) 3984 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 3985 3986 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 3987 } else { 3988 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 3989 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 3990 } 3991 3992 if (sctx->b.family == CHIP_STONEY) 3993 si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); 3994 3995 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 3996 if (sctx->b.chip_class >= CIK) 3997 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); 3998 si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 3999 RADEON_PRIO_BORDER_COLORS); 4000 4001 si_pm4_upload_indirect_buffer(sctx, pm4); 4002 sctx->init_config = pm4; 4003} 4004