si_state.c revision f235dc08ac1dcde6eff87597914583f5b2b9aa70
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "si_shader.h" 29#include "sid.h" 30#include "radeon/r600_cs.h" 31 32#include "util/u_dual_blend.h" 33#include "util/u_format.h" 34#include "util/u_format_s3tc.h" 35#include "util/u_memory.h" 36#include "util/u_pstipple.h" 37#include "util/u_resource.h" 38 39/* Initialize an external atom (owned by ../radeon). */ 40static void 41si_init_external_atom(struct si_context *sctx, struct r600_atom *atom, 42 struct r600_atom **list_elem) 43{ 44 atom->id = list_elem - sctx->atoms.array + 1; 45 *list_elem = atom; 46} 47 48/* Initialize an atom owned by radeonsi. */ 49void si_init_atom(struct si_context *sctx, struct r600_atom *atom, 50 struct r600_atom **list_elem, 51 void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) 52{ 53 atom->emit = (void*)emit_func; 54 atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */ 55 *list_elem = atom; 56} 57 58static unsigned si_map_swizzle(unsigned swizzle) 59{ 60 switch (swizzle) { 61 case PIPE_SWIZZLE_Y: 62 return V_008F0C_SQ_SEL_Y; 63 case PIPE_SWIZZLE_Z: 64 return V_008F0C_SQ_SEL_Z; 65 case PIPE_SWIZZLE_W: 66 return V_008F0C_SQ_SEL_W; 67 case PIPE_SWIZZLE_0: 68 return V_008F0C_SQ_SEL_0; 69 case PIPE_SWIZZLE_1: 70 return V_008F0C_SQ_SEL_1; 71 default: /* PIPE_SWIZZLE_X */ 72 return V_008F0C_SQ_SEL_X; 73 } 74} 75 76static uint32_t S_FIXED(float value, uint32_t frac_bits) 77{ 78 return value * (1 << frac_bits); 79} 80 81/* 12.4 fixed-point */ 82static unsigned si_pack_float_12p4(float x) 83{ 84 return x <= 0 ? 0 : 85 x >= 4096 ? 0xffff : x * 16; 86} 87 88/* 89 * Inferred framebuffer and blender state. 90 * 91 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 92 * if there is not enough PS outputs. 93 */ 94static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom) 95{ 96 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 97 struct si_state_blend *blend = sctx->queued.named.blend; 98 uint32_t cb_target_mask, i; 99 100 /* CB_COLORn_INFO.FORMAT=INVALID disables empty colorbuffer slots. */ 101 if (blend) 102 cb_target_mask = blend->cb_target_mask; 103 else 104 cb_target_mask = 0xffffffff; 105 106 /* Avoid a hang that happens when dual source blending is enabled 107 * but there is not enough color outputs. This is undefined behavior, 108 * so disable color writes completely. 109 * 110 * Reproducible with Unigine Heaven 4.0 and drirc missing. 111 */ 112 if (blend && blend->dual_src_blend && 113 sctx->ps_shader.cso && 114 (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 115 cb_target_mask = 0; 116 117 radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); 118 119 /* STONEY-specific register settings. */ 120 if (sctx->b.family == CHIP_STONEY) { 121 unsigned spi_shader_col_format = 122 sctx->ps_shader.cso ? 123 sctx->ps_shader.current->key.ps.epilog.spi_shader_col_format : 0; 124 unsigned sx_ps_downconvert = 0; 125 unsigned sx_blend_opt_epsilon = 0; 126 unsigned sx_blend_opt_control = 0; 127 128 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 129 struct r600_surface *surf = 130 (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; 131 unsigned format, swap, spi_format, colormask; 132 bool has_alpha, has_rgb; 133 134 if (!surf) 135 continue; 136 137 format = G_028C70_FORMAT(surf->cb_color_info); 138 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 139 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 140 colormask = (cb_target_mask >> (i * 4)) & 0xf; 141 142 /* Set if RGB and A are present. */ 143 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 144 145 if (format == V_028C70_COLOR_8 || 146 format == V_028C70_COLOR_16 || 147 format == V_028C70_COLOR_32) 148 has_rgb = !has_alpha; 149 else 150 has_rgb = true; 151 152 /* Check the colormask and export format. */ 153 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 154 has_rgb = false; 155 if (!(colormask & PIPE_MASK_A)) 156 has_alpha = false; 157 158 if (spi_format == V_028714_SPI_SHADER_ZERO) { 159 has_rgb = false; 160 has_alpha = false; 161 } 162 163 /* Disable value checking for disabled channels. */ 164 if (!has_rgb) 165 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 166 if (!has_alpha) 167 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 168 169 /* Enable down-conversion for 32bpp and smaller formats. */ 170 switch (format) { 171 case V_028C70_COLOR_8: 172 case V_028C70_COLOR_8_8: 173 case V_028C70_COLOR_8_8_8_8: 174 /* For 1 and 2-channel formats, use the superset thereof. */ 175 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 176 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 177 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 178 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 179 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 180 } 181 break; 182 183 case V_028C70_COLOR_5_6_5: 184 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 185 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 186 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 187 } 188 break; 189 190 case V_028C70_COLOR_1_5_5_5: 191 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 192 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 193 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 194 } 195 break; 196 197 case V_028C70_COLOR_4_4_4_4: 198 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 199 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 200 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 201 } 202 break; 203 204 case V_028C70_COLOR_32: 205 if (swap == V_0280A0_SWAP_STD && 206 spi_format == V_028714_SPI_SHADER_32_R) 207 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 208 else if (swap == V_0280A0_SWAP_ALT_REV && 209 spi_format == V_028714_SPI_SHADER_32_AR) 210 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 211 break; 212 213 case V_028C70_COLOR_16: 214 case V_028C70_COLOR_16_16: 215 /* For 1-channel formats, use the superset thereof. */ 216 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 217 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 218 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 219 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 220 if (swap == V_0280A0_SWAP_STD || 221 swap == V_0280A0_SWAP_STD_REV) 222 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 223 else 224 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 225 } 226 break; 227 228 case V_028C70_COLOR_10_11_11: 229 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 230 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 231 sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 232 } 233 break; 234 235 case V_028C70_COLOR_2_10_10_10: 236 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 237 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 238 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 239 } 240 break; 241 } 242 } 243 244 if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { 245 sx_ps_downconvert = 0; 246 sx_blend_opt_epsilon = 0; 247 sx_blend_opt_control = 0; 248 } 249 250 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); 251 radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ 252 radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ 253 radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ 254 } 255} 256 257/* 258 * Blender functions 259 */ 260 261static uint32_t si_translate_blend_function(int blend_func) 262{ 263 switch (blend_func) { 264 case PIPE_BLEND_ADD: 265 return V_028780_COMB_DST_PLUS_SRC; 266 case PIPE_BLEND_SUBTRACT: 267 return V_028780_COMB_SRC_MINUS_DST; 268 case PIPE_BLEND_REVERSE_SUBTRACT: 269 return V_028780_COMB_DST_MINUS_SRC; 270 case PIPE_BLEND_MIN: 271 return V_028780_COMB_MIN_DST_SRC; 272 case PIPE_BLEND_MAX: 273 return V_028780_COMB_MAX_DST_SRC; 274 default: 275 R600_ERR("Unknown blend function %d\n", blend_func); 276 assert(0); 277 break; 278 } 279 return 0; 280} 281 282static uint32_t si_translate_blend_factor(int blend_fact) 283{ 284 switch (blend_fact) { 285 case PIPE_BLENDFACTOR_ONE: 286 return V_028780_BLEND_ONE; 287 case PIPE_BLENDFACTOR_SRC_COLOR: 288 return V_028780_BLEND_SRC_COLOR; 289 case PIPE_BLENDFACTOR_SRC_ALPHA: 290 return V_028780_BLEND_SRC_ALPHA; 291 case PIPE_BLENDFACTOR_DST_ALPHA: 292 return V_028780_BLEND_DST_ALPHA; 293 case PIPE_BLENDFACTOR_DST_COLOR: 294 return V_028780_BLEND_DST_COLOR; 295 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 296 return V_028780_BLEND_SRC_ALPHA_SATURATE; 297 case PIPE_BLENDFACTOR_CONST_COLOR: 298 return V_028780_BLEND_CONSTANT_COLOR; 299 case PIPE_BLENDFACTOR_CONST_ALPHA: 300 return V_028780_BLEND_CONSTANT_ALPHA; 301 case PIPE_BLENDFACTOR_ZERO: 302 return V_028780_BLEND_ZERO; 303 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 304 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 305 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 306 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 307 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 308 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 309 case PIPE_BLENDFACTOR_INV_DST_COLOR: 310 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 311 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 312 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 313 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 314 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 315 case PIPE_BLENDFACTOR_SRC1_COLOR: 316 return V_028780_BLEND_SRC1_COLOR; 317 case PIPE_BLENDFACTOR_SRC1_ALPHA: 318 return V_028780_BLEND_SRC1_ALPHA; 319 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 320 return V_028780_BLEND_INV_SRC1_COLOR; 321 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 322 return V_028780_BLEND_INV_SRC1_ALPHA; 323 default: 324 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 325 assert(0); 326 break; 327 } 328 return 0; 329} 330 331static uint32_t si_translate_blend_opt_function(int blend_func) 332{ 333 switch (blend_func) { 334 case PIPE_BLEND_ADD: 335 return V_028760_OPT_COMB_ADD; 336 case PIPE_BLEND_SUBTRACT: 337 return V_028760_OPT_COMB_SUBTRACT; 338 case PIPE_BLEND_REVERSE_SUBTRACT: 339 return V_028760_OPT_COMB_REVSUBTRACT; 340 case PIPE_BLEND_MIN: 341 return V_028760_OPT_COMB_MIN; 342 case PIPE_BLEND_MAX: 343 return V_028760_OPT_COMB_MAX; 344 default: 345 return V_028760_OPT_COMB_BLEND_DISABLED; 346 } 347} 348 349static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 350{ 351 switch (blend_fact) { 352 case PIPE_BLENDFACTOR_ZERO: 353 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 354 case PIPE_BLENDFACTOR_ONE: 355 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 356 case PIPE_BLENDFACTOR_SRC_COLOR: 357 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 358 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 359 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 360 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 361 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 362 case PIPE_BLENDFACTOR_SRC_ALPHA: 363 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 364 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 365 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 366 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 367 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 368 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 369 default: 370 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 371 } 372} 373 374/** 375 * Get rid of DST in the blend factors by commuting the operands: 376 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 377 */ 378static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 379 unsigned *dst_factor, unsigned expected_dst, 380 unsigned replacement_src) 381{ 382 if (*src_factor == expected_dst && 383 *dst_factor == PIPE_BLENDFACTOR_ZERO) { 384 *src_factor = PIPE_BLENDFACTOR_ZERO; 385 *dst_factor = replacement_src; 386 387 /* Commuting the operands requires reversing subtractions. */ 388 if (*func == PIPE_BLEND_SUBTRACT) 389 *func = PIPE_BLEND_REVERSE_SUBTRACT; 390 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 391 *func = PIPE_BLEND_SUBTRACT; 392 } 393} 394 395static bool si_blend_factor_uses_dst(unsigned factor) 396{ 397 return factor == PIPE_BLENDFACTOR_DST_COLOR || 398 factor == PIPE_BLENDFACTOR_DST_ALPHA || 399 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 400 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 401 factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 402} 403 404static void *si_create_blend_state_mode(struct pipe_context *ctx, 405 const struct pipe_blend_state *state, 406 unsigned mode) 407{ 408 struct si_context *sctx = (struct si_context*)ctx; 409 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 410 struct si_pm4_state *pm4 = &blend->pm4; 411 uint32_t sx_mrt_blend_opt[8] = {0}; 412 uint32_t color_control = 0; 413 414 if (!blend) 415 return NULL; 416 417 blend->alpha_to_coverage = state->alpha_to_coverage; 418 blend->alpha_to_one = state->alpha_to_one; 419 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 420 421 if (state->logicop_enable) { 422 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 423 } else { 424 color_control |= S_028808_ROP3(0xcc); 425 } 426 427 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 428 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 429 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 430 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 431 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 432 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 433 434 if (state->alpha_to_coverage) 435 blend->need_src_alpha_4bit |= 0xf; 436 437 blend->cb_target_mask = 0; 438 for (int i = 0; i < 8; i++) { 439 /* state->rt entries > 0 only written if independent blending */ 440 const int j = state->independent_blend_enable ? i : 0; 441 442 unsigned eqRGB = state->rt[j].rgb_func; 443 unsigned srcRGB = state->rt[j].rgb_src_factor; 444 unsigned dstRGB = state->rt[j].rgb_dst_factor; 445 unsigned eqA = state->rt[j].alpha_func; 446 unsigned srcA = state->rt[j].alpha_src_factor; 447 unsigned dstA = state->rt[j].alpha_dst_factor; 448 449 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 450 unsigned blend_cntl = 0; 451 452 sx_mrt_blend_opt[i] = 453 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 454 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 455 456 /* Only set dual source blending for MRT0 to avoid a hang. */ 457 if (i >= 1 && blend->dual_src_blend) 458 continue; 459 460 /* Only addition and subtraction equations are supported with 461 * dual source blending. 462 */ 463 if (blend->dual_src_blend && 464 (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 465 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 466 assert(!"Unsupported equation for dual source blending"); 467 continue; 468 } 469 470 if (!state->rt[j].colormask) 471 continue; 472 473 /* cb_render_state will disable unused ones */ 474 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 475 476 if (!state->rt[j].blend_enable) { 477 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 478 continue; 479 } 480 481 /* Blending optimizations for Stoney. 482 * These transformations don't change the behavior. 483 * 484 * First, get rid of DST in the blend factors: 485 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 486 */ 487 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 488 PIPE_BLENDFACTOR_DST_COLOR, 489 PIPE_BLENDFACTOR_SRC_COLOR); 490 si_blend_remove_dst(&eqA, &srcA, &dstA, 491 PIPE_BLENDFACTOR_DST_COLOR, 492 PIPE_BLENDFACTOR_SRC_COLOR); 493 si_blend_remove_dst(&eqA, &srcA, &dstA, 494 PIPE_BLENDFACTOR_DST_ALPHA, 495 PIPE_BLENDFACTOR_SRC_ALPHA); 496 497 /* Look up the ideal settings from tables. */ 498 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 499 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 500 srcA_opt = si_translate_blend_opt_factor(srcA, true); 501 dstA_opt = si_translate_blend_opt_factor(dstA, true); 502 503 /* Handle interdependencies. */ 504 if (si_blend_factor_uses_dst(srcRGB)) 505 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 506 if (si_blend_factor_uses_dst(srcA)) 507 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 508 509 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 510 (dstRGB == PIPE_BLENDFACTOR_ZERO || 511 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 512 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 513 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 514 515 /* Set the final value. */ 516 sx_mrt_blend_opt[i] = 517 S_028760_COLOR_SRC_OPT(srcRGB_opt) | 518 S_028760_COLOR_DST_OPT(dstRGB_opt) | 519 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 520 S_028760_ALPHA_SRC_OPT(srcA_opt) | 521 S_028760_ALPHA_DST_OPT(dstA_opt) | 522 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 523 524 /* Set blend state. */ 525 blend_cntl |= S_028780_ENABLE(1); 526 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 527 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 528 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 529 530 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 531 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 532 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 533 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 534 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 535 } 536 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 537 538 blend->blend_enable_4bit |= 0xfu << (i * 4); 539 540 /* This is only important for formats without alpha. */ 541 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 542 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 543 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 544 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 545 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 546 dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 547 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 548 } 549 550 if (blend->cb_target_mask) { 551 color_control |= S_028808_MODE(mode); 552 } else { 553 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 554 } 555 556 if (sctx->b.family == CHIP_STONEY) { 557 for (int i = 0; i < 8; i++) 558 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 559 sx_mrt_blend_opt[i]); 560 561 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 562 if (blend->dual_src_blend || state->logicop_enable || 563 mode == V_028808_CB_RESOLVE) 564 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 565 } 566 567 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 568 return blend; 569} 570 571static void *si_create_blend_state(struct pipe_context *ctx, 572 const struct pipe_blend_state *state) 573{ 574 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 575} 576 577static void si_bind_blend_state(struct pipe_context *ctx, void *state) 578{ 579 struct si_context *sctx = (struct si_context *)ctx; 580 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 581 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 582 sctx->do_update_shaders = true; 583} 584 585static void si_delete_blend_state(struct pipe_context *ctx, void *state) 586{ 587 struct si_context *sctx = (struct si_context *)ctx; 588 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 589} 590 591static void si_set_blend_color(struct pipe_context *ctx, 592 const struct pipe_blend_color *state) 593{ 594 struct si_context *sctx = (struct si_context *)ctx; 595 596 if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0) 597 return; 598 599 sctx->blend_color.state = *state; 600 si_mark_atom_dirty(sctx, &sctx->blend_color.atom); 601} 602 603static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom) 604{ 605 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 606 607 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 608 radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 609} 610 611/* 612 * Clipping 613 */ 614 615static void si_set_clip_state(struct pipe_context *ctx, 616 const struct pipe_clip_state *state) 617{ 618 struct si_context *sctx = (struct si_context *)ctx; 619 struct pipe_constant_buffer cb; 620 621 if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 622 return; 623 624 sctx->clip_state.state = *state; 625 si_mark_atom_dirty(sctx, &sctx->clip_state.atom); 626 627 cb.buffer = NULL; 628 cb.user_buffer = state->ucp; 629 cb.buffer_offset = 0; 630 cb.buffer_size = 4*4*8; 631 si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 632 pipe_resource_reference(&cb.buffer, NULL); 633} 634 635static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom) 636{ 637 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 638 639 radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 640 radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 641} 642 643#define SIX_BITS 0x3F 644 645static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 646{ 647 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 648 struct tgsi_shader_info *info = si_get_vs_info(sctx); 649 unsigned window_space = 650 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 651 unsigned clipdist_mask = 652 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 653 unsigned total_mask = clipdist_mask | (info->culldist_writemask << info->num_written_clipdistance); 654 655 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 656 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 657 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 658 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 659 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | 660 S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | 661 S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | 662 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || 663 info->writes_edgeflag || 664 info->writes_layer || 665 info->writes_viewport_index) | 666 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | 667 (sctx->queued.named.rasterizer->clip_plane_enable & 668 clipdist_mask) | (info->culldist_writemask << 8)); 669 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 670 sctx->queued.named.rasterizer->pa_cl_clip_cntl | 671 (clipdist_mask ? 0 : 672 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) | 673 S_028810_CLIP_DISABLE(window_space)); 674 675 /* reuse needs to be set off if we write oViewport */ 676 radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 677 S_028AB4_REUSE_OFF(info->writes_viewport_index)); 678} 679 680/* 681 * inferred state between framebuffer and rasterizer 682 */ 683static void si_update_poly_offset_state(struct si_context *sctx) 684{ 685 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 686 687 if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) 688 return; 689 690 switch (sctx->framebuffer.state.zsbuf->texture->format) { 691 case PIPE_FORMAT_Z16_UNORM: 692 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 693 break; 694 default: /* 24-bit */ 695 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 696 break; 697 case PIPE_FORMAT_Z32_FLOAT: 698 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 699 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 700 break; 701 } 702} 703 704/* 705 * Rasterizer 706 */ 707 708static uint32_t si_translate_fill(uint32_t func) 709{ 710 switch(func) { 711 case PIPE_POLYGON_MODE_FILL: 712 return V_028814_X_DRAW_TRIANGLES; 713 case PIPE_POLYGON_MODE_LINE: 714 return V_028814_X_DRAW_LINES; 715 case PIPE_POLYGON_MODE_POINT: 716 return V_028814_X_DRAW_POINTS; 717 default: 718 assert(0); 719 return V_028814_X_DRAW_POINTS; 720 } 721} 722 723static void *si_create_rs_state(struct pipe_context *ctx, 724 const struct pipe_rasterizer_state *state) 725{ 726 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 727 struct si_pm4_state *pm4 = &rs->pm4; 728 unsigned tmp, i; 729 float psize_min, psize_max; 730 731 if (!rs) { 732 return NULL; 733 } 734 735 rs->scissor_enable = state->scissor; 736 rs->two_side = state->light_twoside; 737 rs->multisample_enable = state->multisample; 738 rs->force_persample_interp = state->force_persample_interp; 739 rs->clip_plane_enable = state->clip_plane_enable; 740 rs->line_stipple_enable = state->line_stipple_enable; 741 rs->poly_stipple_enable = state->poly_stipple_enable; 742 rs->line_smooth = state->line_smooth; 743 rs->poly_smooth = state->poly_smooth; 744 rs->uses_poly_offset = state->offset_point || state->offset_line || 745 state->offset_tri; 746 rs->clamp_fragment_color = state->clamp_fragment_color; 747 rs->flatshade = state->flatshade; 748 rs->sprite_coord_enable = state->sprite_coord_enable; 749 rs->rasterizer_discard = state->rasterizer_discard; 750 rs->pa_sc_line_stipple = state->line_stipple_enable ? 751 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 752 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 753 rs->pa_cl_clip_cntl = 754 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 755 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 756 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 757 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 758 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 759 760 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 761 S_0286D4_FLAT_SHADE_ENA(1) | 762 S_0286D4_PNT_SPRITE_ENA(1) | 763 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 764 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 765 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 766 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 767 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 768 769 /* point size 12.4 fixed point */ 770 tmp = (unsigned)(state->point_size * 8.0); 771 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 772 773 if (state->point_size_per_vertex) { 774 psize_min = util_get_min_point_size(state); 775 psize_max = 8192; 776 } else { 777 /* Force the point size to be as if the vertex output was disabled. */ 778 psize_min = state->point_size; 779 psize_max = state->point_size; 780 } 781 /* Divide by two, because 0.5 = 1 pixel. */ 782 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 783 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 784 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 785 786 tmp = (unsigned)state->line_width * 8; 787 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 788 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 789 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 790 S_028A48_MSAA_ENABLE(state->multisample || 791 state->poly_smooth || 792 state->line_smooth) | 793 S_028A48_VPORT_SCISSOR_ENABLE(1)); 794 795 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 796 S_028BE4_PIX_CENTER(state->half_pixel_center) | 797 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 798 799 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 800 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 801 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 802 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 803 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 804 S_028814_FACE(!state->front_ccw) | 805 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 806 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 807 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 808 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 809 state->fill_back != PIPE_POLYGON_MODE_FILL) | 810 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 811 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 812 si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 + 813 SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color); 814 815 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 816 for (i = 0; i < 3; i++) { 817 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 818 float offset_units = state->offset_units; 819 float offset_scale = state->offset_scale * 16.0f; 820 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 821 822 if (!state->offset_units_unscaled) { 823 switch (i) { 824 case 0: /* 16-bit zbuffer */ 825 offset_units *= 4.0f; 826 pa_su_poly_offset_db_fmt_cntl = 827 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 828 break; 829 case 1: /* 24-bit zbuffer */ 830 offset_units *= 2.0f; 831 pa_su_poly_offset_db_fmt_cntl = 832 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 833 break; 834 case 2: /* 32-bit zbuffer */ 835 offset_units *= 1.0f; 836 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 837 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 838 break; 839 } 840 } 841 842 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 843 fui(offset_scale)); 844 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 845 fui(offset_units)); 846 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 847 fui(offset_scale)); 848 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 849 fui(offset_units)); 850 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 851 pa_su_poly_offset_db_fmt_cntl); 852 } 853 854 return rs; 855} 856 857static void si_bind_rs_state(struct pipe_context *ctx, void *state) 858{ 859 struct si_context *sctx = (struct si_context *)ctx; 860 struct si_state_rasterizer *old_rs = 861 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 862 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 863 864 if (!state) 865 return; 866 867 if (sctx->framebuffer.nr_samples > 1 && 868 (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) { 869 si_mark_atom_dirty(sctx, &sctx->db_render_state); 870 871 if (sctx->b.family >= CHIP_POLARIS10) 872 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 873 } 874 875 r600_set_scissor_enable(&sctx->b, rs->scissor_enable); 876 877 si_pm4_bind_state(sctx, rasterizer, rs); 878 si_update_poly_offset_state(sctx); 879 880 si_mark_atom_dirty(sctx, &sctx->clip_regs); 881 sctx->do_update_shaders = true; 882} 883 884static void si_delete_rs_state(struct pipe_context *ctx, void *state) 885{ 886 struct si_context *sctx = (struct si_context *)ctx; 887 888 if (sctx->queued.named.rasterizer == state) 889 si_pm4_bind_state(sctx, poly_offset, NULL); 890 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 891} 892 893/* 894 * infeered state between dsa and stencil ref 895 */ 896static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom) 897{ 898 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 899 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 900 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 901 902 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 903 radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 904 S_028430_STENCILMASK(dsa->valuemask[0]) | 905 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 906 S_028430_STENCILOPVAL(1)); 907 radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 908 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 909 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 910 S_028434_STENCILOPVAL_BF(1)); 911} 912 913static void si_set_stencil_ref(struct pipe_context *ctx, 914 const struct pipe_stencil_ref *state) 915{ 916 struct si_context *sctx = (struct si_context *)ctx; 917 918 if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 919 return; 920 921 sctx->stencil_ref.state = *state; 922 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 923} 924 925 926/* 927 * DSA 928 */ 929 930static uint32_t si_translate_stencil_op(int s_op) 931{ 932 switch (s_op) { 933 case PIPE_STENCIL_OP_KEEP: 934 return V_02842C_STENCIL_KEEP; 935 case PIPE_STENCIL_OP_ZERO: 936 return V_02842C_STENCIL_ZERO; 937 case PIPE_STENCIL_OP_REPLACE: 938 return V_02842C_STENCIL_REPLACE_TEST; 939 case PIPE_STENCIL_OP_INCR: 940 return V_02842C_STENCIL_ADD_CLAMP; 941 case PIPE_STENCIL_OP_DECR: 942 return V_02842C_STENCIL_SUB_CLAMP; 943 case PIPE_STENCIL_OP_INCR_WRAP: 944 return V_02842C_STENCIL_ADD_WRAP; 945 case PIPE_STENCIL_OP_DECR_WRAP: 946 return V_02842C_STENCIL_SUB_WRAP; 947 case PIPE_STENCIL_OP_INVERT: 948 return V_02842C_STENCIL_INVERT; 949 default: 950 R600_ERR("Unknown stencil op %d", s_op); 951 assert(0); 952 break; 953 } 954 return 0; 955} 956 957static void *si_create_dsa_state(struct pipe_context *ctx, 958 const struct pipe_depth_stencil_alpha_state *state) 959{ 960 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 961 struct si_pm4_state *pm4 = &dsa->pm4; 962 unsigned db_depth_control; 963 uint32_t db_stencil_control = 0; 964 965 if (!dsa) { 966 return NULL; 967 } 968 969 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 970 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 971 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 972 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 973 974 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 975 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 976 S_028800_ZFUNC(state->depth.func) | 977 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 978 979 /* stencil */ 980 if (state->stencil[0].enabled) { 981 db_depth_control |= S_028800_STENCIL_ENABLE(1); 982 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 983 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 984 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 985 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 986 987 if (state->stencil[1].enabled) { 988 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 989 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 990 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 991 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 992 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 993 } 994 } 995 996 /* alpha */ 997 if (state->alpha.enabled) { 998 dsa->alpha_func = state->alpha.func; 999 1000 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 1001 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 1002 } else { 1003 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1004 } 1005 1006 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1007 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1008 if (state->depth.bounds_test) { 1009 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1010 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1011 } 1012 1013 return dsa; 1014} 1015 1016static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1017{ 1018 struct si_context *sctx = (struct si_context *)ctx; 1019 struct si_state_dsa *dsa = state; 1020 1021 if (!state) 1022 return; 1023 1024 si_pm4_bind_state(sctx, dsa, dsa); 1025 1026 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1027 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1028 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1029 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1030 } 1031 sctx->do_update_shaders = true; 1032} 1033 1034static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1035{ 1036 struct si_context *sctx = (struct si_context *)ctx; 1037 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1038} 1039 1040static void *si_create_db_flush_dsa(struct si_context *sctx) 1041{ 1042 struct pipe_depth_stencil_alpha_state dsa = {}; 1043 1044 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 1045} 1046 1047/* DB RENDER STATE */ 1048 1049static void si_set_active_query_state(struct pipe_context *ctx, boolean enable) 1050{ 1051 struct si_context *sctx = (struct si_context*)ctx; 1052 1053 /* Pipeline stat & streamout queries. */ 1054 if (enable) { 1055 sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS; 1056 sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS; 1057 } else { 1058 sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS; 1059 sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS; 1060 } 1061 1062 /* Occlusion queries. */ 1063 if (sctx->occlusion_queries_disabled != !enable) { 1064 sctx->occlusion_queries_disabled = !enable; 1065 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1066 } 1067} 1068 1069static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 1070{ 1071 struct si_context *sctx = (struct si_context*)ctx; 1072 1073 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1074} 1075 1076static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 1077{ 1078 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1079 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1080 unsigned db_shader_control; 1081 1082 radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 1083 1084 /* DB_RENDER_CONTROL */ 1085 if (sctx->dbcb_depth_copy_enabled || 1086 sctx->dbcb_stencil_copy_enabled) { 1087 radeon_emit(cs, 1088 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1089 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1090 S_028000_COPY_CENTROID(1) | 1091 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 1092 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1093 radeon_emit(cs, 1094 S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1095 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); 1096 } else { 1097 radeon_emit(cs, 1098 S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1099 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); 1100 } 1101 1102 /* DB_COUNT_CONTROL (occlusion queries) */ 1103 if (sctx->b.num_occlusion_queries > 0 && 1104 !sctx->occlusion_queries_disabled) { 1105 bool perfect = sctx->b.num_perfect_occlusion_queries > 0; 1106 1107 if (sctx->b.chip_class >= CIK) { 1108 radeon_emit(cs, 1109 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1110 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 1111 S_028004_ZPASS_ENABLE(1) | 1112 S_028004_SLICE_EVEN_ENABLE(1) | 1113 S_028004_SLICE_ODD_ENABLE(1)); 1114 } else { 1115 radeon_emit(cs, 1116 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1117 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 1118 } 1119 } else { 1120 /* Disable occlusion queries. */ 1121 if (sctx->b.chip_class >= CIK) { 1122 radeon_emit(cs, 0); 1123 } else { 1124 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 1125 } 1126 } 1127 1128 /* DB_RENDER_OVERRIDE2 */ 1129 radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 1130 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1131 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1132 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); 1133 1134 db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | 1135 sctx->ps_db_shader_control; 1136 1137 /* Bug workaround for smoothing (overrasterization) on SI. */ 1138 if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { 1139 db_shader_control &= C_02880C_Z_ORDER; 1140 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1141 } 1142 1143 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1144 if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) 1145 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1146 1147 if (sctx->b.family == CHIP_STONEY && 1148 sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) 1149 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1150 1151 radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 1152 db_shader_control); 1153} 1154 1155/* 1156 * format translation 1157 */ 1158static uint32_t si_translate_colorformat(enum pipe_format format) 1159{ 1160 const struct util_format_description *desc = util_format_description(format); 1161 1162#define HAS_SIZE(x,y,z,w) \ 1163 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1164 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1165 1166 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1167 return V_028C70_COLOR_10_11_11; 1168 1169 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1170 return V_028C70_COLOR_INVALID; 1171 1172 /* hw cannot support mixed formats (except depth/stencil, since 1173 * stencil is not written to). */ 1174 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1175 return V_028C70_COLOR_INVALID; 1176 1177 switch (desc->nr_channels) { 1178 case 1: 1179 switch (desc->channel[0].size) { 1180 case 8: 1181 return V_028C70_COLOR_8; 1182 case 16: 1183 return V_028C70_COLOR_16; 1184 case 32: 1185 return V_028C70_COLOR_32; 1186 } 1187 break; 1188 case 2: 1189 if (desc->channel[0].size == desc->channel[1].size) { 1190 switch (desc->channel[0].size) { 1191 case 8: 1192 return V_028C70_COLOR_8_8; 1193 case 16: 1194 return V_028C70_COLOR_16_16; 1195 case 32: 1196 return V_028C70_COLOR_32_32; 1197 } 1198 } else if (HAS_SIZE(8,24,0,0)) { 1199 return V_028C70_COLOR_24_8; 1200 } else if (HAS_SIZE(24,8,0,0)) { 1201 return V_028C70_COLOR_8_24; 1202 } 1203 break; 1204 case 3: 1205 if (HAS_SIZE(5,6,5,0)) { 1206 return V_028C70_COLOR_5_6_5; 1207 } else if (HAS_SIZE(32,8,24,0)) { 1208 return V_028C70_COLOR_X24_8_32_FLOAT; 1209 } 1210 break; 1211 case 4: 1212 if (desc->channel[0].size == desc->channel[1].size && 1213 desc->channel[0].size == desc->channel[2].size && 1214 desc->channel[0].size == desc->channel[3].size) { 1215 switch (desc->channel[0].size) { 1216 case 4: 1217 return V_028C70_COLOR_4_4_4_4; 1218 case 8: 1219 return V_028C70_COLOR_8_8_8_8; 1220 case 16: 1221 return V_028C70_COLOR_16_16_16_16; 1222 case 32: 1223 return V_028C70_COLOR_32_32_32_32; 1224 } 1225 } else if (HAS_SIZE(5,5,5,1)) { 1226 return V_028C70_COLOR_1_5_5_5; 1227 } else if (HAS_SIZE(10,10,10,2)) { 1228 return V_028C70_COLOR_2_10_10_10; 1229 } 1230 break; 1231 } 1232 return V_028C70_COLOR_INVALID; 1233} 1234 1235static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1236{ 1237 if (SI_BIG_ENDIAN) { 1238 switch(colorformat) { 1239 /* 8-bit buffers. */ 1240 case V_028C70_COLOR_8: 1241 return V_028C70_ENDIAN_NONE; 1242 1243 /* 16-bit buffers. */ 1244 case V_028C70_COLOR_5_6_5: 1245 case V_028C70_COLOR_1_5_5_5: 1246 case V_028C70_COLOR_4_4_4_4: 1247 case V_028C70_COLOR_16: 1248 case V_028C70_COLOR_8_8: 1249 return V_028C70_ENDIAN_8IN16; 1250 1251 /* 32-bit buffers. */ 1252 case V_028C70_COLOR_8_8_8_8: 1253 case V_028C70_COLOR_2_10_10_10: 1254 case V_028C70_COLOR_8_24: 1255 case V_028C70_COLOR_24_8: 1256 case V_028C70_COLOR_16_16: 1257 return V_028C70_ENDIAN_8IN32; 1258 1259 /* 64-bit buffers. */ 1260 case V_028C70_COLOR_16_16_16_16: 1261 return V_028C70_ENDIAN_8IN16; 1262 1263 case V_028C70_COLOR_32_32: 1264 return V_028C70_ENDIAN_8IN32; 1265 1266 /* 128-bit buffers. */ 1267 case V_028C70_COLOR_32_32_32_32: 1268 return V_028C70_ENDIAN_8IN32; 1269 default: 1270 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1271 } 1272 } else { 1273 return V_028C70_ENDIAN_NONE; 1274 } 1275} 1276 1277static uint32_t si_translate_dbformat(enum pipe_format format) 1278{ 1279 switch (format) { 1280 case PIPE_FORMAT_Z16_UNORM: 1281 return V_028040_Z_16; 1282 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1283 case PIPE_FORMAT_X8Z24_UNORM: 1284 case PIPE_FORMAT_Z24X8_UNORM: 1285 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1286 return V_028040_Z_24; /* deprecated on SI */ 1287 case PIPE_FORMAT_Z32_FLOAT: 1288 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1289 return V_028040_Z_32_FLOAT; 1290 default: 1291 return V_028040_Z_INVALID; 1292 } 1293} 1294 1295/* 1296 * Texture translation 1297 */ 1298 1299static uint32_t si_translate_texformat(struct pipe_screen *screen, 1300 enum pipe_format format, 1301 const struct util_format_description *desc, 1302 int first_non_void) 1303{ 1304 struct si_screen *sscreen = (struct si_screen*)screen; 1305 bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 && 1306 sscreen->b.info.drm_minor >= 31) || 1307 sscreen->b.info.drm_major == 3; 1308 bool uniform = true; 1309 int i; 1310 1311 /* Colorspace (return non-RGB formats directly). */ 1312 switch (desc->colorspace) { 1313 /* Depth stencil formats */ 1314 case UTIL_FORMAT_COLORSPACE_ZS: 1315 switch (format) { 1316 case PIPE_FORMAT_Z16_UNORM: 1317 return V_008F14_IMG_DATA_FORMAT_16; 1318 case PIPE_FORMAT_X24S8_UINT: 1319 case PIPE_FORMAT_Z24X8_UNORM: 1320 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1321 return V_008F14_IMG_DATA_FORMAT_8_24; 1322 case PIPE_FORMAT_X8Z24_UNORM: 1323 case PIPE_FORMAT_S8X24_UINT: 1324 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1325 return V_008F14_IMG_DATA_FORMAT_24_8; 1326 case PIPE_FORMAT_S8_UINT: 1327 return V_008F14_IMG_DATA_FORMAT_8; 1328 case PIPE_FORMAT_Z32_FLOAT: 1329 return V_008F14_IMG_DATA_FORMAT_32; 1330 case PIPE_FORMAT_X32_S8X24_UINT: 1331 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1332 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1333 default: 1334 goto out_unknown; 1335 } 1336 1337 case UTIL_FORMAT_COLORSPACE_YUV: 1338 goto out_unknown; /* TODO */ 1339 1340 case UTIL_FORMAT_COLORSPACE_SRGB: 1341 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1342 goto out_unknown; 1343 break; 1344 1345 default: 1346 break; 1347 } 1348 1349 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1350 if (!enable_compressed_formats) 1351 goto out_unknown; 1352 1353 switch (format) { 1354 case PIPE_FORMAT_RGTC1_SNORM: 1355 case PIPE_FORMAT_LATC1_SNORM: 1356 case PIPE_FORMAT_RGTC1_UNORM: 1357 case PIPE_FORMAT_LATC1_UNORM: 1358 return V_008F14_IMG_DATA_FORMAT_BC4; 1359 case PIPE_FORMAT_RGTC2_SNORM: 1360 case PIPE_FORMAT_LATC2_SNORM: 1361 case PIPE_FORMAT_RGTC2_UNORM: 1362 case PIPE_FORMAT_LATC2_UNORM: 1363 return V_008F14_IMG_DATA_FORMAT_BC5; 1364 default: 1365 goto out_unknown; 1366 } 1367 } 1368 1369 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1370 sscreen->b.family == CHIP_STONEY) { 1371 switch (format) { 1372 case PIPE_FORMAT_ETC1_RGB8: 1373 case PIPE_FORMAT_ETC2_RGB8: 1374 case PIPE_FORMAT_ETC2_SRGB8: 1375 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1376 case PIPE_FORMAT_ETC2_RGB8A1: 1377 case PIPE_FORMAT_ETC2_SRGB8A1: 1378 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1379 case PIPE_FORMAT_ETC2_RGBA8: 1380 case PIPE_FORMAT_ETC2_SRGBA8: 1381 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1382 case PIPE_FORMAT_ETC2_R11_UNORM: 1383 case PIPE_FORMAT_ETC2_R11_SNORM: 1384 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1385 case PIPE_FORMAT_ETC2_RG11_UNORM: 1386 case PIPE_FORMAT_ETC2_RG11_SNORM: 1387 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1388 default: 1389 goto out_unknown; 1390 } 1391 } 1392 1393 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1394 if (!enable_compressed_formats) 1395 goto out_unknown; 1396 1397 switch (format) { 1398 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1399 case PIPE_FORMAT_BPTC_SRGBA: 1400 return V_008F14_IMG_DATA_FORMAT_BC7; 1401 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1402 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1403 return V_008F14_IMG_DATA_FORMAT_BC6; 1404 default: 1405 goto out_unknown; 1406 } 1407 } 1408 1409 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1410 switch (format) { 1411 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1412 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1413 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1414 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1415 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1416 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1417 default: 1418 goto out_unknown; 1419 } 1420 } 1421 1422 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1423 if (!enable_compressed_formats) 1424 goto out_unknown; 1425 1426 if (!util_format_s3tc_enabled) { 1427 goto out_unknown; 1428 } 1429 1430 switch (format) { 1431 case PIPE_FORMAT_DXT1_RGB: 1432 case PIPE_FORMAT_DXT1_RGBA: 1433 case PIPE_FORMAT_DXT1_SRGB: 1434 case PIPE_FORMAT_DXT1_SRGBA: 1435 return V_008F14_IMG_DATA_FORMAT_BC1; 1436 case PIPE_FORMAT_DXT3_RGBA: 1437 case PIPE_FORMAT_DXT3_SRGBA: 1438 return V_008F14_IMG_DATA_FORMAT_BC2; 1439 case PIPE_FORMAT_DXT5_RGBA: 1440 case PIPE_FORMAT_DXT5_SRGBA: 1441 return V_008F14_IMG_DATA_FORMAT_BC3; 1442 default: 1443 goto out_unknown; 1444 } 1445 } 1446 1447 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1448 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1449 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1450 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1451 } 1452 1453 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1454 1455 /* hw cannot support mixed formats (except depth/stencil, since only 1456 * depth is read).*/ 1457 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1458 goto out_unknown; 1459 1460 /* See whether the components are of the same size. */ 1461 for (i = 1; i < desc->nr_channels; i++) { 1462 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1463 } 1464 1465 /* Non-uniform formats. */ 1466 if (!uniform) { 1467 switch(desc->nr_channels) { 1468 case 3: 1469 if (desc->channel[0].size == 5 && 1470 desc->channel[1].size == 6 && 1471 desc->channel[2].size == 5) { 1472 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1473 } 1474 goto out_unknown; 1475 case 4: 1476 if (desc->channel[0].size == 5 && 1477 desc->channel[1].size == 5 && 1478 desc->channel[2].size == 5 && 1479 desc->channel[3].size == 1) { 1480 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1481 } 1482 if (desc->channel[0].size == 10 && 1483 desc->channel[1].size == 10 && 1484 desc->channel[2].size == 10 && 1485 desc->channel[3].size == 2) { 1486 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1487 } 1488 goto out_unknown; 1489 } 1490 goto out_unknown; 1491 } 1492 1493 if (first_non_void < 0 || first_non_void > 3) 1494 goto out_unknown; 1495 1496 /* uniform formats */ 1497 switch (desc->channel[first_non_void].size) { 1498 case 4: 1499 switch (desc->nr_channels) { 1500#if 0 /* Not supported for render targets */ 1501 case 2: 1502 return V_008F14_IMG_DATA_FORMAT_4_4; 1503#endif 1504 case 4: 1505 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1506 } 1507 break; 1508 case 8: 1509 switch (desc->nr_channels) { 1510 case 1: 1511 return V_008F14_IMG_DATA_FORMAT_8; 1512 case 2: 1513 return V_008F14_IMG_DATA_FORMAT_8_8; 1514 case 4: 1515 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1516 } 1517 break; 1518 case 16: 1519 switch (desc->nr_channels) { 1520 case 1: 1521 return V_008F14_IMG_DATA_FORMAT_16; 1522 case 2: 1523 return V_008F14_IMG_DATA_FORMAT_16_16; 1524 case 4: 1525 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1526 } 1527 break; 1528 case 32: 1529 switch (desc->nr_channels) { 1530 case 1: 1531 return V_008F14_IMG_DATA_FORMAT_32; 1532 case 2: 1533 return V_008F14_IMG_DATA_FORMAT_32_32; 1534#if 0 /* Not supported for render targets */ 1535 case 3: 1536 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1537#endif 1538 case 4: 1539 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1540 } 1541 } 1542 1543out_unknown: 1544 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1545 return ~0; 1546} 1547 1548static unsigned si_tex_wrap(unsigned wrap) 1549{ 1550 switch (wrap) { 1551 default: 1552 case PIPE_TEX_WRAP_REPEAT: 1553 return V_008F30_SQ_TEX_WRAP; 1554 case PIPE_TEX_WRAP_CLAMP: 1555 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1556 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1557 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1558 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1559 return V_008F30_SQ_TEX_CLAMP_BORDER; 1560 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1561 return V_008F30_SQ_TEX_MIRROR; 1562 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1563 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1564 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1565 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1566 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1567 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1568 } 1569} 1570 1571static unsigned si_tex_mipfilter(unsigned filter) 1572{ 1573 switch (filter) { 1574 case PIPE_TEX_MIPFILTER_NEAREST: 1575 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1576 case PIPE_TEX_MIPFILTER_LINEAR: 1577 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1578 default: 1579 case PIPE_TEX_MIPFILTER_NONE: 1580 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1581 } 1582} 1583 1584static unsigned si_tex_compare(unsigned compare) 1585{ 1586 switch (compare) { 1587 default: 1588 case PIPE_FUNC_NEVER: 1589 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1590 case PIPE_FUNC_LESS: 1591 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1592 case PIPE_FUNC_EQUAL: 1593 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1594 case PIPE_FUNC_LEQUAL: 1595 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1596 case PIPE_FUNC_GREATER: 1597 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1598 case PIPE_FUNC_NOTEQUAL: 1599 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1600 case PIPE_FUNC_GEQUAL: 1601 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1602 case PIPE_FUNC_ALWAYS: 1603 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1604 } 1605} 1606 1607static unsigned si_tex_dim(unsigned res_target, unsigned view_target, 1608 unsigned nr_samples) 1609{ 1610 if (view_target == PIPE_TEXTURE_CUBE || 1611 view_target == PIPE_TEXTURE_CUBE_ARRAY) 1612 res_target = view_target; 1613 1614 switch (res_target) { 1615 default: 1616 case PIPE_TEXTURE_1D: 1617 return V_008F1C_SQ_RSRC_IMG_1D; 1618 case PIPE_TEXTURE_1D_ARRAY: 1619 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1620 case PIPE_TEXTURE_2D: 1621 case PIPE_TEXTURE_RECT: 1622 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1623 V_008F1C_SQ_RSRC_IMG_2D; 1624 case PIPE_TEXTURE_2D_ARRAY: 1625 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1626 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1627 case PIPE_TEXTURE_3D: 1628 return V_008F1C_SQ_RSRC_IMG_3D; 1629 case PIPE_TEXTURE_CUBE: 1630 case PIPE_TEXTURE_CUBE_ARRAY: 1631 return V_008F1C_SQ_RSRC_IMG_CUBE; 1632 } 1633} 1634 1635/* 1636 * Format support testing 1637 */ 1638 1639static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1640{ 1641 return si_translate_texformat(screen, format, util_format_description(format), 1642 util_format_get_first_non_void_channel(format)) != ~0U; 1643} 1644 1645static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1646 const struct util_format_description *desc, 1647 int first_non_void) 1648{ 1649 unsigned type; 1650 int i; 1651 1652 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1653 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1654 1655 assert(first_non_void >= 0); 1656 type = desc->channel[first_non_void].type; 1657 1658 if (type == UTIL_FORMAT_TYPE_FIXED) 1659 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1660 1661 if (desc->nr_channels == 4 && 1662 desc->channel[0].size == 10 && 1663 desc->channel[1].size == 10 && 1664 desc->channel[2].size == 10 && 1665 desc->channel[3].size == 2) 1666 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1667 1668 /* See whether the components are of the same size. */ 1669 for (i = 0; i < desc->nr_channels; i++) { 1670 if (desc->channel[first_non_void].size != desc->channel[i].size) 1671 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1672 } 1673 1674 switch (desc->channel[first_non_void].size) { 1675 case 8: 1676 switch (desc->nr_channels) { 1677 case 1: 1678 return V_008F0C_BUF_DATA_FORMAT_8; 1679 case 2: 1680 return V_008F0C_BUF_DATA_FORMAT_8_8; 1681 case 3: 1682 case 4: 1683 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1684 } 1685 break; 1686 case 16: 1687 switch (desc->nr_channels) { 1688 case 1: 1689 return V_008F0C_BUF_DATA_FORMAT_16; 1690 case 2: 1691 return V_008F0C_BUF_DATA_FORMAT_16_16; 1692 case 3: 1693 case 4: 1694 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1695 } 1696 break; 1697 case 32: 1698 /* From the Southern Islands ISA documentation about MTBUF: 1699 * 'Memory reads of data in memory that is 32 or 64 bits do not 1700 * undergo any format conversion.' 1701 */ 1702 if (type != UTIL_FORMAT_TYPE_FLOAT && 1703 !desc->channel[first_non_void].pure_integer) 1704 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1705 1706 switch (desc->nr_channels) { 1707 case 1: 1708 return V_008F0C_BUF_DATA_FORMAT_32; 1709 case 2: 1710 return V_008F0C_BUF_DATA_FORMAT_32_32; 1711 case 3: 1712 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1713 case 4: 1714 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1715 } 1716 break; 1717 } 1718 1719 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1720} 1721 1722static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1723 const struct util_format_description *desc, 1724 int first_non_void) 1725{ 1726 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1727 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1728 1729 assert(first_non_void >= 0); 1730 1731 switch (desc->channel[first_non_void].type) { 1732 case UTIL_FORMAT_TYPE_SIGNED: 1733 if (desc->channel[first_non_void].normalized) 1734 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1735 else if (desc->channel[first_non_void].pure_integer) 1736 return V_008F0C_BUF_NUM_FORMAT_SINT; 1737 else 1738 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1739 break; 1740 case UTIL_FORMAT_TYPE_UNSIGNED: 1741 if (desc->channel[first_non_void].normalized) 1742 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1743 else if (desc->channel[first_non_void].pure_integer) 1744 return V_008F0C_BUF_NUM_FORMAT_UINT; 1745 else 1746 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1747 break; 1748 case UTIL_FORMAT_TYPE_FLOAT: 1749 default: 1750 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1751 } 1752} 1753 1754static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1755{ 1756 const struct util_format_description *desc; 1757 int first_non_void; 1758 unsigned data_format; 1759 1760 desc = util_format_description(format); 1761 first_non_void = util_format_get_first_non_void_channel(format); 1762 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1763 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1764} 1765 1766static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1767{ 1768 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1769 r600_translate_colorswap(format, false) != ~0U; 1770} 1771 1772static bool si_is_zs_format_supported(enum pipe_format format) 1773{ 1774 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1775} 1776 1777static boolean si_is_format_supported(struct pipe_screen *screen, 1778 enum pipe_format format, 1779 enum pipe_texture_target target, 1780 unsigned sample_count, 1781 unsigned usage) 1782{ 1783 unsigned retval = 0; 1784 1785 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1786 R600_ERR("r600: unsupported texture type %d\n", target); 1787 return false; 1788 } 1789 1790 if (!util_format_is_supported(format, usage)) 1791 return false; 1792 1793 if (sample_count > 1) { 1794 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 1795 return false; 1796 1797 if (usage & PIPE_BIND_SHADER_IMAGE) 1798 return false; 1799 1800 switch (sample_count) { 1801 case 2: 1802 case 4: 1803 case 8: 1804 break; 1805 case 16: 1806 if (format == PIPE_FORMAT_NONE) 1807 return true; 1808 else 1809 return false; 1810 default: 1811 return false; 1812 } 1813 } 1814 1815 if (usage & (PIPE_BIND_SAMPLER_VIEW | 1816 PIPE_BIND_SHADER_IMAGE)) { 1817 if (target == PIPE_BUFFER) { 1818 if (si_is_vertex_format_supported(screen, format)) 1819 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 1820 PIPE_BIND_SHADER_IMAGE); 1821 } else { 1822 if (si_is_sampler_format_supported(screen, format)) 1823 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 1824 PIPE_BIND_SHADER_IMAGE); 1825 } 1826 } 1827 1828 if ((usage & (PIPE_BIND_RENDER_TARGET | 1829 PIPE_BIND_DISPLAY_TARGET | 1830 PIPE_BIND_SCANOUT | 1831 PIPE_BIND_SHARED | 1832 PIPE_BIND_BLENDABLE)) && 1833 si_is_colorbuffer_format_supported(format)) { 1834 retval |= usage & 1835 (PIPE_BIND_RENDER_TARGET | 1836 PIPE_BIND_DISPLAY_TARGET | 1837 PIPE_BIND_SCANOUT | 1838 PIPE_BIND_SHARED); 1839 if (!util_format_is_pure_integer(format) && 1840 !util_format_is_depth_or_stencil(format)) 1841 retval |= usage & PIPE_BIND_BLENDABLE; 1842 } 1843 1844 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1845 si_is_zs_format_supported(format)) { 1846 retval |= PIPE_BIND_DEPTH_STENCIL; 1847 } 1848 1849 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 1850 si_is_vertex_format_supported(screen, format)) { 1851 retval |= PIPE_BIND_VERTEX_BUFFER; 1852 } 1853 1854 if (usage & PIPE_BIND_TRANSFER_READ) 1855 retval |= PIPE_BIND_TRANSFER_READ; 1856 if (usage & PIPE_BIND_TRANSFER_WRITE) 1857 retval |= PIPE_BIND_TRANSFER_WRITE; 1858 1859 if ((usage & PIPE_BIND_LINEAR) && 1860 !util_format_is_compressed(format) && 1861 !(usage & PIPE_BIND_DEPTH_STENCIL)) 1862 retval |= PIPE_BIND_LINEAR; 1863 1864 return retval == usage; 1865} 1866 1867/* 1868 * framebuffer handling 1869 */ 1870 1871static void si_choose_spi_color_formats(struct r600_surface *surf, 1872 unsigned format, unsigned swap, 1873 unsigned ntype, bool is_depth) 1874{ 1875 /* Alpha is needed for alpha-to-coverage. 1876 * Blending may be with or without alpha. 1877 */ 1878 unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 1879 unsigned alpha = 0; /* exports alpha, but may not support blending */ 1880 unsigned blend = 0; /* supports blending, but may not export alpha */ 1881 unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 1882 1883 /* Choose the SPI color formats. These are required values for Stoney/RB+. 1884 * Other chips have multiple choices, though they are not necessarily better. 1885 */ 1886 switch (format) { 1887 case V_028C70_COLOR_5_6_5: 1888 case V_028C70_COLOR_1_5_5_5: 1889 case V_028C70_COLOR_5_5_5_1: 1890 case V_028C70_COLOR_4_4_4_4: 1891 case V_028C70_COLOR_10_11_11: 1892 case V_028C70_COLOR_11_11_10: 1893 case V_028C70_COLOR_8: 1894 case V_028C70_COLOR_8_8: 1895 case V_028C70_COLOR_8_8_8_8: 1896 case V_028C70_COLOR_10_10_10_2: 1897 case V_028C70_COLOR_2_10_10_10: 1898 if (ntype == V_028C70_NUMBER_UINT) 1899 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 1900 else if (ntype == V_028C70_NUMBER_SINT) 1901 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 1902 else 1903 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 1904 break; 1905 1906 case V_028C70_COLOR_16: 1907 case V_028C70_COLOR_16_16: 1908 case V_028C70_COLOR_16_16_16_16: 1909 if (ntype == V_028C70_NUMBER_UNORM || 1910 ntype == V_028C70_NUMBER_SNORM) { 1911 /* UNORM16 and SNORM16 don't support blending */ 1912 if (ntype == V_028C70_NUMBER_UNORM) 1913 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 1914 else 1915 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 1916 1917 /* Use 32 bits per channel for blending. */ 1918 if (format == V_028C70_COLOR_16) { 1919 if (swap == V_028C70_SWAP_STD) { /* R */ 1920 blend = V_028714_SPI_SHADER_32_R; 1921 blend_alpha = V_028714_SPI_SHADER_32_AR; 1922 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 1923 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1924 else 1925 assert(0); 1926 } else if (format == V_028C70_COLOR_16_16) { 1927 if (swap == V_028C70_SWAP_STD) { /* RG */ 1928 blend = V_028714_SPI_SHADER_32_GR; 1929 blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1930 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 1931 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1932 else 1933 assert(0); 1934 } else /* 16_16_16_16 */ 1935 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1936 } else if (ntype == V_028C70_NUMBER_UINT) 1937 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 1938 else if (ntype == V_028C70_NUMBER_SINT) 1939 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 1940 else if (ntype == V_028C70_NUMBER_FLOAT) 1941 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 1942 else 1943 assert(0); 1944 break; 1945 1946 case V_028C70_COLOR_32: 1947 if (swap == V_028C70_SWAP_STD) { /* R */ 1948 blend = normal = V_028714_SPI_SHADER_32_R; 1949 alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 1950 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 1951 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 1952 else 1953 assert(0); 1954 break; 1955 1956 case V_028C70_COLOR_32_32: 1957 if (swap == V_028C70_SWAP_STD) { /* RG */ 1958 blend = normal = V_028714_SPI_SHADER_32_GR; 1959 alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1960 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 1961 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 1962 else 1963 assert(0); 1964 break; 1965 1966 case V_028C70_COLOR_32_32_32_32: 1967 case V_028C70_COLOR_8_24: 1968 case V_028C70_COLOR_24_8: 1969 case V_028C70_COLOR_X24_8_32_FLOAT: 1970 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 1971 break; 1972 1973 default: 1974 assert(0); 1975 return; 1976 } 1977 1978 /* The DB->CB copy needs 32_ABGR. */ 1979 if (is_depth) 1980 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 1981 1982 surf->spi_shader_col_format = normal; 1983 surf->spi_shader_col_format_alpha = alpha; 1984 surf->spi_shader_col_format_blend = blend; 1985 surf->spi_shader_col_format_blend_alpha = blend_alpha; 1986} 1987 1988static void si_initialize_color_surface(struct si_context *sctx, 1989 struct r600_surface *surf) 1990{ 1991 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1992 unsigned color_info, color_attrib, color_view; 1993 unsigned format, swap, ntype, endian; 1994 const struct util_format_description *desc; 1995 int i; 1996 unsigned blend_clamp = 0, blend_bypass = 0; 1997 1998 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 1999 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 2000 2001 desc = util_format_description(surf->base.format); 2002 for (i = 0; i < 4; i++) { 2003 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2004 break; 2005 } 2006 } 2007 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 2008 ntype = V_028C70_NUMBER_FLOAT; 2009 } else { 2010 ntype = V_028C70_NUMBER_UNORM; 2011 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2012 ntype = V_028C70_NUMBER_SRGB; 2013 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2014 if (desc->channel[i].pure_integer) { 2015 ntype = V_028C70_NUMBER_SINT; 2016 } else { 2017 assert(desc->channel[i].normalized); 2018 ntype = V_028C70_NUMBER_SNORM; 2019 } 2020 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2021 if (desc->channel[i].pure_integer) { 2022 ntype = V_028C70_NUMBER_UINT; 2023 } else { 2024 assert(desc->channel[i].normalized); 2025 ntype = V_028C70_NUMBER_UNORM; 2026 } 2027 } 2028 } 2029 2030 format = si_translate_colorformat(surf->base.format); 2031 if (format == V_028C70_COLOR_INVALID) { 2032 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2033 } 2034 assert(format != V_028C70_COLOR_INVALID); 2035 swap = r600_translate_colorswap(surf->base.format, false); 2036 endian = si_colorformat_endian_swap(format); 2037 2038 /* blend clamp should be set for all NORM/SRGB types */ 2039 if (ntype == V_028C70_NUMBER_UNORM || 2040 ntype == V_028C70_NUMBER_SNORM || 2041 ntype == V_028C70_NUMBER_SRGB) 2042 blend_clamp = 1; 2043 2044 /* set blend bypass according to docs if SINT/UINT or 2045 8/24 COLOR variants */ 2046 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2047 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2048 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2049 blend_clamp = 0; 2050 blend_bypass = 1; 2051 } 2052 2053 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 2054 (format == V_028C70_COLOR_8 || 2055 format == V_028C70_COLOR_8_8 || 2056 format == V_028C70_COLOR_8_8_8_8)) 2057 surf->color_is_int8 = true; 2058 2059 color_info = S_028C70_FORMAT(format) | 2060 S_028C70_COMP_SWAP(swap) | 2061 S_028C70_BLEND_CLAMP(blend_clamp) | 2062 S_028C70_BLEND_BYPASS(blend_bypass) | 2063 S_028C70_SIMPLE_FLOAT(1) | 2064 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 2065 ntype != V_028C70_NUMBER_SNORM && 2066 ntype != V_028C70_NUMBER_SRGB && 2067 format != V_028C70_COLOR_8_24 && 2068 format != V_028C70_COLOR_24_8) | 2069 S_028C70_NUMBER_TYPE(ntype) | 2070 S_028C70_ENDIAN(endian); 2071 2072 /* Intensity is implemented as Red, so treat it that way. */ 2073 color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2074 util_format_is_intensity(surf->base.format)); 2075 2076 if (rtex->resource.b.b.nr_samples > 1) { 2077 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 2078 2079 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2080 S_028C74_NUM_FRAGMENTS(log_samples); 2081 2082 if (rtex->fmask.size) { 2083 color_info |= S_028C70_COMPRESSION(1); 2084 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 2085 2086 if (sctx->b.chip_class == SI) { 2087 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2088 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2089 } 2090 } 2091 } 2092 2093 surf->cb_color_view = color_view; 2094 surf->cb_color_info = color_info; 2095 surf->cb_color_attrib = color_attrib; 2096 2097 if (sctx->b.chip_class >= VI) { 2098 unsigned max_uncompressed_block_size = 2; 2099 2100 if (rtex->surface.nsamples > 1) { 2101 if (rtex->surface.bpe == 1) 2102 max_uncompressed_block_size = 0; 2103 else if (rtex->surface.bpe == 2) 2104 max_uncompressed_block_size = 1; 2105 } 2106 2107 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2108 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2109 } 2110 2111 /* This must be set for fast clear to work without FMASK. */ 2112 if (!rtex->fmask.size && sctx->b.chip_class == SI) { 2113 unsigned bankh = util_logbase2(rtex->surface.bankh); 2114 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2115 } 2116 2117 /* Determine pixel shader export format */ 2118 si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); 2119 2120 surf->color_initialized = true; 2121} 2122 2123static void si_init_depth_surface(struct si_context *sctx, 2124 struct r600_surface *surf) 2125{ 2126 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2127 unsigned level = surf->base.u.tex.level; 2128 struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; 2129 unsigned format; 2130 uint32_t z_info, s_info, db_depth_info; 2131 uint64_t z_offs, s_offs; 2132 uint32_t db_htile_data_base, db_htile_surface; 2133 2134 format = si_translate_dbformat(rtex->resource.b.b.format); 2135 2136 if (format == V_028040_Z_INVALID) { 2137 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 2138 } 2139 assert(format != V_028040_Z_INVALID); 2140 2141 s_offs = z_offs = rtex->resource.gpu_address; 2142 z_offs += rtex->surface.level[level].offset; 2143 s_offs += rtex->surface.stencil_level[level].offset; 2144 2145 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 2146 2147 z_info = S_028040_FORMAT(format); 2148 if (rtex->resource.b.b.nr_samples > 1) { 2149 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 2150 } 2151 2152 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 2153 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 2154 else 2155 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 2156 2157 if (sctx->b.chip_class >= CIK) { 2158 struct radeon_info *info = &sctx->screen->b.info; 2159 unsigned index = rtex->surface.tiling_index[level]; 2160 unsigned stencil_index = rtex->surface.stencil_tiling_index[level]; 2161 unsigned macro_index = rtex->surface.macro_tile_index; 2162 unsigned tile_mode = info->si_tile_mode_array[index]; 2163 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2164 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2165 2166 db_depth_info |= 2167 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2168 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2169 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2170 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2171 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2172 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2173 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2174 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2175 } else { 2176 unsigned tile_mode_index = si_tile_mode_index(rtex, level, false); 2177 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2178 tile_mode_index = si_tile_mode_index(rtex, level, true); 2179 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2180 } 2181 2182 /* HiZ aka depth buffer htile */ 2183 /* use htile only for first level */ 2184 if (rtex->htile_buffer && !level) { 2185 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2186 S_028040_ALLOW_EXPCLEAR(1); 2187 2188 if (rtex->surface.flags & RADEON_SURF_SBUFFER) { 2189 /* Workaround: For a not yet understood reason, the 2190 * combination of MSAA, fast stencil clear and stencil 2191 * decompress messes with subsequent stencil buffer 2192 * uses. Problem was reproduced on Verde, Bonaire, 2193 * Tonga, and Carrizo. 2194 * 2195 * Disabling EXPCLEAR works around the problem. 2196 * 2197 * Check piglit's arb_texture_multisample-stencil-clear 2198 * test if you want to try changing this. 2199 */ 2200 if (rtex->resource.b.b.nr_samples <= 1) 2201 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2202 } else 2203 /* Use all of the htile_buffer for depth if there's no stencil. */ 2204 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2205 2206 uint64_t va = rtex->htile_buffer->gpu_address; 2207 db_htile_data_base = va >> 8; 2208 db_htile_surface = S_028ABC_FULL_CACHE(1); 2209 } else { 2210 db_htile_data_base = 0; 2211 db_htile_surface = 0; 2212 } 2213 2214 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2215 2216 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2217 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2218 surf->db_htile_data_base = db_htile_data_base; 2219 surf->db_depth_info = db_depth_info; 2220 surf->db_z_info = z_info; 2221 surf->db_stencil_info = s_info; 2222 surf->db_depth_base = z_offs >> 8; 2223 surf->db_stencil_base = s_offs >> 8; 2224 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2225 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2226 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2227 levelinfo->nblk_y) / 64 - 1); 2228 surf->db_htile_surface = db_htile_surface; 2229 2230 surf->depth_initialized = true; 2231} 2232 2233static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2234{ 2235 for (int i = 0; i < state->nr_cbufs; ++i) { 2236 struct r600_surface *surf = NULL; 2237 struct r600_texture *rtex; 2238 2239 if (!state->cbufs[i]) 2240 continue; 2241 surf = (struct r600_surface*)state->cbufs[i]; 2242 rtex = (struct r600_texture*)surf->base.texture; 2243 2244 p_atomic_dec(&rtex->framebuffers_bound); 2245 } 2246} 2247 2248static void si_set_framebuffer_state(struct pipe_context *ctx, 2249 const struct pipe_framebuffer_state *state) 2250{ 2251 struct si_context *sctx = (struct si_context *)ctx; 2252 struct pipe_constant_buffer constbuf = {0}; 2253 struct r600_surface *surf = NULL; 2254 struct r600_texture *rtex; 2255 bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer; 2256 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2257 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2258 int i; 2259 2260 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2261 if (!sctx->framebuffer.state.cbufs[i]) 2262 continue; 2263 2264 rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture; 2265 if (rtex->dcc_gather_statistics) 2266 vi_separate_dcc_stop_query(ctx, rtex); 2267 } 2268 2269 /* Only flush TC when changing the framebuffer state, because 2270 * the only client not using TC that can change textures is 2271 * the framebuffer. 2272 * 2273 * Flush all CB and DB caches here because all buffers can be used 2274 * for write by both TC (with shader image stores) and CB/DB. 2275 */ 2276 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 2277 SI_CONTEXT_INV_GLOBAL_L2 | 2278 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | 2279 SI_CONTEXT_CS_PARTIAL_FLUSH; 2280 2281 /* Take the maximum of the old and new count. If the new count is lower, 2282 * dirtying is needed to disable the unbound colorbuffers. 2283 */ 2284 sctx->framebuffer.dirty_cbufs |= 2285 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2286 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2287 2288 si_dec_framebuffer_counters(&sctx->framebuffer.state); 2289 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2290 2291 sctx->framebuffer.spi_shader_col_format = 0; 2292 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2293 sctx->framebuffer.spi_shader_col_format_blend = 0; 2294 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2295 sctx->framebuffer.color_is_int8 = 0; 2296 2297 sctx->framebuffer.compressed_cb_mask = 0; 2298 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2299 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2300 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 2301 util_format_is_pure_integer(state->cbufs[0]->format); 2302 sctx->framebuffer.any_dst_linear = false; 2303 2304 if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer) 2305 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2306 2307 for (i = 0; i < state->nr_cbufs; i++) { 2308 if (!state->cbufs[i]) 2309 continue; 2310 2311 surf = (struct r600_surface*)state->cbufs[i]; 2312 rtex = (struct r600_texture*)surf->base.texture; 2313 2314 if (!surf->color_initialized) { 2315 si_initialize_color_surface(sctx, surf); 2316 } 2317 2318 sctx->framebuffer.spi_shader_col_format |= 2319 surf->spi_shader_col_format << (i * 4); 2320 sctx->framebuffer.spi_shader_col_format_alpha |= 2321 surf->spi_shader_col_format_alpha << (i * 4); 2322 sctx->framebuffer.spi_shader_col_format_blend |= 2323 surf->spi_shader_col_format_blend << (i * 4); 2324 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2325 surf->spi_shader_col_format_blend_alpha << (i * 4); 2326 2327 if (surf->color_is_int8) 2328 sctx->framebuffer.color_is_int8 |= 1 << i; 2329 2330 if (rtex->fmask.size && rtex->cmask.size) { 2331 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2332 } 2333 2334 if (surf->level_info->mode == RADEON_SURF_MODE_LINEAR_ALIGNED) 2335 sctx->framebuffer.any_dst_linear = true; 2336 2337 r600_context_add_resource_size(ctx, surf->base.texture); 2338 2339 p_atomic_inc(&rtex->framebuffers_bound); 2340 2341 if (rtex->dcc_gather_statistics) { 2342 /* Dirty tracking must be enabled for DCC usage analysis. */ 2343 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2344 vi_separate_dcc_start_query(ctx, rtex); 2345 } 2346 } 2347 2348 if (state->zsbuf) { 2349 surf = (struct r600_surface*)state->zsbuf; 2350 2351 if (!surf->depth_initialized) { 2352 si_init_depth_surface(sctx, surf); 2353 } 2354 r600_context_add_resource_size(ctx, surf->base.texture); 2355 } 2356 2357 si_update_poly_offset_state(sctx); 2358 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 2359 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); 2360 2361 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2362 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2363 2364 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2365 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2366 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2367 2368 /* Set sample locations as fragment shader constants. */ 2369 switch (sctx->framebuffer.nr_samples) { 2370 case 1: 2371 constbuf.user_buffer = sctx->b.sample_locations_1x; 2372 break; 2373 case 2: 2374 constbuf.user_buffer = sctx->b.sample_locations_2x; 2375 break; 2376 case 4: 2377 constbuf.user_buffer = sctx->b.sample_locations_4x; 2378 break; 2379 case 8: 2380 constbuf.user_buffer = sctx->b.sample_locations_8x; 2381 break; 2382 case 16: 2383 constbuf.user_buffer = sctx->b.sample_locations_16x; 2384 break; 2385 default: 2386 R600_ERR("Requested an invalid number of samples %i.\n", 2387 sctx->framebuffer.nr_samples); 2388 assert(0); 2389 } 2390 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2391 si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 2392 2393 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 2394 } 2395 2396 sctx->need_check_render_feedback = true; 2397 sctx->do_update_shaders = true; 2398} 2399 2400static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2401{ 2402 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2403 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2404 unsigned i, nr_cbufs = state->nr_cbufs; 2405 struct r600_texture *tex = NULL; 2406 struct r600_surface *cb = NULL; 2407 unsigned cb_color_info = 0; 2408 2409 /* Colorbuffers. */ 2410 for (i = 0; i < nr_cbufs; i++) { 2411 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 2412 unsigned cb_color_base, cb_color_fmask, cb_color_attrib; 2413 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 2414 2415 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 2416 continue; 2417 2418 cb = (struct r600_surface*)state->cbufs[i]; 2419 if (!cb) { 2420 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2421 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2422 continue; 2423 } 2424 2425 tex = (struct r600_texture *)cb->base.texture; 2426 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2427 &tex->resource, RADEON_USAGE_READWRITE, 2428 tex->surface.nsamples > 1 ? 2429 RADEON_PRIO_COLOR_BUFFER_MSAA : 2430 RADEON_PRIO_COLOR_BUFFER); 2431 2432 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2433 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2434 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2435 RADEON_PRIO_CMASK); 2436 } 2437 2438 if (tex->dcc_separate_buffer) 2439 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2440 tex->dcc_separate_buffer, 2441 RADEON_USAGE_READWRITE, 2442 RADEON_PRIO_DCC); 2443 2444 /* Compute mutable surface parameters. */ 2445 pitch_tile_max = cb->level_info->nblk_x / 8 - 1; 2446 slice_tile_max = cb->level_info->nblk_x * 2447 cb->level_info->nblk_y / 64 - 1; 2448 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 2449 2450 cb_color_base = (tex->resource.gpu_address + cb->level_info->offset) >> 8; 2451 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 2452 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 2453 cb_color_attrib = cb->cb_color_attrib | 2454 S_028C74_TILE_MODE_INDEX(tile_mode_index); 2455 2456 if (tex->fmask.size) { 2457 if (sctx->b.chip_class >= CIK) 2458 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1); 2459 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index); 2460 cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8; 2461 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max); 2462 } else { 2463 /* This must be set for fast clear to work without FMASK. */ 2464 if (sctx->b.chip_class >= CIK) 2465 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 2466 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 2467 cb_color_fmask = cb_color_base; 2468 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 2469 } 2470 2471 cb_color_info = cb->cb_color_info | tex->cb_color_info; 2472 2473 if (tex->dcc_offset && cb->level_info->dcc_enabled) { 2474 bool is_msaa_resolve_dst = state->cbufs[0] && 2475 state->cbufs[0]->texture->nr_samples > 1 && 2476 state->cbufs[1] == &cb->base && 2477 state->cbufs[1]->texture->nr_samples <= 1; 2478 2479 if (!is_msaa_resolve_dst) 2480 cb_color_info |= S_028C70_DCC_ENABLE(1); 2481 } 2482 2483 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 2484 sctx->b.chip_class >= VI ? 14 : 13); 2485 radeon_emit(cs, cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2486 radeon_emit(cs, cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2487 radeon_emit(cs, cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2488 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2489 radeon_emit(cs, cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2490 radeon_emit(cs, cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2491 radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ 2492 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2493 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2494 radeon_emit(cs, cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2495 radeon_emit(cs, cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2496 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2497 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2498 2499 if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ 2500 radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + 2501 tex->dcc_offset + 2502 tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8); 2503 } 2504 for (; i < 8 ; i++) 2505 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 2506 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2507 2508 /* ZS buffer. */ 2509 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 2510 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2511 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2512 2513 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2514 &rtex->resource, RADEON_USAGE_READWRITE, 2515 zb->base.texture->nr_samples > 1 ? 2516 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2517 RADEON_PRIO_DEPTH_BUFFER); 2518 2519 if (zb->db_htile_data_base) { 2520 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2521 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2522 RADEON_PRIO_HTILE); 2523 } 2524 2525 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2526 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2527 2528 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2529 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2530 radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */ 2531 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 2532 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2533 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2534 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2535 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2536 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2537 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2538 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2539 2540 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 2541 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 2542 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 2543 2544 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2545 } else if (sctx->framebuffer.dirty_zsbuf) { 2546 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2547 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2548 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2549 } 2550 2551 /* Framebuffer dimensions. */ 2552 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2553 radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2554 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2555 2556 sctx->framebuffer.dirty_cbufs = 0; 2557 sctx->framebuffer.dirty_zsbuf = false; 2558} 2559 2560static void si_emit_msaa_sample_locs(struct si_context *sctx, 2561 struct r600_atom *atom) 2562{ 2563 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2564 unsigned nr_samples = sctx->framebuffer.nr_samples; 2565 2566 /* Smoothing (only possible with nr_samples == 1) uses the same 2567 * sample locations as the MSAA it simulates. 2568 */ 2569 if (nr_samples <= 1 && sctx->smoothing_enabled) 2570 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 2571 2572 /* On Polaris, the small primitive filter uses the sample locations 2573 * even when MSAA is off, so we need to make sure they're set to 0. 2574 */ 2575 if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) && 2576 (nr_samples != sctx->msaa_sample_locs.nr_samples)) { 2577 sctx->msaa_sample_locs.nr_samples = nr_samples; 2578 cayman_emit_msaa_sample_locs(cs, nr_samples); 2579 } 2580 2581 if (sctx->b.family >= CHIP_POLARIS10) { 2582 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 2583 unsigned small_prim_filter_cntl = 2584 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 2585 S_028830_LINE_FILTER_DISABLE(1); /* line bug */ 2586 2587 /* The alternative of setting sample locations to 0 would 2588 * require a DB flush to avoid Z errors, see 2589 * https://bugs.freedesktop.org/show_bug.cgi?id=96908 2590 */ 2591 if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable) 2592 small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 2593 2594 radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 2595 small_prim_filter_cntl); 2596 } 2597} 2598 2599static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) 2600{ 2601 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2602 unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes; 2603 /* 33% faster rendering to linear color buffers */ 2604 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 2605 unsigned sc_mode_cntl_1 = 2606 S_028A4C_WALK_SIZE(dst_is_linear) | 2607 S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 2608 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 2609 /* always 1: */ 2610 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | 2611 S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 2612 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | 2613 S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 2614 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 2615 S_028A4C_FORCE_EOV_REZ_ENABLE(1); 2616 2617 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2618 sctx->ps_iter_samples, 2619 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0, 2620 sc_mode_cntl_1); 2621} 2622 2623static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2624{ 2625 struct si_context *sctx = (struct si_context *)ctx; 2626 2627 if (sctx->ps_iter_samples == min_samples) 2628 return; 2629 2630 sctx->ps_iter_samples = min_samples; 2631 sctx->do_update_shaders = true; 2632 2633 if (sctx->framebuffer.nr_samples > 1) 2634 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2635} 2636 2637/* 2638 * Samplers 2639 */ 2640 2641/** 2642 * Build the sampler view descriptor for a buffer texture. 2643 * @param state 256-bit descriptor; only the high 128 bits are filled in 2644 */ 2645void 2646si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, 2647 enum pipe_format format, 2648 unsigned offset, unsigned size, 2649 uint32_t *state) 2650{ 2651 const struct util_format_description *desc; 2652 int first_non_void; 2653 uint64_t va; 2654 unsigned stride; 2655 unsigned num_records; 2656 unsigned num_format, data_format; 2657 2658 desc = util_format_description(format); 2659 first_non_void = util_format_get_first_non_void_channel(format); 2660 stride = desc->block.bits / 8; 2661 va = buf->gpu_address + offset; 2662 num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void); 2663 data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void); 2664 2665 num_records = size / stride; 2666 num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 2667 2668 if (screen->b.chip_class >= VI) 2669 num_records *= stride; 2670 2671 state[4] = va; 2672 state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 2673 S_008F04_STRIDE(stride); 2674 state[6] = num_records; 2675 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2676 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2677 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2678 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2679 S_008F0C_NUM_FORMAT(num_format) | 2680 S_008F0C_DATA_FORMAT(data_format); 2681} 2682 2683/** 2684 * Build the sampler view descriptor for a texture. 2685 */ 2686void 2687si_make_texture_descriptor(struct si_screen *screen, 2688 struct r600_texture *tex, 2689 bool sampler, 2690 enum pipe_texture_target target, 2691 enum pipe_format pipe_format, 2692 const unsigned char state_swizzle[4], 2693 unsigned first_level, unsigned last_level, 2694 unsigned first_layer, unsigned last_layer, 2695 unsigned width, unsigned height, unsigned depth, 2696 uint32_t *state, 2697 uint32_t *fmask_state) 2698{ 2699 struct pipe_resource *res = &tex->resource.b.b; 2700 const struct util_format_description *desc; 2701 unsigned char swizzle[4]; 2702 int first_non_void; 2703 unsigned num_format, data_format, type; 2704 uint64_t va; 2705 2706 desc = util_format_description(pipe_format); 2707 2708 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2709 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2710 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2711 2712 switch (pipe_format) { 2713 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2714 case PIPE_FORMAT_X24S8_UINT: 2715 case PIPE_FORMAT_X32_S8X24_UINT: 2716 case PIPE_FORMAT_X8Z24_UNORM: 2717 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2718 break; 2719 default: 2720 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2721 } 2722 } else { 2723 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2724 } 2725 2726 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2727 2728 switch (pipe_format) { 2729 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2730 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2731 break; 2732 default: 2733 if (first_non_void < 0) { 2734 if (util_format_is_compressed(pipe_format)) { 2735 switch (pipe_format) { 2736 case PIPE_FORMAT_DXT1_SRGB: 2737 case PIPE_FORMAT_DXT1_SRGBA: 2738 case PIPE_FORMAT_DXT3_SRGBA: 2739 case PIPE_FORMAT_DXT5_SRGBA: 2740 case PIPE_FORMAT_BPTC_SRGBA: 2741 case PIPE_FORMAT_ETC2_SRGB8: 2742 case PIPE_FORMAT_ETC2_SRGB8A1: 2743 case PIPE_FORMAT_ETC2_SRGBA8: 2744 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2745 break; 2746 case PIPE_FORMAT_RGTC1_SNORM: 2747 case PIPE_FORMAT_LATC1_SNORM: 2748 case PIPE_FORMAT_RGTC2_SNORM: 2749 case PIPE_FORMAT_LATC2_SNORM: 2750 case PIPE_FORMAT_ETC2_R11_SNORM: 2751 case PIPE_FORMAT_ETC2_RG11_SNORM: 2752 /* implies float, so use SNORM/UNORM to determine 2753 whether data is signed or not */ 2754 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2755 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2756 break; 2757 default: 2758 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2759 break; 2760 } 2761 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2762 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2763 } else { 2764 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2765 } 2766 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2767 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2768 } else { 2769 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2770 2771 switch (desc->channel[first_non_void].type) { 2772 case UTIL_FORMAT_TYPE_FLOAT: 2773 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2774 break; 2775 case UTIL_FORMAT_TYPE_SIGNED: 2776 if (desc->channel[first_non_void].normalized) 2777 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2778 else if (desc->channel[first_non_void].pure_integer) 2779 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2780 else 2781 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2782 break; 2783 case UTIL_FORMAT_TYPE_UNSIGNED: 2784 if (desc->channel[first_non_void].normalized) 2785 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2786 else if (desc->channel[first_non_void].pure_integer) 2787 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2788 else 2789 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2790 } 2791 } 2792 } 2793 2794 data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void); 2795 if (data_format == ~0) { 2796 data_format = 0; 2797 } 2798 2799 if (!sampler && 2800 (res->target == PIPE_TEXTURE_CUBE || 2801 res->target == PIPE_TEXTURE_CUBE_ARRAY || 2802 res->target == PIPE_TEXTURE_3D)) { 2803 /* For the purpose of shader images, treat cube maps and 3D 2804 * textures as 2D arrays. For 3D textures, the address 2805 * calculations for mipmaps are different, so we rely on the 2806 * caller to effectively disable mipmaps. 2807 */ 2808 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 2809 2810 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 2811 } else { 2812 type = si_tex_dim(res->target, target, res->nr_samples); 2813 } 2814 2815 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 2816 height = 1; 2817 depth = res->array_size; 2818 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 2819 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 2820 if (sampler || res->target != PIPE_TEXTURE_3D) 2821 depth = res->array_size; 2822 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 2823 depth = res->array_size / 6; 2824 2825 state[0] = 0; 2826 state[1] = (S_008F14_DATA_FORMAT(data_format) | 2827 S_008F14_NUM_FORMAT(num_format)); 2828 state[2] = (S_008F18_WIDTH(width - 1) | 2829 S_008F18_HEIGHT(height - 1)); 2830 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2831 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2832 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2833 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2834 S_008F1C_BASE_LEVEL(res->nr_samples > 1 ? 2835 0 : first_level) | 2836 S_008F1C_LAST_LEVEL(res->nr_samples > 1 ? 2837 util_logbase2(res->nr_samples) : 2838 last_level) | 2839 S_008F1C_POW2_PAD(res->last_level > 0) | 2840 S_008F1C_TYPE(type)); 2841 state[4] = S_008F20_DEPTH(depth - 1); 2842 state[5] = (S_008F24_BASE_ARRAY(first_layer) | 2843 S_008F24_LAST_ARRAY(last_layer)); 2844 state[6] = 0; 2845 state[7] = 0; 2846 2847 if (tex->dcc_offset) { 2848 unsigned swap = r600_translate_colorswap(pipe_format, false); 2849 2850 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 2851 } else { 2852 /* The last dword is unused by hw. The shader uses it to clear 2853 * bits in the first dword of sampler state. 2854 */ 2855 if (screen->b.chip_class <= CIK && res->nr_samples <= 1) { 2856 if (first_level == last_level) 2857 state[7] = C_008F30_MAX_ANISO_RATIO; 2858 else 2859 state[7] = 0xffffffff; 2860 } 2861 } 2862 2863 /* Initialize the sampler view for FMASK. */ 2864 if (tex->fmask.size) { 2865 uint32_t fmask_format; 2866 2867 va = tex->resource.gpu_address + tex->fmask.offset; 2868 2869 switch (res->nr_samples) { 2870 case 2: 2871 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2872 break; 2873 case 4: 2874 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2875 break; 2876 case 8: 2877 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2878 break; 2879 default: 2880 assert(0); 2881 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2882 } 2883 2884 fmask_state[0] = va >> 8; 2885 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2886 S_008F14_DATA_FORMAT(fmask_format) | 2887 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2888 fmask_state[2] = S_008F18_WIDTH(width - 1) | 2889 S_008F18_HEIGHT(height - 1); 2890 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2891 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2892 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2893 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2894 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) | 2895 S_008F1C_TYPE(si_tex_dim(res->target, target, 0)); 2896 fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2897 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1); 2898 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) | 2899 S_008F24_LAST_ARRAY(last_layer); 2900 fmask_state[6] = 0; 2901 fmask_state[7] = 0; 2902 } 2903} 2904 2905/** 2906 * Create a sampler view. 2907 * 2908 * @param ctx context 2909 * @param texture texture 2910 * @param state sampler view template 2911 * @param width0 width0 override (for compressed textures as int) 2912 * @param height0 height0 override (for compressed textures as int) 2913 * @param force_level set the base address to the level (for compressed textures) 2914 */ 2915struct pipe_sampler_view * 2916si_create_sampler_view_custom(struct pipe_context *ctx, 2917 struct pipe_resource *texture, 2918 const struct pipe_sampler_view *state, 2919 unsigned width0, unsigned height0, 2920 unsigned force_level) 2921{ 2922 struct si_context *sctx = (struct si_context*)ctx; 2923 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 2924 struct r600_texture *tmp = (struct r600_texture*)texture; 2925 unsigned base_level, first_level, last_level; 2926 unsigned char state_swizzle[4]; 2927 unsigned height, depth, width; 2928 unsigned last_layer = state->u.tex.last_layer; 2929 enum pipe_format pipe_format; 2930 const struct radeon_surf_level *surflevel; 2931 2932 if (!view) 2933 return NULL; 2934 2935 /* initialize base object */ 2936 view->base = *state; 2937 view->base.texture = NULL; 2938 view->base.reference.count = 1; 2939 view->base.context = ctx; 2940 2941 /* NULL resource, obey swizzle (only ZERO and ONE make sense). */ 2942 if (!texture) { 2943 view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) | 2944 S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) | 2945 S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) | 2946 S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) | 2947 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D); 2948 return &view->base; 2949 } 2950 2951 pipe_resource_reference(&view->base.texture, texture); 2952 2953 if (state->format == PIPE_FORMAT_X24S8_UINT || 2954 state->format == PIPE_FORMAT_S8X24_UINT || 2955 state->format == PIPE_FORMAT_X32_S8X24_UINT || 2956 state->format == PIPE_FORMAT_S8_UINT) 2957 view->is_stencil_sampler = true; 2958 2959 /* Buffer resource. */ 2960 if (texture->target == PIPE_BUFFER) { 2961 si_make_buffer_descriptor(sctx->screen, 2962 (struct r600_resource *)texture, 2963 state->format, 2964 state->u.buf.offset, 2965 state->u.buf.size, 2966 view->state); 2967 2968 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); 2969 return &view->base; 2970 } 2971 2972 state_swizzle[0] = state->swizzle_r; 2973 state_swizzle[1] = state->swizzle_g; 2974 state_swizzle[2] = state->swizzle_b; 2975 state_swizzle[3] = state->swizzle_a; 2976 2977 base_level = 0; 2978 first_level = state->u.tex.first_level; 2979 last_level = state->u.tex.last_level; 2980 width = width0; 2981 height = height0; 2982 depth = texture->depth0; 2983 2984 if (force_level) { 2985 assert(force_level == first_level && 2986 force_level == last_level); 2987 base_level = force_level; 2988 first_level = 0; 2989 last_level = 0; 2990 width = u_minify(width, force_level); 2991 height = u_minify(height, force_level); 2992 depth = u_minify(depth, force_level); 2993 } 2994 2995 /* This is not needed if state trackers set last_layer correctly. */ 2996 if (state->target == PIPE_TEXTURE_1D || 2997 state->target == PIPE_TEXTURE_2D || 2998 state->target == PIPE_TEXTURE_RECT || 2999 state->target == PIPE_TEXTURE_CUBE) 3000 last_layer = state->u.tex.first_layer; 3001 3002 /* Texturing with separate depth and stencil. */ 3003 pipe_format = state->format; 3004 3005 /* Depth/stencil texturing sometimes needs separate texture. */ 3006 if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) { 3007 if (!tmp->flushed_depth_texture && 3008 !r600_init_flushed_depth_texture(ctx, texture, NULL)) { 3009 pipe_resource_reference(&view->base.texture, NULL); 3010 FREE(view); 3011 return NULL; 3012 } 3013 3014 assert(tmp->flushed_depth_texture); 3015 3016 /* Override format for the case where the flushed texture 3017 * contains only Z or only S. 3018 */ 3019 if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format) 3020 pipe_format = tmp->flushed_depth_texture->resource.b.b.format; 3021 3022 tmp = tmp->flushed_depth_texture; 3023 } 3024 3025 surflevel = tmp->surface.level; 3026 3027 if (tmp->db_compatible) { 3028 switch (pipe_format) { 3029 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 3030 pipe_format = PIPE_FORMAT_Z32_FLOAT; 3031 break; 3032 case PIPE_FORMAT_X8Z24_UNORM: 3033 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3034 /* Z24 is always stored like this for DB 3035 * compatibility. 3036 */ 3037 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 3038 break; 3039 case PIPE_FORMAT_X24S8_UINT: 3040 case PIPE_FORMAT_S8X24_UINT: 3041 case PIPE_FORMAT_X32_S8X24_UINT: 3042 pipe_format = PIPE_FORMAT_S8_UINT; 3043 surflevel = tmp->surface.stencil_level; 3044 break; 3045 default:; 3046 } 3047 } 3048 3049 si_make_texture_descriptor(sctx->screen, tmp, true, 3050 state->target, pipe_format, state_swizzle, 3051 first_level, last_level, 3052 state->u.tex.first_layer, last_layer, 3053 width, height, depth, 3054 view->state, view->fmask_state); 3055 3056 view->base_level_info = &surflevel[base_level]; 3057 view->base_level = base_level; 3058 view->block_width = util_format_get_blockwidth(pipe_format); 3059 return &view->base; 3060} 3061 3062static struct pipe_sampler_view * 3063si_create_sampler_view(struct pipe_context *ctx, 3064 struct pipe_resource *texture, 3065 const struct pipe_sampler_view *state) 3066{ 3067 return si_create_sampler_view_custom(ctx, texture, state, 3068 texture ? texture->width0 : 0, 3069 texture ? texture->height0 : 0, 0); 3070} 3071 3072static void si_sampler_view_destroy(struct pipe_context *ctx, 3073 struct pipe_sampler_view *state) 3074{ 3075 struct si_sampler_view *view = (struct si_sampler_view *)state; 3076 3077 if (state->texture && state->texture->target == PIPE_BUFFER) 3078 LIST_DELINIT(&view->list); 3079 3080 pipe_resource_reference(&state->texture, NULL); 3081 FREE(view); 3082} 3083 3084static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 3085{ 3086 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 3087 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 3088 (linear_filter && 3089 (wrap == PIPE_TEX_WRAP_CLAMP || 3090 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 3091} 3092 3093static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 3094{ 3095 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 3096 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 3097 3098 return (state->border_color.ui[0] || state->border_color.ui[1] || 3099 state->border_color.ui[2] || state->border_color.ui[3]) && 3100 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 3101 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 3102 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 3103} 3104 3105static void *si_create_sampler_state(struct pipe_context *ctx, 3106 const struct pipe_sampler_state *state) 3107{ 3108 struct si_context *sctx = (struct si_context *)ctx; 3109 struct r600_common_screen *rscreen = sctx->b.screen; 3110 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 3111 unsigned border_color_type, border_color_index = 0; 3112 unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso 3113 : state->max_anisotropy; 3114 unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); 3115 3116 if (!rstate) { 3117 return NULL; 3118 } 3119 3120 if (!sampler_state_needs_border_color(state)) 3121 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3122 else if (state->border_color.f[0] == 0 && 3123 state->border_color.f[1] == 0 && 3124 state->border_color.f[2] == 0 && 3125 state->border_color.f[3] == 0) 3126 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3127 else if (state->border_color.f[0] == 0 && 3128 state->border_color.f[1] == 0 && 3129 state->border_color.f[2] == 0 && 3130 state->border_color.f[3] == 1) 3131 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 3132 else if (state->border_color.f[0] == 1 && 3133 state->border_color.f[1] == 1 && 3134 state->border_color.f[2] == 1 && 3135 state->border_color.f[3] == 1) 3136 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 3137 else { 3138 int i; 3139 3140 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 3141 3142 /* Check if the border has been uploaded already. */ 3143 for (i = 0; i < sctx->border_color_count; i++) 3144 if (memcmp(&sctx->border_color_table[i], &state->border_color, 3145 sizeof(state->border_color)) == 0) 3146 break; 3147 3148 if (i >= SI_MAX_BORDER_COLORS) { 3149 /* Getting 4096 unique border colors is very unlikely. */ 3150 fprintf(stderr, "radeonsi: The border color table is full. " 3151 "Any new border colors will be just black. " 3152 "Please file a bug.\n"); 3153 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3154 } else { 3155 if (i == sctx->border_color_count) { 3156 /* Upload a new border color. */ 3157 memcpy(&sctx->border_color_table[i], &state->border_color, 3158 sizeof(state->border_color)); 3159 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 3160 &state->border_color, 3161 sizeof(state->border_color)); 3162 sctx->border_color_count++; 3163 } 3164 3165 border_color_index = i; 3166 } 3167 } 3168 3169 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 3170 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 3171 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 3172 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 3173 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 3174 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 3175 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 3176 S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI)); 3177 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 3178 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 3179 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 3180 S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) | 3181 S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) | 3182 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 3183 S_008F38_MIP_POINT_PRECLAMP(1) | 3184 S_008F38_DISABLE_LSB_CEIL(1) | 3185 S_008F38_FILTER_PREC_FIX(1) | 3186 S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI)); 3187 rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | 3188 S_008F3C_BORDER_COLOR_TYPE(border_color_type); 3189 return rstate; 3190} 3191 3192static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 3193{ 3194 struct si_context *sctx = (struct si_context *)ctx; 3195 3196 if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask) 3197 return; 3198 3199 sctx->sample_mask.sample_mask = sample_mask; 3200 si_mark_atom_dirty(sctx, &sctx->sample_mask.atom); 3201} 3202 3203static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom) 3204{ 3205 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 3206 unsigned mask = sctx->sample_mask.sample_mask; 3207 3208 /* Needed for line and polygon smoothing as well as for the Polaris 3209 * small primitive filter. We expect the state tracker to take care of 3210 * this for us. 3211 */ 3212 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 3213 (mask & 1 && sctx->blitter->running)); 3214 3215 radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 3216 radeon_emit(cs, mask | (mask << 16)); 3217 radeon_emit(cs, mask | (mask << 16)); 3218} 3219 3220static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 3221{ 3222 free(state); 3223} 3224 3225/* 3226 * Vertex elements & buffers 3227 */ 3228 3229static void *si_create_vertex_elements(struct pipe_context *ctx, 3230 unsigned count, 3231 const struct pipe_vertex_element *elements) 3232{ 3233 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 3234 int i; 3235 3236 assert(count <= SI_MAX_ATTRIBS); 3237 if (!v) 3238 return NULL; 3239 3240 v->count = count; 3241 for (i = 0; i < count; ++i) { 3242 const struct util_format_description *desc; 3243 unsigned data_format, num_format; 3244 int first_non_void; 3245 3246 desc = util_format_description(elements[i].src_format); 3247 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 3248 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 3249 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 3250 3251 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3252 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3253 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3254 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3255 S_008F0C_NUM_FORMAT(num_format) | 3256 S_008F0C_DATA_FORMAT(data_format); 3257 v->format_size[i] = desc->block.bits / 8; 3258 } 3259 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 3260 3261 return v; 3262} 3263 3264static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 3265{ 3266 struct si_context *sctx = (struct si_context *)ctx; 3267 struct si_vertex_element *v = (struct si_vertex_element*)state; 3268 3269 sctx->vertex_elements = v; 3270 sctx->vertex_buffers_dirty = true; 3271 sctx->do_update_shaders = true; 3272} 3273 3274static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 3275{ 3276 struct si_context *sctx = (struct si_context *)ctx; 3277 3278 if (sctx->vertex_elements == state) 3279 sctx->vertex_elements = NULL; 3280 FREE(state); 3281} 3282 3283static void si_set_vertex_buffers(struct pipe_context *ctx, 3284 unsigned start_slot, unsigned count, 3285 const struct pipe_vertex_buffer *buffers) 3286{ 3287 struct si_context *sctx = (struct si_context *)ctx; 3288 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 3289 int i; 3290 3291 assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); 3292 3293 if (buffers) { 3294 for (i = 0; i < count; i++) { 3295 const struct pipe_vertex_buffer *src = buffers + i; 3296 struct pipe_vertex_buffer *dsti = dst + i; 3297 3298 pipe_resource_reference(&dsti->buffer, src->buffer); 3299 dsti->buffer_offset = src->buffer_offset; 3300 dsti->stride = src->stride; 3301 r600_context_add_resource_size(ctx, src->buffer); 3302 } 3303 } else { 3304 for (i = 0; i < count; i++) { 3305 pipe_resource_reference(&dst[i].buffer, NULL); 3306 } 3307 } 3308 sctx->vertex_buffers_dirty = true; 3309} 3310 3311static void si_set_index_buffer(struct pipe_context *ctx, 3312 const struct pipe_index_buffer *ib) 3313{ 3314 struct si_context *sctx = (struct si_context *)ctx; 3315 3316 if (ib) { 3317 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer); 3318 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 3319 r600_context_add_resource_size(ctx, ib->buffer); 3320 } else { 3321 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 3322 } 3323} 3324 3325/* 3326 * Misc 3327 */ 3328 3329static void si_set_tess_state(struct pipe_context *ctx, 3330 const float default_outer_level[4], 3331 const float default_inner_level[2]) 3332{ 3333 struct si_context *sctx = (struct si_context *)ctx; 3334 struct pipe_constant_buffer cb; 3335 float array[8]; 3336 3337 memcpy(array, default_outer_level, sizeof(float) * 4); 3338 memcpy(array+4, default_inner_level, sizeof(float) * 2); 3339 3340 cb.buffer = NULL; 3341 cb.user_buffer = NULL; 3342 cb.buffer_size = sizeof(array); 3343 3344 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, 3345 (void*)array, sizeof(array), 3346 &cb.buffer_offset); 3347 3348 si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 3349 pipe_resource_reference(&cb.buffer, NULL); 3350} 3351 3352static void si_texture_barrier(struct pipe_context *ctx) 3353{ 3354 struct si_context *sctx = (struct si_context *)ctx; 3355 3356 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 3357 SI_CONTEXT_INV_GLOBAL_L2 | 3358 SI_CONTEXT_FLUSH_AND_INV_CB | 3359 SI_CONTEXT_CS_PARTIAL_FLUSH; 3360} 3361 3362static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 3363{ 3364 struct si_context *sctx = (struct si_context *)ctx; 3365 3366 /* Subsequent commands must wait for all shader invocations to 3367 * complete. */ 3368 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 3369 SI_CONTEXT_CS_PARTIAL_FLUSH; 3370 3371 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 3372 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | 3373 SI_CONTEXT_INV_VMEM_L1; 3374 3375 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 3376 PIPE_BARRIER_SHADER_BUFFER | 3377 PIPE_BARRIER_TEXTURE | 3378 PIPE_BARRIER_IMAGE | 3379 PIPE_BARRIER_STREAMOUT_BUFFER | 3380 PIPE_BARRIER_GLOBAL_BUFFER)) { 3381 /* As far as I can tell, L1 contents are written back to L2 3382 * automatically at end of shader, but the contents of other 3383 * L1 caches might still be stale. */ 3384 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 3385 } 3386 3387 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 3388 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 3389 3390 /* Indices are read through TC L2 since VI. */ 3391 if (sctx->screen->b.chip_class <= CIK) 3392 sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; 3393 } 3394 3395 if (flags & PIPE_BARRIER_FRAMEBUFFER) 3396 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; 3397 3398 if (flags & (PIPE_BARRIER_MAPPED_BUFFER | 3399 PIPE_BARRIER_FRAMEBUFFER | 3400 PIPE_BARRIER_INDIRECT_BUFFER)) { 3401 /* Not sure if INV_GLOBAL_L2 is the best thing here. 3402 * 3403 * We need to make sure that TC L1 & L2 are written back to 3404 * memory, because neither CPU accesses nor CB fetches consider 3405 * TC, but there's no need to invalidate any TC cache lines. */ 3406 sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; 3407 } 3408} 3409 3410static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 3411{ 3412 struct pipe_blend_state blend; 3413 3414 memset(&blend, 0, sizeof(blend)); 3415 blend.independent_blend_enable = true; 3416 blend.rt[0].colormask = 0xf; 3417 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 3418} 3419 3420static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 3421 bool include_draw_vbo) 3422{ 3423 si_need_cs_space((struct si_context*)ctx); 3424} 3425 3426static void si_init_config(struct si_context *sctx); 3427 3428void si_init_state_functions(struct si_context *sctx) 3429{ 3430 si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond); 3431 si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin); 3432 si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable); 3433 si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors); 3434 si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports); 3435 3436 si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush); 3437 si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); 3438 si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); 3439 si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); 3440 si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); 3441 si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); 3442 si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state); 3443 si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color); 3444 si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs); 3445 si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state); 3446 si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref); 3447 3448 sctx->b.b.create_blend_state = si_create_blend_state; 3449 sctx->b.b.bind_blend_state = si_bind_blend_state; 3450 sctx->b.b.delete_blend_state = si_delete_blend_state; 3451 sctx->b.b.set_blend_color = si_set_blend_color; 3452 3453 sctx->b.b.create_rasterizer_state = si_create_rs_state; 3454 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 3455 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 3456 3457 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 3458 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 3459 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 3460 3461 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 3462 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 3463 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 3464 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 3465 sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 3466 3467 sctx->b.b.set_clip_state = si_set_clip_state; 3468 sctx->b.b.set_stencil_ref = si_set_stencil_ref; 3469 3470 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 3471 sctx->b.b.get_sample_position = cayman_get_sample_position; 3472 3473 sctx->b.b.create_sampler_state = si_create_sampler_state; 3474 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 3475 3476 sctx->b.b.create_sampler_view = si_create_sampler_view; 3477 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 3478 3479 sctx->b.b.set_sample_mask = si_set_sample_mask; 3480 3481 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 3482 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 3483 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 3484 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 3485 sctx->b.b.set_index_buffer = si_set_index_buffer; 3486 3487 sctx->b.b.texture_barrier = si_texture_barrier; 3488 sctx->b.b.memory_barrier = si_memory_barrier; 3489 sctx->b.b.set_min_samples = si_set_min_samples; 3490 sctx->b.b.set_tess_state = si_set_tess_state; 3491 3492 sctx->b.b.set_active_query_state = si_set_active_query_state; 3493 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 3494 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 3495 3496 sctx->b.b.draw_vbo = si_draw_vbo; 3497 3498 si_init_config(sctx); 3499} 3500 3501static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen) 3502{ 3503 return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; 3504} 3505 3506static void si_query_opaque_metadata(struct r600_common_screen *rscreen, 3507 struct r600_texture *rtex, 3508 struct radeon_bo_metadata *md) 3509{ 3510 struct si_screen *sscreen = (struct si_screen*)rscreen; 3511 struct pipe_resource *res = &rtex->resource.b.b; 3512 static const unsigned char swizzle[] = { 3513 PIPE_SWIZZLE_X, 3514 PIPE_SWIZZLE_Y, 3515 PIPE_SWIZZLE_Z, 3516 PIPE_SWIZZLE_W 3517 }; 3518 uint32_t desc[8], i; 3519 bool is_array = util_resource_is_array_texture(res); 3520 3521 /* DRM 2.x.x doesn't support this. */ 3522 if (rscreen->info.drm_major != 3) 3523 return; 3524 3525 assert(rtex->dcc_separate_buffer == NULL); 3526 assert(rtex->fmask.size == 0); 3527 3528 /* Metadata image format format version 1: 3529 * [0] = 1 (metadata format identifier) 3530 * [1] = (VENDOR_ID << 16) | PCI_ID 3531 * [2:9] = image descriptor for the whole resource 3532 * [2] is always 0, because the base address is cleared 3533 * [9] is the DCC offset bits [39:8] from the beginning of 3534 * the buffer 3535 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 3536 */ 3537 3538 md->metadata[0] = 1; /* metadata image format version 1 */ 3539 3540 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ 3541 md->metadata[1] = si_get_bo_metadata_word1(rscreen); 3542 3543 si_make_texture_descriptor(sscreen, rtex, true, 3544 res->target, res->format, 3545 swizzle, 0, res->last_level, 0, 3546 is_array ? res->array_size - 1 : 0, 3547 res->width0, res->height0, res->depth0, 3548 desc, NULL); 3549 3550 si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0, 3551 rtex->surface.blk_w, false, desc); 3552 3553 /* Clear the base address and set the relative DCC offset. */ 3554 desc[0] = 0; 3555 desc[1] &= C_008F14_BASE_ADDRESS_HI; 3556 desc[7] = rtex->dcc_offset >> 8; 3557 3558 /* Dwords [2:9] contain the image descriptor. */ 3559 memcpy(&md->metadata[2], desc, sizeof(desc)); 3560 3561 /* Dwords [10:..] contain the mipmap level offsets. */ 3562 for (i = 0; i <= res->last_level; i++) 3563 md->metadata[10+i] = rtex->surface.level[i].offset >> 8; 3564 3565 md->size_metadata = (11 + res->last_level) * 4; 3566} 3567 3568static void si_apply_opaque_metadata(struct r600_common_screen *rscreen, 3569 struct r600_texture *rtex, 3570 struct radeon_bo_metadata *md) 3571{ 3572 uint32_t *desc = &md->metadata[2]; 3573 3574 if (rscreen->chip_class < VI) 3575 return; 3576 3577 /* Return if DCC is enabled. The texture should be set up with it 3578 * already. 3579 */ 3580 if (md->size_metadata >= 11 * 4 && 3581 md->metadata[0] != 0 && 3582 md->metadata[1] == si_get_bo_metadata_word1(rscreen) && 3583 G_008F28_COMPRESSION_EN(desc[6])) { 3584 assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8)); 3585 return; 3586 } 3587 3588 /* Disable DCC. These are always set by texture_from_handle and must 3589 * be cleared here. 3590 */ 3591 rtex->dcc_offset = 0; 3592} 3593 3594void si_init_screen_state_functions(struct si_screen *sscreen) 3595{ 3596 sscreen->b.b.is_format_supported = si_is_format_supported; 3597 sscreen->b.query_opaque_metadata = si_query_opaque_metadata; 3598 sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata; 3599} 3600 3601static void 3602si_write_harvested_raster_configs(struct si_context *sctx, 3603 struct si_pm4_state *pm4, 3604 unsigned raster_config, 3605 unsigned raster_config_1) 3606{ 3607 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 3608 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 3609 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3610 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3611 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 3612 unsigned rb_per_se = num_rb / num_se; 3613 unsigned se_mask[4]; 3614 unsigned se; 3615 3616 se_mask[0] = ((1 << rb_per_se) - 1); 3617 se_mask[1] = (se_mask[0] << rb_per_se); 3618 se_mask[2] = (se_mask[1] << rb_per_se); 3619 se_mask[3] = (se_mask[2] << rb_per_se); 3620 3621 se_mask[0] &= rb_mask; 3622 se_mask[1] &= rb_mask; 3623 se_mask[2] &= rb_mask; 3624 se_mask[3] &= rb_mask; 3625 3626 assert(num_se == 1 || num_se == 2 || num_se == 4); 3627 assert(sh_per_se == 1 || sh_per_se == 2); 3628 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 3629 3630 /* XXX: I can't figure out what the *_XSEL and *_YSEL 3631 * fields are for, so I'm leaving them as their default 3632 * values. */ 3633 3634 for (se = 0; se < num_se; se++) { 3635 unsigned raster_config_se = raster_config; 3636 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3637 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3638 int idx = (se / 2) * 2; 3639 3640 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3641 raster_config_se &= C_028350_SE_MAP; 3642 3643 if (!se_mask[idx]) { 3644 raster_config_se |= 3645 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 3646 } else { 3647 raster_config_se |= 3648 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 3649 } 3650 } 3651 3652 pkr0_mask &= rb_mask; 3653 pkr1_mask &= rb_mask; 3654 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3655 raster_config_se &= C_028350_PKR_MAP; 3656 3657 if (!pkr0_mask) { 3658 raster_config_se |= 3659 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 3660 } else { 3661 raster_config_se |= 3662 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 3663 } 3664 } 3665 3666 if (rb_per_se >= 2) { 3667 unsigned rb0_mask = 1 << (se * rb_per_se); 3668 unsigned rb1_mask = rb0_mask << 1; 3669 3670 rb0_mask &= rb_mask; 3671 rb1_mask &= rb_mask; 3672 if (!rb0_mask || !rb1_mask) { 3673 raster_config_se &= C_028350_RB_MAP_PKR0; 3674 3675 if (!rb0_mask) { 3676 raster_config_se |= 3677 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 3678 } else { 3679 raster_config_se |= 3680 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 3681 } 3682 } 3683 3684 if (rb_per_se > 2) { 3685 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3686 rb1_mask = rb0_mask << 1; 3687 rb0_mask &= rb_mask; 3688 rb1_mask &= rb_mask; 3689 if (!rb0_mask || !rb1_mask) { 3690 raster_config_se &= C_028350_RB_MAP_PKR1; 3691 3692 if (!rb0_mask) { 3693 raster_config_se |= 3694 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 3695 } else { 3696 raster_config_se |= 3697 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 3698 } 3699 } 3700 } 3701 } 3702 3703 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3704 if (sctx->b.chip_class < CIK) 3705 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3706 SE_INDEX(se) | SH_BROADCAST_WRITES | 3707 INSTANCE_BROADCAST_WRITES); 3708 else 3709 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3710 S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | 3711 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3712 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 3713 } 3714 3715 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3716 if (sctx->b.chip_class < CIK) 3717 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3718 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 3719 INSTANCE_BROADCAST_WRITES); 3720 else { 3721 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3722 S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 3723 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3724 3725 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3726 (!se_mask[2] && !se_mask[3]))) { 3727 raster_config_1 &= C_028354_SE_PAIR_MAP; 3728 3729 if (!se_mask[0] && !se_mask[1]) { 3730 raster_config_1 |= 3731 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 3732 } else { 3733 raster_config_1 |= 3734 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 3735 } 3736 } 3737 3738 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 3739 } 3740} 3741 3742static void si_init_config(struct si_context *sctx) 3743{ 3744 struct si_screen *sscreen = sctx->screen; 3745 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3746 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3747 unsigned raster_config, raster_config_1; 3748 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 3749 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 3750 int i; 3751 3752 if (!pm4) 3753 return; 3754 3755 si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 3756 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); 3757 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 3758 si_pm4_cmd_end(pm4, false); 3759 3760 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 3761 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 3762 3763 /* FIXME calculate these values somehow ??? */ 3764 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 3765 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3766 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3767 3768 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3769 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3770 3771 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3772 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3773 if (sctx->b.chip_class < CIK) 3774 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3775 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3776 3777 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3778 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3779 3780 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3781 3782 for (i = 0; i < 16; i++) { 3783 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0); 3784 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0)); 3785 } 3786 3787 switch (sctx->screen->b.family) { 3788 case CHIP_TAHITI: 3789 case CHIP_PITCAIRN: 3790 raster_config = 0x2a00126a; 3791 raster_config_1 = 0x00000000; 3792 break; 3793 case CHIP_VERDE: 3794 raster_config = 0x0000124a; 3795 raster_config_1 = 0x00000000; 3796 break; 3797 case CHIP_OLAND: 3798 raster_config = 0x00000082; 3799 raster_config_1 = 0x00000000; 3800 break; 3801 case CHIP_HAINAN: 3802 raster_config = 0x00000000; 3803 raster_config_1 = 0x00000000; 3804 break; 3805 case CHIP_BONAIRE: 3806 raster_config = 0x16000012; 3807 raster_config_1 = 0x00000000; 3808 break; 3809 case CHIP_HAWAII: 3810 raster_config = 0x3a00161a; 3811 raster_config_1 = 0x0000002e; 3812 break; 3813 case CHIP_FIJI: 3814 if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { 3815 /* old kernels with old tiling config */ 3816 raster_config = 0x16000012; 3817 raster_config_1 = 0x0000002a; 3818 } else { 3819 raster_config = 0x3a00161a; 3820 raster_config_1 = 0x0000002e; 3821 } 3822 break; 3823 case CHIP_POLARIS10: 3824 raster_config = 0x16000012; 3825 raster_config_1 = 0x0000002a; 3826 break; 3827 case CHIP_POLARIS11: 3828 raster_config = 0x16000012; 3829 raster_config_1 = 0x00000000; 3830 break; 3831 case CHIP_TONGA: 3832 raster_config = 0x16000012; 3833 raster_config_1 = 0x0000002a; 3834 break; 3835 case CHIP_ICELAND: 3836 if (num_rb == 1) 3837 raster_config = 0x00000000; 3838 else 3839 raster_config = 0x00000002; 3840 raster_config_1 = 0x00000000; 3841 break; 3842 case CHIP_CARRIZO: 3843 raster_config = 0x00000002; 3844 raster_config_1 = 0x00000000; 3845 break; 3846 case CHIP_KAVERI: 3847 /* KV should be 0x00000002, but that causes problems with radeon */ 3848 raster_config = 0x00000000; /* 0x00000002 */ 3849 raster_config_1 = 0x00000000; 3850 break; 3851 case CHIP_KABINI: 3852 case CHIP_MULLINS: 3853 case CHIP_STONEY: 3854 raster_config = 0x00000000; 3855 raster_config_1 = 0x00000000; 3856 break; 3857 default: 3858 fprintf(stderr, 3859 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 3860 raster_config = 0x00000000; 3861 raster_config_1 = 0x00000000; 3862 break; 3863 } 3864 3865 /* Always use the default config when all backends are enabled 3866 * (or when we failed to determine the enabled backends). 3867 */ 3868 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 3869 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 3870 raster_config); 3871 if (sctx->b.chip_class >= CIK) 3872 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 3873 raster_config_1); 3874 } else { 3875 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 3876 } 3877 3878 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 3879 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 3880 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 3881 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 3882 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 3883 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 3884 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 3885 3886 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3887 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 3888 S_028230_ER_TRI(0xA) | 3889 S_028230_ER_POINT(0xA) | 3890 S_028230_ER_RECT(0xA) | 3891 /* Required by DX10_DIAMOND_TEST_ENA: */ 3892 S_028230_ER_LINE_LR(0x1A) | 3893 S_028230_ER_LINE_RL(0x26) | 3894 S_028230_ER_LINE_TB(0xA) | 3895 S_028230_ER_LINE_BT(0xA)); 3896 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 3897 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 3898 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 3899 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 3900 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 3901 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 3902 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 3903 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 3904 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); 3905 3906 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 3907 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 3908 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 3909 3910 if (sctx->b.chip_class >= CIK) { 3911 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); 3912 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); 3913 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); 3914 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); 3915 3916 if (sscreen->b.info.num_good_compute_units / 3917 (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) { 3918 /* Too few available compute units per SH. Disallowing 3919 * VS to run on CU0 could hurt us more than late VS 3920 * allocation would help. 3921 * 3922 * LATE_ALLOC_VS = 2 is the highest safe number. 3923 */ 3924 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 3925 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); 3926 } else { 3927 /* Set LATE_ALLOC_VS == 31. It should be less than 3928 * the number of scratch waves. Limitations: 3929 * - VS can't execute on CU0. 3930 * - If HS writes outputs to LDS, LS can't execute on CU0. 3931 */ 3932 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); 3933 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); 3934 } 3935 3936 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 3937 } 3938 3939 if (sctx->b.chip_class >= VI) { 3940 unsigned vgt_tess_distribution; 3941 3942 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, 3943 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 3944 S_028424_OVERWRITE_COMBINER_WATERMARK(4)); 3945 if (sctx->b.family < CHIP_POLARIS10) 3946 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); 3947 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); 3948 3949 vgt_tess_distribution = 3950 S_028B50_ACCUM_ISOLINE(32) | 3951 S_028B50_ACCUM_TRI(11) | 3952 S_028B50_ACCUM_QUAD(11) | 3953 S_028B50_DONUT_SPLIT(16); 3954 3955 /* Testing with Unigine Heaven extreme tesselation yielded best results 3956 * with TRAP_SPLIT = 3. 3957 */ 3958 if (sctx->b.family == CHIP_FIJI || 3959 sctx->b.family >= CHIP_POLARIS10) 3960 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 3961 3962 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 3963 } else { 3964 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 3965 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 3966 } 3967 3968 if (sctx->b.family == CHIP_STONEY) 3969 si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); 3970 3971 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 3972 if (sctx->b.chip_class >= CIK) 3973 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); 3974 si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 3975 RADEON_PRIO_BORDER_COLORS); 3976 3977 si_pm4_upload_indirect_buffer(sctx, pm4); 3978 sctx->init_config = pm4; 3979} 3980