si_state.c revision 5bcfbf91e53e4f66310fc4ab63a7caf931a1e45c
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "si_shader.h" 29#include "sid.h" 30#include "radeon/r600_cs.h" 31 32#include "util/u_dual_blend.h" 33#include "util/u_format.h" 34#include "util/u_format_s3tc.h" 35#include "util/u_memory.h" 36#include "util/u_pstipple.h" 37#include "util/u_resource.h" 38 39/* Initialize an external atom (owned by ../radeon). */ 40static void 41si_init_external_atom(struct si_context *sctx, struct r600_atom *atom, 42 struct r600_atom **list_elem) 43{ 44 atom->id = list_elem - sctx->atoms.array + 1; 45 *list_elem = atom; 46} 47 48/* Initialize an atom owned by radeonsi. */ 49void si_init_atom(struct si_context *sctx, struct r600_atom *atom, 50 struct r600_atom **list_elem, 51 void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) 52{ 53 atom->emit = (void*)emit_func; 54 atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */ 55 *list_elem = atom; 56} 57 58static unsigned si_map_swizzle(unsigned swizzle) 59{ 60 switch (swizzle) { 61 case PIPE_SWIZZLE_Y: 62 return V_008F0C_SQ_SEL_Y; 63 case PIPE_SWIZZLE_Z: 64 return V_008F0C_SQ_SEL_Z; 65 case PIPE_SWIZZLE_W: 66 return V_008F0C_SQ_SEL_W; 67 case PIPE_SWIZZLE_0: 68 return V_008F0C_SQ_SEL_0; 69 case PIPE_SWIZZLE_1: 70 return V_008F0C_SQ_SEL_1; 71 default: /* PIPE_SWIZZLE_X */ 72 return V_008F0C_SQ_SEL_X; 73 } 74} 75 76static uint32_t S_FIXED(float value, uint32_t frac_bits) 77{ 78 return value * (1 << frac_bits); 79} 80 81/* 12.4 fixed-point */ 82static unsigned si_pack_float_12p4(float x) 83{ 84 return x <= 0 ? 0 : 85 x >= 4096 ? 0xffff : x * 16; 86} 87 88/* 89 * Inferred framebuffer and blender state. 90 * 91 * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state 92 * is that: 93 * - The blend state mask is 0xf most of the time. 94 * - The COLOR1 format isn't INVALID because of possible dual-source blending, 95 * so COLOR1 is enabled pretty much all the time. 96 * So CB_TARGET_MASK is the only register that can disable COLOR1. 97 * 98 * Another reason is to avoid a hang with dual source blending. 99 */ 100static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom) 101{ 102 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 103 struct si_state_blend *blend = sctx->queued.named.blend; 104 uint32_t cb_target_mask = 0, i; 105 106 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) 107 if (sctx->framebuffer.state.cbufs[i]) 108 cb_target_mask |= 0xf << (4*i); 109 110 if (blend) 111 cb_target_mask &= blend->cb_target_mask; 112 113 /* Avoid a hang that happens when dual source blending is enabled 114 * but there is not enough color outputs. This is undefined behavior, 115 * so disable color writes completely. 116 * 117 * Reproducible with Unigine Heaven 4.0 and drirc missing. 118 */ 119 if (blend && blend->dual_src_blend && 120 sctx->ps_shader.cso && 121 (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 122 cb_target_mask = 0; 123 124 radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); 125 126 /* STONEY-specific register settings. */ 127 if (sctx->b.family == CHIP_STONEY) { 128 unsigned spi_shader_col_format = 129 sctx->ps_shader.cso ? 130 sctx->ps_shader.current->key.ps.epilog.spi_shader_col_format : 0; 131 unsigned sx_ps_downconvert = 0; 132 unsigned sx_blend_opt_epsilon = 0; 133 unsigned sx_blend_opt_control = 0; 134 135 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 136 struct r600_surface *surf = 137 (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; 138 unsigned format, swap, spi_format, colormask; 139 bool has_alpha, has_rgb; 140 141 if (!surf) 142 continue; 143 144 format = G_028C70_FORMAT(surf->cb_color_info); 145 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 146 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 147 colormask = (cb_target_mask >> (i * 4)) & 0xf; 148 149 /* Set if RGB and A are present. */ 150 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 151 152 if (format == V_028C70_COLOR_8 || 153 format == V_028C70_COLOR_16 || 154 format == V_028C70_COLOR_32) 155 has_rgb = !has_alpha; 156 else 157 has_rgb = true; 158 159 /* Check the colormask and export format. */ 160 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 161 has_rgb = false; 162 if (!(colormask & PIPE_MASK_A)) 163 has_alpha = false; 164 165 if (spi_format == V_028714_SPI_SHADER_ZERO) { 166 has_rgb = false; 167 has_alpha = false; 168 } 169 170 /* Disable value checking for disabled channels. */ 171 if (!has_rgb) 172 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 173 if (!has_alpha) 174 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 175 176 /* Enable down-conversion for 32bpp and smaller formats. */ 177 switch (format) { 178 case V_028C70_COLOR_8: 179 case V_028C70_COLOR_8_8: 180 case V_028C70_COLOR_8_8_8_8: 181 /* For 1 and 2-channel formats, use the superset thereof. */ 182 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 183 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 184 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 185 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 186 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 187 } 188 break; 189 190 case V_028C70_COLOR_5_6_5: 191 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 192 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 193 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 194 } 195 break; 196 197 case V_028C70_COLOR_1_5_5_5: 198 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 199 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 200 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 201 } 202 break; 203 204 case V_028C70_COLOR_4_4_4_4: 205 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 206 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 207 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 208 } 209 break; 210 211 case V_028C70_COLOR_32: 212 if (swap == V_0280A0_SWAP_STD && 213 spi_format == V_028714_SPI_SHADER_32_R) 214 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 215 else if (swap == V_0280A0_SWAP_ALT_REV && 216 spi_format == V_028714_SPI_SHADER_32_AR) 217 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 218 break; 219 220 case V_028C70_COLOR_16: 221 case V_028C70_COLOR_16_16: 222 /* For 1-channel formats, use the superset thereof. */ 223 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 224 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 225 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 226 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 227 if (swap == V_0280A0_SWAP_STD || 228 swap == V_0280A0_SWAP_STD_REV) 229 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 230 else 231 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 232 } 233 break; 234 235 case V_028C70_COLOR_10_11_11: 236 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 237 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 238 sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 239 } 240 break; 241 242 case V_028C70_COLOR_2_10_10_10: 243 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 244 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 245 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 246 } 247 break; 248 } 249 } 250 251 if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { 252 sx_ps_downconvert = 0; 253 sx_blend_opt_epsilon = 0; 254 sx_blend_opt_control = 0; 255 } 256 257 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); 258 radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ 259 radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ 260 radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ 261 } 262} 263 264/* 265 * Blender functions 266 */ 267 268static uint32_t si_translate_blend_function(int blend_func) 269{ 270 switch (blend_func) { 271 case PIPE_BLEND_ADD: 272 return V_028780_COMB_DST_PLUS_SRC; 273 case PIPE_BLEND_SUBTRACT: 274 return V_028780_COMB_SRC_MINUS_DST; 275 case PIPE_BLEND_REVERSE_SUBTRACT: 276 return V_028780_COMB_DST_MINUS_SRC; 277 case PIPE_BLEND_MIN: 278 return V_028780_COMB_MIN_DST_SRC; 279 case PIPE_BLEND_MAX: 280 return V_028780_COMB_MAX_DST_SRC; 281 default: 282 R600_ERR("Unknown blend function %d\n", blend_func); 283 assert(0); 284 break; 285 } 286 return 0; 287} 288 289static uint32_t si_translate_blend_factor(int blend_fact) 290{ 291 switch (blend_fact) { 292 case PIPE_BLENDFACTOR_ONE: 293 return V_028780_BLEND_ONE; 294 case PIPE_BLENDFACTOR_SRC_COLOR: 295 return V_028780_BLEND_SRC_COLOR; 296 case PIPE_BLENDFACTOR_SRC_ALPHA: 297 return V_028780_BLEND_SRC_ALPHA; 298 case PIPE_BLENDFACTOR_DST_ALPHA: 299 return V_028780_BLEND_DST_ALPHA; 300 case PIPE_BLENDFACTOR_DST_COLOR: 301 return V_028780_BLEND_DST_COLOR; 302 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 303 return V_028780_BLEND_SRC_ALPHA_SATURATE; 304 case PIPE_BLENDFACTOR_CONST_COLOR: 305 return V_028780_BLEND_CONSTANT_COLOR; 306 case PIPE_BLENDFACTOR_CONST_ALPHA: 307 return V_028780_BLEND_CONSTANT_ALPHA; 308 case PIPE_BLENDFACTOR_ZERO: 309 return V_028780_BLEND_ZERO; 310 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 311 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 312 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 313 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 314 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 315 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 316 case PIPE_BLENDFACTOR_INV_DST_COLOR: 317 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 318 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 319 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 320 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 321 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 322 case PIPE_BLENDFACTOR_SRC1_COLOR: 323 return V_028780_BLEND_SRC1_COLOR; 324 case PIPE_BLENDFACTOR_SRC1_ALPHA: 325 return V_028780_BLEND_SRC1_ALPHA; 326 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 327 return V_028780_BLEND_INV_SRC1_COLOR; 328 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 329 return V_028780_BLEND_INV_SRC1_ALPHA; 330 default: 331 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 332 assert(0); 333 break; 334 } 335 return 0; 336} 337 338static uint32_t si_translate_blend_opt_function(int blend_func) 339{ 340 switch (blend_func) { 341 case PIPE_BLEND_ADD: 342 return V_028760_OPT_COMB_ADD; 343 case PIPE_BLEND_SUBTRACT: 344 return V_028760_OPT_COMB_SUBTRACT; 345 case PIPE_BLEND_REVERSE_SUBTRACT: 346 return V_028760_OPT_COMB_REVSUBTRACT; 347 case PIPE_BLEND_MIN: 348 return V_028760_OPT_COMB_MIN; 349 case PIPE_BLEND_MAX: 350 return V_028760_OPT_COMB_MAX; 351 default: 352 return V_028760_OPT_COMB_BLEND_DISABLED; 353 } 354} 355 356static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 357{ 358 switch (blend_fact) { 359 case PIPE_BLENDFACTOR_ZERO: 360 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 361 case PIPE_BLENDFACTOR_ONE: 362 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 363 case PIPE_BLENDFACTOR_SRC_COLOR: 364 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 365 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 366 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 367 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 368 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 369 case PIPE_BLENDFACTOR_SRC_ALPHA: 370 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 371 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 372 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 373 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 374 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 375 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 376 default: 377 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 378 } 379} 380 381/** 382 * Get rid of DST in the blend factors by commuting the operands: 383 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 384 */ 385static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 386 unsigned *dst_factor, unsigned expected_dst, 387 unsigned replacement_src) 388{ 389 if (*src_factor == expected_dst && 390 *dst_factor == PIPE_BLENDFACTOR_ZERO) { 391 *src_factor = PIPE_BLENDFACTOR_ZERO; 392 *dst_factor = replacement_src; 393 394 /* Commuting the operands requires reversing subtractions. */ 395 if (*func == PIPE_BLEND_SUBTRACT) 396 *func = PIPE_BLEND_REVERSE_SUBTRACT; 397 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 398 *func = PIPE_BLEND_SUBTRACT; 399 } 400} 401 402static bool si_blend_factor_uses_dst(unsigned factor) 403{ 404 return factor == PIPE_BLENDFACTOR_DST_COLOR || 405 factor == PIPE_BLENDFACTOR_DST_ALPHA || 406 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 407 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 408 factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 409} 410 411static void *si_create_blend_state_mode(struct pipe_context *ctx, 412 const struct pipe_blend_state *state, 413 unsigned mode) 414{ 415 struct si_context *sctx = (struct si_context*)ctx; 416 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 417 struct si_pm4_state *pm4 = &blend->pm4; 418 uint32_t sx_mrt_blend_opt[8] = {0}; 419 uint32_t color_control = 0; 420 421 if (!blend) 422 return NULL; 423 424 blend->alpha_to_coverage = state->alpha_to_coverage; 425 blend->alpha_to_one = state->alpha_to_one; 426 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 427 428 if (state->logicop_enable) { 429 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 430 } else { 431 color_control |= S_028808_ROP3(0xcc); 432 } 433 434 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 435 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 436 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 437 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 438 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 439 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 440 441 if (state->alpha_to_coverage) 442 blend->need_src_alpha_4bit |= 0xf; 443 444 blend->cb_target_mask = 0; 445 for (int i = 0; i < 8; i++) { 446 /* state->rt entries > 0 only written if independent blending */ 447 const int j = state->independent_blend_enable ? i : 0; 448 449 unsigned eqRGB = state->rt[j].rgb_func; 450 unsigned srcRGB = state->rt[j].rgb_src_factor; 451 unsigned dstRGB = state->rt[j].rgb_dst_factor; 452 unsigned eqA = state->rt[j].alpha_func; 453 unsigned srcA = state->rt[j].alpha_src_factor; 454 unsigned dstA = state->rt[j].alpha_dst_factor; 455 456 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 457 unsigned blend_cntl = 0; 458 459 sx_mrt_blend_opt[i] = 460 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 461 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 462 463 if (!state->rt[j].colormask) 464 continue; 465 466 /* cb_render_state will disable unused ones */ 467 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 468 469 if (!state->rt[j].blend_enable) { 470 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 471 continue; 472 } 473 474 /* Blending optimizations for Stoney. 475 * These transformations don't change the behavior. 476 * 477 * First, get rid of DST in the blend factors: 478 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 479 */ 480 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 481 PIPE_BLENDFACTOR_DST_COLOR, 482 PIPE_BLENDFACTOR_SRC_COLOR); 483 si_blend_remove_dst(&eqA, &srcA, &dstA, 484 PIPE_BLENDFACTOR_DST_COLOR, 485 PIPE_BLENDFACTOR_SRC_COLOR); 486 si_blend_remove_dst(&eqA, &srcA, &dstA, 487 PIPE_BLENDFACTOR_DST_ALPHA, 488 PIPE_BLENDFACTOR_SRC_ALPHA); 489 490 /* Look up the ideal settings from tables. */ 491 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 492 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 493 srcA_opt = si_translate_blend_opt_factor(srcA, true); 494 dstA_opt = si_translate_blend_opt_factor(dstA, true); 495 496 /* Handle interdependencies. */ 497 if (si_blend_factor_uses_dst(srcRGB)) 498 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 499 if (si_blend_factor_uses_dst(srcA)) 500 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 501 502 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 503 (dstRGB == PIPE_BLENDFACTOR_ZERO || 504 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 505 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 506 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 507 508 /* Set the final value. */ 509 sx_mrt_blend_opt[i] = 510 S_028760_COLOR_SRC_OPT(srcRGB_opt) | 511 S_028760_COLOR_DST_OPT(dstRGB_opt) | 512 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 513 S_028760_ALPHA_SRC_OPT(srcA_opt) | 514 S_028760_ALPHA_DST_OPT(dstA_opt) | 515 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 516 517 /* Set blend state. */ 518 blend_cntl |= S_028780_ENABLE(1); 519 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 520 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 521 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 522 523 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 524 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 525 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 526 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 527 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 528 } 529 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 530 531 blend->blend_enable_4bit |= 0xfu << (i * 4); 532 533 /* This is only important for formats without alpha. */ 534 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 535 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 536 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 537 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 538 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 539 dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 540 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 541 } 542 543 if (blend->cb_target_mask) { 544 color_control |= S_028808_MODE(mode); 545 } else { 546 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 547 } 548 549 if (sctx->b.family == CHIP_STONEY) { 550 for (int i = 0; i < 8; i++) 551 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 552 sx_mrt_blend_opt[i]); 553 554 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 555 if (blend->dual_src_blend || state->logicop_enable || 556 mode == V_028808_CB_RESOLVE) 557 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 558 } 559 560 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 561 return blend; 562} 563 564static void *si_create_blend_state(struct pipe_context *ctx, 565 const struct pipe_blend_state *state) 566{ 567 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 568} 569 570static void si_bind_blend_state(struct pipe_context *ctx, void *state) 571{ 572 struct si_context *sctx = (struct si_context *)ctx; 573 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 574 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 575} 576 577static void si_delete_blend_state(struct pipe_context *ctx, void *state) 578{ 579 struct si_context *sctx = (struct si_context *)ctx; 580 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 581} 582 583static void si_set_blend_color(struct pipe_context *ctx, 584 const struct pipe_blend_color *state) 585{ 586 struct si_context *sctx = (struct si_context *)ctx; 587 588 if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0) 589 return; 590 591 sctx->blend_color.state = *state; 592 si_mark_atom_dirty(sctx, &sctx->blend_color.atom); 593} 594 595static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom) 596{ 597 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 598 599 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 600 radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 601} 602 603/* 604 * Clipping 605 */ 606 607static void si_set_clip_state(struct pipe_context *ctx, 608 const struct pipe_clip_state *state) 609{ 610 struct si_context *sctx = (struct si_context *)ctx; 611 struct pipe_constant_buffer cb; 612 613 if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 614 return; 615 616 sctx->clip_state.state = *state; 617 si_mark_atom_dirty(sctx, &sctx->clip_state.atom); 618 619 cb.buffer = NULL; 620 cb.user_buffer = state->ucp; 621 cb.buffer_offset = 0; 622 cb.buffer_size = 4*4*8; 623 si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 624 pipe_resource_reference(&cb.buffer, NULL); 625} 626 627static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom) 628{ 629 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 630 631 radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 632 radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 633} 634 635#define SIX_BITS 0x3F 636 637static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 638{ 639 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 640 struct tgsi_shader_info *info = si_get_vs_info(sctx); 641 unsigned window_space = 642 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 643 unsigned clipdist_mask = 644 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 645 646 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 647 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 648 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 649 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 650 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | 651 S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) | 652 S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) | 653 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || 654 info->writes_edgeflag || 655 info->writes_layer || 656 info->writes_viewport_index) | 657 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | 658 (sctx->queued.named.rasterizer->clip_plane_enable & 659 clipdist_mask)); 660 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 661 sctx->queued.named.rasterizer->pa_cl_clip_cntl | 662 (clipdist_mask ? 0 : 663 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) | 664 S_028810_CLIP_DISABLE(window_space)); 665 666 /* reuse needs to be set off if we write oViewport */ 667 radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 668 S_028AB4_REUSE_OFF(info->writes_viewport_index)); 669} 670 671/* 672 * inferred state between framebuffer and rasterizer 673 */ 674static void si_update_poly_offset_state(struct si_context *sctx) 675{ 676 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 677 678 if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) 679 return; 680 681 switch (sctx->framebuffer.state.zsbuf->texture->format) { 682 case PIPE_FORMAT_Z16_UNORM: 683 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 684 break; 685 default: /* 24-bit */ 686 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 687 break; 688 case PIPE_FORMAT_Z32_FLOAT: 689 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 690 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 691 break; 692 } 693} 694 695/* 696 * Rasterizer 697 */ 698 699static uint32_t si_translate_fill(uint32_t func) 700{ 701 switch(func) { 702 case PIPE_POLYGON_MODE_FILL: 703 return V_028814_X_DRAW_TRIANGLES; 704 case PIPE_POLYGON_MODE_LINE: 705 return V_028814_X_DRAW_LINES; 706 case PIPE_POLYGON_MODE_POINT: 707 return V_028814_X_DRAW_POINTS; 708 default: 709 assert(0); 710 return V_028814_X_DRAW_POINTS; 711 } 712} 713 714static void *si_create_rs_state(struct pipe_context *ctx, 715 const struct pipe_rasterizer_state *state) 716{ 717 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 718 struct si_pm4_state *pm4 = &rs->pm4; 719 unsigned tmp, i; 720 float psize_min, psize_max; 721 722 if (!rs) { 723 return NULL; 724 } 725 726 rs->scissor_enable = state->scissor; 727 rs->two_side = state->light_twoside; 728 rs->multisample_enable = state->multisample; 729 rs->force_persample_interp = state->force_persample_interp; 730 rs->clip_plane_enable = state->clip_plane_enable; 731 rs->line_stipple_enable = state->line_stipple_enable; 732 rs->poly_stipple_enable = state->poly_stipple_enable; 733 rs->line_smooth = state->line_smooth; 734 rs->poly_smooth = state->poly_smooth; 735 rs->uses_poly_offset = state->offset_point || state->offset_line || 736 state->offset_tri; 737 rs->clamp_fragment_color = state->clamp_fragment_color; 738 rs->flatshade = state->flatshade; 739 rs->sprite_coord_enable = state->sprite_coord_enable; 740 rs->rasterizer_discard = state->rasterizer_discard; 741 rs->pa_sc_line_stipple = state->line_stipple_enable ? 742 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 743 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 744 rs->pa_cl_clip_cntl = 745 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 746 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 747 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 748 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 749 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 750 751 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 752 S_0286D4_FLAT_SHADE_ENA(1) | 753 S_0286D4_PNT_SPRITE_ENA(1) | 754 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 755 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 756 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 757 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 758 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 759 760 /* point size 12.4 fixed point */ 761 tmp = (unsigned)(state->point_size * 8.0); 762 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 763 764 if (state->point_size_per_vertex) { 765 psize_min = util_get_min_point_size(state); 766 psize_max = 8192; 767 } else { 768 /* Force the point size to be as if the vertex output was disabled. */ 769 psize_min = state->point_size; 770 psize_max = state->point_size; 771 } 772 /* Divide by two, because 0.5 = 1 pixel. */ 773 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 774 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 775 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 776 777 tmp = (unsigned)state->line_width * 8; 778 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 779 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 780 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 781 S_028A48_MSAA_ENABLE(state->multisample || 782 state->poly_smooth || 783 state->line_smooth) | 784 S_028A48_VPORT_SCISSOR_ENABLE(1)); 785 786 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 787 S_028BE4_PIX_CENTER(state->half_pixel_center) | 788 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 789 790 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 791 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 792 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 793 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 794 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 795 S_028814_FACE(!state->front_ccw) | 796 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 797 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 798 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 799 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 800 state->fill_back != PIPE_POLYGON_MODE_FILL) | 801 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 802 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 803 si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 + 804 SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color); 805 806 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 807 for (i = 0; i < 3; i++) { 808 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 809 float offset_units = state->offset_units; 810 float offset_scale = state->offset_scale * 16.0f; 811 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 812 813 if (!state->offset_units_unscaled) { 814 switch (i) { 815 case 0: /* 16-bit zbuffer */ 816 offset_units *= 4.0f; 817 pa_su_poly_offset_db_fmt_cntl = 818 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 819 break; 820 case 1: /* 24-bit zbuffer */ 821 offset_units *= 2.0f; 822 pa_su_poly_offset_db_fmt_cntl = 823 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 824 break; 825 case 2: /* 32-bit zbuffer */ 826 offset_units *= 1.0f; 827 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 828 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 829 break; 830 } 831 } 832 833 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 834 fui(offset_scale)); 835 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 836 fui(offset_units)); 837 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 838 fui(offset_scale)); 839 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 840 fui(offset_units)); 841 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 842 pa_su_poly_offset_db_fmt_cntl); 843 } 844 845 return rs; 846} 847 848static void si_bind_rs_state(struct pipe_context *ctx, void *state) 849{ 850 struct si_context *sctx = (struct si_context *)ctx; 851 struct si_state_rasterizer *old_rs = 852 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 853 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 854 855 if (!state) 856 return; 857 858 if (sctx->framebuffer.nr_samples > 1 && 859 (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) { 860 si_mark_atom_dirty(sctx, &sctx->db_render_state); 861 862 if (sctx->b.family >= CHIP_POLARIS10) 863 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 864 } 865 866 r600_set_scissor_enable(&sctx->b, rs->scissor_enable); 867 868 si_pm4_bind_state(sctx, rasterizer, rs); 869 si_update_poly_offset_state(sctx); 870 871 si_mark_atom_dirty(sctx, &sctx->clip_regs); 872} 873 874static void si_delete_rs_state(struct pipe_context *ctx, void *state) 875{ 876 struct si_context *sctx = (struct si_context *)ctx; 877 878 if (sctx->queued.named.rasterizer == state) 879 si_pm4_bind_state(sctx, poly_offset, NULL); 880 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 881} 882 883/* 884 * infeered state between dsa and stencil ref 885 */ 886static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom) 887{ 888 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 889 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 890 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 891 892 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 893 radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 894 S_028430_STENCILMASK(dsa->valuemask[0]) | 895 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 896 S_028430_STENCILOPVAL(1)); 897 radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 898 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 899 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 900 S_028434_STENCILOPVAL_BF(1)); 901} 902 903static void si_set_stencil_ref(struct pipe_context *ctx, 904 const struct pipe_stencil_ref *state) 905{ 906 struct si_context *sctx = (struct si_context *)ctx; 907 908 if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 909 return; 910 911 sctx->stencil_ref.state = *state; 912 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 913} 914 915 916/* 917 * DSA 918 */ 919 920static uint32_t si_translate_stencil_op(int s_op) 921{ 922 switch (s_op) { 923 case PIPE_STENCIL_OP_KEEP: 924 return V_02842C_STENCIL_KEEP; 925 case PIPE_STENCIL_OP_ZERO: 926 return V_02842C_STENCIL_ZERO; 927 case PIPE_STENCIL_OP_REPLACE: 928 return V_02842C_STENCIL_REPLACE_TEST; 929 case PIPE_STENCIL_OP_INCR: 930 return V_02842C_STENCIL_ADD_CLAMP; 931 case PIPE_STENCIL_OP_DECR: 932 return V_02842C_STENCIL_SUB_CLAMP; 933 case PIPE_STENCIL_OP_INCR_WRAP: 934 return V_02842C_STENCIL_ADD_WRAP; 935 case PIPE_STENCIL_OP_DECR_WRAP: 936 return V_02842C_STENCIL_SUB_WRAP; 937 case PIPE_STENCIL_OP_INVERT: 938 return V_02842C_STENCIL_INVERT; 939 default: 940 R600_ERR("Unknown stencil op %d", s_op); 941 assert(0); 942 break; 943 } 944 return 0; 945} 946 947static void *si_create_dsa_state(struct pipe_context *ctx, 948 const struct pipe_depth_stencil_alpha_state *state) 949{ 950 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 951 struct si_pm4_state *pm4 = &dsa->pm4; 952 unsigned db_depth_control; 953 uint32_t db_stencil_control = 0; 954 955 if (!dsa) { 956 return NULL; 957 } 958 959 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 960 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 961 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 962 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 963 964 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 965 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 966 S_028800_ZFUNC(state->depth.func) | 967 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 968 969 /* stencil */ 970 if (state->stencil[0].enabled) { 971 db_depth_control |= S_028800_STENCIL_ENABLE(1); 972 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 973 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 974 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 975 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 976 977 if (state->stencil[1].enabled) { 978 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 979 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 980 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 981 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 982 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 983 } 984 } 985 986 /* alpha */ 987 if (state->alpha.enabled) { 988 dsa->alpha_func = state->alpha.func; 989 990 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 991 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 992 } else { 993 dsa->alpha_func = PIPE_FUNC_ALWAYS; 994 } 995 996 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 997 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 998 if (state->depth.bounds_test) { 999 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1000 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1001 } 1002 1003 return dsa; 1004} 1005 1006static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1007{ 1008 struct si_context *sctx = (struct si_context *)ctx; 1009 struct si_state_dsa *dsa = state; 1010 1011 if (!state) 1012 return; 1013 1014 si_pm4_bind_state(sctx, dsa, dsa); 1015 1016 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1017 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1018 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1019 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1020 } 1021} 1022 1023static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1024{ 1025 struct si_context *sctx = (struct si_context *)ctx; 1026 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1027} 1028 1029static void *si_create_db_flush_dsa(struct si_context *sctx) 1030{ 1031 struct pipe_depth_stencil_alpha_state dsa = {}; 1032 1033 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 1034} 1035 1036/* DB RENDER STATE */ 1037 1038static void si_set_active_query_state(struct pipe_context *ctx, boolean enable) 1039{ 1040 struct si_context *sctx = (struct si_context*)ctx; 1041 1042 /* Pipeline stat & streamout queries. */ 1043 if (enable) { 1044 sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS; 1045 sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS; 1046 } else { 1047 sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS; 1048 sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS; 1049 } 1050 1051 /* Occlusion queries. */ 1052 if (sctx->occlusion_queries_disabled != !enable) { 1053 sctx->occlusion_queries_disabled = !enable; 1054 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1055 } 1056} 1057 1058static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 1059{ 1060 struct si_context *sctx = (struct si_context*)ctx; 1061 1062 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1063} 1064 1065static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 1066{ 1067 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1068 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1069 unsigned db_shader_control; 1070 1071 radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 1072 1073 /* DB_RENDER_CONTROL */ 1074 if (sctx->dbcb_depth_copy_enabled || 1075 sctx->dbcb_stencil_copy_enabled) { 1076 radeon_emit(cs, 1077 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1078 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1079 S_028000_COPY_CENTROID(1) | 1080 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 1081 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1082 radeon_emit(cs, 1083 S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1084 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); 1085 } else { 1086 radeon_emit(cs, 1087 S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1088 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); 1089 } 1090 1091 /* DB_COUNT_CONTROL (occlusion queries) */ 1092 if (sctx->b.num_occlusion_queries > 0 && 1093 !sctx->occlusion_queries_disabled) { 1094 bool perfect = sctx->b.num_perfect_occlusion_queries > 0; 1095 1096 if (sctx->b.chip_class >= CIK) { 1097 radeon_emit(cs, 1098 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1099 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 1100 S_028004_ZPASS_ENABLE(1) | 1101 S_028004_SLICE_EVEN_ENABLE(1) | 1102 S_028004_SLICE_ODD_ENABLE(1)); 1103 } else { 1104 radeon_emit(cs, 1105 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1106 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 1107 } 1108 } else { 1109 /* Disable occlusion queries. */ 1110 if (sctx->b.chip_class >= CIK) { 1111 radeon_emit(cs, 0); 1112 } else { 1113 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 1114 } 1115 } 1116 1117 /* DB_RENDER_OVERRIDE2 */ 1118 radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 1119 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1120 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1121 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); 1122 1123 db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | 1124 sctx->ps_db_shader_control; 1125 1126 /* Bug workaround for smoothing (overrasterization) on SI. */ 1127 if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { 1128 db_shader_control &= C_02880C_Z_ORDER; 1129 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1130 } 1131 1132 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1133 if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) 1134 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1135 1136 if (sctx->b.family == CHIP_STONEY && 1137 sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) 1138 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1139 1140 radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 1141 db_shader_control); 1142} 1143 1144/* 1145 * format translation 1146 */ 1147static uint32_t si_translate_colorformat(enum pipe_format format) 1148{ 1149 const struct util_format_description *desc = util_format_description(format); 1150 1151#define HAS_SIZE(x,y,z,w) \ 1152 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1153 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1154 1155 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1156 return V_028C70_COLOR_10_11_11; 1157 1158 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1159 return V_028C70_COLOR_INVALID; 1160 1161 /* hw cannot support mixed formats (except depth/stencil, since 1162 * stencil is not written to). */ 1163 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1164 return V_028C70_COLOR_INVALID; 1165 1166 switch (desc->nr_channels) { 1167 case 1: 1168 switch (desc->channel[0].size) { 1169 case 8: 1170 return V_028C70_COLOR_8; 1171 case 16: 1172 return V_028C70_COLOR_16; 1173 case 32: 1174 return V_028C70_COLOR_32; 1175 } 1176 break; 1177 case 2: 1178 if (desc->channel[0].size == desc->channel[1].size) { 1179 switch (desc->channel[0].size) { 1180 case 8: 1181 return V_028C70_COLOR_8_8; 1182 case 16: 1183 return V_028C70_COLOR_16_16; 1184 case 32: 1185 return V_028C70_COLOR_32_32; 1186 } 1187 } else if (HAS_SIZE(8,24,0,0)) { 1188 return V_028C70_COLOR_24_8; 1189 } else if (HAS_SIZE(24,8,0,0)) { 1190 return V_028C70_COLOR_8_24; 1191 } 1192 break; 1193 case 3: 1194 if (HAS_SIZE(5,6,5,0)) { 1195 return V_028C70_COLOR_5_6_5; 1196 } else if (HAS_SIZE(32,8,24,0)) { 1197 return V_028C70_COLOR_X24_8_32_FLOAT; 1198 } 1199 break; 1200 case 4: 1201 if (desc->channel[0].size == desc->channel[1].size && 1202 desc->channel[0].size == desc->channel[2].size && 1203 desc->channel[0].size == desc->channel[3].size) { 1204 switch (desc->channel[0].size) { 1205 case 4: 1206 return V_028C70_COLOR_4_4_4_4; 1207 case 8: 1208 return V_028C70_COLOR_8_8_8_8; 1209 case 16: 1210 return V_028C70_COLOR_16_16_16_16; 1211 case 32: 1212 return V_028C70_COLOR_32_32_32_32; 1213 } 1214 } else if (HAS_SIZE(5,5,5,1)) { 1215 return V_028C70_COLOR_1_5_5_5; 1216 } else if (HAS_SIZE(10,10,10,2)) { 1217 return V_028C70_COLOR_2_10_10_10; 1218 } 1219 break; 1220 } 1221 return V_028C70_COLOR_INVALID; 1222} 1223 1224static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1225{ 1226 if (SI_BIG_ENDIAN) { 1227 switch(colorformat) { 1228 /* 8-bit buffers. */ 1229 case V_028C70_COLOR_8: 1230 return V_028C70_ENDIAN_NONE; 1231 1232 /* 16-bit buffers. */ 1233 case V_028C70_COLOR_5_6_5: 1234 case V_028C70_COLOR_1_5_5_5: 1235 case V_028C70_COLOR_4_4_4_4: 1236 case V_028C70_COLOR_16: 1237 case V_028C70_COLOR_8_8: 1238 return V_028C70_ENDIAN_8IN16; 1239 1240 /* 32-bit buffers. */ 1241 case V_028C70_COLOR_8_8_8_8: 1242 case V_028C70_COLOR_2_10_10_10: 1243 case V_028C70_COLOR_8_24: 1244 case V_028C70_COLOR_24_8: 1245 case V_028C70_COLOR_16_16: 1246 return V_028C70_ENDIAN_8IN32; 1247 1248 /* 64-bit buffers. */ 1249 case V_028C70_COLOR_16_16_16_16: 1250 return V_028C70_ENDIAN_8IN16; 1251 1252 case V_028C70_COLOR_32_32: 1253 return V_028C70_ENDIAN_8IN32; 1254 1255 /* 128-bit buffers. */ 1256 case V_028C70_COLOR_32_32_32_32: 1257 return V_028C70_ENDIAN_8IN32; 1258 default: 1259 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1260 } 1261 } else { 1262 return V_028C70_ENDIAN_NONE; 1263 } 1264} 1265 1266static uint32_t si_translate_dbformat(enum pipe_format format) 1267{ 1268 switch (format) { 1269 case PIPE_FORMAT_Z16_UNORM: 1270 return V_028040_Z_16; 1271 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1272 case PIPE_FORMAT_X8Z24_UNORM: 1273 case PIPE_FORMAT_Z24X8_UNORM: 1274 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1275 return V_028040_Z_24; /* deprecated on SI */ 1276 case PIPE_FORMAT_Z32_FLOAT: 1277 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1278 return V_028040_Z_32_FLOAT; 1279 default: 1280 return V_028040_Z_INVALID; 1281 } 1282} 1283 1284/* 1285 * Texture translation 1286 */ 1287 1288static uint32_t si_translate_texformat(struct pipe_screen *screen, 1289 enum pipe_format format, 1290 const struct util_format_description *desc, 1291 int first_non_void) 1292{ 1293 struct si_screen *sscreen = (struct si_screen*)screen; 1294 bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 && 1295 sscreen->b.info.drm_minor >= 31) || 1296 sscreen->b.info.drm_major == 3; 1297 bool uniform = true; 1298 int i; 1299 1300 /* Colorspace (return non-RGB formats directly). */ 1301 switch (desc->colorspace) { 1302 /* Depth stencil formats */ 1303 case UTIL_FORMAT_COLORSPACE_ZS: 1304 switch (format) { 1305 case PIPE_FORMAT_Z16_UNORM: 1306 return V_008F14_IMG_DATA_FORMAT_16; 1307 case PIPE_FORMAT_X24S8_UINT: 1308 case PIPE_FORMAT_Z24X8_UNORM: 1309 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1310 return V_008F14_IMG_DATA_FORMAT_8_24; 1311 case PIPE_FORMAT_X8Z24_UNORM: 1312 case PIPE_FORMAT_S8X24_UINT: 1313 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1314 return V_008F14_IMG_DATA_FORMAT_24_8; 1315 case PIPE_FORMAT_S8_UINT: 1316 return V_008F14_IMG_DATA_FORMAT_8; 1317 case PIPE_FORMAT_Z32_FLOAT: 1318 return V_008F14_IMG_DATA_FORMAT_32; 1319 case PIPE_FORMAT_X32_S8X24_UINT: 1320 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1321 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1322 default: 1323 goto out_unknown; 1324 } 1325 1326 case UTIL_FORMAT_COLORSPACE_YUV: 1327 goto out_unknown; /* TODO */ 1328 1329 case UTIL_FORMAT_COLORSPACE_SRGB: 1330 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1331 goto out_unknown; 1332 break; 1333 1334 default: 1335 break; 1336 } 1337 1338 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1339 if (!enable_compressed_formats) 1340 goto out_unknown; 1341 1342 switch (format) { 1343 case PIPE_FORMAT_RGTC1_SNORM: 1344 case PIPE_FORMAT_LATC1_SNORM: 1345 case PIPE_FORMAT_RGTC1_UNORM: 1346 case PIPE_FORMAT_LATC1_UNORM: 1347 return V_008F14_IMG_DATA_FORMAT_BC4; 1348 case PIPE_FORMAT_RGTC2_SNORM: 1349 case PIPE_FORMAT_LATC2_SNORM: 1350 case PIPE_FORMAT_RGTC2_UNORM: 1351 case PIPE_FORMAT_LATC2_UNORM: 1352 return V_008F14_IMG_DATA_FORMAT_BC5; 1353 default: 1354 goto out_unknown; 1355 } 1356 } 1357 1358 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1359 sscreen->b.family == CHIP_STONEY) { 1360 switch (format) { 1361 case PIPE_FORMAT_ETC1_RGB8: 1362 case PIPE_FORMAT_ETC2_RGB8: 1363 case PIPE_FORMAT_ETC2_SRGB8: 1364 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1365 case PIPE_FORMAT_ETC2_RGB8A1: 1366 case PIPE_FORMAT_ETC2_SRGB8A1: 1367 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1368 case PIPE_FORMAT_ETC2_RGBA8: 1369 case PIPE_FORMAT_ETC2_SRGBA8: 1370 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1371 case PIPE_FORMAT_ETC2_R11_UNORM: 1372 case PIPE_FORMAT_ETC2_R11_SNORM: 1373 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1374 case PIPE_FORMAT_ETC2_RG11_UNORM: 1375 case PIPE_FORMAT_ETC2_RG11_SNORM: 1376 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1377 default: 1378 goto out_unknown; 1379 } 1380 } 1381 1382 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1383 if (!enable_compressed_formats) 1384 goto out_unknown; 1385 1386 switch (format) { 1387 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1388 case PIPE_FORMAT_BPTC_SRGBA: 1389 return V_008F14_IMG_DATA_FORMAT_BC7; 1390 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1391 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1392 return V_008F14_IMG_DATA_FORMAT_BC6; 1393 default: 1394 goto out_unknown; 1395 } 1396 } 1397 1398 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1399 switch (format) { 1400 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1401 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1402 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1403 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1404 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1405 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1406 default: 1407 goto out_unknown; 1408 } 1409 } 1410 1411 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1412 if (!enable_compressed_formats) 1413 goto out_unknown; 1414 1415 if (!util_format_s3tc_enabled) { 1416 goto out_unknown; 1417 } 1418 1419 switch (format) { 1420 case PIPE_FORMAT_DXT1_RGB: 1421 case PIPE_FORMAT_DXT1_RGBA: 1422 case PIPE_FORMAT_DXT1_SRGB: 1423 case PIPE_FORMAT_DXT1_SRGBA: 1424 return V_008F14_IMG_DATA_FORMAT_BC1; 1425 case PIPE_FORMAT_DXT3_RGBA: 1426 case PIPE_FORMAT_DXT3_SRGBA: 1427 return V_008F14_IMG_DATA_FORMAT_BC2; 1428 case PIPE_FORMAT_DXT5_RGBA: 1429 case PIPE_FORMAT_DXT5_SRGBA: 1430 return V_008F14_IMG_DATA_FORMAT_BC3; 1431 default: 1432 goto out_unknown; 1433 } 1434 } 1435 1436 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1437 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1438 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1439 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1440 } 1441 1442 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1443 1444 /* hw cannot support mixed formats (except depth/stencil, since only 1445 * depth is read).*/ 1446 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1447 goto out_unknown; 1448 1449 /* See whether the components are of the same size. */ 1450 for (i = 1; i < desc->nr_channels; i++) { 1451 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1452 } 1453 1454 /* Non-uniform formats. */ 1455 if (!uniform) { 1456 switch(desc->nr_channels) { 1457 case 3: 1458 if (desc->channel[0].size == 5 && 1459 desc->channel[1].size == 6 && 1460 desc->channel[2].size == 5) { 1461 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1462 } 1463 goto out_unknown; 1464 case 4: 1465 if (desc->channel[0].size == 5 && 1466 desc->channel[1].size == 5 && 1467 desc->channel[2].size == 5 && 1468 desc->channel[3].size == 1) { 1469 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1470 } 1471 if (desc->channel[0].size == 10 && 1472 desc->channel[1].size == 10 && 1473 desc->channel[2].size == 10 && 1474 desc->channel[3].size == 2) { 1475 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1476 } 1477 goto out_unknown; 1478 } 1479 goto out_unknown; 1480 } 1481 1482 if (first_non_void < 0 || first_non_void > 3) 1483 goto out_unknown; 1484 1485 /* uniform formats */ 1486 switch (desc->channel[first_non_void].size) { 1487 case 4: 1488 switch (desc->nr_channels) { 1489#if 0 /* Not supported for render targets */ 1490 case 2: 1491 return V_008F14_IMG_DATA_FORMAT_4_4; 1492#endif 1493 case 4: 1494 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1495 } 1496 break; 1497 case 8: 1498 switch (desc->nr_channels) { 1499 case 1: 1500 return V_008F14_IMG_DATA_FORMAT_8; 1501 case 2: 1502 return V_008F14_IMG_DATA_FORMAT_8_8; 1503 case 4: 1504 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1505 } 1506 break; 1507 case 16: 1508 switch (desc->nr_channels) { 1509 case 1: 1510 return V_008F14_IMG_DATA_FORMAT_16; 1511 case 2: 1512 return V_008F14_IMG_DATA_FORMAT_16_16; 1513 case 4: 1514 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1515 } 1516 break; 1517 case 32: 1518 switch (desc->nr_channels) { 1519 case 1: 1520 return V_008F14_IMG_DATA_FORMAT_32; 1521 case 2: 1522 return V_008F14_IMG_DATA_FORMAT_32_32; 1523#if 0 /* Not supported for render targets */ 1524 case 3: 1525 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1526#endif 1527 case 4: 1528 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1529 } 1530 } 1531 1532out_unknown: 1533 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1534 return ~0; 1535} 1536 1537static unsigned si_tex_wrap(unsigned wrap) 1538{ 1539 switch (wrap) { 1540 default: 1541 case PIPE_TEX_WRAP_REPEAT: 1542 return V_008F30_SQ_TEX_WRAP; 1543 case PIPE_TEX_WRAP_CLAMP: 1544 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1545 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1546 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1547 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1548 return V_008F30_SQ_TEX_CLAMP_BORDER; 1549 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1550 return V_008F30_SQ_TEX_MIRROR; 1551 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1552 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1553 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1554 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1555 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1556 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1557 } 1558} 1559 1560static unsigned si_tex_mipfilter(unsigned filter) 1561{ 1562 switch (filter) { 1563 case PIPE_TEX_MIPFILTER_NEAREST: 1564 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1565 case PIPE_TEX_MIPFILTER_LINEAR: 1566 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1567 default: 1568 case PIPE_TEX_MIPFILTER_NONE: 1569 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1570 } 1571} 1572 1573static unsigned si_tex_compare(unsigned compare) 1574{ 1575 switch (compare) { 1576 default: 1577 case PIPE_FUNC_NEVER: 1578 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1579 case PIPE_FUNC_LESS: 1580 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1581 case PIPE_FUNC_EQUAL: 1582 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1583 case PIPE_FUNC_LEQUAL: 1584 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1585 case PIPE_FUNC_GREATER: 1586 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1587 case PIPE_FUNC_NOTEQUAL: 1588 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1589 case PIPE_FUNC_GEQUAL: 1590 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1591 case PIPE_FUNC_ALWAYS: 1592 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1593 } 1594} 1595 1596static unsigned si_tex_dim(unsigned res_target, unsigned view_target, 1597 unsigned nr_samples) 1598{ 1599 if (view_target == PIPE_TEXTURE_CUBE || 1600 view_target == PIPE_TEXTURE_CUBE_ARRAY) 1601 res_target = view_target; 1602 1603 switch (res_target) { 1604 default: 1605 case PIPE_TEXTURE_1D: 1606 return V_008F1C_SQ_RSRC_IMG_1D; 1607 case PIPE_TEXTURE_1D_ARRAY: 1608 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1609 case PIPE_TEXTURE_2D: 1610 case PIPE_TEXTURE_RECT: 1611 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1612 V_008F1C_SQ_RSRC_IMG_2D; 1613 case PIPE_TEXTURE_2D_ARRAY: 1614 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1615 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1616 case PIPE_TEXTURE_3D: 1617 return V_008F1C_SQ_RSRC_IMG_3D; 1618 case PIPE_TEXTURE_CUBE: 1619 case PIPE_TEXTURE_CUBE_ARRAY: 1620 return V_008F1C_SQ_RSRC_IMG_CUBE; 1621 } 1622} 1623 1624/* 1625 * Format support testing 1626 */ 1627 1628static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1629{ 1630 return si_translate_texformat(screen, format, util_format_description(format), 1631 util_format_get_first_non_void_channel(format)) != ~0U; 1632} 1633 1634static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1635 const struct util_format_description *desc, 1636 int first_non_void) 1637{ 1638 unsigned type; 1639 int i; 1640 1641 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1642 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1643 1644 assert(first_non_void >= 0); 1645 type = desc->channel[first_non_void].type; 1646 1647 if (type == UTIL_FORMAT_TYPE_FIXED) 1648 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1649 1650 if (desc->nr_channels == 4 && 1651 desc->channel[0].size == 10 && 1652 desc->channel[1].size == 10 && 1653 desc->channel[2].size == 10 && 1654 desc->channel[3].size == 2) 1655 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1656 1657 /* See whether the components are of the same size. */ 1658 for (i = 0; i < desc->nr_channels; i++) { 1659 if (desc->channel[first_non_void].size != desc->channel[i].size) 1660 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1661 } 1662 1663 switch (desc->channel[first_non_void].size) { 1664 case 8: 1665 switch (desc->nr_channels) { 1666 case 1: 1667 return V_008F0C_BUF_DATA_FORMAT_8; 1668 case 2: 1669 return V_008F0C_BUF_DATA_FORMAT_8_8; 1670 case 3: 1671 case 4: 1672 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1673 } 1674 break; 1675 case 16: 1676 switch (desc->nr_channels) { 1677 case 1: 1678 return V_008F0C_BUF_DATA_FORMAT_16; 1679 case 2: 1680 return V_008F0C_BUF_DATA_FORMAT_16_16; 1681 case 3: 1682 case 4: 1683 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1684 } 1685 break; 1686 case 32: 1687 /* From the Southern Islands ISA documentation about MTBUF: 1688 * 'Memory reads of data in memory that is 32 or 64 bits do not 1689 * undergo any format conversion.' 1690 */ 1691 if (type != UTIL_FORMAT_TYPE_FLOAT && 1692 !desc->channel[first_non_void].pure_integer) 1693 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1694 1695 switch (desc->nr_channels) { 1696 case 1: 1697 return V_008F0C_BUF_DATA_FORMAT_32; 1698 case 2: 1699 return V_008F0C_BUF_DATA_FORMAT_32_32; 1700 case 3: 1701 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1702 case 4: 1703 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1704 } 1705 break; 1706 } 1707 1708 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1709} 1710 1711static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1712 const struct util_format_description *desc, 1713 int first_non_void) 1714{ 1715 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1716 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1717 1718 assert(first_non_void >= 0); 1719 1720 switch (desc->channel[first_non_void].type) { 1721 case UTIL_FORMAT_TYPE_SIGNED: 1722 if (desc->channel[first_non_void].normalized) 1723 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1724 else if (desc->channel[first_non_void].pure_integer) 1725 return V_008F0C_BUF_NUM_FORMAT_SINT; 1726 else 1727 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1728 break; 1729 case UTIL_FORMAT_TYPE_UNSIGNED: 1730 if (desc->channel[first_non_void].normalized) 1731 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1732 else if (desc->channel[first_non_void].pure_integer) 1733 return V_008F0C_BUF_NUM_FORMAT_UINT; 1734 else 1735 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1736 break; 1737 case UTIL_FORMAT_TYPE_FLOAT: 1738 default: 1739 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1740 } 1741} 1742 1743static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1744{ 1745 const struct util_format_description *desc; 1746 int first_non_void; 1747 unsigned data_format; 1748 1749 desc = util_format_description(format); 1750 first_non_void = util_format_get_first_non_void_channel(format); 1751 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1752 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1753} 1754 1755static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1756{ 1757 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1758 r600_translate_colorswap(format, false) != ~0U; 1759} 1760 1761static bool si_is_zs_format_supported(enum pipe_format format) 1762{ 1763 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1764} 1765 1766static boolean si_is_format_supported(struct pipe_screen *screen, 1767 enum pipe_format format, 1768 enum pipe_texture_target target, 1769 unsigned sample_count, 1770 unsigned usage) 1771{ 1772 unsigned retval = 0; 1773 1774 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1775 R600_ERR("r600: unsupported texture type %d\n", target); 1776 return false; 1777 } 1778 1779 if (!util_format_is_supported(format, usage)) 1780 return false; 1781 1782 if (sample_count > 1) { 1783 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 1784 return false; 1785 1786 if (usage & PIPE_BIND_SHADER_IMAGE) 1787 return false; 1788 1789 switch (sample_count) { 1790 case 2: 1791 case 4: 1792 case 8: 1793 break; 1794 case 16: 1795 if (format == PIPE_FORMAT_NONE) 1796 return true; 1797 else 1798 return false; 1799 default: 1800 return false; 1801 } 1802 } 1803 1804 if (usage & (PIPE_BIND_SAMPLER_VIEW | 1805 PIPE_BIND_SHADER_IMAGE)) { 1806 if (target == PIPE_BUFFER) { 1807 if (si_is_vertex_format_supported(screen, format)) 1808 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 1809 PIPE_BIND_SHADER_IMAGE); 1810 } else { 1811 if (si_is_sampler_format_supported(screen, format)) 1812 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 1813 PIPE_BIND_SHADER_IMAGE); 1814 } 1815 } 1816 1817 if ((usage & (PIPE_BIND_RENDER_TARGET | 1818 PIPE_BIND_DISPLAY_TARGET | 1819 PIPE_BIND_SCANOUT | 1820 PIPE_BIND_SHARED | 1821 PIPE_BIND_BLENDABLE)) && 1822 si_is_colorbuffer_format_supported(format)) { 1823 retval |= usage & 1824 (PIPE_BIND_RENDER_TARGET | 1825 PIPE_BIND_DISPLAY_TARGET | 1826 PIPE_BIND_SCANOUT | 1827 PIPE_BIND_SHARED); 1828 if (!util_format_is_pure_integer(format) && 1829 !util_format_is_depth_or_stencil(format)) 1830 retval |= usage & PIPE_BIND_BLENDABLE; 1831 } 1832 1833 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1834 si_is_zs_format_supported(format)) { 1835 retval |= PIPE_BIND_DEPTH_STENCIL; 1836 } 1837 1838 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 1839 si_is_vertex_format_supported(screen, format)) { 1840 retval |= PIPE_BIND_VERTEX_BUFFER; 1841 } 1842 1843 if (usage & PIPE_BIND_TRANSFER_READ) 1844 retval |= PIPE_BIND_TRANSFER_READ; 1845 if (usage & PIPE_BIND_TRANSFER_WRITE) 1846 retval |= PIPE_BIND_TRANSFER_WRITE; 1847 1848 if ((usage & PIPE_BIND_LINEAR) && 1849 !util_format_is_compressed(format) && 1850 !(usage & PIPE_BIND_DEPTH_STENCIL)) 1851 retval |= PIPE_BIND_LINEAR; 1852 1853 return retval == usage; 1854} 1855 1856/* 1857 * framebuffer handling 1858 */ 1859 1860static void si_choose_spi_color_formats(struct r600_surface *surf, 1861 unsigned format, unsigned swap, 1862 unsigned ntype, bool is_depth) 1863{ 1864 /* Alpha is needed for alpha-to-coverage. 1865 * Blending may be with or without alpha. 1866 */ 1867 unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 1868 unsigned alpha = 0; /* exports alpha, but may not support blending */ 1869 unsigned blend = 0; /* supports blending, but may not export alpha */ 1870 unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 1871 1872 /* Choose the SPI color formats. These are required values for Stoney/RB+. 1873 * Other chips have multiple choices, though they are not necessarily better. 1874 */ 1875 switch (format) { 1876 case V_028C70_COLOR_5_6_5: 1877 case V_028C70_COLOR_1_5_5_5: 1878 case V_028C70_COLOR_5_5_5_1: 1879 case V_028C70_COLOR_4_4_4_4: 1880 case V_028C70_COLOR_10_11_11: 1881 case V_028C70_COLOR_11_11_10: 1882 case V_028C70_COLOR_8: 1883 case V_028C70_COLOR_8_8: 1884 case V_028C70_COLOR_8_8_8_8: 1885 case V_028C70_COLOR_10_10_10_2: 1886 case V_028C70_COLOR_2_10_10_10: 1887 if (ntype == V_028C70_NUMBER_UINT) 1888 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 1889 else if (ntype == V_028C70_NUMBER_SINT) 1890 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 1891 else 1892 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 1893 break; 1894 1895 case V_028C70_COLOR_16: 1896 case V_028C70_COLOR_16_16: 1897 case V_028C70_COLOR_16_16_16_16: 1898 if (ntype == V_028C70_NUMBER_UNORM || 1899 ntype == V_028C70_NUMBER_SNORM) { 1900 /* UNORM16 and SNORM16 don't support blending */ 1901 if (ntype == V_028C70_NUMBER_UNORM) 1902 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 1903 else 1904 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 1905 1906 /* Use 32 bits per channel for blending. */ 1907 if (format == V_028C70_COLOR_16) { 1908 if (swap == V_028C70_SWAP_STD) { /* R */ 1909 blend = V_028714_SPI_SHADER_32_R; 1910 blend_alpha = V_028714_SPI_SHADER_32_AR; 1911 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 1912 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1913 else 1914 assert(0); 1915 } else if (format == V_028C70_COLOR_16_16) { 1916 if (swap == V_028C70_SWAP_STD) { /* RG */ 1917 blend = V_028714_SPI_SHADER_32_GR; 1918 blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1919 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 1920 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1921 else 1922 assert(0); 1923 } else /* 16_16_16_16 */ 1924 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1925 } else if (ntype == V_028C70_NUMBER_UINT) 1926 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 1927 else if (ntype == V_028C70_NUMBER_SINT) 1928 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 1929 else if (ntype == V_028C70_NUMBER_FLOAT) 1930 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 1931 else 1932 assert(0); 1933 break; 1934 1935 case V_028C70_COLOR_32: 1936 if (swap == V_028C70_SWAP_STD) { /* R */ 1937 blend = normal = V_028714_SPI_SHADER_32_R; 1938 alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 1939 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 1940 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 1941 else 1942 assert(0); 1943 break; 1944 1945 case V_028C70_COLOR_32_32: 1946 if (swap == V_028C70_SWAP_STD) { /* RG */ 1947 blend = normal = V_028714_SPI_SHADER_32_GR; 1948 alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 1949 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 1950 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 1951 else 1952 assert(0); 1953 break; 1954 1955 case V_028C70_COLOR_32_32_32_32: 1956 case V_028C70_COLOR_8_24: 1957 case V_028C70_COLOR_24_8: 1958 case V_028C70_COLOR_X24_8_32_FLOAT: 1959 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 1960 break; 1961 1962 default: 1963 assert(0); 1964 return; 1965 } 1966 1967 /* The DB->CB copy needs 32_ABGR. */ 1968 if (is_depth) 1969 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 1970 1971 surf->spi_shader_col_format = normal; 1972 surf->spi_shader_col_format_alpha = alpha; 1973 surf->spi_shader_col_format_blend = blend; 1974 surf->spi_shader_col_format_blend_alpha = blend_alpha; 1975} 1976 1977static void si_initialize_color_surface(struct si_context *sctx, 1978 struct r600_surface *surf) 1979{ 1980 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1981 unsigned color_info, color_attrib, color_view; 1982 unsigned format, swap, ntype, endian; 1983 const struct util_format_description *desc; 1984 int i; 1985 unsigned blend_clamp = 0, blend_bypass = 0; 1986 1987 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 1988 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 1989 1990 desc = util_format_description(surf->base.format); 1991 for (i = 0; i < 4; i++) { 1992 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 1993 break; 1994 } 1995 } 1996 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 1997 ntype = V_028C70_NUMBER_FLOAT; 1998 } else { 1999 ntype = V_028C70_NUMBER_UNORM; 2000 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2001 ntype = V_028C70_NUMBER_SRGB; 2002 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2003 if (desc->channel[i].pure_integer) { 2004 ntype = V_028C70_NUMBER_SINT; 2005 } else { 2006 assert(desc->channel[i].normalized); 2007 ntype = V_028C70_NUMBER_SNORM; 2008 } 2009 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2010 if (desc->channel[i].pure_integer) { 2011 ntype = V_028C70_NUMBER_UINT; 2012 } else { 2013 assert(desc->channel[i].normalized); 2014 ntype = V_028C70_NUMBER_UNORM; 2015 } 2016 } 2017 } 2018 2019 format = si_translate_colorformat(surf->base.format); 2020 if (format == V_028C70_COLOR_INVALID) { 2021 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2022 } 2023 assert(format != V_028C70_COLOR_INVALID); 2024 swap = r600_translate_colorswap(surf->base.format, false); 2025 endian = si_colorformat_endian_swap(format); 2026 2027 /* blend clamp should be set for all NORM/SRGB types */ 2028 if (ntype == V_028C70_NUMBER_UNORM || 2029 ntype == V_028C70_NUMBER_SNORM || 2030 ntype == V_028C70_NUMBER_SRGB) 2031 blend_clamp = 1; 2032 2033 /* set blend bypass according to docs if SINT/UINT or 2034 8/24 COLOR variants */ 2035 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2036 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2037 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2038 blend_clamp = 0; 2039 blend_bypass = 1; 2040 } 2041 2042 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 2043 (format == V_028C70_COLOR_8 || 2044 format == V_028C70_COLOR_8_8 || 2045 format == V_028C70_COLOR_8_8_8_8)) 2046 surf->color_is_int8 = true; 2047 2048 color_info = S_028C70_FORMAT(format) | 2049 S_028C70_COMP_SWAP(swap) | 2050 S_028C70_BLEND_CLAMP(blend_clamp) | 2051 S_028C70_BLEND_BYPASS(blend_bypass) | 2052 S_028C70_NUMBER_TYPE(ntype) | 2053 S_028C70_ENDIAN(endian); 2054 2055 /* Intensity is implemented as Red, so treat it that way. */ 2056 color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2057 util_format_is_intensity(surf->base.format)); 2058 2059 if (rtex->resource.b.b.nr_samples > 1) { 2060 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 2061 2062 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2063 S_028C74_NUM_FRAGMENTS(log_samples); 2064 2065 if (rtex->fmask.size) { 2066 color_info |= S_028C70_COMPRESSION(1); 2067 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 2068 2069 if (sctx->b.chip_class == SI) { 2070 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2071 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2072 } 2073 } 2074 } 2075 2076 surf->cb_color_view = color_view; 2077 surf->cb_color_info = color_info; 2078 surf->cb_color_attrib = color_attrib; 2079 2080 if (sctx->b.chip_class >= VI) { 2081 unsigned max_uncompressed_block_size = 2; 2082 2083 if (rtex->surface.nsamples > 1) { 2084 if (rtex->surface.bpe == 1) 2085 max_uncompressed_block_size = 0; 2086 else if (rtex->surface.bpe == 2) 2087 max_uncompressed_block_size = 1; 2088 } 2089 2090 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2091 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2092 } 2093 2094 /* This must be set for fast clear to work without FMASK. */ 2095 if (!rtex->fmask.size && sctx->b.chip_class == SI) { 2096 unsigned bankh = util_logbase2(rtex->surface.bankh); 2097 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2098 } 2099 2100 /* Determine pixel shader export format */ 2101 si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); 2102 2103 surf->color_initialized = true; 2104} 2105 2106static void si_init_depth_surface(struct si_context *sctx, 2107 struct r600_surface *surf) 2108{ 2109 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2110 unsigned level = surf->base.u.tex.level; 2111 struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; 2112 unsigned format; 2113 uint32_t z_info, s_info, db_depth_info; 2114 uint64_t z_offs, s_offs; 2115 uint32_t db_htile_data_base, db_htile_surface; 2116 2117 format = si_translate_dbformat(rtex->resource.b.b.format); 2118 2119 if (format == V_028040_Z_INVALID) { 2120 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 2121 } 2122 assert(format != V_028040_Z_INVALID); 2123 2124 s_offs = z_offs = rtex->resource.gpu_address; 2125 z_offs += rtex->surface.level[level].offset; 2126 s_offs += rtex->surface.stencil_level[level].offset; 2127 2128 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 2129 2130 z_info = S_028040_FORMAT(format); 2131 if (rtex->resource.b.b.nr_samples > 1) { 2132 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 2133 } 2134 2135 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 2136 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 2137 else 2138 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 2139 2140 if (sctx->b.chip_class >= CIK) { 2141 struct radeon_info *info = &sctx->screen->b.info; 2142 unsigned index = rtex->surface.tiling_index[level]; 2143 unsigned stencil_index = rtex->surface.stencil_tiling_index[level]; 2144 unsigned macro_index = rtex->surface.macro_tile_index; 2145 unsigned tile_mode = info->si_tile_mode_array[index]; 2146 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2147 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2148 2149 db_depth_info |= 2150 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2151 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2152 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2153 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2154 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2155 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2156 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2157 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2158 } else { 2159 unsigned tile_mode_index = si_tile_mode_index(rtex, level, false); 2160 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2161 tile_mode_index = si_tile_mode_index(rtex, level, true); 2162 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2163 } 2164 2165 /* HiZ aka depth buffer htile */ 2166 /* use htile only for first level */ 2167 if (rtex->htile_buffer && !level) { 2168 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2169 S_028040_ALLOW_EXPCLEAR(1); 2170 2171 if (rtex->surface.flags & RADEON_SURF_SBUFFER) { 2172 /* Workaround: For a not yet understood reason, the 2173 * combination of MSAA, fast stencil clear and stencil 2174 * decompress messes with subsequent stencil buffer 2175 * uses. Problem was reproduced on Verde, Bonaire, 2176 * Tonga, and Carrizo. 2177 * 2178 * Disabling EXPCLEAR works around the problem. 2179 * 2180 * Check piglit's arb_texture_multisample-stencil-clear 2181 * test if you want to try changing this. 2182 */ 2183 if (rtex->resource.b.b.nr_samples <= 1) 2184 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2185 } else 2186 /* Use all of the htile_buffer for depth if there's no stencil. */ 2187 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2188 2189 uint64_t va = rtex->htile_buffer->gpu_address; 2190 db_htile_data_base = va >> 8; 2191 db_htile_surface = S_028ABC_FULL_CACHE(1); 2192 } else { 2193 db_htile_data_base = 0; 2194 db_htile_surface = 0; 2195 } 2196 2197 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2198 2199 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2200 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2201 surf->db_htile_data_base = db_htile_data_base; 2202 surf->db_depth_info = db_depth_info; 2203 surf->db_z_info = z_info; 2204 surf->db_stencil_info = s_info; 2205 surf->db_depth_base = z_offs >> 8; 2206 surf->db_stencil_base = s_offs >> 8; 2207 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2208 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2209 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2210 levelinfo->nblk_y) / 64 - 1); 2211 surf->db_htile_surface = db_htile_surface; 2212 2213 surf->depth_initialized = true; 2214} 2215 2216static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2217{ 2218 for (int i = 0; i < state->nr_cbufs; ++i) { 2219 struct r600_surface *surf = NULL; 2220 struct r600_texture *rtex; 2221 2222 if (!state->cbufs[i]) 2223 continue; 2224 surf = (struct r600_surface*)state->cbufs[i]; 2225 rtex = (struct r600_texture*)surf->base.texture; 2226 2227 p_atomic_dec(&rtex->framebuffers_bound); 2228 } 2229} 2230 2231static void si_set_framebuffer_state(struct pipe_context *ctx, 2232 const struct pipe_framebuffer_state *state) 2233{ 2234 struct si_context *sctx = (struct si_context *)ctx; 2235 struct pipe_constant_buffer constbuf = {0}; 2236 struct r600_surface *surf = NULL; 2237 struct r600_texture *rtex; 2238 bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer; 2239 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2240 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2241 int i; 2242 2243 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2244 if (!sctx->framebuffer.state.cbufs[i]) 2245 continue; 2246 2247 rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture; 2248 if (rtex->dcc_gather_statistics) 2249 vi_separate_dcc_stop_query(ctx, rtex); 2250 } 2251 2252 /* Only flush TC when changing the framebuffer state, because 2253 * the only client not using TC that can change textures is 2254 * the framebuffer. 2255 * 2256 * Flush all CB and DB caches here because all buffers can be used 2257 * for write by both TC (with shader image stores) and CB/DB. 2258 */ 2259 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 2260 SI_CONTEXT_INV_GLOBAL_L2 | 2261 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | 2262 SI_CONTEXT_CS_PARTIAL_FLUSH; 2263 2264 /* Take the maximum of the old and new count. If the new count is lower, 2265 * dirtying is needed to disable the unbound colorbuffers. 2266 */ 2267 sctx->framebuffer.dirty_cbufs |= 2268 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2269 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2270 2271 si_dec_framebuffer_counters(&sctx->framebuffer.state); 2272 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2273 2274 sctx->framebuffer.spi_shader_col_format = 0; 2275 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2276 sctx->framebuffer.spi_shader_col_format_blend = 0; 2277 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2278 sctx->framebuffer.color_is_int8 = 0; 2279 2280 sctx->framebuffer.compressed_cb_mask = 0; 2281 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2282 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2283 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 2284 util_format_is_pure_integer(state->cbufs[0]->format); 2285 sctx->framebuffer.any_dst_linear = false; 2286 2287 if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer) 2288 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2289 2290 for (i = 0; i < state->nr_cbufs; i++) { 2291 if (!state->cbufs[i]) 2292 continue; 2293 2294 surf = (struct r600_surface*)state->cbufs[i]; 2295 rtex = (struct r600_texture*)surf->base.texture; 2296 2297 if (!surf->color_initialized) { 2298 si_initialize_color_surface(sctx, surf); 2299 } 2300 2301 sctx->framebuffer.spi_shader_col_format |= 2302 surf->spi_shader_col_format << (i * 4); 2303 sctx->framebuffer.spi_shader_col_format_alpha |= 2304 surf->spi_shader_col_format_alpha << (i * 4); 2305 sctx->framebuffer.spi_shader_col_format_blend |= 2306 surf->spi_shader_col_format_blend << (i * 4); 2307 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2308 surf->spi_shader_col_format_blend_alpha << (i * 4); 2309 2310 if (surf->color_is_int8) 2311 sctx->framebuffer.color_is_int8 |= 1 << i; 2312 2313 if (rtex->fmask.size && rtex->cmask.size) { 2314 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2315 } 2316 2317 if (surf->level_info->mode == RADEON_SURF_MODE_LINEAR_ALIGNED) 2318 sctx->framebuffer.any_dst_linear = true; 2319 2320 r600_context_add_resource_size(ctx, surf->base.texture); 2321 2322 p_atomic_inc(&rtex->framebuffers_bound); 2323 2324 if (rtex->dcc_gather_statistics) { 2325 /* Dirty tracking must be enabled for DCC usage analysis. */ 2326 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2327 vi_separate_dcc_start_query(ctx, rtex); 2328 } 2329 } 2330 /* Set the second SPI format for possible dual-src blending. */ 2331 if (i == 1 && surf) { 2332 sctx->framebuffer.spi_shader_col_format |= 2333 surf->spi_shader_col_format << (i * 4); 2334 sctx->framebuffer.spi_shader_col_format_alpha |= 2335 surf->spi_shader_col_format_alpha << (i * 4); 2336 sctx->framebuffer.spi_shader_col_format_blend |= 2337 surf->spi_shader_col_format_blend << (i * 4); 2338 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2339 surf->spi_shader_col_format_blend_alpha << (i * 4); 2340 } 2341 2342 if (state->zsbuf) { 2343 surf = (struct r600_surface*)state->zsbuf; 2344 2345 if (!surf->depth_initialized) { 2346 si_init_depth_surface(sctx, surf); 2347 } 2348 r600_context_add_resource_size(ctx, surf->base.texture); 2349 } 2350 2351 si_update_poly_offset_state(sctx); 2352 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 2353 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); 2354 2355 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2356 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2357 2358 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2359 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2360 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2361 2362 /* Set sample locations as fragment shader constants. */ 2363 switch (sctx->framebuffer.nr_samples) { 2364 case 1: 2365 constbuf.user_buffer = sctx->b.sample_locations_1x; 2366 break; 2367 case 2: 2368 constbuf.user_buffer = sctx->b.sample_locations_2x; 2369 break; 2370 case 4: 2371 constbuf.user_buffer = sctx->b.sample_locations_4x; 2372 break; 2373 case 8: 2374 constbuf.user_buffer = sctx->b.sample_locations_8x; 2375 break; 2376 case 16: 2377 constbuf.user_buffer = sctx->b.sample_locations_16x; 2378 break; 2379 default: 2380 R600_ERR("Requested an invalid number of samples %i.\n", 2381 sctx->framebuffer.nr_samples); 2382 assert(0); 2383 } 2384 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2385 si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 2386 2387 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 2388 } 2389 2390 sctx->need_check_render_feedback = true; 2391} 2392 2393static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2394{ 2395 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2396 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2397 unsigned i, nr_cbufs = state->nr_cbufs; 2398 struct r600_texture *tex = NULL; 2399 struct r600_surface *cb = NULL; 2400 unsigned cb_color_info = 0; 2401 2402 /* Colorbuffers. */ 2403 for (i = 0; i < nr_cbufs; i++) { 2404 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 2405 unsigned cb_color_base, cb_color_fmask, cb_color_attrib; 2406 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 2407 2408 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 2409 continue; 2410 2411 cb = (struct r600_surface*)state->cbufs[i]; 2412 if (!cb) { 2413 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2414 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2415 continue; 2416 } 2417 2418 tex = (struct r600_texture *)cb->base.texture; 2419 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2420 &tex->resource, RADEON_USAGE_READWRITE, 2421 tex->surface.nsamples > 1 ? 2422 RADEON_PRIO_COLOR_BUFFER_MSAA : 2423 RADEON_PRIO_COLOR_BUFFER); 2424 2425 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2426 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2427 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2428 RADEON_PRIO_CMASK); 2429 } 2430 2431 if (tex->dcc_separate_buffer) 2432 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2433 tex->dcc_separate_buffer, 2434 RADEON_USAGE_READWRITE, 2435 RADEON_PRIO_DCC); 2436 2437 /* Compute mutable surface parameters. */ 2438 pitch_tile_max = cb->level_info->nblk_x / 8 - 1; 2439 slice_tile_max = cb->level_info->nblk_x * 2440 cb->level_info->nblk_y / 64 - 1; 2441 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 2442 2443 cb_color_base = (tex->resource.gpu_address + cb->level_info->offset) >> 8; 2444 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 2445 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 2446 cb_color_attrib = cb->cb_color_attrib | 2447 S_028C74_TILE_MODE_INDEX(tile_mode_index); 2448 2449 if (tex->fmask.size) { 2450 if (sctx->b.chip_class >= CIK) 2451 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1); 2452 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index); 2453 cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8; 2454 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max); 2455 } else { 2456 /* This must be set for fast clear to work without FMASK. */ 2457 if (sctx->b.chip_class >= CIK) 2458 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 2459 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 2460 cb_color_fmask = cb_color_base; 2461 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 2462 } 2463 2464 cb_color_info = cb->cb_color_info | tex->cb_color_info; 2465 2466 if (tex->dcc_offset && cb->level_info->dcc_enabled) { 2467 bool is_msaa_resolve_dst = state->cbufs[0] && 2468 state->cbufs[0]->texture->nr_samples > 1 && 2469 state->cbufs[1] == &cb->base && 2470 state->cbufs[1]->texture->nr_samples <= 1; 2471 2472 if (!is_msaa_resolve_dst) 2473 cb_color_info |= S_028C70_DCC_ENABLE(1); 2474 } 2475 2476 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 2477 sctx->b.chip_class >= VI ? 14 : 13); 2478 radeon_emit(cs, cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2479 radeon_emit(cs, cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2480 radeon_emit(cs, cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2481 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2482 radeon_emit(cs, cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2483 radeon_emit(cs, cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2484 radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ 2485 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2486 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2487 radeon_emit(cs, cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2488 radeon_emit(cs, cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2489 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2490 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2491 2492 if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ 2493 radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + 2494 tex->dcc_offset + 2495 tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8); 2496 } 2497 /* set CB_COLOR1_INFO for possible dual-src blending */ 2498 if (i == 1 && state->cbufs[0] && 2499 sctx->framebuffer.dirty_cbufs & (1 << 0)) { 2500 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 2501 cb_color_info); 2502 i++; 2503 } 2504 for (; i < 8 ; i++) 2505 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 2506 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2507 2508 /* ZS buffer. */ 2509 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 2510 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2511 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2512 2513 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2514 &rtex->resource, RADEON_USAGE_READWRITE, 2515 zb->base.texture->nr_samples > 1 ? 2516 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2517 RADEON_PRIO_DEPTH_BUFFER); 2518 2519 if (zb->db_htile_data_base) { 2520 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2521 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2522 RADEON_PRIO_HTILE); 2523 } 2524 2525 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2526 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2527 2528 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2529 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2530 radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */ 2531 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 2532 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2533 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2534 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2535 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2536 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2537 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2538 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2539 2540 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 2541 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 2542 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 2543 2544 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2545 } else if (sctx->framebuffer.dirty_zsbuf) { 2546 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2547 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2548 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2549 } 2550 2551 /* Framebuffer dimensions. */ 2552 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2553 radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2554 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2555 2556 sctx->framebuffer.dirty_cbufs = 0; 2557 sctx->framebuffer.dirty_zsbuf = false; 2558} 2559 2560static void si_emit_msaa_sample_locs(struct si_context *sctx, 2561 struct r600_atom *atom) 2562{ 2563 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2564 unsigned nr_samples = sctx->framebuffer.nr_samples; 2565 2566 /* Smoothing (only possible with nr_samples == 1) uses the same 2567 * sample locations as the MSAA it simulates. 2568 */ 2569 if (nr_samples <= 1 && sctx->smoothing_enabled) 2570 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 2571 2572 /* The small primitive filter on Polaris requires explicitly setting 2573 * sample locations to 0 when MSAA is disabled. 2574 */ 2575 if (sctx->b.family >= CHIP_POLARIS10) { 2576 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 2577 2578 if (!sctx->smoothing_enabled && 2579 rs && !rs->multisample_enable) 2580 nr_samples = 1; 2581 } 2582 2583 if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) && 2584 (nr_samples != sctx->msaa_sample_locs.nr_samples)) { 2585 sctx->msaa_sample_locs.nr_samples = nr_samples; 2586 cayman_emit_msaa_sample_locs(cs, nr_samples); 2587 } 2588} 2589 2590static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) 2591{ 2592 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2593 unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes; 2594 /* 33% faster rendering to linear color buffers */ 2595 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 2596 unsigned sc_mode_cntl_1 = 2597 S_028A4C_WALK_SIZE(dst_is_linear) | 2598 S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 2599 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 2600 /* always 1: */ 2601 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | 2602 S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 2603 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | 2604 S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 2605 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 2606 S_028A4C_FORCE_EOV_REZ_ENABLE(1); 2607 2608 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2609 sctx->ps_iter_samples, 2610 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0, 2611 sc_mode_cntl_1); 2612} 2613 2614static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2615{ 2616 struct si_context *sctx = (struct si_context *)ctx; 2617 2618 if (sctx->ps_iter_samples == min_samples) 2619 return; 2620 2621 sctx->ps_iter_samples = min_samples; 2622 2623 if (sctx->framebuffer.nr_samples > 1) 2624 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2625} 2626 2627/* 2628 * Samplers 2629 */ 2630 2631/** 2632 * Build the sampler view descriptor for a buffer texture. 2633 * @param state 256-bit descriptor; only the high 128 bits are filled in 2634 */ 2635void 2636si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, 2637 enum pipe_format format, 2638 unsigned first_element, unsigned last_element, 2639 uint32_t *state) 2640{ 2641 const struct util_format_description *desc; 2642 int first_non_void; 2643 uint64_t va; 2644 unsigned stride; 2645 unsigned num_records; 2646 unsigned num_format, data_format; 2647 2648 desc = util_format_description(format); 2649 first_non_void = util_format_get_first_non_void_channel(format); 2650 stride = desc->block.bits / 8; 2651 va = buf->gpu_address + first_element * stride; 2652 num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void); 2653 data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void); 2654 2655 num_records = last_element + 1 - first_element; 2656 num_records = MIN2(num_records, buf->b.b.width0 / stride); 2657 2658 if (screen->b.chip_class >= VI) 2659 num_records *= stride; 2660 2661 state[4] = va; 2662 state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 2663 S_008F04_STRIDE(stride); 2664 state[6] = num_records; 2665 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2666 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2667 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2668 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2669 S_008F0C_NUM_FORMAT(num_format) | 2670 S_008F0C_DATA_FORMAT(data_format); 2671} 2672 2673/** 2674 * Build the sampler view descriptor for a texture. 2675 */ 2676void 2677si_make_texture_descriptor(struct si_screen *screen, 2678 struct r600_texture *tex, 2679 bool sampler, 2680 enum pipe_texture_target target, 2681 enum pipe_format pipe_format, 2682 const unsigned char state_swizzle[4], 2683 unsigned first_level, unsigned last_level, 2684 unsigned first_layer, unsigned last_layer, 2685 unsigned width, unsigned height, unsigned depth, 2686 uint32_t *state, 2687 uint32_t *fmask_state) 2688{ 2689 struct pipe_resource *res = &tex->resource.b.b; 2690 const struct util_format_description *desc; 2691 unsigned char swizzle[4]; 2692 int first_non_void; 2693 unsigned num_format, data_format, type; 2694 uint64_t va; 2695 2696 desc = util_format_description(pipe_format); 2697 2698 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2699 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2700 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2701 2702 switch (pipe_format) { 2703 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2704 case PIPE_FORMAT_X24S8_UINT: 2705 case PIPE_FORMAT_X32_S8X24_UINT: 2706 case PIPE_FORMAT_X8Z24_UNORM: 2707 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2708 break; 2709 default: 2710 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2711 } 2712 } else { 2713 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2714 } 2715 2716 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2717 2718 switch (pipe_format) { 2719 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2720 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2721 break; 2722 default: 2723 if (first_non_void < 0) { 2724 if (util_format_is_compressed(pipe_format)) { 2725 switch (pipe_format) { 2726 case PIPE_FORMAT_DXT1_SRGB: 2727 case PIPE_FORMAT_DXT1_SRGBA: 2728 case PIPE_FORMAT_DXT3_SRGBA: 2729 case PIPE_FORMAT_DXT5_SRGBA: 2730 case PIPE_FORMAT_BPTC_SRGBA: 2731 case PIPE_FORMAT_ETC2_SRGB8: 2732 case PIPE_FORMAT_ETC2_SRGB8A1: 2733 case PIPE_FORMAT_ETC2_SRGBA8: 2734 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2735 break; 2736 case PIPE_FORMAT_RGTC1_SNORM: 2737 case PIPE_FORMAT_LATC1_SNORM: 2738 case PIPE_FORMAT_RGTC2_SNORM: 2739 case PIPE_FORMAT_LATC2_SNORM: 2740 case PIPE_FORMAT_ETC2_R11_SNORM: 2741 case PIPE_FORMAT_ETC2_RG11_SNORM: 2742 /* implies float, so use SNORM/UNORM to determine 2743 whether data is signed or not */ 2744 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2745 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2746 break; 2747 default: 2748 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2749 break; 2750 } 2751 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2752 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2753 } else { 2754 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2755 } 2756 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2757 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2758 } else { 2759 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2760 2761 switch (desc->channel[first_non_void].type) { 2762 case UTIL_FORMAT_TYPE_FLOAT: 2763 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2764 break; 2765 case UTIL_FORMAT_TYPE_SIGNED: 2766 if (desc->channel[first_non_void].normalized) 2767 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2768 else if (desc->channel[first_non_void].pure_integer) 2769 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2770 else 2771 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2772 break; 2773 case UTIL_FORMAT_TYPE_UNSIGNED: 2774 if (desc->channel[first_non_void].normalized) 2775 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2776 else if (desc->channel[first_non_void].pure_integer) 2777 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2778 else 2779 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2780 } 2781 } 2782 } 2783 2784 data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void); 2785 if (data_format == ~0) { 2786 data_format = 0; 2787 } 2788 2789 if (!sampler && 2790 (res->target == PIPE_TEXTURE_CUBE || 2791 res->target == PIPE_TEXTURE_CUBE_ARRAY || 2792 res->target == PIPE_TEXTURE_3D)) { 2793 /* For the purpose of shader images, treat cube maps and 3D 2794 * textures as 2D arrays. For 3D textures, the address 2795 * calculations for mipmaps are different, so we rely on the 2796 * caller to effectively disable mipmaps. 2797 */ 2798 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 2799 2800 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 2801 } else { 2802 type = si_tex_dim(res->target, target, res->nr_samples); 2803 } 2804 2805 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 2806 height = 1; 2807 depth = res->array_size; 2808 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 2809 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 2810 if (sampler || res->target != PIPE_TEXTURE_3D) 2811 depth = res->array_size; 2812 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 2813 depth = res->array_size / 6; 2814 2815 state[0] = 0; 2816 state[1] = (S_008F14_DATA_FORMAT(data_format) | 2817 S_008F14_NUM_FORMAT(num_format)); 2818 state[2] = (S_008F18_WIDTH(width - 1) | 2819 S_008F18_HEIGHT(height - 1)); 2820 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2821 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2822 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2823 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2824 S_008F1C_BASE_LEVEL(res->nr_samples > 1 ? 2825 0 : first_level) | 2826 S_008F1C_LAST_LEVEL(res->nr_samples > 1 ? 2827 util_logbase2(res->nr_samples) : 2828 last_level) | 2829 S_008F1C_POW2_PAD(res->last_level > 0) | 2830 S_008F1C_TYPE(type)); 2831 state[4] = S_008F20_DEPTH(depth - 1); 2832 state[5] = (S_008F24_BASE_ARRAY(first_layer) | 2833 S_008F24_LAST_ARRAY(last_layer)); 2834 state[6] = 0; 2835 state[7] = 0; 2836 2837 if (tex->dcc_offset) { 2838 unsigned swap = r600_translate_colorswap(pipe_format, false); 2839 2840 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 2841 } else { 2842 /* The last dword is unused by hw. The shader uses it to clear 2843 * bits in the first dword of sampler state. 2844 */ 2845 if (screen->b.chip_class <= CIK && res->nr_samples <= 1) { 2846 if (first_level == last_level) 2847 state[7] = C_008F30_MAX_ANISO_RATIO; 2848 else 2849 state[7] = 0xffffffff; 2850 } 2851 } 2852 2853 /* Initialize the sampler view for FMASK. */ 2854 if (tex->fmask.size) { 2855 uint32_t fmask_format; 2856 2857 va = tex->resource.gpu_address + tex->fmask.offset; 2858 2859 switch (res->nr_samples) { 2860 case 2: 2861 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2862 break; 2863 case 4: 2864 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2865 break; 2866 case 8: 2867 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2868 break; 2869 default: 2870 assert(0); 2871 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2872 } 2873 2874 fmask_state[0] = va >> 8; 2875 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2876 S_008F14_DATA_FORMAT(fmask_format) | 2877 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2878 fmask_state[2] = S_008F18_WIDTH(width - 1) | 2879 S_008F18_HEIGHT(height - 1); 2880 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2881 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2882 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2883 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2884 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) | 2885 S_008F1C_TYPE(si_tex_dim(res->target, target, 0)); 2886 fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2887 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1); 2888 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) | 2889 S_008F24_LAST_ARRAY(last_layer); 2890 fmask_state[6] = 0; 2891 fmask_state[7] = 0; 2892 } 2893} 2894 2895/** 2896 * Create a sampler view. 2897 * 2898 * @param ctx context 2899 * @param texture texture 2900 * @param state sampler view template 2901 * @param width0 width0 override (for compressed textures as int) 2902 * @param height0 height0 override (for compressed textures as int) 2903 * @param force_level set the base address to the level (for compressed textures) 2904 */ 2905struct pipe_sampler_view * 2906si_create_sampler_view_custom(struct pipe_context *ctx, 2907 struct pipe_resource *texture, 2908 const struct pipe_sampler_view *state, 2909 unsigned width0, unsigned height0, 2910 unsigned force_level) 2911{ 2912 struct si_context *sctx = (struct si_context*)ctx; 2913 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 2914 struct r600_texture *tmp = (struct r600_texture*)texture; 2915 unsigned base_level, first_level, last_level; 2916 unsigned char state_swizzle[4]; 2917 unsigned height, depth, width; 2918 unsigned last_layer = state->u.tex.last_layer; 2919 enum pipe_format pipe_format; 2920 const struct radeon_surf_level *surflevel; 2921 2922 if (!view) 2923 return NULL; 2924 2925 /* initialize base object */ 2926 view->base = *state; 2927 view->base.texture = NULL; 2928 view->base.reference.count = 1; 2929 view->base.context = ctx; 2930 2931 /* NULL resource, obey swizzle (only ZERO and ONE make sense). */ 2932 if (!texture) { 2933 view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) | 2934 S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) | 2935 S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) | 2936 S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) | 2937 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D); 2938 return &view->base; 2939 } 2940 2941 pipe_resource_reference(&view->base.texture, texture); 2942 2943 if (state->format == PIPE_FORMAT_X24S8_UINT || 2944 state->format == PIPE_FORMAT_S8X24_UINT || 2945 state->format == PIPE_FORMAT_X32_S8X24_UINT || 2946 state->format == PIPE_FORMAT_S8_UINT) 2947 view->is_stencil_sampler = true; 2948 2949 /* Buffer resource. */ 2950 if (texture->target == PIPE_BUFFER) { 2951 si_make_buffer_descriptor(sctx->screen, 2952 (struct r600_resource *)texture, 2953 state->format, 2954 state->u.buf.first_element, 2955 state->u.buf.last_element, 2956 view->state); 2957 2958 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); 2959 return &view->base; 2960 } 2961 2962 state_swizzle[0] = state->swizzle_r; 2963 state_swizzle[1] = state->swizzle_g; 2964 state_swizzle[2] = state->swizzle_b; 2965 state_swizzle[3] = state->swizzle_a; 2966 2967 base_level = 0; 2968 first_level = state->u.tex.first_level; 2969 last_level = state->u.tex.last_level; 2970 width = width0; 2971 height = height0; 2972 depth = texture->depth0; 2973 2974 if (force_level) { 2975 assert(force_level == first_level && 2976 force_level == last_level); 2977 base_level = force_level; 2978 first_level = 0; 2979 last_level = 0; 2980 width = u_minify(width, force_level); 2981 height = u_minify(height, force_level); 2982 depth = u_minify(depth, force_level); 2983 } 2984 2985 /* This is not needed if state trackers set last_layer correctly. */ 2986 if (state->target == PIPE_TEXTURE_1D || 2987 state->target == PIPE_TEXTURE_2D || 2988 state->target == PIPE_TEXTURE_RECT || 2989 state->target == PIPE_TEXTURE_CUBE) 2990 last_layer = state->u.tex.first_layer; 2991 2992 /* Texturing with separate depth and stencil. */ 2993 pipe_format = state->format; 2994 2995 /* Depth/stencil texturing sometimes needs separate texture. */ 2996 if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) { 2997 if (!tmp->flushed_depth_texture && 2998 !r600_init_flushed_depth_texture(ctx, texture, NULL)) { 2999 pipe_resource_reference(&view->base.texture, NULL); 3000 FREE(view); 3001 return NULL; 3002 } 3003 3004 /* Override format for the case where the flushed texture 3005 * contains only Z or only S. 3006 */ 3007 if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format) 3008 pipe_format = tmp->flushed_depth_texture->resource.b.b.format; 3009 3010 tmp = tmp->flushed_depth_texture; 3011 } 3012 3013 surflevel = tmp->surface.level; 3014 3015 if (tmp->db_compatible) { 3016 switch (pipe_format) { 3017 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 3018 pipe_format = PIPE_FORMAT_Z32_FLOAT; 3019 break; 3020 case PIPE_FORMAT_X8Z24_UNORM: 3021 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3022 /* Z24 is always stored like this for DB 3023 * compatibility. 3024 */ 3025 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 3026 break; 3027 case PIPE_FORMAT_X24S8_UINT: 3028 case PIPE_FORMAT_S8X24_UINT: 3029 case PIPE_FORMAT_X32_S8X24_UINT: 3030 pipe_format = PIPE_FORMAT_S8_UINT; 3031 surflevel = tmp->surface.stencil_level; 3032 break; 3033 default:; 3034 } 3035 } 3036 3037 si_make_texture_descriptor(sctx->screen, tmp, true, 3038 state->target, pipe_format, state_swizzle, 3039 first_level, last_level, 3040 state->u.tex.first_layer, last_layer, 3041 width, height, depth, 3042 view->state, view->fmask_state); 3043 3044 view->base_level_info = &surflevel[base_level]; 3045 view->base_level = base_level; 3046 view->block_width = util_format_get_blockwidth(pipe_format); 3047 return &view->base; 3048} 3049 3050static struct pipe_sampler_view * 3051si_create_sampler_view(struct pipe_context *ctx, 3052 struct pipe_resource *texture, 3053 const struct pipe_sampler_view *state) 3054{ 3055 return si_create_sampler_view_custom(ctx, texture, state, 3056 texture ? texture->width0 : 0, 3057 texture ? texture->height0 : 0, 0); 3058} 3059 3060static void si_sampler_view_destroy(struct pipe_context *ctx, 3061 struct pipe_sampler_view *state) 3062{ 3063 struct si_sampler_view *view = (struct si_sampler_view *)state; 3064 3065 if (state->texture && state->texture->target == PIPE_BUFFER) 3066 LIST_DELINIT(&view->list); 3067 3068 pipe_resource_reference(&state->texture, NULL); 3069 FREE(view); 3070} 3071 3072static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 3073{ 3074 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 3075 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 3076 (linear_filter && 3077 (wrap == PIPE_TEX_WRAP_CLAMP || 3078 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 3079} 3080 3081static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 3082{ 3083 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 3084 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 3085 3086 return (state->border_color.ui[0] || state->border_color.ui[1] || 3087 state->border_color.ui[2] || state->border_color.ui[3]) && 3088 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 3089 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 3090 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 3091} 3092 3093static void *si_create_sampler_state(struct pipe_context *ctx, 3094 const struct pipe_sampler_state *state) 3095{ 3096 struct si_context *sctx = (struct si_context *)ctx; 3097 struct r600_common_screen *rscreen = sctx->b.screen; 3098 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 3099 unsigned border_color_type, border_color_index = 0; 3100 unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso 3101 : state->max_anisotropy; 3102 unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); 3103 3104 if (!rstate) { 3105 return NULL; 3106 } 3107 3108 if (!sampler_state_needs_border_color(state)) 3109 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3110 else if (state->border_color.f[0] == 0 && 3111 state->border_color.f[1] == 0 && 3112 state->border_color.f[2] == 0 && 3113 state->border_color.f[3] == 0) 3114 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3115 else if (state->border_color.f[0] == 0 && 3116 state->border_color.f[1] == 0 && 3117 state->border_color.f[2] == 0 && 3118 state->border_color.f[3] == 1) 3119 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 3120 else if (state->border_color.f[0] == 1 && 3121 state->border_color.f[1] == 1 && 3122 state->border_color.f[2] == 1 && 3123 state->border_color.f[3] == 1) 3124 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 3125 else { 3126 int i; 3127 3128 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 3129 3130 /* Check if the border has been uploaded already. */ 3131 for (i = 0; i < sctx->border_color_count; i++) 3132 if (memcmp(&sctx->border_color_table[i], &state->border_color, 3133 sizeof(state->border_color)) == 0) 3134 break; 3135 3136 if (i >= SI_MAX_BORDER_COLORS) { 3137 /* Getting 4096 unique border colors is very unlikely. */ 3138 fprintf(stderr, "radeonsi: The border color table is full. " 3139 "Any new border colors will be just black. " 3140 "Please file a bug.\n"); 3141 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3142 } else { 3143 if (i == sctx->border_color_count) { 3144 /* Upload a new border color. */ 3145 memcpy(&sctx->border_color_table[i], &state->border_color, 3146 sizeof(state->border_color)); 3147 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 3148 &state->border_color, 3149 sizeof(state->border_color)); 3150 sctx->border_color_count++; 3151 } 3152 3153 border_color_index = i; 3154 } 3155 } 3156 3157 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 3158 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 3159 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 3160 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 3161 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 3162 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 3163 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 3164 S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI)); 3165 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 3166 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 3167 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 3168 S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) | 3169 S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) | 3170 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 3171 S_008F38_MIP_POINT_PRECLAMP(1) | 3172 S_008F38_DISABLE_LSB_CEIL(1) | 3173 S_008F38_FILTER_PREC_FIX(1) | 3174 S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI)); 3175 rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | 3176 S_008F3C_BORDER_COLOR_TYPE(border_color_type); 3177 return rstate; 3178} 3179 3180static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 3181{ 3182 struct si_context *sctx = (struct si_context *)ctx; 3183 3184 if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask) 3185 return; 3186 3187 sctx->sample_mask.sample_mask = sample_mask; 3188 si_mark_atom_dirty(sctx, &sctx->sample_mask.atom); 3189} 3190 3191static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom) 3192{ 3193 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 3194 unsigned mask = sctx->sample_mask.sample_mask; 3195 3196 /* Needed for line and polygon smoothing as well as for the Polaris 3197 * small primitive filter. We expect the state tracker to take care of 3198 * this for us. 3199 */ 3200 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1); 3201 3202 radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 3203 radeon_emit(cs, mask | (mask << 16)); 3204 radeon_emit(cs, mask | (mask << 16)); 3205} 3206 3207static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 3208{ 3209 free(state); 3210} 3211 3212/* 3213 * Vertex elements & buffers 3214 */ 3215 3216static void *si_create_vertex_elements(struct pipe_context *ctx, 3217 unsigned count, 3218 const struct pipe_vertex_element *elements) 3219{ 3220 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 3221 int i; 3222 3223 assert(count <= SI_MAX_ATTRIBS); 3224 if (!v) 3225 return NULL; 3226 3227 v->count = count; 3228 for (i = 0; i < count; ++i) { 3229 const struct util_format_description *desc; 3230 unsigned data_format, num_format; 3231 int first_non_void; 3232 3233 desc = util_format_description(elements[i].src_format); 3234 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 3235 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 3236 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 3237 3238 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3239 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3240 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3241 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3242 S_008F0C_NUM_FORMAT(num_format) | 3243 S_008F0C_DATA_FORMAT(data_format); 3244 v->format_size[i] = desc->block.bits / 8; 3245 } 3246 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 3247 3248 return v; 3249} 3250 3251static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 3252{ 3253 struct si_context *sctx = (struct si_context *)ctx; 3254 struct si_vertex_element *v = (struct si_vertex_element*)state; 3255 3256 sctx->vertex_elements = v; 3257 sctx->vertex_buffers_dirty = true; 3258} 3259 3260static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 3261{ 3262 struct si_context *sctx = (struct si_context *)ctx; 3263 3264 if (sctx->vertex_elements == state) 3265 sctx->vertex_elements = NULL; 3266 FREE(state); 3267} 3268 3269static void si_set_vertex_buffers(struct pipe_context *ctx, 3270 unsigned start_slot, unsigned count, 3271 const struct pipe_vertex_buffer *buffers) 3272{ 3273 struct si_context *sctx = (struct si_context *)ctx; 3274 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 3275 int i; 3276 3277 assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); 3278 3279 if (buffers) { 3280 for (i = 0; i < count; i++) { 3281 const struct pipe_vertex_buffer *src = buffers + i; 3282 struct pipe_vertex_buffer *dsti = dst + i; 3283 3284 pipe_resource_reference(&dsti->buffer, src->buffer); 3285 dsti->buffer_offset = src->buffer_offset; 3286 dsti->stride = src->stride; 3287 r600_context_add_resource_size(ctx, src->buffer); 3288 } 3289 } else { 3290 for (i = 0; i < count; i++) { 3291 pipe_resource_reference(&dst[i].buffer, NULL); 3292 } 3293 } 3294 sctx->vertex_buffers_dirty = true; 3295} 3296 3297static void si_set_index_buffer(struct pipe_context *ctx, 3298 const struct pipe_index_buffer *ib) 3299{ 3300 struct si_context *sctx = (struct si_context *)ctx; 3301 3302 if (ib) { 3303 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer); 3304 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 3305 r600_context_add_resource_size(ctx, ib->buffer); 3306 } else { 3307 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 3308 } 3309} 3310 3311/* 3312 * Misc 3313 */ 3314 3315static void si_set_tess_state(struct pipe_context *ctx, 3316 const float default_outer_level[4], 3317 const float default_inner_level[2]) 3318{ 3319 struct si_context *sctx = (struct si_context *)ctx; 3320 struct pipe_constant_buffer cb; 3321 float array[8]; 3322 3323 memcpy(array, default_outer_level, sizeof(float) * 4); 3324 memcpy(array+4, default_inner_level, sizeof(float) * 2); 3325 3326 cb.buffer = NULL; 3327 cb.user_buffer = NULL; 3328 cb.buffer_size = sizeof(array); 3329 3330 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, 3331 (void*)array, sizeof(array), 3332 &cb.buffer_offset); 3333 3334 si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 3335 pipe_resource_reference(&cb.buffer, NULL); 3336} 3337 3338static void si_texture_barrier(struct pipe_context *ctx) 3339{ 3340 struct si_context *sctx = (struct si_context *)ctx; 3341 3342 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 3343 SI_CONTEXT_INV_GLOBAL_L2 | 3344 SI_CONTEXT_FLUSH_AND_INV_CB | 3345 SI_CONTEXT_CS_PARTIAL_FLUSH; 3346} 3347 3348static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 3349{ 3350 struct si_context *sctx = (struct si_context *)ctx; 3351 3352 /* Subsequent commands must wait for all shader invocations to 3353 * complete. */ 3354 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 3355 SI_CONTEXT_CS_PARTIAL_FLUSH; 3356 3357 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 3358 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | 3359 SI_CONTEXT_INV_VMEM_L1; 3360 3361 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 3362 PIPE_BARRIER_SHADER_BUFFER | 3363 PIPE_BARRIER_TEXTURE | 3364 PIPE_BARRIER_IMAGE | 3365 PIPE_BARRIER_STREAMOUT_BUFFER | 3366 PIPE_BARRIER_GLOBAL_BUFFER)) { 3367 /* As far as I can tell, L1 contents are written back to L2 3368 * automatically at end of shader, but the contents of other 3369 * L1 caches might still be stale. */ 3370 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 3371 } 3372 3373 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 3374 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 3375 3376 /* Indices are read through TC L2 since VI. */ 3377 if (sctx->screen->b.chip_class <= CIK) 3378 sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; 3379 } 3380 3381 if (flags & PIPE_BARRIER_FRAMEBUFFER) 3382 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; 3383 3384 if (flags & (PIPE_BARRIER_MAPPED_BUFFER | 3385 PIPE_BARRIER_FRAMEBUFFER | 3386 PIPE_BARRIER_INDIRECT_BUFFER)) { 3387 /* Not sure if INV_GLOBAL_L2 is the best thing here. 3388 * 3389 * We need to make sure that TC L1 & L2 are written back to 3390 * memory, because neither CPU accesses nor CB fetches consider 3391 * TC, but there's no need to invalidate any TC cache lines. */ 3392 sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; 3393 } 3394} 3395 3396static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 3397{ 3398 struct pipe_blend_state blend; 3399 3400 memset(&blend, 0, sizeof(blend)); 3401 blend.independent_blend_enable = true; 3402 blend.rt[0].colormask = 0xf; 3403 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 3404} 3405 3406static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 3407 bool include_draw_vbo) 3408{ 3409 si_need_cs_space((struct si_context*)ctx); 3410} 3411 3412static void si_init_config(struct si_context *sctx); 3413 3414void si_init_state_functions(struct si_context *sctx) 3415{ 3416 si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond); 3417 si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin); 3418 si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable); 3419 si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors); 3420 si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports); 3421 3422 si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush); 3423 si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); 3424 si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); 3425 si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); 3426 si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); 3427 si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); 3428 si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state); 3429 si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color); 3430 si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs); 3431 si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state); 3432 si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref); 3433 3434 sctx->b.b.create_blend_state = si_create_blend_state; 3435 sctx->b.b.bind_blend_state = si_bind_blend_state; 3436 sctx->b.b.delete_blend_state = si_delete_blend_state; 3437 sctx->b.b.set_blend_color = si_set_blend_color; 3438 3439 sctx->b.b.create_rasterizer_state = si_create_rs_state; 3440 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 3441 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 3442 3443 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 3444 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 3445 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 3446 3447 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 3448 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 3449 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 3450 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 3451 sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 3452 3453 sctx->b.b.set_clip_state = si_set_clip_state; 3454 sctx->b.b.set_stencil_ref = si_set_stencil_ref; 3455 3456 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 3457 sctx->b.b.get_sample_position = cayman_get_sample_position; 3458 3459 sctx->b.b.create_sampler_state = si_create_sampler_state; 3460 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 3461 3462 sctx->b.b.create_sampler_view = si_create_sampler_view; 3463 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 3464 3465 sctx->b.b.set_sample_mask = si_set_sample_mask; 3466 3467 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 3468 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 3469 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 3470 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 3471 sctx->b.b.set_index_buffer = si_set_index_buffer; 3472 3473 sctx->b.b.texture_barrier = si_texture_barrier; 3474 sctx->b.b.memory_barrier = si_memory_barrier; 3475 sctx->b.b.set_min_samples = si_set_min_samples; 3476 sctx->b.b.set_tess_state = si_set_tess_state; 3477 3478 sctx->b.b.set_active_query_state = si_set_active_query_state; 3479 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 3480 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 3481 3482 sctx->b.b.draw_vbo = si_draw_vbo; 3483 3484 si_init_config(sctx); 3485} 3486 3487static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen) 3488{ 3489 return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; 3490} 3491 3492static void si_query_opaque_metadata(struct r600_common_screen *rscreen, 3493 struct r600_texture *rtex, 3494 struct radeon_bo_metadata *md) 3495{ 3496 struct si_screen *sscreen = (struct si_screen*)rscreen; 3497 struct pipe_resource *res = &rtex->resource.b.b; 3498 static const unsigned char swizzle[] = { 3499 PIPE_SWIZZLE_X, 3500 PIPE_SWIZZLE_Y, 3501 PIPE_SWIZZLE_Z, 3502 PIPE_SWIZZLE_W 3503 }; 3504 uint32_t desc[8], i; 3505 bool is_array = util_resource_is_array_texture(res); 3506 3507 /* DRM 2.x.x doesn't support this. */ 3508 if (rscreen->info.drm_major != 3) 3509 return; 3510 3511 assert(rtex->dcc_separate_buffer == NULL); 3512 assert(rtex->fmask.size == 0); 3513 3514 /* Metadata image format format version 1: 3515 * [0] = 1 (metadata format identifier) 3516 * [1] = (VENDOR_ID << 16) | PCI_ID 3517 * [2:9] = image descriptor for the whole resource 3518 * [2] is always 0, because the base address is cleared 3519 * [9] is the DCC offset bits [39:8] from the beginning of 3520 * the buffer 3521 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 3522 */ 3523 3524 md->metadata[0] = 1; /* metadata image format version 1 */ 3525 3526 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ 3527 md->metadata[1] = si_get_bo_metadata_word1(rscreen); 3528 3529 si_make_texture_descriptor(sscreen, rtex, true, 3530 res->target, res->format, 3531 swizzle, 0, res->last_level, 0, 3532 is_array ? res->array_size - 1 : 0, 3533 res->width0, res->height0, res->depth0, 3534 desc, NULL); 3535 3536 si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0, 3537 rtex->surface.blk_w, false, desc); 3538 3539 /* Clear the base address and set the relative DCC offset. */ 3540 desc[0] = 0; 3541 desc[1] &= C_008F14_BASE_ADDRESS_HI; 3542 desc[7] = rtex->dcc_offset >> 8; 3543 3544 /* Dwords [2:9] contain the image descriptor. */ 3545 memcpy(&md->metadata[2], desc, sizeof(desc)); 3546 3547 /* Dwords [10:..] contain the mipmap level offsets. */ 3548 for (i = 0; i <= res->last_level; i++) 3549 md->metadata[10+i] = rtex->surface.level[i].offset >> 8; 3550 3551 md->size_metadata = (11 + res->last_level) * 4; 3552} 3553 3554static void si_apply_opaque_metadata(struct r600_common_screen *rscreen, 3555 struct r600_texture *rtex, 3556 struct radeon_bo_metadata *md) 3557{ 3558 uint32_t *desc = &md->metadata[2]; 3559 3560 if (rscreen->chip_class < VI) 3561 return; 3562 3563 /* Return if DCC is enabled. The texture should be set up with it 3564 * already. 3565 */ 3566 if (md->size_metadata >= 11 * 4 && 3567 md->metadata[0] != 0 && 3568 md->metadata[1] == si_get_bo_metadata_word1(rscreen) && 3569 G_008F28_COMPRESSION_EN(desc[6])) { 3570 assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8)); 3571 return; 3572 } 3573 3574 /* Disable DCC. These are always set by texture_from_handle and must 3575 * be cleared here. 3576 */ 3577 rtex->dcc_offset = 0; 3578} 3579 3580void si_init_screen_state_functions(struct si_screen *sscreen) 3581{ 3582 sscreen->b.b.is_format_supported = si_is_format_supported; 3583 sscreen->b.query_opaque_metadata = si_query_opaque_metadata; 3584 sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata; 3585} 3586 3587static void 3588si_write_harvested_raster_configs(struct si_context *sctx, 3589 struct si_pm4_state *pm4, 3590 unsigned raster_config, 3591 unsigned raster_config_1) 3592{ 3593 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 3594 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 3595 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3596 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3597 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 3598 unsigned rb_per_se = num_rb / num_se; 3599 unsigned se_mask[4]; 3600 unsigned se; 3601 3602 se_mask[0] = ((1 << rb_per_se) - 1); 3603 se_mask[1] = (se_mask[0] << rb_per_se); 3604 se_mask[2] = (se_mask[1] << rb_per_se); 3605 se_mask[3] = (se_mask[2] << rb_per_se); 3606 3607 se_mask[0] &= rb_mask; 3608 se_mask[1] &= rb_mask; 3609 se_mask[2] &= rb_mask; 3610 se_mask[3] &= rb_mask; 3611 3612 assert(num_se == 1 || num_se == 2 || num_se == 4); 3613 assert(sh_per_se == 1 || sh_per_se == 2); 3614 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 3615 3616 /* XXX: I can't figure out what the *_XSEL and *_YSEL 3617 * fields are for, so I'm leaving them as their default 3618 * values. */ 3619 3620 for (se = 0; se < num_se; se++) { 3621 unsigned raster_config_se = raster_config; 3622 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3623 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3624 int idx = (se / 2) * 2; 3625 3626 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3627 raster_config_se &= C_028350_SE_MAP; 3628 3629 if (!se_mask[idx]) { 3630 raster_config_se |= 3631 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 3632 } else { 3633 raster_config_se |= 3634 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 3635 } 3636 } 3637 3638 pkr0_mask &= rb_mask; 3639 pkr1_mask &= rb_mask; 3640 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3641 raster_config_se &= C_028350_PKR_MAP; 3642 3643 if (!pkr0_mask) { 3644 raster_config_se |= 3645 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 3646 } else { 3647 raster_config_se |= 3648 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 3649 } 3650 } 3651 3652 if (rb_per_se >= 2) { 3653 unsigned rb0_mask = 1 << (se * rb_per_se); 3654 unsigned rb1_mask = rb0_mask << 1; 3655 3656 rb0_mask &= rb_mask; 3657 rb1_mask &= rb_mask; 3658 if (!rb0_mask || !rb1_mask) { 3659 raster_config_se &= C_028350_RB_MAP_PKR0; 3660 3661 if (!rb0_mask) { 3662 raster_config_se |= 3663 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 3664 } else { 3665 raster_config_se |= 3666 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 3667 } 3668 } 3669 3670 if (rb_per_se > 2) { 3671 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3672 rb1_mask = rb0_mask << 1; 3673 rb0_mask &= rb_mask; 3674 rb1_mask &= rb_mask; 3675 if (!rb0_mask || !rb1_mask) { 3676 raster_config_se &= C_028350_RB_MAP_PKR1; 3677 3678 if (!rb0_mask) { 3679 raster_config_se |= 3680 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 3681 } else { 3682 raster_config_se |= 3683 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 3684 } 3685 } 3686 } 3687 } 3688 3689 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3690 if (sctx->b.chip_class < CIK) 3691 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3692 SE_INDEX(se) | SH_BROADCAST_WRITES | 3693 INSTANCE_BROADCAST_WRITES); 3694 else 3695 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3696 S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | 3697 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3698 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 3699 } 3700 3701 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3702 if (sctx->b.chip_class < CIK) 3703 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3704 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 3705 INSTANCE_BROADCAST_WRITES); 3706 else { 3707 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3708 S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 3709 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3710 3711 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3712 (!se_mask[2] && !se_mask[3]))) { 3713 raster_config_1 &= C_028354_SE_PAIR_MAP; 3714 3715 if (!se_mask[0] && !se_mask[1]) { 3716 raster_config_1 |= 3717 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 3718 } else { 3719 raster_config_1 |= 3720 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 3721 } 3722 } 3723 3724 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 3725 } 3726} 3727 3728static void si_init_config(struct si_context *sctx) 3729{ 3730 struct si_screen *sscreen = sctx->screen; 3731 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3732 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3733 unsigned raster_config, raster_config_1; 3734 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 3735 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 3736 int i; 3737 3738 if (!pm4) 3739 return; 3740 3741 si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 3742 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); 3743 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 3744 si_pm4_cmd_end(pm4, false); 3745 3746 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 3747 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 3748 3749 /* FIXME calculate these values somehow ??? */ 3750 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 3751 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3752 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3753 3754 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3755 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3756 3757 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3758 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3759 if (sctx->b.chip_class < CIK) 3760 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3761 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3762 3763 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3764 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3765 3766 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3767 3768 for (i = 0; i < 16; i++) { 3769 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0); 3770 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0)); 3771 } 3772 3773 switch (sctx->screen->b.family) { 3774 case CHIP_TAHITI: 3775 case CHIP_PITCAIRN: 3776 raster_config = 0x2a00126a; 3777 raster_config_1 = 0x00000000; 3778 break; 3779 case CHIP_VERDE: 3780 raster_config = 0x0000124a; 3781 raster_config_1 = 0x00000000; 3782 break; 3783 case CHIP_OLAND: 3784 raster_config = 0x00000082; 3785 raster_config_1 = 0x00000000; 3786 break; 3787 case CHIP_HAINAN: 3788 raster_config = 0x00000000; 3789 raster_config_1 = 0x00000000; 3790 break; 3791 case CHIP_BONAIRE: 3792 raster_config = 0x16000012; 3793 raster_config_1 = 0x00000000; 3794 break; 3795 case CHIP_HAWAII: 3796 raster_config = 0x3a00161a; 3797 raster_config_1 = 0x0000002e; 3798 break; 3799 case CHIP_FIJI: 3800 if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { 3801 /* old kernels with old tiling config */ 3802 raster_config = 0x16000012; 3803 raster_config_1 = 0x0000002a; 3804 } else { 3805 raster_config = 0x3a00161a; 3806 raster_config_1 = 0x0000002e; 3807 } 3808 break; 3809 case CHIP_POLARIS10: 3810 raster_config = 0x16000012; 3811 raster_config_1 = 0x0000002a; 3812 break; 3813 case CHIP_POLARIS11: 3814 raster_config = 0x16000012; 3815 raster_config_1 = 0x00000000; 3816 break; 3817 case CHIP_TONGA: 3818 raster_config = 0x16000012; 3819 raster_config_1 = 0x0000002a; 3820 break; 3821 case CHIP_ICELAND: 3822 if (num_rb == 1) 3823 raster_config = 0x00000000; 3824 else 3825 raster_config = 0x00000002; 3826 raster_config_1 = 0x00000000; 3827 break; 3828 case CHIP_CARRIZO: 3829 raster_config = 0x00000002; 3830 raster_config_1 = 0x00000000; 3831 break; 3832 case CHIP_KAVERI: 3833 /* KV should be 0x00000002, but that causes problems with radeon */ 3834 raster_config = 0x00000000; /* 0x00000002 */ 3835 raster_config_1 = 0x00000000; 3836 break; 3837 case CHIP_KABINI: 3838 case CHIP_MULLINS: 3839 case CHIP_STONEY: 3840 raster_config = 0x00000000; 3841 raster_config_1 = 0x00000000; 3842 break; 3843 default: 3844 fprintf(stderr, 3845 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 3846 raster_config = 0x00000000; 3847 raster_config_1 = 0x00000000; 3848 break; 3849 } 3850 3851 /* Always use the default config when all backends are enabled 3852 * (or when we failed to determine the enabled backends). 3853 */ 3854 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 3855 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 3856 raster_config); 3857 if (sctx->b.chip_class >= CIK) 3858 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 3859 raster_config_1); 3860 } else { 3861 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 3862 } 3863 3864 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 3865 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 3866 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 3867 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 3868 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 3869 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 3870 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 3871 3872 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3873 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 3874 S_028230_ER_TRI(0xA) | 3875 S_028230_ER_POINT(0xA) | 3876 S_028230_ER_RECT(0xA) | 3877 /* Required by DX10_DIAMOND_TEST_ENA: */ 3878 S_028230_ER_LINE_LR(0x1A) | 3879 S_028230_ER_LINE_RL(0x26) | 3880 S_028230_ER_LINE_TB(0xA) | 3881 S_028230_ER_LINE_BT(0xA)); 3882 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 3883 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 3884 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 3885 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 3886 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 3887 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 3888 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 3889 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 3890 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); 3891 3892 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 3893 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 3894 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 3895 3896 if (sctx->b.chip_class >= CIK) { 3897 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); 3898 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); 3899 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); 3900 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); 3901 3902 if (sscreen->b.info.num_good_compute_units / 3903 (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) { 3904 /* Too few available compute units per SH. Disallowing 3905 * VS to run on CU0 could hurt us more than late VS 3906 * allocation would help. 3907 * 3908 * LATE_ALLOC_VS = 2 is the highest safe number. 3909 */ 3910 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 3911 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); 3912 } else { 3913 /* Set LATE_ALLOC_VS == 31. It should be less than 3914 * the number of scratch waves. Limitations: 3915 * - VS can't execute on CU0. 3916 * - If HS writes outputs to LDS, LS can't execute on CU0. 3917 */ 3918 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); 3919 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); 3920 } 3921 3922 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 3923 } 3924 3925 if (sctx->b.chip_class >= VI) { 3926 unsigned vgt_tess_distribution; 3927 3928 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, 3929 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 3930 S_028424_OVERWRITE_COMBINER_WATERMARK(4)); 3931 if (sctx->b.family < CHIP_POLARIS10) 3932 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); 3933 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); 3934 3935 vgt_tess_distribution = 3936 S_028B50_ACCUM_ISOLINE(32) | 3937 S_028B50_ACCUM_TRI(11) | 3938 S_028B50_ACCUM_QUAD(11) | 3939 S_028B50_DONUT_SPLIT(16); 3940 3941 /* Testing with Unigine Heaven extreme tesselation yielded best results 3942 * with TRAP_SPLIT = 3. 3943 */ 3944 if (sctx->b.family == CHIP_FIJI || 3945 sctx->b.family >= CHIP_POLARIS10) 3946 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 3947 3948 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 3949 } else { 3950 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 3951 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 3952 } 3953 3954 if (sctx->b.family == CHIP_STONEY) 3955 si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); 3956 3957 if (sctx->b.family >= CHIP_POLARIS10) 3958 si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 3959 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 3960 S_028830_LINE_FILTER_DISABLE(1)); /* line bug */ 3961 3962 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 3963 if (sctx->b.chip_class >= CIK) 3964 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); 3965 si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 3966 RADEON_PRIO_BORDER_COLORS); 3967 3968 si_pm4_upload_indirect_buffer(sctx, pm4); 3969 sctx->init_config = pm4; 3970} 3971