si_state.c revision 1e864d73799cfbcb29c4f22722b908bc39643347
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "si_shader.h" 29#include "sid.h" 30#include "radeon/r600_cs.h" 31 32#include "util/u_dual_blend.h" 33#include "util/u_format.h" 34#include "util/u_format_s3tc.h" 35#include "util/u_memory.h" 36#include "util/u_pstipple.h" 37 38/* Initialize an external atom (owned by ../radeon). */ 39static void 40si_init_external_atom(struct si_context *sctx, struct r600_atom *atom, 41 struct r600_atom **list_elem) 42{ 43 atom->id = list_elem - sctx->atoms.array + 1; 44 *list_elem = atom; 45} 46 47/* Initialize an atom owned by radeonsi. */ 48void si_init_atom(struct si_context *sctx, struct r600_atom *atom, 49 struct r600_atom **list_elem, 50 void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) 51{ 52 atom->emit = (void*)emit_func; 53 atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */ 54 *list_elem = atom; 55} 56 57unsigned si_array_mode(unsigned mode) 58{ 59 switch (mode) { 60 case RADEON_SURF_MODE_LINEAR_ALIGNED: 61 return V_009910_ARRAY_LINEAR_ALIGNED; 62 case RADEON_SURF_MODE_1D: 63 return V_009910_ARRAY_1D_TILED_THIN1; 64 case RADEON_SURF_MODE_2D: 65 return V_009910_ARRAY_2D_TILED_THIN1; 66 default: 67 case RADEON_SURF_MODE_LINEAR: 68 return V_009910_ARRAY_LINEAR_GENERAL; 69 } 70} 71 72uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) 73{ 74 if (sscreen->b.chip_class >= CIK && 75 sscreen->b.info.cik_macrotile_mode_array_valid) { 76 unsigned index, tileb; 77 78 tileb = 8 * 8 * tex->surface.bpe; 79 tileb = MIN2(tex->surface.tile_split, tileb); 80 81 for (index = 0; tileb > 64; index++) { 82 tileb >>= 1; 83 } 84 assert(index < 16); 85 86 return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3; 87 } 88 89 if (sscreen->b.chip_class == SI && 90 sscreen->b.info.si_tile_mode_array_valid) { 91 /* Don't use stencil_tiling_index, because num_banks is always 92 * read from the depth mode. */ 93 unsigned tile_mode_index = tex->surface.tiling_index[0]; 94 assert(tile_mode_index < 32); 95 96 return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]); 97 } 98 99 /* The old way. */ 100 switch (sscreen->b.tiling_info.num_banks) { 101 case 2: 102 return V_02803C_ADDR_SURF_2_BANK; 103 case 4: 104 return V_02803C_ADDR_SURF_4_BANK; 105 case 8: 106 default: 107 return V_02803C_ADDR_SURF_8_BANK; 108 case 16: 109 return V_02803C_ADDR_SURF_16_BANK; 110 } 111} 112 113unsigned cik_tile_split(unsigned tile_split) 114{ 115 switch (tile_split) { 116 case 64: 117 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B; 118 break; 119 case 128: 120 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B; 121 break; 122 case 256: 123 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B; 124 break; 125 case 512: 126 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B; 127 break; 128 default: 129 case 1024: 130 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB; 131 break; 132 case 2048: 133 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB; 134 break; 135 case 4096: 136 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB; 137 break; 138 } 139 return tile_split; 140} 141 142unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect) 143{ 144 switch (macro_tile_aspect) { 145 default: 146 case 1: 147 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1; 148 break; 149 case 2: 150 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2; 151 break; 152 case 4: 153 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4; 154 break; 155 case 8: 156 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8; 157 break; 158 } 159 return macro_tile_aspect; 160} 161 162unsigned cik_bank_wh(unsigned bankwh) 163{ 164 switch (bankwh) { 165 default: 166 case 1: 167 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1; 168 break; 169 case 2: 170 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2; 171 break; 172 case 4: 173 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4; 174 break; 175 case 8: 176 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8; 177 break; 178 } 179 return bankwh; 180} 181 182unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode) 183{ 184 if (sscreen->b.info.si_tile_mode_array_valid) { 185 uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode]; 186 187 return G_009910_PIPE_CONFIG(gb_tile_mode); 188 } 189 190 /* This is probably broken for a lot of chips, but it's only used 191 * if the kernel cannot return the tile mode array for CIK. */ 192 switch (sscreen->b.info.num_tile_pipes) { 193 case 16: 194 return V_02803C_X_ADDR_SURF_P16_32X32_16X16; 195 case 8: 196 return V_02803C_X_ADDR_SURF_P8_32X32_16X16; 197 case 4: 198 default: 199 if (sscreen->b.info.num_render_backends == 4) 200 return V_02803C_X_ADDR_SURF_P4_16X16; 201 else 202 return V_02803C_X_ADDR_SURF_P4_8X16; 203 case 2: 204 return V_02803C_ADDR_SURF_P2; 205 } 206} 207 208static unsigned si_map_swizzle(unsigned swizzle) 209{ 210 switch (swizzle) { 211 case UTIL_FORMAT_SWIZZLE_Y: 212 return V_008F0C_SQ_SEL_Y; 213 case UTIL_FORMAT_SWIZZLE_Z: 214 return V_008F0C_SQ_SEL_Z; 215 case UTIL_FORMAT_SWIZZLE_W: 216 return V_008F0C_SQ_SEL_W; 217 case UTIL_FORMAT_SWIZZLE_0: 218 return V_008F0C_SQ_SEL_0; 219 case UTIL_FORMAT_SWIZZLE_1: 220 return V_008F0C_SQ_SEL_1; 221 default: /* UTIL_FORMAT_SWIZZLE_X */ 222 return V_008F0C_SQ_SEL_X; 223 } 224} 225 226static uint32_t S_FIXED(float value, uint32_t frac_bits) 227{ 228 return value * (1 << frac_bits); 229} 230 231/* 12.4 fixed-point */ 232static unsigned si_pack_float_12p4(float x) 233{ 234 return x <= 0 ? 0 : 235 x >= 4096 ? 0xffff : x * 16; 236} 237 238/* 239 * Inferred framebuffer and blender state. 240 * 241 * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state 242 * is that: 243 * - The blend state mask is 0xf most of the time. 244 * - The COLOR1 format isn't INVALID because of possible dual-source blending, 245 * so COLOR1 is enabled pretty much all the time. 246 * So CB_TARGET_MASK is the only register that can disable COLOR1. 247 * 248 * Another reason is to avoid a hang with dual source blending. 249 */ 250static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom) 251{ 252 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 253 struct si_state_blend *blend = sctx->queued.named.blend; 254 uint32_t cb_target_mask = 0, i; 255 256 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) 257 if (sctx->framebuffer.state.cbufs[i]) 258 cb_target_mask |= 0xf << (4*i); 259 260 if (blend) 261 cb_target_mask &= blend->cb_target_mask; 262 263 /* Avoid a hang that happens when dual source blending is enabled 264 * but there is not enough color outputs. This is undefined behavior, 265 * so disable color writes completely. 266 * 267 * Reproducible with Unigine Heaven 4.0 and drirc missing. 268 */ 269 if (blend && blend->dual_src_blend && 270 sctx->ps_shader.cso && 271 (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 272 cb_target_mask = 0; 273 274 radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); 275 276 /* STONEY-specific register settings. */ 277 if (sctx->b.family == CHIP_STONEY) { 278 unsigned spi_shader_col_format = 279 sctx->ps_shader.cso ? 280 sctx->ps_shader.current->key.ps.spi_shader_col_format : 0; 281 unsigned sx_ps_downconvert = 0; 282 unsigned sx_blend_opt_epsilon = 0; 283 unsigned sx_blend_opt_control = 0; 284 285 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 286 struct r600_surface *surf = 287 (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; 288 unsigned format, swap, spi_format, colormask; 289 bool has_alpha, has_rgb; 290 291 if (!surf) 292 continue; 293 294 format = G_028C70_FORMAT(surf->cb_color_info); 295 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 296 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 297 colormask = (cb_target_mask >> (i * 4)) & 0xf; 298 299 /* Set if RGB and A are present. */ 300 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 301 302 if (format == V_028C70_COLOR_8 || 303 format == V_028C70_COLOR_16 || 304 format == V_028C70_COLOR_32) 305 has_rgb = !has_alpha; 306 else 307 has_rgb = true; 308 309 /* Check the colormask and export format. */ 310 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 311 has_rgb = false; 312 if (!(colormask & PIPE_MASK_A)) 313 has_alpha = false; 314 315 if (spi_format == V_028714_SPI_SHADER_ZERO) { 316 has_rgb = false; 317 has_alpha = false; 318 } 319 320 /* Disable value checking for disabled channels. */ 321 if (!has_rgb) 322 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 323 if (!has_alpha) 324 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 325 326 /* Enable down-conversion for 32bpp and smaller formats. */ 327 switch (format) { 328 case V_028C70_COLOR_8: 329 case V_028C70_COLOR_8_8: 330 case V_028C70_COLOR_8_8_8_8: 331 /* For 1 and 2-channel formats, use the superset thereof. */ 332 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 333 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 334 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 335 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 336 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 337 } 338 break; 339 340 case V_028C70_COLOR_5_6_5: 341 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 342 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 343 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 344 } 345 break; 346 347 case V_028C70_COLOR_1_5_5_5: 348 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 349 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 350 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 351 } 352 break; 353 354 case V_028C70_COLOR_4_4_4_4: 355 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 356 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 357 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 358 } 359 break; 360 361 case V_028C70_COLOR_32: 362 if (swap == V_0280A0_SWAP_STD && 363 spi_format == V_028714_SPI_SHADER_32_R) 364 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 365 else if (swap == V_0280A0_SWAP_ALT_REV && 366 spi_format == V_028714_SPI_SHADER_32_AR) 367 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 368 break; 369 370 case V_028C70_COLOR_16: 371 case V_028C70_COLOR_16_16: 372 /* For 1-channel formats, use the superset thereof. */ 373 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 374 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 375 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 376 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 377 if (swap == V_0280A0_SWAP_STD || 378 swap == V_0280A0_SWAP_STD_REV) 379 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 380 else 381 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 382 } 383 break; 384 385 case V_028C70_COLOR_10_11_11: 386 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 387 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 388 sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 389 } 390 break; 391 392 case V_028C70_COLOR_2_10_10_10: 393 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 394 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 395 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 396 } 397 break; 398 } 399 } 400 401 if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { 402 sx_ps_downconvert = 0; 403 sx_blend_opt_epsilon = 0; 404 sx_blend_opt_control = 0; 405 } 406 407 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); 408 radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ 409 radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ 410 radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ 411 } 412} 413 414/* 415 * Blender functions 416 */ 417 418static uint32_t si_translate_blend_function(int blend_func) 419{ 420 switch (blend_func) { 421 case PIPE_BLEND_ADD: 422 return V_028780_COMB_DST_PLUS_SRC; 423 case PIPE_BLEND_SUBTRACT: 424 return V_028780_COMB_SRC_MINUS_DST; 425 case PIPE_BLEND_REVERSE_SUBTRACT: 426 return V_028780_COMB_DST_MINUS_SRC; 427 case PIPE_BLEND_MIN: 428 return V_028780_COMB_MIN_DST_SRC; 429 case PIPE_BLEND_MAX: 430 return V_028780_COMB_MAX_DST_SRC; 431 default: 432 R600_ERR("Unknown blend function %d\n", blend_func); 433 assert(0); 434 break; 435 } 436 return 0; 437} 438 439static uint32_t si_translate_blend_factor(int blend_fact) 440{ 441 switch (blend_fact) { 442 case PIPE_BLENDFACTOR_ONE: 443 return V_028780_BLEND_ONE; 444 case PIPE_BLENDFACTOR_SRC_COLOR: 445 return V_028780_BLEND_SRC_COLOR; 446 case PIPE_BLENDFACTOR_SRC_ALPHA: 447 return V_028780_BLEND_SRC_ALPHA; 448 case PIPE_BLENDFACTOR_DST_ALPHA: 449 return V_028780_BLEND_DST_ALPHA; 450 case PIPE_BLENDFACTOR_DST_COLOR: 451 return V_028780_BLEND_DST_COLOR; 452 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 453 return V_028780_BLEND_SRC_ALPHA_SATURATE; 454 case PIPE_BLENDFACTOR_CONST_COLOR: 455 return V_028780_BLEND_CONSTANT_COLOR; 456 case PIPE_BLENDFACTOR_CONST_ALPHA: 457 return V_028780_BLEND_CONSTANT_ALPHA; 458 case PIPE_BLENDFACTOR_ZERO: 459 return V_028780_BLEND_ZERO; 460 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 461 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 462 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 463 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 464 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 465 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 466 case PIPE_BLENDFACTOR_INV_DST_COLOR: 467 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 468 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 469 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 470 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 471 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 472 case PIPE_BLENDFACTOR_SRC1_COLOR: 473 return V_028780_BLEND_SRC1_COLOR; 474 case PIPE_BLENDFACTOR_SRC1_ALPHA: 475 return V_028780_BLEND_SRC1_ALPHA; 476 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 477 return V_028780_BLEND_INV_SRC1_COLOR; 478 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 479 return V_028780_BLEND_INV_SRC1_ALPHA; 480 default: 481 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 482 assert(0); 483 break; 484 } 485 return 0; 486} 487 488static uint32_t si_translate_blend_opt_function(int blend_func) 489{ 490 switch (blend_func) { 491 case PIPE_BLEND_ADD: 492 return V_028760_OPT_COMB_ADD; 493 case PIPE_BLEND_SUBTRACT: 494 return V_028760_OPT_COMB_SUBTRACT; 495 case PIPE_BLEND_REVERSE_SUBTRACT: 496 return V_028760_OPT_COMB_REVSUBTRACT; 497 case PIPE_BLEND_MIN: 498 return V_028760_OPT_COMB_MIN; 499 case PIPE_BLEND_MAX: 500 return V_028760_OPT_COMB_MAX; 501 default: 502 return V_028760_OPT_COMB_BLEND_DISABLED; 503 } 504} 505 506static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 507{ 508 switch (blend_fact) { 509 case PIPE_BLENDFACTOR_ZERO: 510 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 511 case PIPE_BLENDFACTOR_ONE: 512 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 513 case PIPE_BLENDFACTOR_SRC_COLOR: 514 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 515 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 516 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 517 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 518 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 519 case PIPE_BLENDFACTOR_SRC_ALPHA: 520 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 521 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 522 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 523 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 524 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 525 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 526 default: 527 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 528 } 529} 530 531/** 532 * Get rid of DST in the blend factors by commuting the operands: 533 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 534 */ 535static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 536 unsigned *dst_factor, unsigned expected_dst, 537 unsigned replacement_src) 538{ 539 if (*src_factor == expected_dst && 540 *dst_factor == PIPE_BLENDFACTOR_ZERO) { 541 *src_factor = PIPE_BLENDFACTOR_ZERO; 542 *dst_factor = replacement_src; 543 544 /* Commuting the operands requires reversing subtractions. */ 545 if (*func == PIPE_BLEND_SUBTRACT) 546 *func = PIPE_BLEND_REVERSE_SUBTRACT; 547 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 548 *func = PIPE_BLEND_SUBTRACT; 549 } 550} 551 552static bool si_blend_factor_uses_dst(unsigned factor) 553{ 554 return factor == PIPE_BLENDFACTOR_DST_COLOR || 555 factor == PIPE_BLENDFACTOR_DST_ALPHA || 556 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 557 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 558 factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 559} 560 561static void *si_create_blend_state_mode(struct pipe_context *ctx, 562 const struct pipe_blend_state *state, 563 unsigned mode) 564{ 565 struct si_context *sctx = (struct si_context*)ctx; 566 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 567 struct si_pm4_state *pm4 = &blend->pm4; 568 uint32_t sx_mrt_blend_opt[8] = {0}; 569 uint32_t color_control = 0; 570 571 if (!blend) 572 return NULL; 573 574 blend->alpha_to_coverage = state->alpha_to_coverage; 575 blend->alpha_to_one = state->alpha_to_one; 576 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 577 578 if (state->logicop_enable) { 579 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 580 } else { 581 color_control |= S_028808_ROP3(0xcc); 582 } 583 584 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 585 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 586 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 587 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 588 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 589 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 590 591 if (state->alpha_to_coverage) 592 blend->need_src_alpha_4bit |= 0xf; 593 594 blend->cb_target_mask = 0; 595 for (int i = 0; i < 8; i++) { 596 /* state->rt entries > 0 only written if independent blending */ 597 const int j = state->independent_blend_enable ? i : 0; 598 599 unsigned eqRGB = state->rt[j].rgb_func; 600 unsigned srcRGB = state->rt[j].rgb_src_factor; 601 unsigned dstRGB = state->rt[j].rgb_dst_factor; 602 unsigned eqA = state->rt[j].alpha_func; 603 unsigned srcA = state->rt[j].alpha_src_factor; 604 unsigned dstA = state->rt[j].alpha_dst_factor; 605 606 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 607 unsigned blend_cntl = 0; 608 609 sx_mrt_blend_opt[i] = 610 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 611 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 612 613 if (!state->rt[j].colormask) 614 continue; 615 616 /* cb_render_state will disable unused ones */ 617 blend->cb_target_mask |= state->rt[j].colormask << (4 * i); 618 619 if (!state->rt[j].blend_enable) { 620 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 621 continue; 622 } 623 624 /* Blending optimizations for Stoney. 625 * These transformations don't change the behavior. 626 * 627 * First, get rid of DST in the blend factors: 628 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 629 */ 630 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 631 PIPE_BLENDFACTOR_DST_COLOR, 632 PIPE_BLENDFACTOR_SRC_COLOR); 633 si_blend_remove_dst(&eqA, &srcA, &dstA, 634 PIPE_BLENDFACTOR_DST_COLOR, 635 PIPE_BLENDFACTOR_SRC_COLOR); 636 si_blend_remove_dst(&eqA, &srcA, &dstA, 637 PIPE_BLENDFACTOR_DST_ALPHA, 638 PIPE_BLENDFACTOR_SRC_ALPHA); 639 640 /* Look up the ideal settings from tables. */ 641 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 642 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 643 srcA_opt = si_translate_blend_opt_factor(srcA, true); 644 dstA_opt = si_translate_blend_opt_factor(dstA, true); 645 646 /* Handle interdependencies. */ 647 if (si_blend_factor_uses_dst(srcRGB)) 648 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 649 if (si_blend_factor_uses_dst(srcA)) 650 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 651 652 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 653 (dstRGB == PIPE_BLENDFACTOR_ZERO || 654 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 655 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 656 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 657 658 /* Set the final value. */ 659 sx_mrt_blend_opt[i] = 660 S_028760_COLOR_SRC_OPT(srcRGB_opt) | 661 S_028760_COLOR_DST_OPT(dstRGB_opt) | 662 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 663 S_028760_ALPHA_SRC_OPT(srcA_opt) | 664 S_028760_ALPHA_DST_OPT(dstA_opt) | 665 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 666 667 /* Set blend state. */ 668 blend_cntl |= S_028780_ENABLE(1); 669 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 670 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 671 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 672 673 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 674 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 675 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 676 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 677 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 678 } 679 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 680 681 blend->blend_enable_4bit |= 0xf << (i * 4); 682 683 /* This is only important for formats without alpha. */ 684 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 685 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 686 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 687 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 688 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 689 dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 690 blend->need_src_alpha_4bit |= 0xf << (i * 4); 691 } 692 693 if (blend->cb_target_mask) { 694 color_control |= S_028808_MODE(mode); 695 } else { 696 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 697 } 698 699 if (sctx->b.family == CHIP_STONEY) { 700 for (int i = 0; i < 8; i++) 701 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 702 sx_mrt_blend_opt[i]); 703 704 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 705 if (blend->dual_src_blend || state->logicop_enable || 706 mode == V_028808_CB_RESOLVE) 707 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 708 } 709 710 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 711 return blend; 712} 713 714static void *si_create_blend_state(struct pipe_context *ctx, 715 const struct pipe_blend_state *state) 716{ 717 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 718} 719 720static void si_bind_blend_state(struct pipe_context *ctx, void *state) 721{ 722 struct si_context *sctx = (struct si_context *)ctx; 723 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 724 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 725} 726 727static void si_delete_blend_state(struct pipe_context *ctx, void *state) 728{ 729 struct si_context *sctx = (struct si_context *)ctx; 730 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 731} 732 733static void si_set_blend_color(struct pipe_context *ctx, 734 const struct pipe_blend_color *state) 735{ 736 struct si_context *sctx = (struct si_context *)ctx; 737 738 if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0) 739 return; 740 741 sctx->blend_color.state = *state; 742 si_mark_atom_dirty(sctx, &sctx->blend_color.atom); 743} 744 745static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom) 746{ 747 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 748 749 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 750 radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 751} 752 753/* 754 * Clipping, scissors and viewport 755 */ 756 757static void si_set_clip_state(struct pipe_context *ctx, 758 const struct pipe_clip_state *state) 759{ 760 struct si_context *sctx = (struct si_context *)ctx; 761 struct pipe_constant_buffer cb; 762 763 if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 764 return; 765 766 sctx->clip_state.state = *state; 767 si_mark_atom_dirty(sctx, &sctx->clip_state.atom); 768 769 cb.buffer = NULL; 770 cb.user_buffer = state->ucp; 771 cb.buffer_offset = 0; 772 cb.buffer_size = 4*4*8; 773 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb); 774 pipe_resource_reference(&cb.buffer, NULL); 775} 776 777static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom) 778{ 779 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 780 781 radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 782 radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 783} 784 785#define SIX_BITS 0x3F 786 787static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 788{ 789 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 790 struct tgsi_shader_info *info = si_get_vs_info(sctx); 791 unsigned window_space = 792 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 793 unsigned clipdist_mask = 794 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 795 796 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 797 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 798 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 799 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 800 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | 801 S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) | 802 S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) | 803 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || 804 info->writes_edgeflag || 805 info->writes_layer || 806 info->writes_viewport_index) | 807 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | 808 (sctx->queued.named.rasterizer->clip_plane_enable & 809 clipdist_mask)); 810 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 811 sctx->queued.named.rasterizer->pa_cl_clip_cntl | 812 (clipdist_mask ? 0 : 813 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) | 814 S_028810_CLIP_DISABLE(window_space)); 815 816 /* reuse needs to be set off if we write oViewport */ 817 radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 818 S_028AB4_REUSE_OFF(info->writes_viewport_index)); 819} 820 821static void si_set_scissor_states(struct pipe_context *ctx, 822 unsigned start_slot, 823 unsigned num_scissors, 824 const struct pipe_scissor_state *state) 825{ 826 struct si_context *sctx = (struct si_context *)ctx; 827 int i; 828 829 for (i = 0; i < num_scissors; i++) 830 sctx->scissors.states[start_slot + i] = state[i]; 831 832 sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot; 833 si_mark_atom_dirty(sctx, &sctx->scissors.atom); 834} 835 836static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom) 837{ 838 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 839 struct pipe_scissor_state *states = sctx->scissors.states; 840 unsigned mask = sctx->scissors.dirty_mask; 841 842 /* The simple case: Only 1 viewport is active. */ 843 if (mask & 1 && 844 !si_get_vs_info(sctx)->writes_viewport_index) { 845 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); 846 radeon_emit(cs, S_028250_TL_X(states[0].minx) | 847 S_028250_TL_Y(states[0].miny) | 848 S_028250_WINDOW_OFFSET_DISABLE(1)); 849 radeon_emit(cs, S_028254_BR_X(states[0].maxx) | 850 S_028254_BR_Y(states[0].maxy)); 851 sctx->scissors.dirty_mask &= ~1; /* clear one bit */ 852 return; 853 } 854 855 while (mask) { 856 int start, count, i; 857 858 u_bit_scan_consecutive_range(&mask, &start, &count); 859 860 radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + 861 start * 4 * 2, count * 2); 862 for (i = start; i < start+count; i++) { 863 radeon_emit(cs, S_028250_TL_X(states[i].minx) | 864 S_028250_TL_Y(states[i].miny) | 865 S_028250_WINDOW_OFFSET_DISABLE(1)); 866 radeon_emit(cs, S_028254_BR_X(states[i].maxx) | 867 S_028254_BR_Y(states[i].maxy)); 868 } 869 } 870 sctx->scissors.dirty_mask = 0; 871} 872 873static void si_set_viewport_states(struct pipe_context *ctx, 874 unsigned start_slot, 875 unsigned num_viewports, 876 const struct pipe_viewport_state *state) 877{ 878 struct si_context *sctx = (struct si_context *)ctx; 879 int i; 880 881 for (i = 0; i < num_viewports; i++) 882 sctx->viewports.states[start_slot + i] = state[i]; 883 884 sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot; 885 si_mark_atom_dirty(sctx, &sctx->viewports.atom); 886} 887 888static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom) 889{ 890 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 891 struct pipe_viewport_state *states = sctx->viewports.states; 892 unsigned mask = sctx->viewports.dirty_mask; 893 894 /* The simple case: Only 1 viewport is active. */ 895 if (mask & 1 && 896 !si_get_vs_info(sctx)->writes_viewport_index) { 897 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6); 898 radeon_emit(cs, fui(states[0].scale[0])); 899 radeon_emit(cs, fui(states[0].translate[0])); 900 radeon_emit(cs, fui(states[0].scale[1])); 901 radeon_emit(cs, fui(states[0].translate[1])); 902 radeon_emit(cs, fui(states[0].scale[2])); 903 radeon_emit(cs, fui(states[0].translate[2])); 904 sctx->viewports.dirty_mask &= ~1; /* clear one bit */ 905 return; 906 } 907 908 while (mask) { 909 int start, count, i; 910 911 u_bit_scan_consecutive_range(&mask, &start, &count); 912 913 radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + 914 start * 4 * 6, count * 6); 915 for (i = start; i < start+count; i++) { 916 radeon_emit(cs, fui(states[i].scale[0])); 917 radeon_emit(cs, fui(states[i].translate[0])); 918 radeon_emit(cs, fui(states[i].scale[1])); 919 radeon_emit(cs, fui(states[i].translate[1])); 920 radeon_emit(cs, fui(states[i].scale[2])); 921 radeon_emit(cs, fui(states[i].translate[2])); 922 } 923 } 924 sctx->viewports.dirty_mask = 0; 925} 926 927/* 928 * inferred state between framebuffer and rasterizer 929 */ 930static void si_update_poly_offset_state(struct si_context *sctx) 931{ 932 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 933 934 if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) 935 return; 936 937 switch (sctx->framebuffer.state.zsbuf->texture->format) { 938 case PIPE_FORMAT_Z16_UNORM: 939 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 940 break; 941 default: /* 24-bit */ 942 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 943 break; 944 case PIPE_FORMAT_Z32_FLOAT: 945 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 946 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 947 break; 948 } 949} 950 951/* 952 * Rasterizer 953 */ 954 955static uint32_t si_translate_fill(uint32_t func) 956{ 957 switch(func) { 958 case PIPE_POLYGON_MODE_FILL: 959 return V_028814_X_DRAW_TRIANGLES; 960 case PIPE_POLYGON_MODE_LINE: 961 return V_028814_X_DRAW_LINES; 962 case PIPE_POLYGON_MODE_POINT: 963 return V_028814_X_DRAW_POINTS; 964 default: 965 assert(0); 966 return V_028814_X_DRAW_POINTS; 967 } 968} 969 970static void *si_create_rs_state(struct pipe_context *ctx, 971 const struct pipe_rasterizer_state *state) 972{ 973 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 974 struct si_pm4_state *pm4 = &rs->pm4; 975 unsigned tmp, i; 976 float psize_min, psize_max; 977 978 if (!rs) { 979 return NULL; 980 } 981 982 rs->two_side = state->light_twoside; 983 rs->multisample_enable = state->multisample; 984 rs->force_persample_interp = state->force_persample_interp; 985 rs->clip_plane_enable = state->clip_plane_enable; 986 rs->line_stipple_enable = state->line_stipple_enable; 987 rs->poly_stipple_enable = state->poly_stipple_enable; 988 rs->line_smooth = state->line_smooth; 989 rs->poly_smooth = state->poly_smooth; 990 rs->uses_poly_offset = state->offset_point || state->offset_line || 991 state->offset_tri; 992 rs->clamp_fragment_color = state->clamp_fragment_color; 993 rs->flatshade = state->flatshade; 994 rs->sprite_coord_enable = state->sprite_coord_enable; 995 rs->rasterizer_discard = state->rasterizer_discard; 996 rs->pa_sc_line_stipple = state->line_stipple_enable ? 997 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 998 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 999 rs->pa_cl_clip_cntl = 1000 S_028810_PS_UCP_MODE(3) | 1001 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 1002 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 1003 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 1004 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 1005 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 1006 1007 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 1008 S_0286D4_FLAT_SHADE_ENA(1) | 1009 S_0286D4_PNT_SPRITE_ENA(1) | 1010 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 1011 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 1012 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 1013 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 1014 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 1015 1016 /* point size 12.4 fixed point */ 1017 tmp = (unsigned)(state->point_size * 8.0); 1018 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 1019 1020 if (state->point_size_per_vertex) { 1021 psize_min = util_get_min_point_size(state); 1022 psize_max = 8192; 1023 } else { 1024 /* Force the point size to be as if the vertex output was disabled. */ 1025 psize_min = state->point_size; 1026 psize_max = state->point_size; 1027 } 1028 /* Divide by two, because 0.5 = 1 pixel. */ 1029 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 1030 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 1031 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 1032 1033 tmp = (unsigned)state->line_width * 8; 1034 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 1035 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 1036 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 1037 S_028A48_MSAA_ENABLE(state->multisample || 1038 state->poly_smooth || 1039 state->line_smooth) | 1040 S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); 1041 1042 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 1043 S_028BE4_PIX_CENTER(state->half_pixel_center) | 1044 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 1045 1046 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 1047 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 1048 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 1049 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 1050 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 1051 S_028814_FACE(!state->front_ccw) | 1052 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 1053 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 1054 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 1055 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 1056 state->fill_back != PIPE_POLYGON_MODE_FILL) | 1057 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 1058 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 1059 si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 + 1060 SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color); 1061 1062 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 1063 for (i = 0; i < 3; i++) { 1064 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 1065 float offset_units = state->offset_units; 1066 float offset_scale = state->offset_scale * 16.0f; 1067 1068 switch (i) { 1069 case 0: /* 16-bit zbuffer */ 1070 offset_units *= 4.0f; 1071 break; 1072 case 1: /* 24-bit zbuffer */ 1073 offset_units *= 2.0f; 1074 break; 1075 case 2: /* 32-bit zbuffer */ 1076 offset_units *= 1.0f; 1077 break; 1078 } 1079 1080 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 1081 fui(offset_scale)); 1082 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 1083 fui(offset_units)); 1084 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 1085 fui(offset_scale)); 1086 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 1087 fui(offset_units)); 1088 } 1089 1090 return rs; 1091} 1092 1093static void si_bind_rs_state(struct pipe_context *ctx, void *state) 1094{ 1095 struct si_context *sctx = (struct si_context *)ctx; 1096 struct si_state_rasterizer *old_rs = 1097 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 1098 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1099 1100 if (!state) 1101 return; 1102 1103 if (sctx->framebuffer.nr_samples > 1 && 1104 (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) 1105 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1106 1107 si_pm4_bind_state(sctx, rasterizer, rs); 1108 si_update_poly_offset_state(sctx); 1109 1110 si_mark_atom_dirty(sctx, &sctx->clip_regs); 1111} 1112 1113static void si_delete_rs_state(struct pipe_context *ctx, void *state) 1114{ 1115 struct si_context *sctx = (struct si_context *)ctx; 1116 1117 if (sctx->queued.named.rasterizer == state) 1118 si_pm4_bind_state(sctx, poly_offset, NULL); 1119 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 1120} 1121 1122/* 1123 * infeered state between dsa and stencil ref 1124 */ 1125static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom) 1126{ 1127 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1128 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 1129 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 1130 1131 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 1132 radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 1133 S_028430_STENCILMASK(dsa->valuemask[0]) | 1134 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 1135 S_028430_STENCILOPVAL(1)); 1136 radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 1137 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 1138 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 1139 S_028434_STENCILOPVAL_BF(1)); 1140} 1141 1142static void si_set_stencil_ref(struct pipe_context *ctx, 1143 const struct pipe_stencil_ref *state) 1144{ 1145 struct si_context *sctx = (struct si_context *)ctx; 1146 1147 if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 1148 return; 1149 1150 sctx->stencil_ref.state = *state; 1151 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1152} 1153 1154 1155/* 1156 * DSA 1157 */ 1158 1159static uint32_t si_translate_stencil_op(int s_op) 1160{ 1161 switch (s_op) { 1162 case PIPE_STENCIL_OP_KEEP: 1163 return V_02842C_STENCIL_KEEP; 1164 case PIPE_STENCIL_OP_ZERO: 1165 return V_02842C_STENCIL_ZERO; 1166 case PIPE_STENCIL_OP_REPLACE: 1167 return V_02842C_STENCIL_REPLACE_TEST; 1168 case PIPE_STENCIL_OP_INCR: 1169 return V_02842C_STENCIL_ADD_CLAMP; 1170 case PIPE_STENCIL_OP_DECR: 1171 return V_02842C_STENCIL_SUB_CLAMP; 1172 case PIPE_STENCIL_OP_INCR_WRAP: 1173 return V_02842C_STENCIL_ADD_WRAP; 1174 case PIPE_STENCIL_OP_DECR_WRAP: 1175 return V_02842C_STENCIL_SUB_WRAP; 1176 case PIPE_STENCIL_OP_INVERT: 1177 return V_02842C_STENCIL_INVERT; 1178 default: 1179 R600_ERR("Unknown stencil op %d", s_op); 1180 assert(0); 1181 break; 1182 } 1183 return 0; 1184} 1185 1186static void *si_create_dsa_state(struct pipe_context *ctx, 1187 const struct pipe_depth_stencil_alpha_state *state) 1188{ 1189 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 1190 struct si_pm4_state *pm4 = &dsa->pm4; 1191 unsigned db_depth_control; 1192 uint32_t db_stencil_control = 0; 1193 1194 if (!dsa) { 1195 return NULL; 1196 } 1197 1198 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1199 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1200 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1201 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1202 1203 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 1204 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 1205 S_028800_ZFUNC(state->depth.func) | 1206 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 1207 1208 /* stencil */ 1209 if (state->stencil[0].enabled) { 1210 db_depth_control |= S_028800_STENCIL_ENABLE(1); 1211 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1212 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1213 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1214 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1215 1216 if (state->stencil[1].enabled) { 1217 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1218 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1219 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1220 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1221 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1222 } 1223 } 1224 1225 /* alpha */ 1226 if (state->alpha.enabled) { 1227 dsa->alpha_func = state->alpha.func; 1228 1229 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 1230 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 1231 } else { 1232 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1233 } 1234 1235 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1236 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1237 if (state->depth.bounds_test) { 1238 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1239 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1240 } 1241 1242 return dsa; 1243} 1244 1245static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1246{ 1247 struct si_context *sctx = (struct si_context *)ctx; 1248 struct si_state_dsa *dsa = state; 1249 1250 if (!state) 1251 return; 1252 1253 si_pm4_bind_state(sctx, dsa, dsa); 1254 1255 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1256 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1257 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1258 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1259 } 1260} 1261 1262static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1263{ 1264 struct si_context *sctx = (struct si_context *)ctx; 1265 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1266} 1267 1268static void *si_create_db_flush_dsa(struct si_context *sctx) 1269{ 1270 struct pipe_depth_stencil_alpha_state dsa = {}; 1271 1272 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 1273} 1274 1275/* DB RENDER STATE */ 1276 1277static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 1278{ 1279 struct si_context *sctx = (struct si_context*)ctx; 1280 1281 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1282} 1283 1284static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 1285{ 1286 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1287 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1288 unsigned db_shader_control; 1289 1290 radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 1291 1292 /* DB_RENDER_CONTROL */ 1293 if (sctx->dbcb_depth_copy_enabled || 1294 sctx->dbcb_stencil_copy_enabled) { 1295 radeon_emit(cs, 1296 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1297 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1298 S_028000_COPY_CENTROID(1) | 1299 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 1300 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1301 radeon_emit(cs, 1302 S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1303 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); 1304 } else { 1305 radeon_emit(cs, 1306 S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1307 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); 1308 } 1309 1310 /* DB_COUNT_CONTROL (occlusion queries) */ 1311 if (sctx->b.num_occlusion_queries > 0) { 1312 if (sctx->b.chip_class >= CIK) { 1313 radeon_emit(cs, 1314 S_028004_PERFECT_ZPASS_COUNTS(1) | 1315 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 1316 S_028004_ZPASS_ENABLE(1) | 1317 S_028004_SLICE_EVEN_ENABLE(1) | 1318 S_028004_SLICE_ODD_ENABLE(1)); 1319 } else { 1320 radeon_emit(cs, 1321 S_028004_PERFECT_ZPASS_COUNTS(1) | 1322 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 1323 } 1324 } else { 1325 /* Disable occlusion queries. */ 1326 if (sctx->b.chip_class >= CIK) { 1327 radeon_emit(cs, 0); 1328 } else { 1329 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 1330 } 1331 } 1332 1333 /* DB_RENDER_OVERRIDE2 */ 1334 radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 1335 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1336 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear)); 1337 1338 db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | 1339 sctx->ps_db_shader_control; 1340 1341 /* Bug workaround for smoothing (overrasterization) on SI. */ 1342 if (sctx->b.chip_class == SI && sctx->smoothing_enabled) 1343 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1344 else 1345 db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); 1346 1347 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1348 if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) 1349 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1350 1351 if (sctx->b.family == CHIP_STONEY && 1352 sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) 1353 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1354 1355 radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 1356 db_shader_control); 1357} 1358 1359/* 1360 * format translation 1361 */ 1362static uint32_t si_translate_colorformat(enum pipe_format format) 1363{ 1364 const struct util_format_description *desc = util_format_description(format); 1365 1366#define HAS_SIZE(x,y,z,w) \ 1367 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1368 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1369 1370 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1371 return V_028C70_COLOR_10_11_11; 1372 1373 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1374 return V_028C70_COLOR_INVALID; 1375 1376 switch (desc->nr_channels) { 1377 case 1: 1378 switch (desc->channel[0].size) { 1379 case 8: 1380 return V_028C70_COLOR_8; 1381 case 16: 1382 return V_028C70_COLOR_16; 1383 case 32: 1384 return V_028C70_COLOR_32; 1385 } 1386 break; 1387 case 2: 1388 if (desc->channel[0].size == desc->channel[1].size) { 1389 switch (desc->channel[0].size) { 1390 case 8: 1391 return V_028C70_COLOR_8_8; 1392 case 16: 1393 return V_028C70_COLOR_16_16; 1394 case 32: 1395 return V_028C70_COLOR_32_32; 1396 } 1397 } else if (HAS_SIZE(8,24,0,0)) { 1398 return V_028C70_COLOR_24_8; 1399 } else if (HAS_SIZE(24,8,0,0)) { 1400 return V_028C70_COLOR_8_24; 1401 } 1402 break; 1403 case 3: 1404 if (HAS_SIZE(5,6,5,0)) { 1405 return V_028C70_COLOR_5_6_5; 1406 } else if (HAS_SIZE(32,8,24,0)) { 1407 return V_028C70_COLOR_X24_8_32_FLOAT; 1408 } 1409 break; 1410 case 4: 1411 if (desc->channel[0].size == desc->channel[1].size && 1412 desc->channel[0].size == desc->channel[2].size && 1413 desc->channel[0].size == desc->channel[3].size) { 1414 switch (desc->channel[0].size) { 1415 case 4: 1416 return V_028C70_COLOR_4_4_4_4; 1417 case 8: 1418 return V_028C70_COLOR_8_8_8_8; 1419 case 16: 1420 return V_028C70_COLOR_16_16_16_16; 1421 case 32: 1422 return V_028C70_COLOR_32_32_32_32; 1423 } 1424 } else if (HAS_SIZE(5,5,5,1)) { 1425 return V_028C70_COLOR_1_5_5_5; 1426 } else if (HAS_SIZE(10,10,10,2)) { 1427 return V_028C70_COLOR_2_10_10_10; 1428 } 1429 break; 1430 } 1431 return V_028C70_COLOR_INVALID; 1432} 1433 1434static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1435{ 1436 if (SI_BIG_ENDIAN) { 1437 switch(colorformat) { 1438 /* 8-bit buffers. */ 1439 case V_028C70_COLOR_8: 1440 return V_028C70_ENDIAN_NONE; 1441 1442 /* 16-bit buffers. */ 1443 case V_028C70_COLOR_5_6_5: 1444 case V_028C70_COLOR_1_5_5_5: 1445 case V_028C70_COLOR_4_4_4_4: 1446 case V_028C70_COLOR_16: 1447 case V_028C70_COLOR_8_8: 1448 return V_028C70_ENDIAN_8IN16; 1449 1450 /* 32-bit buffers. */ 1451 case V_028C70_COLOR_8_8_8_8: 1452 case V_028C70_COLOR_2_10_10_10: 1453 case V_028C70_COLOR_8_24: 1454 case V_028C70_COLOR_24_8: 1455 case V_028C70_COLOR_16_16: 1456 return V_028C70_ENDIAN_8IN32; 1457 1458 /* 64-bit buffers. */ 1459 case V_028C70_COLOR_16_16_16_16: 1460 return V_028C70_ENDIAN_8IN16; 1461 1462 case V_028C70_COLOR_32_32: 1463 return V_028C70_ENDIAN_8IN32; 1464 1465 /* 128-bit buffers. */ 1466 case V_028C70_COLOR_32_32_32_32: 1467 return V_028C70_ENDIAN_8IN32; 1468 default: 1469 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1470 } 1471 } else { 1472 return V_028C70_ENDIAN_NONE; 1473 } 1474} 1475 1476static uint32_t si_translate_dbformat(enum pipe_format format) 1477{ 1478 switch (format) { 1479 case PIPE_FORMAT_Z16_UNORM: 1480 return V_028040_Z_16; 1481 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1482 case PIPE_FORMAT_X8Z24_UNORM: 1483 case PIPE_FORMAT_Z24X8_UNORM: 1484 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1485 return V_028040_Z_24; /* deprecated on SI */ 1486 case PIPE_FORMAT_Z32_FLOAT: 1487 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1488 return V_028040_Z_32_FLOAT; 1489 default: 1490 return V_028040_Z_INVALID; 1491 } 1492} 1493 1494/* 1495 * Texture translation 1496 */ 1497 1498static uint32_t si_translate_texformat(struct pipe_screen *screen, 1499 enum pipe_format format, 1500 const struct util_format_description *desc, 1501 int first_non_void) 1502{ 1503 struct si_screen *sscreen = (struct si_screen*)screen; 1504 bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 && 1505 sscreen->b.info.drm_minor >= 31) || 1506 sscreen->b.info.drm_major == 3; 1507 boolean uniform = TRUE; 1508 int i; 1509 1510 /* Colorspace (return non-RGB formats directly). */ 1511 switch (desc->colorspace) { 1512 /* Depth stencil formats */ 1513 case UTIL_FORMAT_COLORSPACE_ZS: 1514 switch (format) { 1515 case PIPE_FORMAT_Z16_UNORM: 1516 return V_008F14_IMG_DATA_FORMAT_16; 1517 case PIPE_FORMAT_X24S8_UINT: 1518 case PIPE_FORMAT_Z24X8_UNORM: 1519 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1520 return V_008F14_IMG_DATA_FORMAT_8_24; 1521 case PIPE_FORMAT_X8Z24_UNORM: 1522 case PIPE_FORMAT_S8X24_UINT: 1523 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1524 return V_008F14_IMG_DATA_FORMAT_24_8; 1525 case PIPE_FORMAT_S8_UINT: 1526 return V_008F14_IMG_DATA_FORMAT_8; 1527 case PIPE_FORMAT_Z32_FLOAT: 1528 return V_008F14_IMG_DATA_FORMAT_32; 1529 case PIPE_FORMAT_X32_S8X24_UINT: 1530 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1531 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1532 default: 1533 goto out_unknown; 1534 } 1535 1536 case UTIL_FORMAT_COLORSPACE_YUV: 1537 goto out_unknown; /* TODO */ 1538 1539 case UTIL_FORMAT_COLORSPACE_SRGB: 1540 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1541 goto out_unknown; 1542 break; 1543 1544 default: 1545 break; 1546 } 1547 1548 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1549 if (!enable_compressed_formats) 1550 goto out_unknown; 1551 1552 switch (format) { 1553 case PIPE_FORMAT_RGTC1_SNORM: 1554 case PIPE_FORMAT_LATC1_SNORM: 1555 case PIPE_FORMAT_RGTC1_UNORM: 1556 case PIPE_FORMAT_LATC1_UNORM: 1557 return V_008F14_IMG_DATA_FORMAT_BC4; 1558 case PIPE_FORMAT_RGTC2_SNORM: 1559 case PIPE_FORMAT_LATC2_SNORM: 1560 case PIPE_FORMAT_RGTC2_UNORM: 1561 case PIPE_FORMAT_LATC2_UNORM: 1562 return V_008F14_IMG_DATA_FORMAT_BC5; 1563 default: 1564 goto out_unknown; 1565 } 1566 } 1567 1568 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1569 sscreen->b.family >= CHIP_STONEY) { 1570 switch (format) { 1571 case PIPE_FORMAT_ETC1_RGB8: 1572 case PIPE_FORMAT_ETC2_RGB8: 1573 case PIPE_FORMAT_ETC2_SRGB8: 1574 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1575 case PIPE_FORMAT_ETC2_RGB8A1: 1576 case PIPE_FORMAT_ETC2_SRGB8A1: 1577 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1578 case PIPE_FORMAT_ETC2_RGBA8: 1579 case PIPE_FORMAT_ETC2_SRGBA8: 1580 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1581 case PIPE_FORMAT_ETC2_R11_UNORM: 1582 case PIPE_FORMAT_ETC2_R11_SNORM: 1583 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1584 case PIPE_FORMAT_ETC2_RG11_UNORM: 1585 case PIPE_FORMAT_ETC2_RG11_SNORM: 1586 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1587 default: 1588 goto out_unknown; 1589 } 1590 } 1591 1592 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1593 if (!enable_compressed_formats) 1594 goto out_unknown; 1595 1596 switch (format) { 1597 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1598 case PIPE_FORMAT_BPTC_SRGBA: 1599 return V_008F14_IMG_DATA_FORMAT_BC7; 1600 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1601 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1602 return V_008F14_IMG_DATA_FORMAT_BC6; 1603 default: 1604 goto out_unknown; 1605 } 1606 } 1607 1608 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1609 switch (format) { 1610 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1611 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1612 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1613 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1614 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1615 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1616 default: 1617 goto out_unknown; 1618 } 1619 } 1620 1621 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1622 if (!enable_compressed_formats) 1623 goto out_unknown; 1624 1625 if (!util_format_s3tc_enabled) { 1626 goto out_unknown; 1627 } 1628 1629 switch (format) { 1630 case PIPE_FORMAT_DXT1_RGB: 1631 case PIPE_FORMAT_DXT1_RGBA: 1632 case PIPE_FORMAT_DXT1_SRGB: 1633 case PIPE_FORMAT_DXT1_SRGBA: 1634 return V_008F14_IMG_DATA_FORMAT_BC1; 1635 case PIPE_FORMAT_DXT3_RGBA: 1636 case PIPE_FORMAT_DXT3_SRGBA: 1637 return V_008F14_IMG_DATA_FORMAT_BC2; 1638 case PIPE_FORMAT_DXT5_RGBA: 1639 case PIPE_FORMAT_DXT5_SRGBA: 1640 return V_008F14_IMG_DATA_FORMAT_BC3; 1641 default: 1642 goto out_unknown; 1643 } 1644 } 1645 1646 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1647 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1648 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1649 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1650 } 1651 1652 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1653 1654 /* See whether the components are of the same size. */ 1655 for (i = 1; i < desc->nr_channels; i++) { 1656 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1657 } 1658 1659 /* Non-uniform formats. */ 1660 if (!uniform) { 1661 switch(desc->nr_channels) { 1662 case 3: 1663 if (desc->channel[0].size == 5 && 1664 desc->channel[1].size == 6 && 1665 desc->channel[2].size == 5) { 1666 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1667 } 1668 goto out_unknown; 1669 case 4: 1670 if (desc->channel[0].size == 5 && 1671 desc->channel[1].size == 5 && 1672 desc->channel[2].size == 5 && 1673 desc->channel[3].size == 1) { 1674 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1675 } 1676 if (desc->channel[0].size == 10 && 1677 desc->channel[1].size == 10 && 1678 desc->channel[2].size == 10 && 1679 desc->channel[3].size == 2) { 1680 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1681 } 1682 goto out_unknown; 1683 } 1684 goto out_unknown; 1685 } 1686 1687 if (first_non_void < 0 || first_non_void > 3) 1688 goto out_unknown; 1689 1690 /* uniform formats */ 1691 switch (desc->channel[first_non_void].size) { 1692 case 4: 1693 switch (desc->nr_channels) { 1694#if 0 /* Not supported for render targets */ 1695 case 2: 1696 return V_008F14_IMG_DATA_FORMAT_4_4; 1697#endif 1698 case 4: 1699 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1700 } 1701 break; 1702 case 8: 1703 switch (desc->nr_channels) { 1704 case 1: 1705 return V_008F14_IMG_DATA_FORMAT_8; 1706 case 2: 1707 return V_008F14_IMG_DATA_FORMAT_8_8; 1708 case 4: 1709 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1710 } 1711 break; 1712 case 16: 1713 switch (desc->nr_channels) { 1714 case 1: 1715 return V_008F14_IMG_DATA_FORMAT_16; 1716 case 2: 1717 return V_008F14_IMG_DATA_FORMAT_16_16; 1718 case 4: 1719 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1720 } 1721 break; 1722 case 32: 1723 switch (desc->nr_channels) { 1724 case 1: 1725 return V_008F14_IMG_DATA_FORMAT_32; 1726 case 2: 1727 return V_008F14_IMG_DATA_FORMAT_32_32; 1728#if 0 /* Not supported for render targets */ 1729 case 3: 1730 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1731#endif 1732 case 4: 1733 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1734 } 1735 } 1736 1737out_unknown: 1738 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1739 return ~0; 1740} 1741 1742static unsigned si_tex_wrap(unsigned wrap) 1743{ 1744 switch (wrap) { 1745 default: 1746 case PIPE_TEX_WRAP_REPEAT: 1747 return V_008F30_SQ_TEX_WRAP; 1748 case PIPE_TEX_WRAP_CLAMP: 1749 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1750 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1751 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1752 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1753 return V_008F30_SQ_TEX_CLAMP_BORDER; 1754 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1755 return V_008F30_SQ_TEX_MIRROR; 1756 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1757 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1758 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1759 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1760 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1761 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1762 } 1763} 1764 1765static unsigned si_tex_filter(unsigned filter) 1766{ 1767 switch (filter) { 1768 default: 1769 case PIPE_TEX_FILTER_NEAREST: 1770 return V_008F38_SQ_TEX_XY_FILTER_POINT; 1771 case PIPE_TEX_FILTER_LINEAR: 1772 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 1773 } 1774} 1775 1776static unsigned si_tex_mipfilter(unsigned filter) 1777{ 1778 switch (filter) { 1779 case PIPE_TEX_MIPFILTER_NEAREST: 1780 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1781 case PIPE_TEX_MIPFILTER_LINEAR: 1782 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1783 default: 1784 case PIPE_TEX_MIPFILTER_NONE: 1785 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1786 } 1787} 1788 1789static unsigned si_tex_compare(unsigned compare) 1790{ 1791 switch (compare) { 1792 default: 1793 case PIPE_FUNC_NEVER: 1794 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1795 case PIPE_FUNC_LESS: 1796 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1797 case PIPE_FUNC_EQUAL: 1798 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1799 case PIPE_FUNC_LEQUAL: 1800 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1801 case PIPE_FUNC_GREATER: 1802 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1803 case PIPE_FUNC_NOTEQUAL: 1804 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1805 case PIPE_FUNC_GEQUAL: 1806 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1807 case PIPE_FUNC_ALWAYS: 1808 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1809 } 1810} 1811 1812static unsigned si_tex_dim(unsigned res_target, unsigned view_target, 1813 unsigned nr_samples) 1814{ 1815 if (view_target == PIPE_TEXTURE_CUBE || 1816 view_target == PIPE_TEXTURE_CUBE_ARRAY) 1817 res_target = view_target; 1818 1819 switch (res_target) { 1820 default: 1821 case PIPE_TEXTURE_1D: 1822 return V_008F1C_SQ_RSRC_IMG_1D; 1823 case PIPE_TEXTURE_1D_ARRAY: 1824 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1825 case PIPE_TEXTURE_2D: 1826 case PIPE_TEXTURE_RECT: 1827 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1828 V_008F1C_SQ_RSRC_IMG_2D; 1829 case PIPE_TEXTURE_2D_ARRAY: 1830 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1831 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1832 case PIPE_TEXTURE_3D: 1833 return V_008F1C_SQ_RSRC_IMG_3D; 1834 case PIPE_TEXTURE_CUBE: 1835 case PIPE_TEXTURE_CUBE_ARRAY: 1836 return V_008F1C_SQ_RSRC_IMG_CUBE; 1837 } 1838} 1839 1840/* 1841 * Format support testing 1842 */ 1843 1844static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1845{ 1846 return si_translate_texformat(screen, format, util_format_description(format), 1847 util_format_get_first_non_void_channel(format)) != ~0U; 1848} 1849 1850static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1851 const struct util_format_description *desc, 1852 int first_non_void) 1853{ 1854 unsigned type = desc->channel[first_non_void].type; 1855 int i; 1856 1857 if (type == UTIL_FORMAT_TYPE_FIXED) 1858 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1859 1860 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1861 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1862 1863 if (desc->nr_channels == 4 && 1864 desc->channel[0].size == 10 && 1865 desc->channel[1].size == 10 && 1866 desc->channel[2].size == 10 && 1867 desc->channel[3].size == 2) 1868 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1869 1870 /* See whether the components are of the same size. */ 1871 for (i = 0; i < desc->nr_channels; i++) { 1872 if (desc->channel[first_non_void].size != desc->channel[i].size) 1873 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1874 } 1875 1876 switch (desc->channel[first_non_void].size) { 1877 case 8: 1878 switch (desc->nr_channels) { 1879 case 1: 1880 return V_008F0C_BUF_DATA_FORMAT_8; 1881 case 2: 1882 return V_008F0C_BUF_DATA_FORMAT_8_8; 1883 case 3: 1884 case 4: 1885 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1886 } 1887 break; 1888 case 16: 1889 switch (desc->nr_channels) { 1890 case 1: 1891 return V_008F0C_BUF_DATA_FORMAT_16; 1892 case 2: 1893 return V_008F0C_BUF_DATA_FORMAT_16_16; 1894 case 3: 1895 case 4: 1896 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1897 } 1898 break; 1899 case 32: 1900 /* From the Southern Islands ISA documentation about MTBUF: 1901 * 'Memory reads of data in memory that is 32 or 64 bits do not 1902 * undergo any format conversion.' 1903 */ 1904 if (type != UTIL_FORMAT_TYPE_FLOAT && 1905 !desc->channel[first_non_void].pure_integer) 1906 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1907 1908 switch (desc->nr_channels) { 1909 case 1: 1910 return V_008F0C_BUF_DATA_FORMAT_32; 1911 case 2: 1912 return V_008F0C_BUF_DATA_FORMAT_32_32; 1913 case 3: 1914 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1915 case 4: 1916 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1917 } 1918 break; 1919 } 1920 1921 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1922} 1923 1924static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1925 const struct util_format_description *desc, 1926 int first_non_void) 1927{ 1928 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1929 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1930 1931 switch (desc->channel[first_non_void].type) { 1932 case UTIL_FORMAT_TYPE_SIGNED: 1933 if (desc->channel[first_non_void].normalized) 1934 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1935 else if (desc->channel[first_non_void].pure_integer) 1936 return V_008F0C_BUF_NUM_FORMAT_SINT; 1937 else 1938 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1939 break; 1940 case UTIL_FORMAT_TYPE_UNSIGNED: 1941 if (desc->channel[first_non_void].normalized) 1942 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1943 else if (desc->channel[first_non_void].pure_integer) 1944 return V_008F0C_BUF_NUM_FORMAT_UINT; 1945 else 1946 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1947 break; 1948 case UTIL_FORMAT_TYPE_FLOAT: 1949 default: 1950 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1951 } 1952} 1953 1954static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1955{ 1956 const struct util_format_description *desc; 1957 int first_non_void; 1958 unsigned data_format; 1959 1960 desc = util_format_description(format); 1961 first_non_void = util_format_get_first_non_void_channel(format); 1962 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1963 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1964} 1965 1966static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1967{ 1968 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1969 r600_translate_colorswap(format) != ~0U; 1970} 1971 1972static bool si_is_zs_format_supported(enum pipe_format format) 1973{ 1974 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1975} 1976 1977boolean si_is_format_supported(struct pipe_screen *screen, 1978 enum pipe_format format, 1979 enum pipe_texture_target target, 1980 unsigned sample_count, 1981 unsigned usage) 1982{ 1983 unsigned retval = 0; 1984 1985 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1986 R600_ERR("r600: unsupported texture type %d\n", target); 1987 return FALSE; 1988 } 1989 1990 if (!util_format_is_supported(format, usage)) 1991 return FALSE; 1992 1993 if (sample_count > 1) { 1994 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 1995 return FALSE; 1996 1997 switch (sample_count) { 1998 case 2: 1999 case 4: 2000 case 8: 2001 break; 2002 default: 2003 return FALSE; 2004 } 2005 } 2006 2007 if (usage & PIPE_BIND_SAMPLER_VIEW) { 2008 if (target == PIPE_BUFFER) { 2009 if (si_is_vertex_format_supported(screen, format)) 2010 retval |= PIPE_BIND_SAMPLER_VIEW; 2011 } else { 2012 if (si_is_sampler_format_supported(screen, format)) 2013 retval |= PIPE_BIND_SAMPLER_VIEW; 2014 } 2015 } 2016 2017 if ((usage & (PIPE_BIND_RENDER_TARGET | 2018 PIPE_BIND_DISPLAY_TARGET | 2019 PIPE_BIND_SCANOUT | 2020 PIPE_BIND_SHARED | 2021 PIPE_BIND_BLENDABLE)) && 2022 si_is_colorbuffer_format_supported(format)) { 2023 retval |= usage & 2024 (PIPE_BIND_RENDER_TARGET | 2025 PIPE_BIND_DISPLAY_TARGET | 2026 PIPE_BIND_SCANOUT | 2027 PIPE_BIND_SHARED); 2028 if (!util_format_is_pure_integer(format) && 2029 !util_format_is_depth_or_stencil(format)) 2030 retval |= usage & PIPE_BIND_BLENDABLE; 2031 } 2032 2033 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 2034 si_is_zs_format_supported(format)) { 2035 retval |= PIPE_BIND_DEPTH_STENCIL; 2036 } 2037 2038 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 2039 si_is_vertex_format_supported(screen, format)) { 2040 retval |= PIPE_BIND_VERTEX_BUFFER; 2041 } 2042 2043 if (usage & PIPE_BIND_TRANSFER_READ) 2044 retval |= PIPE_BIND_TRANSFER_READ; 2045 if (usage & PIPE_BIND_TRANSFER_WRITE) 2046 retval |= PIPE_BIND_TRANSFER_WRITE; 2047 2048 return retval == usage; 2049} 2050 2051unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil) 2052{ 2053 unsigned tile_mode_index = 0; 2054 2055 if (stencil) { 2056 tile_mode_index = rtex->surface.stencil_tiling_index[level]; 2057 } else { 2058 tile_mode_index = rtex->surface.tiling_index[level]; 2059 } 2060 return tile_mode_index; 2061} 2062 2063/* 2064 * framebuffer handling 2065 */ 2066 2067static void si_choose_spi_color_formats(struct r600_surface *surf, 2068 unsigned format, unsigned swap, 2069 unsigned ntype, bool is_depth) 2070{ 2071 /* Alpha is needed for alpha-to-coverage. 2072 * Blending may be with or without alpha. 2073 */ 2074 unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 2075 unsigned alpha = 0; /* exports alpha, but may not support blending */ 2076 unsigned blend = 0; /* supports blending, but may not export alpha */ 2077 unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 2078 2079 /* Choose the SPI color formats. These are required values for Stoney/RB+. 2080 * Other chips have multiple choices, though they are not necessarily better. 2081 */ 2082 switch (format) { 2083 case V_028C70_COLOR_5_6_5: 2084 case V_028C70_COLOR_1_5_5_5: 2085 case V_028C70_COLOR_5_5_5_1: 2086 case V_028C70_COLOR_4_4_4_4: 2087 case V_028C70_COLOR_10_11_11: 2088 case V_028C70_COLOR_11_11_10: 2089 case V_028C70_COLOR_8: 2090 case V_028C70_COLOR_8_8: 2091 case V_028C70_COLOR_8_8_8_8: 2092 case V_028C70_COLOR_10_10_10_2: 2093 case V_028C70_COLOR_2_10_10_10: 2094 if (ntype == V_028C70_NUMBER_UINT) 2095 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2096 else if (ntype == V_028C70_NUMBER_SINT) 2097 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2098 else 2099 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2100 break; 2101 2102 case V_028C70_COLOR_16: 2103 case V_028C70_COLOR_16_16: 2104 case V_028C70_COLOR_16_16_16_16: 2105 if (ntype == V_028C70_NUMBER_UNORM || 2106 ntype == V_028C70_NUMBER_SNORM) { 2107 /* UNORM16 and SNORM16 don't support blending */ 2108 if (ntype == V_028C70_NUMBER_UNORM) 2109 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 2110 else 2111 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 2112 2113 /* Use 32 bits per channel for blending. */ 2114 if (format == V_028C70_COLOR_16) { 2115 if (swap == V_028C70_SWAP_STD) { /* R */ 2116 blend = V_028714_SPI_SHADER_32_R; 2117 blend_alpha = V_028714_SPI_SHADER_32_AR; 2118 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2119 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2120 else 2121 assert(0); 2122 } else if (format == V_028C70_COLOR_16_16) { 2123 if (swap == V_028C70_SWAP_STD) { /* RG */ 2124 blend = V_028714_SPI_SHADER_32_GR; 2125 blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2126 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2127 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2128 else 2129 assert(0); 2130 } else /* 16_16_16_16 */ 2131 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2132 } else if (ntype == V_028C70_NUMBER_UINT) 2133 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2134 else if (ntype == V_028C70_NUMBER_SINT) 2135 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2136 else if (ntype == V_028C70_NUMBER_FLOAT) 2137 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2138 else 2139 assert(0); 2140 break; 2141 2142 case V_028C70_COLOR_32: 2143 if (swap == V_028C70_SWAP_STD) { /* R */ 2144 blend = normal = V_028714_SPI_SHADER_32_R; 2145 alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 2146 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2147 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2148 else 2149 assert(0); 2150 break; 2151 2152 case V_028C70_COLOR_32_32: 2153 if (swap == V_028C70_SWAP_STD) { /* RG */ 2154 blend = normal = V_028714_SPI_SHADER_32_GR; 2155 alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2156 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2157 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2158 else 2159 assert(0); 2160 break; 2161 2162 case V_028C70_COLOR_32_32_32_32: 2163 case V_028C70_COLOR_8_24: 2164 case V_028C70_COLOR_24_8: 2165 case V_028C70_COLOR_X24_8_32_FLOAT: 2166 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2167 break; 2168 2169 default: 2170 assert(0); 2171 return; 2172 } 2173 2174 /* The DB->CB copy needs 32_ABGR. */ 2175 if (is_depth) 2176 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2177 2178 surf->spi_shader_col_format = normal; 2179 surf->spi_shader_col_format_alpha = alpha; 2180 surf->spi_shader_col_format_blend = blend; 2181 surf->spi_shader_col_format_blend_alpha = blend_alpha; 2182} 2183 2184static void si_initialize_color_surface(struct si_context *sctx, 2185 struct r600_surface *surf) 2186{ 2187 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2188 unsigned level = surf->base.u.tex.level; 2189 uint64_t offset = rtex->surface.level[level].offset; 2190 unsigned pitch, slice; 2191 unsigned color_info, color_attrib, color_pitch, color_view; 2192 unsigned tile_mode_index; 2193 unsigned format, swap, ntype, endian; 2194 const struct util_format_description *desc; 2195 int i; 2196 unsigned blend_clamp = 0, blend_bypass = 0; 2197 2198 /* Layered rendering doesn't work with LINEAR_GENERAL. 2199 * (LINEAR_ALIGNED and others work) */ 2200 if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) { 2201 assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer); 2202 offset += rtex->surface.level[level].slice_size * 2203 surf->base.u.tex.first_layer; 2204 color_view = 0; 2205 } else { 2206 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2207 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 2208 } 2209 2210 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1; 2211 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; 2212 if (slice) { 2213 slice = slice - 1; 2214 } 2215 2216 tile_mode_index = si_tile_mode_index(rtex, level, false); 2217 2218 desc = util_format_description(surf->base.format); 2219 for (i = 0; i < 4; i++) { 2220 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2221 break; 2222 } 2223 } 2224 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 2225 ntype = V_028C70_NUMBER_FLOAT; 2226 } else { 2227 ntype = V_028C70_NUMBER_UNORM; 2228 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2229 ntype = V_028C70_NUMBER_SRGB; 2230 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2231 if (desc->channel[i].pure_integer) { 2232 ntype = V_028C70_NUMBER_SINT; 2233 } else { 2234 assert(desc->channel[i].normalized); 2235 ntype = V_028C70_NUMBER_SNORM; 2236 } 2237 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2238 if (desc->channel[i].pure_integer) { 2239 ntype = V_028C70_NUMBER_UINT; 2240 } else { 2241 assert(desc->channel[i].normalized); 2242 ntype = V_028C70_NUMBER_UNORM; 2243 } 2244 } 2245 } 2246 2247 format = si_translate_colorformat(surf->base.format); 2248 if (format == V_028C70_COLOR_INVALID) { 2249 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2250 } 2251 assert(format != V_028C70_COLOR_INVALID); 2252 swap = r600_translate_colorswap(surf->base.format); 2253 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) { 2254 endian = V_028C70_ENDIAN_NONE; 2255 } else { 2256 endian = si_colorformat_endian_swap(format); 2257 } 2258 2259 /* blend clamp should be set for all NORM/SRGB types */ 2260 if (ntype == V_028C70_NUMBER_UNORM || 2261 ntype == V_028C70_NUMBER_SNORM || 2262 ntype == V_028C70_NUMBER_SRGB) 2263 blend_clamp = 1; 2264 2265 /* set blend bypass according to docs if SINT/UINT or 2266 8/24 COLOR variants */ 2267 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2268 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2269 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2270 blend_clamp = 0; 2271 blend_bypass = 1; 2272 } 2273 2274 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 2275 (format == V_028C70_COLOR_8 || 2276 format == V_028C70_COLOR_8_8 || 2277 format == V_028C70_COLOR_8_8_8_8)) 2278 surf->color_is_int8 = true; 2279 2280 color_info = S_028C70_FORMAT(format) | 2281 S_028C70_COMP_SWAP(swap) | 2282 S_028C70_BLEND_CLAMP(blend_clamp) | 2283 S_028C70_BLEND_BYPASS(blend_bypass) | 2284 S_028C70_NUMBER_TYPE(ntype) | 2285 S_028C70_ENDIAN(endian); 2286 2287 color_pitch = S_028C64_TILE_MAX(pitch); 2288 2289 /* Intensity is implemented as Red, so treat it that way. */ 2290 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) | 2291 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1 || 2292 util_format_is_intensity(surf->base.format)); 2293 2294 if (rtex->resource.b.b.nr_samples > 1) { 2295 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 2296 2297 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2298 S_028C74_NUM_FRAGMENTS(log_samples); 2299 2300 if (rtex->fmask.size) { 2301 color_info |= S_028C70_COMPRESSION(1); 2302 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 2303 2304 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index); 2305 2306 if (sctx->b.chip_class == SI) { 2307 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2308 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2309 } 2310 if (sctx->b.chip_class >= CIK) { 2311 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch_in_pixels / 8 - 1); 2312 } 2313 } 2314 } 2315 2316 offset += rtex->resource.gpu_address; 2317 2318 surf->cb_color_base = offset >> 8; 2319 surf->cb_color_pitch = color_pitch; 2320 surf->cb_color_slice = S_028C68_TILE_MAX(slice); 2321 surf->cb_color_view = color_view; 2322 surf->cb_color_info = color_info; 2323 surf->cb_color_attrib = color_attrib; 2324 2325 if (sctx->b.chip_class >= VI && rtex->dcc_buffer) { 2326 unsigned max_uncompressed_block_size = 2; 2327 uint64_t dcc_offset = rtex->surface.level[level].dcc_offset; 2328 2329 if (rtex->surface.nsamples > 1) { 2330 if (rtex->surface.bpe == 1) 2331 max_uncompressed_block_size = 0; 2332 else if (rtex->surface.bpe == 2) 2333 max_uncompressed_block_size = 1; 2334 } 2335 2336 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2337 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2338 surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) >> 8; 2339 } 2340 2341 if (rtex->fmask.size) { 2342 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; 2343 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); 2344 } else { 2345 /* This must be set for fast clear to work without FMASK. */ 2346 surf->cb_color_fmask = surf->cb_color_base; 2347 surf->cb_color_fmask_slice = surf->cb_color_slice; 2348 surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 2349 2350 if (sctx->b.chip_class == SI) { 2351 unsigned bankh = util_logbase2(rtex->surface.bankh); 2352 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2353 } 2354 2355 if (sctx->b.chip_class >= CIK) { 2356 surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch); 2357 } 2358 } 2359 2360 /* Determine pixel shader export format */ 2361 si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); 2362 2363 surf->color_initialized = true; 2364} 2365 2366static void si_init_depth_surface(struct si_context *sctx, 2367 struct r600_surface *surf) 2368{ 2369 struct si_screen *sscreen = sctx->screen; 2370 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2371 unsigned level = surf->base.u.tex.level; 2372 struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; 2373 unsigned format, tile_mode_index, array_mode; 2374 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config; 2375 uint32_t z_info, s_info, db_depth_info; 2376 uint64_t z_offs, s_offs; 2377 uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0; 2378 2379 switch (sctx->framebuffer.state.zsbuf->texture->format) { 2380 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2381 case PIPE_FORMAT_X8Z24_UNORM: 2382 case PIPE_FORMAT_Z24X8_UNORM: 2383 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 2384 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 2385 break; 2386 case PIPE_FORMAT_Z32_FLOAT: 2387 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2388 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 2389 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 2390 break; 2391 case PIPE_FORMAT_Z16_UNORM: 2392 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 2393 break; 2394 default: 2395 assert(0); 2396 } 2397 2398 format = si_translate_dbformat(rtex->resource.b.b.format); 2399 2400 if (format == V_028040_Z_INVALID) { 2401 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 2402 } 2403 assert(format != V_028040_Z_INVALID); 2404 2405 s_offs = z_offs = rtex->resource.gpu_address; 2406 z_offs += rtex->surface.level[level].offset; 2407 s_offs += rtex->surface.stencil_level[level].offset; 2408 2409 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 2410 2411 z_info = S_028040_FORMAT(format); 2412 if (rtex->resource.b.b.nr_samples > 1) { 2413 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 2414 } 2415 2416 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 2417 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 2418 else 2419 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 2420 2421 if (sctx->b.chip_class >= CIK) { 2422 switch (rtex->surface.level[level].mode) { 2423 case RADEON_SURF_MODE_2D: 2424 array_mode = V_02803C_ARRAY_2D_TILED_THIN1; 2425 break; 2426 case RADEON_SURF_MODE_1D: 2427 case RADEON_SURF_MODE_LINEAR_ALIGNED: 2428 case RADEON_SURF_MODE_LINEAR: 2429 default: 2430 array_mode = V_02803C_ARRAY_1D_TILED_THIN1; 2431 break; 2432 } 2433 tile_split = rtex->surface.tile_split; 2434 stile_split = rtex->surface.stencil_tile_split; 2435 macro_aspect = rtex->surface.mtilea; 2436 bankw = rtex->surface.bankw; 2437 bankh = rtex->surface.bankh; 2438 tile_split = cik_tile_split(tile_split); 2439 stile_split = cik_tile_split(stile_split); 2440 macro_aspect = cik_macro_tile_aspect(macro_aspect); 2441 bankw = cik_bank_wh(bankw); 2442 bankh = cik_bank_wh(bankh); 2443 nbanks = si_num_banks(sscreen, rtex); 2444 tile_mode_index = si_tile_mode_index(rtex, level, false); 2445 pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); 2446 2447 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) | 2448 S_02803C_PIPE_CONFIG(pipe_config) | 2449 S_02803C_BANK_WIDTH(bankw) | 2450 S_02803C_BANK_HEIGHT(bankh) | 2451 S_02803C_MACRO_TILE_ASPECT(macro_aspect) | 2452 S_02803C_NUM_BANKS(nbanks); 2453 z_info |= S_028040_TILE_SPLIT(tile_split); 2454 s_info |= S_028044_TILE_SPLIT(stile_split); 2455 } else { 2456 tile_mode_index = si_tile_mode_index(rtex, level, false); 2457 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2458 tile_mode_index = si_tile_mode_index(rtex, level, true); 2459 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2460 } 2461 2462 /* HiZ aka depth buffer htile */ 2463 /* use htile only for first level */ 2464 if (rtex->htile_buffer && !level) { 2465 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2466 S_028040_ALLOW_EXPCLEAR(1); 2467 2468 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 2469 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2470 else 2471 /* Use all of the htile_buffer for depth if there's no stencil. */ 2472 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2473 2474 uint64_t va = rtex->htile_buffer->gpu_address; 2475 db_htile_data_base = va >> 8; 2476 db_htile_surface = S_028ABC_FULL_CACHE(1); 2477 } else { 2478 db_htile_data_base = 0; 2479 db_htile_surface = 0; 2480 } 2481 2482 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2483 2484 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2485 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2486 surf->db_htile_data_base = db_htile_data_base; 2487 surf->db_depth_info = db_depth_info; 2488 surf->db_z_info = z_info; 2489 surf->db_stencil_info = s_info; 2490 surf->db_depth_base = z_offs >> 8; 2491 surf->db_stencil_base = s_offs >> 8; 2492 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2493 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2494 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2495 levelinfo->nblk_y) / 64 - 1); 2496 surf->db_htile_surface = db_htile_surface; 2497 surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl; 2498 2499 surf->depth_initialized = true; 2500} 2501 2502static void si_set_framebuffer_state(struct pipe_context *ctx, 2503 const struct pipe_framebuffer_state *state) 2504{ 2505 struct si_context *sctx = (struct si_context *)ctx; 2506 struct pipe_constant_buffer constbuf = {0}; 2507 struct r600_surface *surf = NULL; 2508 struct r600_texture *rtex; 2509 bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer; 2510 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2511 int i; 2512 2513 /* Only flush TC when changing the framebuffer state, because 2514 * the only client not using TC that can change textures is 2515 * the framebuffer. 2516 * 2517 * Flush all CB and DB caches here because all buffers can be used 2518 * for write by both TC (with shader image stores) and CB/DB. 2519 */ 2520 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 2521 SI_CONTEXT_INV_GLOBAL_L2 | 2522 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; 2523 2524 /* Take the maximum of the old and new count. If the new count is lower, 2525 * dirtying is needed to disable the unbound colorbuffers. 2526 */ 2527 sctx->framebuffer.dirty_cbufs |= 2528 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2529 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2530 2531 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2532 2533 sctx->framebuffer.spi_shader_col_format = 0; 2534 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2535 sctx->framebuffer.spi_shader_col_format_blend = 0; 2536 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2537 sctx->framebuffer.color_is_int8 = 0; 2538 2539 sctx->framebuffer.compressed_cb_mask = 0; 2540 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2541 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2542 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 2543 util_format_is_pure_integer(state->cbufs[0]->format); 2544 2545 if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer) 2546 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2547 2548 for (i = 0; i < state->nr_cbufs; i++) { 2549 if (!state->cbufs[i]) 2550 continue; 2551 2552 surf = (struct r600_surface*)state->cbufs[i]; 2553 rtex = (struct r600_texture*)surf->base.texture; 2554 2555 if (!surf->color_initialized) { 2556 si_initialize_color_surface(sctx, surf); 2557 } 2558 2559 sctx->framebuffer.spi_shader_col_format |= 2560 surf->spi_shader_col_format << (i * 4); 2561 sctx->framebuffer.spi_shader_col_format_alpha |= 2562 surf->spi_shader_col_format_alpha << (i * 4); 2563 sctx->framebuffer.spi_shader_col_format_blend |= 2564 surf->spi_shader_col_format_blend << (i * 4); 2565 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2566 surf->spi_shader_col_format_blend_alpha << (i * 4); 2567 2568 if (surf->color_is_int8) 2569 sctx->framebuffer.color_is_int8 |= 1 << i; 2570 2571 if (rtex->fmask.size && rtex->cmask.size) { 2572 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2573 } 2574 r600_context_add_resource_size(ctx, surf->base.texture); 2575 } 2576 /* Set the second SPI format for possible dual-src blending. */ 2577 if (i == 1 && surf) { 2578 sctx->framebuffer.spi_shader_col_format |= 2579 surf->spi_shader_col_format << (i * 4); 2580 sctx->framebuffer.spi_shader_col_format_alpha |= 2581 surf->spi_shader_col_format_alpha << (i * 4); 2582 sctx->framebuffer.spi_shader_col_format_blend |= 2583 surf->spi_shader_col_format_blend << (i * 4); 2584 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2585 surf->spi_shader_col_format_blend_alpha << (i * 4); 2586 } 2587 2588 if (state->zsbuf) { 2589 surf = (struct r600_surface*)state->zsbuf; 2590 2591 if (!surf->depth_initialized) { 2592 si_init_depth_surface(sctx, surf); 2593 } 2594 r600_context_add_resource_size(ctx, surf->base.texture); 2595 } 2596 2597 si_update_poly_offset_state(sctx); 2598 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 2599 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); 2600 2601 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2602 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2603 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2604 2605 /* Set sample locations as fragment shader constants. */ 2606 switch (sctx->framebuffer.nr_samples) { 2607 case 1: 2608 constbuf.user_buffer = sctx->b.sample_locations_1x; 2609 break; 2610 case 2: 2611 constbuf.user_buffer = sctx->b.sample_locations_2x; 2612 break; 2613 case 4: 2614 constbuf.user_buffer = sctx->b.sample_locations_4x; 2615 break; 2616 case 8: 2617 constbuf.user_buffer = sctx->b.sample_locations_8x; 2618 break; 2619 case 16: 2620 constbuf.user_buffer = sctx->b.sample_locations_16x; 2621 break; 2622 default: 2623 assert(0); 2624 } 2625 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2626 ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, 2627 SI_DRIVER_STATE_CONST_BUF, &constbuf); 2628 2629 /* Smoothing (only possible with nr_samples == 1) uses the same 2630 * sample locations as the MSAA it simulates. 2631 * 2632 * Therefore, don't update the sample locations when 2633 * transitioning from no AA to smoothing-equivalent AA, and 2634 * vice versa. 2635 */ 2636 if ((sctx->framebuffer.nr_samples != 1 || 2637 old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) && 2638 (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES || 2639 old_nr_samples != 1)) 2640 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs); 2641 } 2642} 2643 2644static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2645{ 2646 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2647 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2648 unsigned i, nr_cbufs = state->nr_cbufs; 2649 struct r600_texture *tex = NULL; 2650 struct r600_surface *cb = NULL; 2651 2652 /* Colorbuffers. */ 2653 for (i = 0; i < nr_cbufs; i++) { 2654 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 2655 continue; 2656 2657 cb = (struct r600_surface*)state->cbufs[i]; 2658 if (!cb) { 2659 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2660 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2661 continue; 2662 } 2663 2664 tex = (struct r600_texture *)cb->base.texture; 2665 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2666 &tex->resource, RADEON_USAGE_READWRITE, 2667 tex->surface.nsamples > 1 ? 2668 RADEON_PRIO_COLOR_BUFFER_MSAA : 2669 RADEON_PRIO_COLOR_BUFFER); 2670 2671 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2672 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2673 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2674 RADEON_PRIO_CMASK); 2675 } 2676 2677 if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) { 2678 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2679 tex->dcc_buffer, RADEON_USAGE_READWRITE, 2680 RADEON_PRIO_DCC); 2681 } 2682 2683 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 2684 sctx->b.chip_class >= VI ? 14 : 13); 2685 radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2686 radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2687 radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2688 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2689 radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2690 radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2691 radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ 2692 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2693 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2694 radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2695 radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2696 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2697 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2698 2699 if (sctx->b.chip_class >= VI) 2700 radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */ 2701 } 2702 /* set CB_COLOR1_INFO for possible dual-src blending */ 2703 if (i == 1 && state->cbufs[0] && 2704 sctx->framebuffer.dirty_cbufs & (1 << 0)) { 2705 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 2706 cb->cb_color_info | tex->cb_color_info); 2707 i++; 2708 } 2709 for (; i < 8 ; i++) 2710 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 2711 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2712 2713 /* ZS buffer. */ 2714 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 2715 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2716 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2717 2718 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2719 &rtex->resource, RADEON_USAGE_READWRITE, 2720 zb->base.texture->nr_samples > 1 ? 2721 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2722 RADEON_PRIO_DEPTH_BUFFER); 2723 2724 if (zb->db_htile_data_base) { 2725 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2726 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2727 RADEON_PRIO_HTILE); 2728 } 2729 2730 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2731 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2732 2733 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2734 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2735 radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */ 2736 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 2737 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2738 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2739 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2740 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2741 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2742 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2743 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2744 2745 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 2746 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 2747 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 2748 2749 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2750 radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2751 zb->pa_su_poly_offset_db_fmt_cntl); 2752 } else if (sctx->framebuffer.dirty_zsbuf) { 2753 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2754 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2755 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2756 } 2757 2758 /* Framebuffer dimensions. */ 2759 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2760 radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2761 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2762 2763 sctx->framebuffer.dirty_cbufs = 0; 2764 sctx->framebuffer.dirty_zsbuf = false; 2765} 2766 2767static void si_emit_msaa_sample_locs(struct si_context *sctx, 2768 struct r600_atom *atom) 2769{ 2770 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2771 unsigned nr_samples = sctx->framebuffer.nr_samples; 2772 2773 cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples : 2774 SI_NUM_SMOOTH_AA_SAMPLES); 2775} 2776 2777static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) 2778{ 2779 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2780 2781 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2782 sctx->ps_iter_samples, 2783 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0); 2784} 2785 2786 2787static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2788{ 2789 struct si_context *sctx = (struct si_context *)ctx; 2790 2791 if (sctx->ps_iter_samples == min_samples) 2792 return; 2793 2794 sctx->ps_iter_samples = min_samples; 2795 2796 if (sctx->framebuffer.nr_samples > 1) 2797 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2798} 2799 2800/* 2801 * Samplers 2802 */ 2803 2804/** 2805 * Create a sampler view. 2806 * 2807 * @param ctx context 2808 * @param texture texture 2809 * @param state sampler view template 2810 * @param width0 width0 override (for compressed textures as int) 2811 * @param height0 height0 override (for compressed textures as int) 2812 * @param force_level set the base address to the level (for compressed textures) 2813 */ 2814struct pipe_sampler_view * 2815si_create_sampler_view_custom(struct pipe_context *ctx, 2816 struct pipe_resource *texture, 2817 const struct pipe_sampler_view *state, 2818 unsigned width0, unsigned height0, 2819 unsigned force_level) 2820{ 2821 struct si_context *sctx = (struct si_context*)ctx; 2822 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 2823 struct r600_texture *tmp = (struct r600_texture*)texture; 2824 const struct util_format_description *desc; 2825 unsigned format, num_format, base_level, first_level, last_level; 2826 uint32_t pitch = 0; 2827 unsigned char state_swizzle[4], swizzle[4]; 2828 unsigned height, depth, width; 2829 enum pipe_format pipe_format = state->format; 2830 struct radeon_surf_level *surflevel; 2831 int first_non_void; 2832 uint64_t va; 2833 unsigned last_layer = state->u.tex.last_layer; 2834 2835 if (!view) 2836 return NULL; 2837 2838 /* initialize base object */ 2839 view->base = *state; 2840 view->base.texture = NULL; 2841 view->base.reference.count = 1; 2842 view->base.context = ctx; 2843 2844 /* NULL resource, obey swizzle (only ZERO and ONE make sense). */ 2845 if (!texture) { 2846 view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) | 2847 S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) | 2848 S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) | 2849 S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) | 2850 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D); 2851 return &view->base; 2852 } 2853 2854 pipe_resource_reference(&view->base.texture, texture); 2855 view->resource = &tmp->resource; 2856 2857 if (state->format == PIPE_FORMAT_X24S8_UINT || 2858 state->format == PIPE_FORMAT_S8X24_UINT || 2859 state->format == PIPE_FORMAT_X32_S8X24_UINT || 2860 state->format == PIPE_FORMAT_S8_UINT) 2861 view->is_stencil_sampler = true; 2862 2863 /* Buffer resource. */ 2864 if (texture->target == PIPE_BUFFER) { 2865 unsigned stride, num_records; 2866 2867 desc = util_format_description(state->format); 2868 first_non_void = util_format_get_first_non_void_channel(state->format); 2869 stride = desc->block.bits / 8; 2870 va = tmp->resource.gpu_address + state->u.buf.first_element*stride; 2871 format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2872 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2873 2874 num_records = state->u.buf.last_element + 1 - state->u.buf.first_element; 2875 num_records = MIN2(num_records, texture->width0 / stride); 2876 2877 if (sctx->b.chip_class >= VI) 2878 num_records *= stride; 2879 2880 view->state[4] = va; 2881 view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 2882 S_008F04_STRIDE(stride); 2883 view->state[6] = num_records; 2884 view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2885 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2886 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2887 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2888 S_008F0C_NUM_FORMAT(num_format) | 2889 S_008F0C_DATA_FORMAT(format); 2890 2891 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); 2892 return &view->base; 2893 } 2894 2895 state_swizzle[0] = state->swizzle_r; 2896 state_swizzle[1] = state->swizzle_g; 2897 state_swizzle[2] = state->swizzle_b; 2898 state_swizzle[3] = state->swizzle_a; 2899 2900 surflevel = tmp->surface.level; 2901 2902 /* Texturing with separate depth and stencil. */ 2903 if (tmp->is_depth && !tmp->is_flushing_texture) { 2904 switch (pipe_format) { 2905 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2906 pipe_format = PIPE_FORMAT_Z32_FLOAT; 2907 break; 2908 case PIPE_FORMAT_X8Z24_UNORM: 2909 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2910 /* Z24 is always stored like this. */ 2911 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 2912 break; 2913 case PIPE_FORMAT_X24S8_UINT: 2914 case PIPE_FORMAT_S8X24_UINT: 2915 case PIPE_FORMAT_X32_S8X24_UINT: 2916 pipe_format = PIPE_FORMAT_S8_UINT; 2917 surflevel = tmp->surface.stencil_level; 2918 break; 2919 default:; 2920 } 2921 } 2922 2923 desc = util_format_description(pipe_format); 2924 2925 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2926 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2927 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2928 2929 switch (pipe_format) { 2930 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2931 case PIPE_FORMAT_X24S8_UINT: 2932 case PIPE_FORMAT_X32_S8X24_UINT: 2933 case PIPE_FORMAT_X8Z24_UNORM: 2934 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2935 break; 2936 default: 2937 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2938 } 2939 } else { 2940 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2941 } 2942 2943 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2944 2945 switch (pipe_format) { 2946 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2947 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2948 break; 2949 default: 2950 if (first_non_void < 0) { 2951 if (util_format_is_compressed(pipe_format)) { 2952 switch (pipe_format) { 2953 case PIPE_FORMAT_DXT1_SRGB: 2954 case PIPE_FORMAT_DXT1_SRGBA: 2955 case PIPE_FORMAT_DXT3_SRGBA: 2956 case PIPE_FORMAT_DXT5_SRGBA: 2957 case PIPE_FORMAT_BPTC_SRGBA: 2958 case PIPE_FORMAT_ETC2_SRGB8: 2959 case PIPE_FORMAT_ETC2_SRGB8A1: 2960 case PIPE_FORMAT_ETC2_SRGBA8: 2961 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2962 break; 2963 case PIPE_FORMAT_RGTC1_SNORM: 2964 case PIPE_FORMAT_LATC1_SNORM: 2965 case PIPE_FORMAT_RGTC2_SNORM: 2966 case PIPE_FORMAT_LATC2_SNORM: 2967 case PIPE_FORMAT_ETC2_R11_SNORM: 2968 case PIPE_FORMAT_ETC2_RG11_SNORM: 2969 /* implies float, so use SNORM/UNORM to determine 2970 whether data is signed or not */ 2971 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2972 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2973 break; 2974 default: 2975 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2976 break; 2977 } 2978 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2979 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2980 } else { 2981 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2982 } 2983 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2984 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2985 } else { 2986 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2987 2988 switch (desc->channel[first_non_void].type) { 2989 case UTIL_FORMAT_TYPE_FLOAT: 2990 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2991 break; 2992 case UTIL_FORMAT_TYPE_SIGNED: 2993 if (desc->channel[first_non_void].normalized) 2994 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2995 else if (desc->channel[first_non_void].pure_integer) 2996 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2997 else 2998 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2999 break; 3000 case UTIL_FORMAT_TYPE_UNSIGNED: 3001 if (desc->channel[first_non_void].normalized) 3002 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3003 else if (desc->channel[first_non_void].pure_integer) 3004 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 3005 else 3006 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 3007 } 3008 } 3009 } 3010 3011 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void); 3012 if (format == ~0) { 3013 format = 0; 3014 } 3015 3016 base_level = 0; 3017 first_level = state->u.tex.first_level; 3018 last_level = state->u.tex.last_level; 3019 width = width0; 3020 height = height0; 3021 depth = texture->depth0; 3022 3023 if (force_level) { 3024 assert(force_level == first_level && 3025 force_level == last_level); 3026 base_level = force_level; 3027 first_level = 0; 3028 last_level = 0; 3029 width = u_minify(width, force_level); 3030 height = u_minify(height, force_level); 3031 depth = u_minify(depth, force_level); 3032 } 3033 3034 pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format); 3035 3036 if (texture->target == PIPE_TEXTURE_1D_ARRAY) { 3037 height = 1; 3038 depth = texture->array_size; 3039 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { 3040 depth = texture->array_size; 3041 } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) 3042 depth = texture->array_size / 6; 3043 3044 /* This is not needed if state trackers set last_layer correctly. */ 3045 if (state->target == PIPE_TEXTURE_1D || 3046 state->target == PIPE_TEXTURE_2D || 3047 state->target == PIPE_TEXTURE_RECT || 3048 state->target == PIPE_TEXTURE_CUBE) 3049 last_layer = state->u.tex.first_layer; 3050 3051 va = tmp->resource.gpu_address + surflevel[base_level].offset; 3052 3053 view->state[0] = va >> 8; 3054 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) | 3055 S_008F14_DATA_FORMAT(format) | 3056 S_008F14_NUM_FORMAT(num_format)); 3057 view->state[2] = (S_008F18_WIDTH(width - 1) | 3058 S_008F18_HEIGHT(height - 1)); 3059 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 3060 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 3061 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 3062 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 3063 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ? 3064 0 : first_level) | 3065 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ? 3066 util_logbase2(texture->nr_samples) : 3067 last_level) | 3068 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) | 3069 S_008F1C_POW2_PAD(texture->last_level > 0) | 3070 S_008F1C_TYPE(si_tex_dim(texture->target, state->target, 3071 texture->nr_samples))); 3072 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); 3073 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 3074 S_008F24_LAST_ARRAY(last_layer)); 3075 3076 if (tmp->dcc_buffer) { 3077 uint64_t dcc_offset = surflevel[base_level].dcc_offset; 3078 unsigned swap = r600_translate_colorswap(pipe_format); 3079 3080 view->state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 3081 view->state[7] = (tmp->dcc_buffer->gpu_address + dcc_offset) >> 8; 3082 view->dcc_buffer = tmp->dcc_buffer; 3083 } else { 3084 view->state[6] = 0; 3085 view->state[7] = 0; 3086 } 3087 3088 /* Initialize the sampler view for FMASK. */ 3089 if (tmp->fmask.size) { 3090 uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset; 3091 uint32_t fmask_format; 3092 3093 switch (texture->nr_samples) { 3094 case 2: 3095 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 3096 break; 3097 case 4: 3098 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 3099 break; 3100 case 8: 3101 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 3102 break; 3103 default: 3104 assert(0); 3105 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 3106 } 3107 3108 view->fmask_state[0] = va >> 8; 3109 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 3110 S_008F14_DATA_FORMAT(fmask_format) | 3111 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 3112 view->fmask_state[2] = S_008F18_WIDTH(width - 1) | 3113 S_008F18_HEIGHT(height - 1); 3114 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 3115 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 3116 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 3117 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 3118 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | 3119 S_008F1C_TYPE(si_tex_dim(texture->target, 3120 state->target, 0)); 3121 view->fmask_state[4] = S_008F20_DEPTH(depth - 1) | 3122 S_008F20_PITCH(tmp->fmask.pitch_in_pixels - 1); 3123 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 3124 S_008F24_LAST_ARRAY(last_layer); 3125 view->fmask_state[6] = 0; 3126 view->fmask_state[7] = 0; 3127 } 3128 3129 return &view->base; 3130} 3131 3132static struct pipe_sampler_view * 3133si_create_sampler_view(struct pipe_context *ctx, 3134 struct pipe_resource *texture, 3135 const struct pipe_sampler_view *state) 3136{ 3137 return si_create_sampler_view_custom(ctx, texture, state, 3138 texture ? texture->width0 : 0, 3139 texture ? texture->height0 : 0, 0); 3140} 3141 3142static void si_sampler_view_destroy(struct pipe_context *ctx, 3143 struct pipe_sampler_view *state) 3144{ 3145 struct si_sampler_view *view = (struct si_sampler_view *)state; 3146 3147 if (view->resource && view->resource->b.b.target == PIPE_BUFFER) 3148 LIST_DELINIT(&view->list); 3149 3150 pipe_resource_reference(&state->texture, NULL); 3151 FREE(view); 3152} 3153 3154static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 3155{ 3156 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 3157 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 3158 (linear_filter && 3159 (wrap == PIPE_TEX_WRAP_CLAMP || 3160 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 3161} 3162 3163static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 3164{ 3165 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 3166 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 3167 3168 return (state->border_color.ui[0] || state->border_color.ui[1] || 3169 state->border_color.ui[2] || state->border_color.ui[3]) && 3170 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 3171 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 3172 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 3173} 3174 3175static void *si_create_sampler_state(struct pipe_context *ctx, 3176 const struct pipe_sampler_state *state) 3177{ 3178 struct si_context *sctx = (struct si_context *)ctx; 3179 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 3180 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; 3181 unsigned border_color_type, border_color_index = 0; 3182 3183 if (!rstate) { 3184 return NULL; 3185 } 3186 3187 if (!sampler_state_needs_border_color(state)) 3188 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3189 else if (state->border_color.f[0] == 0 && 3190 state->border_color.f[1] == 0 && 3191 state->border_color.f[2] == 0 && 3192 state->border_color.f[3] == 0) 3193 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3194 else if (state->border_color.f[0] == 0 && 3195 state->border_color.f[1] == 0 && 3196 state->border_color.f[2] == 0 && 3197 state->border_color.f[3] == 1) 3198 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 3199 else if (state->border_color.f[0] == 1 && 3200 state->border_color.f[1] == 1 && 3201 state->border_color.f[2] == 1 && 3202 state->border_color.f[3] == 1) 3203 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 3204 else { 3205 int i; 3206 3207 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 3208 3209 /* Check if the border has been uploaded already. */ 3210 for (i = 0; i < sctx->border_color_count; i++) 3211 if (memcmp(&sctx->border_color_table[i], &state->border_color, 3212 sizeof(state->border_color)) == 0) 3213 break; 3214 3215 if (i >= SI_MAX_BORDER_COLORS) { 3216 /* Getting 4096 unique border colors is very unlikely. */ 3217 fprintf(stderr, "radeonsi: The border color table is full. " 3218 "Any new border colors will be just black. " 3219 "Please file a bug.\n"); 3220 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3221 } else { 3222 if (i == sctx->border_color_count) { 3223 /* Upload a new border color. */ 3224 memcpy(&sctx->border_color_table[i], &state->border_color, 3225 sizeof(state->border_color)); 3226 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 3227 &state->border_color, 3228 sizeof(state->border_color)); 3229 sctx->border_color_count++; 3230 } 3231 3232 border_color_index = i; 3233 } 3234 } 3235 3236 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 3237 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 3238 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 3239 r600_tex_aniso_filter(state->max_anisotropy) << 9 | 3240 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 3241 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 3242 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map)); 3243 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 3244 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 3245 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 3246 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) | 3247 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) | 3248 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); 3249 rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | 3250 S_008F3C_BORDER_COLOR_TYPE(border_color_type); 3251 return rstate; 3252} 3253 3254static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 3255{ 3256 struct si_context *sctx = (struct si_context *)ctx; 3257 3258 if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask) 3259 return; 3260 3261 sctx->sample_mask.sample_mask = sample_mask; 3262 si_mark_atom_dirty(sctx, &sctx->sample_mask.atom); 3263} 3264 3265static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom) 3266{ 3267 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 3268 unsigned mask = sctx->sample_mask.sample_mask; 3269 3270 radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 3271 radeon_emit(cs, mask | (mask << 16)); 3272 radeon_emit(cs, mask | (mask << 16)); 3273} 3274 3275static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 3276{ 3277 free(state); 3278} 3279 3280/* 3281 * Vertex elements & buffers 3282 */ 3283 3284static void *si_create_vertex_elements(struct pipe_context *ctx, 3285 unsigned count, 3286 const struct pipe_vertex_element *elements) 3287{ 3288 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 3289 int i; 3290 3291 assert(count < SI_MAX_ATTRIBS); 3292 if (!v) 3293 return NULL; 3294 3295 v->count = count; 3296 for (i = 0; i < count; ++i) { 3297 const struct util_format_description *desc; 3298 unsigned data_format, num_format; 3299 int first_non_void; 3300 3301 desc = util_format_description(elements[i].src_format); 3302 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 3303 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 3304 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 3305 3306 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3307 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3308 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3309 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3310 S_008F0C_NUM_FORMAT(num_format) | 3311 S_008F0C_DATA_FORMAT(data_format); 3312 v->format_size[i] = desc->block.bits / 8; 3313 } 3314 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 3315 3316 return v; 3317} 3318 3319static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 3320{ 3321 struct si_context *sctx = (struct si_context *)ctx; 3322 struct si_vertex_element *v = (struct si_vertex_element*)state; 3323 3324 sctx->vertex_elements = v; 3325 sctx->vertex_buffers_dirty = true; 3326} 3327 3328static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 3329{ 3330 struct si_context *sctx = (struct si_context *)ctx; 3331 3332 if (sctx->vertex_elements == state) 3333 sctx->vertex_elements = NULL; 3334 FREE(state); 3335} 3336 3337static void si_set_vertex_buffers(struct pipe_context *ctx, 3338 unsigned start_slot, unsigned count, 3339 const struct pipe_vertex_buffer *buffers) 3340{ 3341 struct si_context *sctx = (struct si_context *)ctx; 3342 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 3343 int i; 3344 3345 assert(start_slot + count <= Elements(sctx->vertex_buffer)); 3346 3347 if (buffers) { 3348 for (i = 0; i < count; i++) { 3349 const struct pipe_vertex_buffer *src = buffers + i; 3350 struct pipe_vertex_buffer *dsti = dst + i; 3351 3352 pipe_resource_reference(&dsti->buffer, src->buffer); 3353 dsti->buffer_offset = src->buffer_offset; 3354 dsti->stride = src->stride; 3355 r600_context_add_resource_size(ctx, src->buffer); 3356 } 3357 } else { 3358 for (i = 0; i < count; i++) { 3359 pipe_resource_reference(&dst[i].buffer, NULL); 3360 } 3361 } 3362 sctx->vertex_buffers_dirty = true; 3363} 3364 3365static void si_set_index_buffer(struct pipe_context *ctx, 3366 const struct pipe_index_buffer *ib) 3367{ 3368 struct si_context *sctx = (struct si_context *)ctx; 3369 3370 if (ib) { 3371 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer); 3372 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 3373 r600_context_add_resource_size(ctx, ib->buffer); 3374 } else { 3375 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 3376 } 3377} 3378 3379/* 3380 * Misc 3381 */ 3382static void si_set_polygon_stipple(struct pipe_context *ctx, 3383 const struct pipe_poly_stipple *state) 3384{ 3385 struct si_context *sctx = (struct si_context *)ctx; 3386 struct pipe_resource *tex; 3387 struct pipe_sampler_view *view; 3388 bool is_zero = true; 3389 bool is_one = true; 3390 int i; 3391 3392 /* The hardware obeys 0 and 1 swizzles in the descriptor even if 3393 * the resource is NULL/invalid. Take advantage of this fact and skip 3394 * texture allocation if the stipple pattern is constant. 3395 * 3396 * This is an optimization for the common case when stippling isn't 3397 * used but set_polygon_stipple is still called by st/mesa. 3398 */ 3399 for (i = 0; i < Elements(state->stipple); i++) { 3400 is_zero = is_zero && state->stipple[i] == 0; 3401 is_one = is_one && state->stipple[i] == 0xffffffff; 3402 } 3403 3404 if (is_zero || is_one) { 3405 struct pipe_sampler_view templ = {{0}}; 3406 3407 templ.swizzle_r = PIPE_SWIZZLE_ZERO; 3408 templ.swizzle_g = PIPE_SWIZZLE_ZERO; 3409 templ.swizzle_b = PIPE_SWIZZLE_ZERO; 3410 /* The pattern should be inverted in the texture. */ 3411 templ.swizzle_a = is_zero ? PIPE_SWIZZLE_ONE : PIPE_SWIZZLE_ZERO; 3412 3413 view = ctx->create_sampler_view(ctx, NULL, &templ); 3414 } else { 3415 /* Create a new texture. */ 3416 tex = util_pstipple_create_stipple_texture(ctx, state->stipple); 3417 if (!tex) 3418 return; 3419 3420 view = util_pstipple_create_sampler_view(ctx, tex); 3421 pipe_resource_reference(&tex, NULL); 3422 } 3423 3424 ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 3425 SI_POLY_STIPPLE_SAMPLER, 1, &view); 3426 pipe_sampler_view_reference(&view, NULL); 3427 3428 /* Bind the sampler state if needed. */ 3429 if (!sctx->pstipple_sampler_state) { 3430 sctx->pstipple_sampler_state = util_pstipple_create_sampler(ctx); 3431 ctx->bind_sampler_states(ctx, PIPE_SHADER_FRAGMENT, 3432 SI_POLY_STIPPLE_SAMPLER, 1, 3433 &sctx->pstipple_sampler_state); 3434 } 3435} 3436 3437static void si_set_tess_state(struct pipe_context *ctx, 3438 const float default_outer_level[4], 3439 const float default_inner_level[2]) 3440{ 3441 struct si_context *sctx = (struct si_context *)ctx; 3442 struct pipe_constant_buffer cb; 3443 float array[8]; 3444 3445 memcpy(array, default_outer_level, sizeof(float) * 4); 3446 memcpy(array+4, default_inner_level, sizeof(float) * 2); 3447 3448 cb.buffer = NULL; 3449 cb.user_buffer = NULL; 3450 cb.buffer_size = sizeof(array); 3451 3452 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, 3453 (void*)array, sizeof(array), 3454 &cb.buffer_offset); 3455 3456 ctx->set_constant_buffer(ctx, PIPE_SHADER_TESS_CTRL, 3457 SI_DRIVER_STATE_CONST_BUF, &cb); 3458 pipe_resource_reference(&cb.buffer, NULL); 3459} 3460 3461static void si_texture_barrier(struct pipe_context *ctx) 3462{ 3463 struct si_context *sctx = (struct si_context *)ctx; 3464 3465 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 3466 SI_CONTEXT_INV_GLOBAL_L2 | 3467 SI_CONTEXT_FLUSH_AND_INV_CB; 3468} 3469 3470static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 3471{ 3472 struct pipe_blend_state blend; 3473 3474 memset(&blend, 0, sizeof(blend)); 3475 blend.independent_blend_enable = true; 3476 blend.rt[0].colormask = 0xf; 3477 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 3478} 3479 3480static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 3481 bool include_draw_vbo) 3482{ 3483 si_need_cs_space((struct si_context*)ctx); 3484} 3485 3486static void si_init_config(struct si_context *sctx); 3487 3488void si_init_state_functions(struct si_context *sctx) 3489{ 3490 si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond); 3491 si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin); 3492 si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable); 3493 3494 si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush); 3495 si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); 3496 si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); 3497 si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); 3498 si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); 3499 si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); 3500 si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state); 3501 si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color); 3502 si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs); 3503 si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state); 3504 si_init_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors, si_emit_scissors); 3505 si_init_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports, si_emit_viewports); 3506 si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref); 3507 3508 sctx->b.b.create_blend_state = si_create_blend_state; 3509 sctx->b.b.bind_blend_state = si_bind_blend_state; 3510 sctx->b.b.delete_blend_state = si_delete_blend_state; 3511 sctx->b.b.set_blend_color = si_set_blend_color; 3512 3513 sctx->b.b.create_rasterizer_state = si_create_rs_state; 3514 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 3515 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 3516 3517 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 3518 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 3519 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 3520 3521 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 3522 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 3523 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 3524 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 3525 3526 sctx->b.b.set_clip_state = si_set_clip_state; 3527 sctx->b.b.set_scissor_states = si_set_scissor_states; 3528 sctx->b.b.set_viewport_states = si_set_viewport_states; 3529 sctx->b.b.set_stencil_ref = si_set_stencil_ref; 3530 3531 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 3532 sctx->b.b.get_sample_position = cayman_get_sample_position; 3533 3534 sctx->b.b.create_sampler_state = si_create_sampler_state; 3535 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 3536 3537 sctx->b.b.create_sampler_view = si_create_sampler_view; 3538 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 3539 3540 sctx->b.b.set_sample_mask = si_set_sample_mask; 3541 3542 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 3543 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 3544 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 3545 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 3546 sctx->b.b.set_index_buffer = si_set_index_buffer; 3547 3548 sctx->b.b.texture_barrier = si_texture_barrier; 3549 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; 3550 sctx->b.b.set_min_samples = si_set_min_samples; 3551 sctx->b.b.set_tess_state = si_set_tess_state; 3552 3553 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 3554 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 3555 3556 sctx->b.b.draw_vbo = si_draw_vbo; 3557 3558 if (sctx->b.chip_class >= CIK) { 3559 sctx->b.dma_copy = cik_sdma_copy; 3560 } else { 3561 sctx->b.dma_copy = si_dma_copy; 3562 } 3563 3564 si_init_config(sctx); 3565} 3566 3567static void 3568si_write_harvested_raster_configs(struct si_context *sctx, 3569 struct si_pm4_state *pm4, 3570 unsigned raster_config, 3571 unsigned raster_config_1) 3572{ 3573 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 3574 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 3575 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3576 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3577 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 3578 unsigned rb_per_se = num_rb / num_se; 3579 unsigned se_mask[4]; 3580 unsigned se; 3581 3582 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3583 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3584 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3585 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3586 3587 assert(num_se == 1 || num_se == 2 || num_se == 4); 3588 assert(sh_per_se == 1 || sh_per_se == 2); 3589 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 3590 3591 /* XXX: I can't figure out what the *_XSEL and *_YSEL 3592 * fields are for, so I'm leaving them as their default 3593 * values. */ 3594 3595 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3596 (!se_mask[2] && !se_mask[3]))) { 3597 raster_config_1 &= C_028354_SE_PAIR_MAP; 3598 3599 if (!se_mask[0] && !se_mask[1]) { 3600 raster_config_1 |= 3601 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 3602 } else { 3603 raster_config_1 |= 3604 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 3605 } 3606 } 3607 3608 for (se = 0; se < num_se; se++) { 3609 unsigned raster_config_se = raster_config; 3610 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3611 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3612 int idx = (se / 2) * 2; 3613 3614 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3615 raster_config_se &= C_028350_SE_MAP; 3616 3617 if (!se_mask[idx]) { 3618 raster_config_se |= 3619 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 3620 } else { 3621 raster_config_se |= 3622 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 3623 } 3624 } 3625 3626 pkr0_mask &= rb_mask; 3627 pkr1_mask &= rb_mask; 3628 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3629 raster_config_se &= C_028350_PKR_MAP; 3630 3631 if (!pkr0_mask) { 3632 raster_config_se |= 3633 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 3634 } else { 3635 raster_config_se |= 3636 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 3637 } 3638 } 3639 3640 if (rb_per_se >= 2) { 3641 unsigned rb0_mask = 1 << (se * rb_per_se); 3642 unsigned rb1_mask = rb0_mask << 1; 3643 3644 rb0_mask &= rb_mask; 3645 rb1_mask &= rb_mask; 3646 if (!rb0_mask || !rb1_mask) { 3647 raster_config_se &= C_028350_RB_MAP_PKR0; 3648 3649 if (!rb0_mask) { 3650 raster_config_se |= 3651 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 3652 } else { 3653 raster_config_se |= 3654 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 3655 } 3656 } 3657 3658 if (rb_per_se > 2) { 3659 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3660 rb1_mask = rb0_mask << 1; 3661 rb0_mask &= rb_mask; 3662 rb1_mask &= rb_mask; 3663 if (!rb0_mask || !rb1_mask) { 3664 raster_config_se &= C_028350_RB_MAP_PKR1; 3665 3666 if (!rb0_mask) { 3667 raster_config_se |= 3668 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 3669 } else { 3670 raster_config_se |= 3671 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 3672 } 3673 } 3674 } 3675 } 3676 3677 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3678 if (sctx->b.chip_class < CIK) 3679 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3680 SE_INDEX(se) | SH_BROADCAST_WRITES | 3681 INSTANCE_BROADCAST_WRITES); 3682 else 3683 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3684 S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | 3685 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3686 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 3687 if (sctx->b.chip_class >= CIK) 3688 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 3689 } 3690 3691 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3692 if (sctx->b.chip_class < CIK) 3693 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3694 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 3695 INSTANCE_BROADCAST_WRITES); 3696 else 3697 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3698 S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 3699 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3700} 3701 3702static void si_init_config(struct si_context *sctx) 3703{ 3704 struct si_screen *sscreen = sctx->screen; 3705 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3706 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3707 unsigned raster_config, raster_config_1; 3708 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 3709 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 3710 int i; 3711 3712 if (!pm4) 3713 return; 3714 3715 si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 3716 si_pm4_cmd_add(pm4, 0x80000000); 3717 si_pm4_cmd_add(pm4, 0x80000000); 3718 si_pm4_cmd_end(pm4, false); 3719 3720 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 3721 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 3722 3723 /* FIXME calculate these values somehow ??? */ 3724 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 3725 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3726 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3727 3728 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3729 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3730 3731 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3732 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3733 if (sctx->b.chip_class < CIK) 3734 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3735 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3736 3737 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3738 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3739 3740 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3741 3742 for (i = 0; i < 16; i++) { 3743 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0); 3744 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0)); 3745 } 3746 3747 switch (sctx->screen->b.family) { 3748 case CHIP_TAHITI: 3749 case CHIP_PITCAIRN: 3750 raster_config = 0x2a00126a; 3751 raster_config_1 = 0x00000000; 3752 break; 3753 case CHIP_VERDE: 3754 raster_config = 0x0000124a; 3755 raster_config_1 = 0x00000000; 3756 break; 3757 case CHIP_OLAND: 3758 raster_config = 0x00000082; 3759 raster_config_1 = 0x00000000; 3760 break; 3761 case CHIP_HAINAN: 3762 raster_config = 0x00000000; 3763 raster_config_1 = 0x00000000; 3764 break; 3765 case CHIP_BONAIRE: 3766 raster_config = 0x16000012; 3767 raster_config_1 = 0x00000000; 3768 break; 3769 case CHIP_HAWAII: 3770 raster_config = 0x3a00161a; 3771 raster_config_1 = 0x0000002e; 3772 break; 3773 case CHIP_FIJI: 3774 if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { 3775 /* old kernels with old tiling config */ 3776 raster_config = 0x16000012; 3777 raster_config_1 = 0x0000002a; 3778 } else { 3779 raster_config = 0x3a00161a; 3780 raster_config_1 = 0x0000002e; 3781 } 3782 break; 3783 case CHIP_TONGA: 3784 raster_config = 0x16000012; 3785 raster_config_1 = 0x0000002a; 3786 break; 3787 case CHIP_ICELAND: 3788 raster_config = 0x00000002; 3789 raster_config_1 = 0x00000000; 3790 break; 3791 case CHIP_CARRIZO: 3792 raster_config = 0x00000002; 3793 raster_config_1 = 0x00000000; 3794 break; 3795 case CHIP_KAVERI: 3796 /* KV should be 0x00000002, but that causes problems with radeon */ 3797 raster_config = 0x00000000; /* 0x00000002 */ 3798 raster_config_1 = 0x00000000; 3799 break; 3800 case CHIP_KABINI: 3801 case CHIP_MULLINS: 3802 case CHIP_STONEY: 3803 raster_config = 0x00000000; 3804 raster_config_1 = 0x00000000; 3805 break; 3806 default: 3807 fprintf(stderr, 3808 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 3809 raster_config = 0x00000000; 3810 raster_config_1 = 0x00000000; 3811 break; 3812 } 3813 3814 /* Always use the default config when all backends are enabled 3815 * (or when we failed to determine the enabled backends). 3816 */ 3817 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 3818 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 3819 raster_config); 3820 if (sctx->b.chip_class >= CIK) 3821 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 3822 raster_config_1); 3823 } else { 3824 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 3825 } 3826 3827 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 3828 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 3829 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 3830 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 3831 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 3832 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 3833 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 3834 3835 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3836 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 3837 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 3838 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 3839 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 3840 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0)); 3841 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0)); 3842 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0)); 3843 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0)); 3844 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 3845 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 3846 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 3847 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 3848 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 3849 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); 3850 3851 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 3852 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 3853 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 3854 3855 if (sctx->b.chip_class >= CIK) { 3856 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); 3857 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); 3858 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); 3859 3860 if (sscreen->b.info.num_good_compute_units / 3861 (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) { 3862 /* Too few available compute units per SH. Disallowing 3863 * VS to run on CU0 could hurt us more than late VS 3864 * allocation would help. 3865 * 3866 * LATE_ALLOC_VS = 2 is the highest safe number. 3867 */ 3868 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); 3869 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 3870 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); 3871 } else { 3872 /* Set LATE_ALLOC_VS == 31. It should be less than 3873 * the number of scratch waves. Limitations: 3874 * - VS can't execute on CU0. 3875 * - If HS writes outputs to LDS, LS can't execute on CU0. 3876 */ 3877 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe)); 3878 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); 3879 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); 3880 } 3881 3882 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 3883 } 3884 3885 if (sctx->b.chip_class >= VI) { 3886 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, 3887 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 3888 S_028424_OVERWRITE_COMBINER_WATERMARK(4)); 3889 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); 3890 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); 3891 } 3892 3893 if (sctx->b.family == CHIP_STONEY) 3894 si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); 3895 3896 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 3897 if (sctx->b.chip_class >= CIK) 3898 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); 3899 si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 3900 RADEON_PRIO_BORDER_COLORS); 3901 3902 si_pm4_upload_indirect_buffer(sctx, pm4); 3903 sctx->init_config = pm4; 3904} 3905