si_state.c revision fa913a2dc6aefabdb9c6e927ad7095e89ffe0211
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "si_shader.h" 29#include "sid.h" 30#include "radeon/r600_cs.h" 31 32#include "util/u_format.h" 33#include "util/u_format_s3tc.h" 34#include "util/u_memory.h" 35#include "util/u_pstipple.h" 36 37static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, 38 void (*emit)(struct si_context *ctx, struct r600_atom *state), 39 unsigned num_dw) 40{ 41 atom->emit = (void*)emit; 42 atom->num_dw = num_dw; 43 atom->dirty = false; 44 *list_elem = atom; 45} 46 47uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) 48{ 49 if (sscreen->b.chip_class == CIK && 50 sscreen->b.info.cik_macrotile_mode_array_valid) { 51 unsigned index, tileb; 52 53 tileb = 8 * 8 * tex->surface.bpe; 54 tileb = MIN2(tex->surface.tile_split, tileb); 55 56 for (index = 0; tileb > 64; index++) { 57 tileb >>= 1; 58 } 59 assert(index < 16); 60 61 return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3; 62 } 63 64 if (sscreen->b.chip_class == SI && 65 sscreen->b.info.si_tile_mode_array_valid) { 66 /* Don't use stencil_tiling_index, because num_banks is always 67 * read from the depth mode. */ 68 unsigned tile_mode_index = tex->surface.tiling_index[0]; 69 assert(tile_mode_index < 32); 70 71 return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]); 72 } 73 74 /* The old way. */ 75 switch (sscreen->b.tiling_info.num_banks) { 76 case 2: 77 return V_02803C_ADDR_SURF_2_BANK; 78 case 4: 79 return V_02803C_ADDR_SURF_4_BANK; 80 case 8: 81 default: 82 return V_02803C_ADDR_SURF_8_BANK; 83 case 16: 84 return V_02803C_ADDR_SURF_16_BANK; 85 } 86} 87 88unsigned cik_tile_split(unsigned tile_split) 89{ 90 switch (tile_split) { 91 case 64: 92 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B; 93 break; 94 case 128: 95 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B; 96 break; 97 case 256: 98 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B; 99 break; 100 case 512: 101 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B; 102 break; 103 default: 104 case 1024: 105 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB; 106 break; 107 case 2048: 108 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB; 109 break; 110 case 4096: 111 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB; 112 break; 113 } 114 return tile_split; 115} 116 117unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect) 118{ 119 switch (macro_tile_aspect) { 120 default: 121 case 1: 122 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1; 123 break; 124 case 2: 125 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2; 126 break; 127 case 4: 128 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4; 129 break; 130 case 8: 131 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8; 132 break; 133 } 134 return macro_tile_aspect; 135} 136 137unsigned cik_bank_wh(unsigned bankwh) 138{ 139 switch (bankwh) { 140 default: 141 case 1: 142 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1; 143 break; 144 case 2: 145 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2; 146 break; 147 case 4: 148 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4; 149 break; 150 case 8: 151 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8; 152 break; 153 } 154 return bankwh; 155} 156 157unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode) 158{ 159 if (sscreen->b.info.si_tile_mode_array_valid) { 160 uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode]; 161 162 return G_009910_PIPE_CONFIG(gb_tile_mode); 163 } 164 165 /* This is probably broken for a lot of chips, but it's only used 166 * if the kernel cannot return the tile mode array for CIK. */ 167 switch (sscreen->b.info.r600_num_tile_pipes) { 168 case 16: 169 return V_02803C_X_ADDR_SURF_P16_32X32_16X16; 170 case 8: 171 return V_02803C_X_ADDR_SURF_P8_32X32_16X16; 172 case 4: 173 default: 174 if (sscreen->b.info.r600_num_backends == 4) 175 return V_02803C_X_ADDR_SURF_P4_16X16; 176 else 177 return V_02803C_X_ADDR_SURF_P4_8X16; 178 case 2: 179 return V_02803C_ADDR_SURF_P2; 180 } 181} 182 183static unsigned si_map_swizzle(unsigned swizzle) 184{ 185 switch (swizzle) { 186 case UTIL_FORMAT_SWIZZLE_Y: 187 return V_008F0C_SQ_SEL_Y; 188 case UTIL_FORMAT_SWIZZLE_Z: 189 return V_008F0C_SQ_SEL_Z; 190 case UTIL_FORMAT_SWIZZLE_W: 191 return V_008F0C_SQ_SEL_W; 192 case UTIL_FORMAT_SWIZZLE_0: 193 return V_008F0C_SQ_SEL_0; 194 case UTIL_FORMAT_SWIZZLE_1: 195 return V_008F0C_SQ_SEL_1; 196 default: /* UTIL_FORMAT_SWIZZLE_X */ 197 return V_008F0C_SQ_SEL_X; 198 } 199} 200 201static uint32_t S_FIXED(float value, uint32_t frac_bits) 202{ 203 return value * (1 << frac_bits); 204} 205 206/* 12.4 fixed-point */ 207static unsigned si_pack_float_12p4(float x) 208{ 209 return x <= 0 ? 0 : 210 x >= 4096 ? 0xffff : x * 16; 211} 212 213/* 214 * Inferred framebuffer and blender state. 215 * 216 * One of the reasons this must be derived from the framebuffer state is that: 217 * - The blend state mask is 0xf most of the time. 218 * - The COLOR1 format isn't INVALID because of possible dual-source blending, 219 * so COLOR1 is enabled pretty much all the time. 220 * So CB_TARGET_MASK is the only register that can disable COLOR1. 221 */ 222static void si_update_fb_blend_state(struct si_context *sctx) 223{ 224 struct si_pm4_state *pm4; 225 struct si_state_blend *blend = sctx->queued.named.blend; 226 uint32_t mask = 0, i; 227 228 if (blend == NULL) 229 return; 230 231 pm4 = CALLOC_STRUCT(si_pm4_state); 232 if (pm4 == NULL) 233 return; 234 235 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) 236 if (sctx->framebuffer.state.cbufs[i]) 237 mask |= 0xf << (4*i); 238 mask &= blend->cb_target_mask; 239 240 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask); 241 si_pm4_set_state(sctx, fb_blend, pm4); 242} 243 244/* 245 * Blender functions 246 */ 247 248static uint32_t si_translate_blend_function(int blend_func) 249{ 250 switch (blend_func) { 251 case PIPE_BLEND_ADD: 252 return V_028780_COMB_DST_PLUS_SRC; 253 case PIPE_BLEND_SUBTRACT: 254 return V_028780_COMB_SRC_MINUS_DST; 255 case PIPE_BLEND_REVERSE_SUBTRACT: 256 return V_028780_COMB_DST_MINUS_SRC; 257 case PIPE_BLEND_MIN: 258 return V_028780_COMB_MIN_DST_SRC; 259 case PIPE_BLEND_MAX: 260 return V_028780_COMB_MAX_DST_SRC; 261 default: 262 R600_ERR("Unknown blend function %d\n", blend_func); 263 assert(0); 264 break; 265 } 266 return 0; 267} 268 269static uint32_t si_translate_blend_factor(int blend_fact) 270{ 271 switch (blend_fact) { 272 case PIPE_BLENDFACTOR_ONE: 273 return V_028780_BLEND_ONE; 274 case PIPE_BLENDFACTOR_SRC_COLOR: 275 return V_028780_BLEND_SRC_COLOR; 276 case PIPE_BLENDFACTOR_SRC_ALPHA: 277 return V_028780_BLEND_SRC_ALPHA; 278 case PIPE_BLENDFACTOR_DST_ALPHA: 279 return V_028780_BLEND_DST_ALPHA; 280 case PIPE_BLENDFACTOR_DST_COLOR: 281 return V_028780_BLEND_DST_COLOR; 282 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 283 return V_028780_BLEND_SRC_ALPHA_SATURATE; 284 case PIPE_BLENDFACTOR_CONST_COLOR: 285 return V_028780_BLEND_CONSTANT_COLOR; 286 case PIPE_BLENDFACTOR_CONST_ALPHA: 287 return V_028780_BLEND_CONSTANT_ALPHA; 288 case PIPE_BLENDFACTOR_ZERO: 289 return V_028780_BLEND_ZERO; 290 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 291 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 292 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 293 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 294 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 295 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 296 case PIPE_BLENDFACTOR_INV_DST_COLOR: 297 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 298 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 299 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 300 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 301 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 302 case PIPE_BLENDFACTOR_SRC1_COLOR: 303 return V_028780_BLEND_SRC1_COLOR; 304 case PIPE_BLENDFACTOR_SRC1_ALPHA: 305 return V_028780_BLEND_SRC1_ALPHA; 306 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 307 return V_028780_BLEND_INV_SRC1_COLOR; 308 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 309 return V_028780_BLEND_INV_SRC1_ALPHA; 310 default: 311 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 312 assert(0); 313 break; 314 } 315 return 0; 316} 317 318static void *si_create_blend_state_mode(struct pipe_context *ctx, 319 const struct pipe_blend_state *state, 320 unsigned mode) 321{ 322 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 323 struct si_pm4_state *pm4 = &blend->pm4; 324 325 uint32_t color_control = 0; 326 327 if (blend == NULL) 328 return NULL; 329 330 blend->alpha_to_one = state->alpha_to_one; 331 332 if (state->logicop_enable) { 333 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 334 } else { 335 color_control |= S_028808_ROP3(0xcc); 336 } 337 338 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 339 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 340 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 341 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 342 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 343 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 344 345 blend->cb_target_mask = 0; 346 for (int i = 0; i < 8; i++) { 347 /* state->rt entries > 0 only written if independent blending */ 348 const int j = state->independent_blend_enable ? i : 0; 349 350 unsigned eqRGB = state->rt[j].rgb_func; 351 unsigned srcRGB = state->rt[j].rgb_src_factor; 352 unsigned dstRGB = state->rt[j].rgb_dst_factor; 353 unsigned eqA = state->rt[j].alpha_func; 354 unsigned srcA = state->rt[j].alpha_src_factor; 355 unsigned dstA = state->rt[j].alpha_dst_factor; 356 357 unsigned blend_cntl = 0; 358 359 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ 360 blend->cb_target_mask |= state->rt[j].colormask << (4 * i); 361 362 if (!state->rt[j].blend_enable) { 363 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 364 continue; 365 } 366 367 blend_cntl |= S_028780_ENABLE(1); 368 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 369 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 370 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 371 372 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 373 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 374 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 375 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 376 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 377 } 378 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 379 } 380 381 if (blend->cb_target_mask) { 382 color_control |= S_028808_MODE(mode); 383 } else { 384 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 385 } 386 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 387 388 return blend; 389} 390 391static void *si_create_blend_state(struct pipe_context *ctx, 392 const struct pipe_blend_state *state) 393{ 394 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 395} 396 397static void si_bind_blend_state(struct pipe_context *ctx, void *state) 398{ 399 struct si_context *sctx = (struct si_context *)ctx; 400 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 401 si_update_fb_blend_state(sctx); 402} 403 404static void si_delete_blend_state(struct pipe_context *ctx, void *state) 405{ 406 struct si_context *sctx = (struct si_context *)ctx; 407 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 408} 409 410static void si_set_blend_color(struct pipe_context *ctx, 411 const struct pipe_blend_color *state) 412{ 413 struct si_context *sctx = (struct si_context *)ctx; 414 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 415 416 if (pm4 == NULL) 417 return; 418 419 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0])); 420 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1])); 421 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2])); 422 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3])); 423 424 si_pm4_set_state(sctx, blend_color, pm4); 425} 426 427/* 428 * Clipping, scissors and viewport 429 */ 430 431static void si_set_clip_state(struct pipe_context *ctx, 432 const struct pipe_clip_state *state) 433{ 434 struct si_context *sctx = (struct si_context *)ctx; 435 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 436 struct pipe_constant_buffer cb; 437 438 if (pm4 == NULL) 439 return; 440 441 for (int i = 0; i < 6; i++) { 442 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16, 443 fui(state->ucp[i][0])); 444 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16, 445 fui(state->ucp[i][1])); 446 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16, 447 fui(state->ucp[i][2])); 448 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16, 449 fui(state->ucp[i][3])); 450 } 451 452 cb.buffer = NULL; 453 cb.user_buffer = state->ucp; 454 cb.buffer_offset = 0; 455 cb.buffer_size = 4*4*8; 456 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb); 457 pipe_resource_reference(&cb.buffer, NULL); 458 459 si_pm4_set_state(sctx, clip, pm4); 460} 461 462#define SIX_BITS 0x3F 463 464static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 465{ 466 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 467 struct tgsi_shader_info *info = si_get_vs_info(sctx); 468 struct si_shader *vs = si_get_vs_state(sctx); 469 unsigned window_space = 470 vs->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 471 unsigned clipdist_mask = 472 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 473 474 r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 475 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 476 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 477 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 478 S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) | 479 S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) | 480 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || 481 info->writes_edgeflag || 482 info->writes_layer) | 483 (sctx->queued.named.rasterizer->clip_plane_enable & 484 clipdist_mask)); 485 r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 486 sctx->queued.named.rasterizer->pa_cl_clip_cntl | 487 (clipdist_mask ? 0 : 488 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) | 489 S_028810_CLIP_DISABLE(window_space)); 490} 491 492static void si_set_scissor_states(struct pipe_context *ctx, 493 unsigned start_slot, 494 unsigned num_scissors, 495 const struct pipe_scissor_state *state) 496{ 497 struct si_context *sctx = (struct si_context *)ctx; 498 struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor); 499 struct si_pm4_state *pm4 = &scissor->pm4; 500 501 if (scissor == NULL) 502 return; 503 504 scissor->scissor = *state; 505 si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 506 S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) | 507 S_028250_WINDOW_OFFSET_DISABLE(1)); 508 si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 509 S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy)); 510 511 si_pm4_set_state(sctx, scissor, scissor); 512} 513 514static void si_set_viewport_states(struct pipe_context *ctx, 515 unsigned start_slot, 516 unsigned num_viewports, 517 const struct pipe_viewport_state *state) 518{ 519 struct si_context *sctx = (struct si_context *)ctx; 520 struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport); 521 struct si_pm4_state *pm4 = &viewport->pm4; 522 523 if (viewport == NULL) 524 return; 525 526 viewport->viewport = *state; 527 si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0])); 528 si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0])); 529 si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1])); 530 si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1])); 531 si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2])); 532 si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2])); 533 534 si_pm4_set_state(sctx, viewport, viewport); 535} 536 537/* 538 * inferred state between framebuffer and rasterizer 539 */ 540static void si_update_fb_rs_state(struct si_context *sctx) 541{ 542 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 543 struct si_pm4_state *pm4; 544 float offset_units; 545 546 if (!rs || !sctx->framebuffer.state.zsbuf) 547 return; 548 549 offset_units = sctx->queued.named.rasterizer->offset_units; 550 switch (sctx->framebuffer.state.zsbuf->texture->format) { 551 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 552 case PIPE_FORMAT_X8Z24_UNORM: 553 case PIPE_FORMAT_Z24X8_UNORM: 554 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 555 offset_units *= 2.0f; 556 break; 557 case PIPE_FORMAT_Z32_FLOAT: 558 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 559 offset_units *= 1.0f; 560 break; 561 case PIPE_FORMAT_Z16_UNORM: 562 offset_units *= 4.0f; 563 break; 564 default: 565 return; 566 } 567 568 pm4 = CALLOC_STRUCT(si_pm4_state); 569 570 if (pm4 == NULL) 571 return; 572 573 /* FIXME some of those reg can be computed with cso */ 574 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 575 fui(sctx->queued.named.rasterizer->offset_scale)); 576 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 577 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 578 fui(sctx->queued.named.rasterizer->offset_scale)); 579 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 580 581 si_pm4_set_state(sctx, fb_rs, pm4); 582} 583 584/* 585 * Rasterizer 586 */ 587 588static uint32_t si_translate_fill(uint32_t func) 589{ 590 switch(func) { 591 case PIPE_POLYGON_MODE_FILL: 592 return V_028814_X_DRAW_TRIANGLES; 593 case PIPE_POLYGON_MODE_LINE: 594 return V_028814_X_DRAW_LINES; 595 case PIPE_POLYGON_MODE_POINT: 596 return V_028814_X_DRAW_POINTS; 597 default: 598 assert(0); 599 return V_028814_X_DRAW_POINTS; 600 } 601} 602 603static void *si_create_rs_state(struct pipe_context *ctx, 604 const struct pipe_rasterizer_state *state) 605{ 606 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 607 struct si_pm4_state *pm4 = &rs->pm4; 608 unsigned tmp; 609 unsigned prov_vtx = 1, polygon_dual_mode; 610 float psize_min, psize_max; 611 612 if (rs == NULL) { 613 return NULL; 614 } 615 616 rs->two_side = state->light_twoside; 617 rs->multisample_enable = state->multisample; 618 rs->clip_plane_enable = state->clip_plane_enable; 619 rs->line_stipple_enable = state->line_stipple_enable; 620 rs->poly_stipple_enable = state->poly_stipple_enable; 621 622 polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || 623 state->fill_back != PIPE_POLYGON_MODE_FILL); 624 625 if (state->flatshade_first) 626 prov_vtx = 0; 627 628 rs->flatshade = state->flatshade; 629 rs->sprite_coord_enable = state->sprite_coord_enable; 630 rs->pa_sc_line_stipple = state->line_stipple_enable ? 631 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 632 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 633 rs->pa_su_sc_mode_cntl = 634 S_028814_PROVOKING_VTX_LAST(prov_vtx) | 635 S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 636 S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 637 S_028814_FACE(!state->front_ccw) | 638 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 639 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 640 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 641 S_028814_POLY_MODE(polygon_dual_mode) | 642 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 643 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)); 644 rs->pa_cl_clip_cntl = 645 S_028810_PS_UCP_MODE(3) | 646 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 647 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 648 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 649 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 650 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 651 652 /* offset */ 653 rs->offset_units = state->offset_units; 654 rs->offset_scale = state->offset_scale * 12.0f; 655 656 tmp = S_0286D4_FLAT_SHADE_ENA(1); 657 if (state->sprite_coord_enable) { 658 tmp |= S_0286D4_PNT_SPRITE_ENA(1) | 659 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 660 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 661 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 662 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1); 663 if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { 664 tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); 665 } 666 } 667 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp); 668 669 /* point size 12.4 fixed point */ 670 tmp = (unsigned)(state->point_size * 8.0); 671 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 672 673 if (state->point_size_per_vertex) { 674 psize_min = util_get_min_point_size(state); 675 psize_max = 8192; 676 } else { 677 /* Force the point size to be as if the vertex output was disabled. */ 678 psize_min = state->point_size; 679 psize_max = state->point_size; 680 } 681 /* Divide by two, because 0.5 = 1 pixel. */ 682 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 683 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 684 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 685 686 tmp = (unsigned)state->line_width * 8; 687 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 688 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 689 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 690 S_028A48_MSAA_ENABLE(state->multisample) | 691 S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); 692 693 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 694 S_028BE4_PIX_CENTER(state->half_pixel_center) | 695 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 696 697 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 698 699 return rs; 700} 701 702static void si_bind_rs_state(struct pipe_context *ctx, void *state) 703{ 704 struct si_context *sctx = (struct si_context *)ctx; 705 struct si_state_rasterizer *old_rs = 706 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 707 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 708 709 if (state == NULL) 710 return; 711 712 // TODO 713 sctx->pa_sc_line_stipple = rs->pa_sc_line_stipple; 714 sctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl; 715 716 if (sctx->framebuffer.nr_samples > 1 && 717 (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) 718 sctx->db_render_state.dirty = true; 719 720 si_pm4_bind_state(sctx, rasterizer, rs); 721 si_update_fb_rs_state(sctx); 722 723 sctx->clip_regs.dirty = true; 724 sctx->last_rast_prim = -1; /* reset this so that it gets updated */ 725} 726 727static void si_delete_rs_state(struct pipe_context *ctx, void *state) 728{ 729 struct si_context *sctx = (struct si_context *)ctx; 730 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 731} 732 733/* 734 * infeered state between dsa and stencil ref 735 */ 736static void si_update_dsa_stencil_ref(struct si_context *sctx) 737{ 738 struct si_pm4_state *pm4; 739 struct pipe_stencil_ref *ref = &sctx->stencil_ref; 740 struct si_state_dsa *dsa = sctx->queued.named.dsa; 741 742 if (!dsa) 743 return; 744 745 pm4 = CALLOC_STRUCT(si_pm4_state); 746 if (pm4 == NULL) 747 return; 748 749 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK, 750 S_028430_STENCILTESTVAL(ref->ref_value[0]) | 751 S_028430_STENCILMASK(dsa->valuemask[0]) | 752 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 753 S_028430_STENCILOPVAL(1)); 754 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF, 755 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 756 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 757 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 758 S_028434_STENCILOPVAL_BF(1)); 759 760 si_pm4_set_state(sctx, dsa_stencil_ref, pm4); 761} 762 763static void si_set_pipe_stencil_ref(struct pipe_context *ctx, 764 const struct pipe_stencil_ref *state) 765{ 766 struct si_context *sctx = (struct si_context *)ctx; 767 sctx->stencil_ref = *state; 768 si_update_dsa_stencil_ref(sctx); 769} 770 771 772/* 773 * DSA 774 */ 775 776static uint32_t si_translate_stencil_op(int s_op) 777{ 778 switch (s_op) { 779 case PIPE_STENCIL_OP_KEEP: 780 return V_02842C_STENCIL_KEEP; 781 case PIPE_STENCIL_OP_ZERO: 782 return V_02842C_STENCIL_ZERO; 783 case PIPE_STENCIL_OP_REPLACE: 784 return V_02842C_STENCIL_REPLACE_TEST; 785 case PIPE_STENCIL_OP_INCR: 786 return V_02842C_STENCIL_ADD_CLAMP; 787 case PIPE_STENCIL_OP_DECR: 788 return V_02842C_STENCIL_SUB_CLAMP; 789 case PIPE_STENCIL_OP_INCR_WRAP: 790 return V_02842C_STENCIL_ADD_WRAP; 791 case PIPE_STENCIL_OP_DECR_WRAP: 792 return V_02842C_STENCIL_SUB_WRAP; 793 case PIPE_STENCIL_OP_INVERT: 794 return V_02842C_STENCIL_INVERT; 795 default: 796 R600_ERR("Unknown stencil op %d", s_op); 797 assert(0); 798 break; 799 } 800 return 0; 801} 802 803static void *si_create_dsa_state(struct pipe_context *ctx, 804 const struct pipe_depth_stencil_alpha_state *state) 805{ 806 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 807 struct si_pm4_state *pm4 = &dsa->pm4; 808 unsigned db_depth_control; 809 uint32_t db_stencil_control = 0; 810 811 if (dsa == NULL) { 812 return NULL; 813 } 814 815 dsa->valuemask[0] = state->stencil[0].valuemask; 816 dsa->valuemask[1] = state->stencil[1].valuemask; 817 dsa->writemask[0] = state->stencil[0].writemask; 818 dsa->writemask[1] = state->stencil[1].writemask; 819 820 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 821 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 822 S_028800_ZFUNC(state->depth.func); 823 824 /* stencil */ 825 if (state->stencil[0].enabled) { 826 db_depth_control |= S_028800_STENCIL_ENABLE(1); 827 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 828 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 829 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 830 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 831 832 if (state->stencil[1].enabled) { 833 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 834 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 835 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 836 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 837 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 838 } 839 } 840 841 /* alpha */ 842 if (state->alpha.enabled) { 843 dsa->alpha_func = state->alpha.func; 844 845 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 846 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 847 } else { 848 dsa->alpha_func = PIPE_FUNC_ALWAYS; 849 } 850 851 /* misc */ 852 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 853 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 854 855 return dsa; 856} 857 858static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 859{ 860 struct si_context *sctx = (struct si_context *)ctx; 861 struct si_state_dsa *dsa = state; 862 863 if (state == NULL) 864 return; 865 866 si_pm4_bind_state(sctx, dsa, dsa); 867 si_update_dsa_stencil_ref(sctx); 868} 869 870static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 871{ 872 struct si_context *sctx = (struct si_context *)ctx; 873 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 874} 875 876static void *si_create_db_flush_dsa(struct si_context *sctx) 877{ 878 struct pipe_depth_stencil_alpha_state dsa = {}; 879 880 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 881} 882 883/* DB RENDER STATE */ 884 885static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 886{ 887 struct si_context *sctx = (struct si_context*)ctx; 888 889 sctx->db_render_state.dirty = true; 890} 891 892static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 893{ 894 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 895 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 896 unsigned db_shader_control; 897 898 r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 899 900 /* DB_RENDER_CONTROL */ 901 if (sctx->dbcb_depth_copy_enabled || 902 sctx->dbcb_stencil_copy_enabled) { 903 radeon_emit(cs, 904 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 905 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 906 S_028000_COPY_CENTROID(1) | 907 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 908 } else if (sctx->db_inplace_flush_enabled) { 909 radeon_emit(cs, 910 S_028000_DEPTH_COMPRESS_DISABLE(1) | 911 S_028000_STENCIL_COMPRESS_DISABLE(1)); 912 } else if (sctx->db_depth_clear) { 913 radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1)); 914 } else { 915 radeon_emit(cs, 0); 916 } 917 918 /* DB_COUNT_CONTROL (occlusion queries) */ 919 if (sctx->b.num_occlusion_queries > 0) { 920 if (sctx->b.chip_class >= CIK) { 921 radeon_emit(cs, 922 S_028004_PERFECT_ZPASS_COUNTS(1) | 923 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 924 S_028004_ZPASS_ENABLE(1) | 925 S_028004_SLICE_EVEN_ENABLE(1) | 926 S_028004_SLICE_ODD_ENABLE(1)); 927 } else { 928 radeon_emit(cs, 929 S_028004_PERFECT_ZPASS_COUNTS(1) | 930 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 931 } 932 } else { 933 /* Disable occlusion queries. */ 934 if (sctx->b.chip_class >= CIK) { 935 radeon_emit(cs, 0); 936 } else { 937 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 938 } 939 } 940 941 /* DB_RENDER_OVERRIDE2 */ 942 if (sctx->db_depth_disable_expclear) { 943 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 944 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1)); 945 } else { 946 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0); 947 } 948 949 db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) | 950 S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | 951 sctx->ps_db_shader_control; 952 953 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 954 if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) 955 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 956 957 r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 958 db_shader_control); 959} 960 961/* 962 * format translation 963 */ 964static uint32_t si_translate_colorformat(enum pipe_format format) 965{ 966 const struct util_format_description *desc = util_format_description(format); 967 968#define HAS_SIZE(x,y,z,w) \ 969 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 970 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 971 972 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 973 return V_028C70_COLOR_10_11_11; 974 975 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 976 return V_028C70_COLOR_INVALID; 977 978 switch (desc->nr_channels) { 979 case 1: 980 switch (desc->channel[0].size) { 981 case 8: 982 return V_028C70_COLOR_8; 983 case 16: 984 return V_028C70_COLOR_16; 985 case 32: 986 return V_028C70_COLOR_32; 987 } 988 break; 989 case 2: 990 if (desc->channel[0].size == desc->channel[1].size) { 991 switch (desc->channel[0].size) { 992 case 8: 993 return V_028C70_COLOR_8_8; 994 case 16: 995 return V_028C70_COLOR_16_16; 996 case 32: 997 return V_028C70_COLOR_32_32; 998 } 999 } else if (HAS_SIZE(8,24,0,0)) { 1000 return V_028C70_COLOR_24_8; 1001 } else if (HAS_SIZE(24,8,0,0)) { 1002 return V_028C70_COLOR_8_24; 1003 } 1004 break; 1005 case 3: 1006 if (HAS_SIZE(5,6,5,0)) { 1007 return V_028C70_COLOR_5_6_5; 1008 } else if (HAS_SIZE(32,8,24,0)) { 1009 return V_028C70_COLOR_X24_8_32_FLOAT; 1010 } 1011 break; 1012 case 4: 1013 if (desc->channel[0].size == desc->channel[1].size && 1014 desc->channel[0].size == desc->channel[2].size && 1015 desc->channel[0].size == desc->channel[3].size) { 1016 switch (desc->channel[0].size) { 1017 case 4: 1018 return V_028C70_COLOR_4_4_4_4; 1019 case 8: 1020 return V_028C70_COLOR_8_8_8_8; 1021 case 16: 1022 return V_028C70_COLOR_16_16_16_16; 1023 case 32: 1024 return V_028C70_COLOR_32_32_32_32; 1025 } 1026 } else if (HAS_SIZE(5,5,5,1)) { 1027 return V_028C70_COLOR_1_5_5_5; 1028 } else if (HAS_SIZE(10,10,10,2)) { 1029 return V_028C70_COLOR_2_10_10_10; 1030 } 1031 break; 1032 } 1033 return V_028C70_COLOR_INVALID; 1034} 1035 1036static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1037{ 1038 if (SI_BIG_ENDIAN) { 1039 switch(colorformat) { 1040 /* 8-bit buffers. */ 1041 case V_028C70_COLOR_8: 1042 return V_028C70_ENDIAN_NONE; 1043 1044 /* 16-bit buffers. */ 1045 case V_028C70_COLOR_5_6_5: 1046 case V_028C70_COLOR_1_5_5_5: 1047 case V_028C70_COLOR_4_4_4_4: 1048 case V_028C70_COLOR_16: 1049 case V_028C70_COLOR_8_8: 1050 return V_028C70_ENDIAN_8IN16; 1051 1052 /* 32-bit buffers. */ 1053 case V_028C70_COLOR_8_8_8_8: 1054 case V_028C70_COLOR_2_10_10_10: 1055 case V_028C70_COLOR_8_24: 1056 case V_028C70_COLOR_24_8: 1057 case V_028C70_COLOR_16_16: 1058 return V_028C70_ENDIAN_8IN32; 1059 1060 /* 64-bit buffers. */ 1061 case V_028C70_COLOR_16_16_16_16: 1062 return V_028C70_ENDIAN_8IN16; 1063 1064 case V_028C70_COLOR_32_32: 1065 return V_028C70_ENDIAN_8IN32; 1066 1067 /* 128-bit buffers. */ 1068 case V_028C70_COLOR_32_32_32_32: 1069 return V_028C70_ENDIAN_8IN32; 1070 default: 1071 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1072 } 1073 } else { 1074 return V_028C70_ENDIAN_NONE; 1075 } 1076} 1077 1078/* Returns the size in bits of the widest component of a CB format */ 1079static unsigned si_colorformat_max_comp_size(uint32_t colorformat) 1080{ 1081 switch(colorformat) { 1082 case V_028C70_COLOR_4_4_4_4: 1083 return 4; 1084 1085 case V_028C70_COLOR_1_5_5_5: 1086 case V_028C70_COLOR_5_5_5_1: 1087 return 5; 1088 1089 case V_028C70_COLOR_5_6_5: 1090 return 6; 1091 1092 case V_028C70_COLOR_8: 1093 case V_028C70_COLOR_8_8: 1094 case V_028C70_COLOR_8_8_8_8: 1095 return 8; 1096 1097 case V_028C70_COLOR_10_10_10_2: 1098 case V_028C70_COLOR_2_10_10_10: 1099 return 10; 1100 1101 case V_028C70_COLOR_10_11_11: 1102 case V_028C70_COLOR_11_11_10: 1103 return 11; 1104 1105 case V_028C70_COLOR_16: 1106 case V_028C70_COLOR_16_16: 1107 case V_028C70_COLOR_16_16_16_16: 1108 return 16; 1109 1110 case V_028C70_COLOR_8_24: 1111 case V_028C70_COLOR_24_8: 1112 return 24; 1113 1114 case V_028C70_COLOR_32: 1115 case V_028C70_COLOR_32_32: 1116 case V_028C70_COLOR_32_32_32_32: 1117 case V_028C70_COLOR_X24_8_32_FLOAT: 1118 return 32; 1119 } 1120 1121 assert(!"Unknown maximum component size"); 1122 return 0; 1123} 1124 1125static uint32_t si_translate_dbformat(enum pipe_format format) 1126{ 1127 switch (format) { 1128 case PIPE_FORMAT_Z16_UNORM: 1129 return V_028040_Z_16; 1130 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1131 case PIPE_FORMAT_X8Z24_UNORM: 1132 case PIPE_FORMAT_Z24X8_UNORM: 1133 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1134 return V_028040_Z_24; /* deprecated on SI */ 1135 case PIPE_FORMAT_Z32_FLOAT: 1136 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1137 return V_028040_Z_32_FLOAT; 1138 default: 1139 return V_028040_Z_INVALID; 1140 } 1141} 1142 1143/* 1144 * Texture translation 1145 */ 1146 1147static uint32_t si_translate_texformat(struct pipe_screen *screen, 1148 enum pipe_format format, 1149 const struct util_format_description *desc, 1150 int first_non_void) 1151{ 1152 struct si_screen *sscreen = (struct si_screen*)screen; 1153 bool enable_s3tc = sscreen->b.info.drm_minor >= 31; 1154 boolean uniform = TRUE; 1155 int i; 1156 1157 /* Colorspace (return non-RGB formats directly). */ 1158 switch (desc->colorspace) { 1159 /* Depth stencil formats */ 1160 case UTIL_FORMAT_COLORSPACE_ZS: 1161 switch (format) { 1162 case PIPE_FORMAT_Z16_UNORM: 1163 return V_008F14_IMG_DATA_FORMAT_16; 1164 case PIPE_FORMAT_X24S8_UINT: 1165 case PIPE_FORMAT_Z24X8_UNORM: 1166 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1167 return V_008F14_IMG_DATA_FORMAT_8_24; 1168 case PIPE_FORMAT_X8Z24_UNORM: 1169 case PIPE_FORMAT_S8X24_UINT: 1170 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1171 return V_008F14_IMG_DATA_FORMAT_24_8; 1172 case PIPE_FORMAT_S8_UINT: 1173 return V_008F14_IMG_DATA_FORMAT_8; 1174 case PIPE_FORMAT_Z32_FLOAT: 1175 return V_008F14_IMG_DATA_FORMAT_32; 1176 case PIPE_FORMAT_X32_S8X24_UINT: 1177 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1178 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1179 default: 1180 goto out_unknown; 1181 } 1182 1183 case UTIL_FORMAT_COLORSPACE_YUV: 1184 goto out_unknown; /* TODO */ 1185 1186 case UTIL_FORMAT_COLORSPACE_SRGB: 1187 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1188 goto out_unknown; 1189 break; 1190 1191 default: 1192 break; 1193 } 1194 1195 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1196 if (!enable_s3tc) 1197 goto out_unknown; 1198 1199 switch (format) { 1200 case PIPE_FORMAT_RGTC1_SNORM: 1201 case PIPE_FORMAT_LATC1_SNORM: 1202 case PIPE_FORMAT_RGTC1_UNORM: 1203 case PIPE_FORMAT_LATC1_UNORM: 1204 return V_008F14_IMG_DATA_FORMAT_BC4; 1205 case PIPE_FORMAT_RGTC2_SNORM: 1206 case PIPE_FORMAT_LATC2_SNORM: 1207 case PIPE_FORMAT_RGTC2_UNORM: 1208 case PIPE_FORMAT_LATC2_UNORM: 1209 return V_008F14_IMG_DATA_FORMAT_BC5; 1210 default: 1211 goto out_unknown; 1212 } 1213 } 1214 1215 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1216 if (!enable_s3tc) 1217 goto out_unknown; 1218 1219 switch (format) { 1220 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1221 case PIPE_FORMAT_BPTC_SRGBA: 1222 return V_008F14_IMG_DATA_FORMAT_BC7; 1223 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1224 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1225 return V_008F14_IMG_DATA_FORMAT_BC6; 1226 default: 1227 goto out_unknown; 1228 } 1229 } 1230 1231 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1232 switch (format) { 1233 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1234 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1235 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1236 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1237 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1238 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1239 default: 1240 goto out_unknown; 1241 } 1242 } 1243 1244 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1245 1246 if (!enable_s3tc) 1247 goto out_unknown; 1248 1249 if (!util_format_s3tc_enabled) { 1250 goto out_unknown; 1251 } 1252 1253 switch (format) { 1254 case PIPE_FORMAT_DXT1_RGB: 1255 case PIPE_FORMAT_DXT1_RGBA: 1256 case PIPE_FORMAT_DXT1_SRGB: 1257 case PIPE_FORMAT_DXT1_SRGBA: 1258 return V_008F14_IMG_DATA_FORMAT_BC1; 1259 case PIPE_FORMAT_DXT3_RGBA: 1260 case PIPE_FORMAT_DXT3_SRGBA: 1261 return V_008F14_IMG_DATA_FORMAT_BC2; 1262 case PIPE_FORMAT_DXT5_RGBA: 1263 case PIPE_FORMAT_DXT5_SRGBA: 1264 return V_008F14_IMG_DATA_FORMAT_BC3; 1265 default: 1266 goto out_unknown; 1267 } 1268 } 1269 1270 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1271 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1272 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1273 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1274 } 1275 1276 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1277 1278 /* See whether the components are of the same size. */ 1279 for (i = 1; i < desc->nr_channels; i++) { 1280 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1281 } 1282 1283 /* Non-uniform formats. */ 1284 if (!uniform) { 1285 switch(desc->nr_channels) { 1286 case 3: 1287 if (desc->channel[0].size == 5 && 1288 desc->channel[1].size == 6 && 1289 desc->channel[2].size == 5) { 1290 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1291 } 1292 goto out_unknown; 1293 case 4: 1294 if (desc->channel[0].size == 5 && 1295 desc->channel[1].size == 5 && 1296 desc->channel[2].size == 5 && 1297 desc->channel[3].size == 1) { 1298 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1299 } 1300 if (desc->channel[0].size == 10 && 1301 desc->channel[1].size == 10 && 1302 desc->channel[2].size == 10 && 1303 desc->channel[3].size == 2) { 1304 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1305 } 1306 goto out_unknown; 1307 } 1308 goto out_unknown; 1309 } 1310 1311 if (first_non_void < 0 || first_non_void > 3) 1312 goto out_unknown; 1313 1314 /* uniform formats */ 1315 switch (desc->channel[first_non_void].size) { 1316 case 4: 1317 switch (desc->nr_channels) { 1318#if 0 /* Not supported for render targets */ 1319 case 2: 1320 return V_008F14_IMG_DATA_FORMAT_4_4; 1321#endif 1322 case 4: 1323 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1324 } 1325 break; 1326 case 8: 1327 switch (desc->nr_channels) { 1328 case 1: 1329 return V_008F14_IMG_DATA_FORMAT_8; 1330 case 2: 1331 return V_008F14_IMG_DATA_FORMAT_8_8; 1332 case 4: 1333 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1334 } 1335 break; 1336 case 16: 1337 switch (desc->nr_channels) { 1338 case 1: 1339 return V_008F14_IMG_DATA_FORMAT_16; 1340 case 2: 1341 return V_008F14_IMG_DATA_FORMAT_16_16; 1342 case 4: 1343 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1344 } 1345 break; 1346 case 32: 1347 switch (desc->nr_channels) { 1348 case 1: 1349 return V_008F14_IMG_DATA_FORMAT_32; 1350 case 2: 1351 return V_008F14_IMG_DATA_FORMAT_32_32; 1352#if 0 /* Not supported for render targets */ 1353 case 3: 1354 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1355#endif 1356 case 4: 1357 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1358 } 1359 } 1360 1361out_unknown: 1362 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1363 return ~0; 1364} 1365 1366static unsigned si_tex_wrap(unsigned wrap) 1367{ 1368 switch (wrap) { 1369 default: 1370 case PIPE_TEX_WRAP_REPEAT: 1371 return V_008F30_SQ_TEX_WRAP; 1372 case PIPE_TEX_WRAP_CLAMP: 1373 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1374 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1375 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1376 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1377 return V_008F30_SQ_TEX_CLAMP_BORDER; 1378 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1379 return V_008F30_SQ_TEX_MIRROR; 1380 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1381 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1382 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1383 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1384 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1385 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1386 } 1387} 1388 1389static unsigned si_tex_filter(unsigned filter) 1390{ 1391 switch (filter) { 1392 default: 1393 case PIPE_TEX_FILTER_NEAREST: 1394 return V_008F38_SQ_TEX_XY_FILTER_POINT; 1395 case PIPE_TEX_FILTER_LINEAR: 1396 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 1397 } 1398} 1399 1400static unsigned si_tex_mipfilter(unsigned filter) 1401{ 1402 switch (filter) { 1403 case PIPE_TEX_MIPFILTER_NEAREST: 1404 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1405 case PIPE_TEX_MIPFILTER_LINEAR: 1406 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1407 default: 1408 case PIPE_TEX_MIPFILTER_NONE: 1409 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1410 } 1411} 1412 1413static unsigned si_tex_compare(unsigned compare) 1414{ 1415 switch (compare) { 1416 default: 1417 case PIPE_FUNC_NEVER: 1418 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1419 case PIPE_FUNC_LESS: 1420 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1421 case PIPE_FUNC_EQUAL: 1422 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1423 case PIPE_FUNC_LEQUAL: 1424 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1425 case PIPE_FUNC_GREATER: 1426 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1427 case PIPE_FUNC_NOTEQUAL: 1428 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1429 case PIPE_FUNC_GEQUAL: 1430 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1431 case PIPE_FUNC_ALWAYS: 1432 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1433 } 1434} 1435 1436static unsigned si_tex_dim(unsigned dim, unsigned nr_samples) 1437{ 1438 switch (dim) { 1439 default: 1440 case PIPE_TEXTURE_1D: 1441 return V_008F1C_SQ_RSRC_IMG_1D; 1442 case PIPE_TEXTURE_1D_ARRAY: 1443 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1444 case PIPE_TEXTURE_2D: 1445 case PIPE_TEXTURE_RECT: 1446 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1447 V_008F1C_SQ_RSRC_IMG_2D; 1448 case PIPE_TEXTURE_2D_ARRAY: 1449 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1450 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1451 case PIPE_TEXTURE_3D: 1452 return V_008F1C_SQ_RSRC_IMG_3D; 1453 case PIPE_TEXTURE_CUBE: 1454 case PIPE_TEXTURE_CUBE_ARRAY: 1455 return V_008F1C_SQ_RSRC_IMG_CUBE; 1456 } 1457} 1458 1459/* 1460 * Format support testing 1461 */ 1462 1463static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1464{ 1465 return si_translate_texformat(screen, format, util_format_description(format), 1466 util_format_get_first_non_void_channel(format)) != ~0U; 1467} 1468 1469static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1470 const struct util_format_description *desc, 1471 int first_non_void) 1472{ 1473 unsigned type = desc->channel[first_non_void].type; 1474 int i; 1475 1476 if (type == UTIL_FORMAT_TYPE_FIXED) 1477 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1478 1479 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1480 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1481 1482 if (desc->nr_channels == 4 && 1483 desc->channel[0].size == 10 && 1484 desc->channel[1].size == 10 && 1485 desc->channel[2].size == 10 && 1486 desc->channel[3].size == 2) 1487 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1488 1489 /* See whether the components are of the same size. */ 1490 for (i = 0; i < desc->nr_channels; i++) { 1491 if (desc->channel[first_non_void].size != desc->channel[i].size) 1492 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1493 } 1494 1495 switch (desc->channel[first_non_void].size) { 1496 case 8: 1497 switch (desc->nr_channels) { 1498 case 1: 1499 return V_008F0C_BUF_DATA_FORMAT_8; 1500 case 2: 1501 return V_008F0C_BUF_DATA_FORMAT_8_8; 1502 case 3: 1503 case 4: 1504 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1505 } 1506 break; 1507 case 16: 1508 switch (desc->nr_channels) { 1509 case 1: 1510 return V_008F0C_BUF_DATA_FORMAT_16; 1511 case 2: 1512 return V_008F0C_BUF_DATA_FORMAT_16_16; 1513 case 3: 1514 case 4: 1515 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1516 } 1517 break; 1518 case 32: 1519 /* From the Southern Islands ISA documentation about MTBUF: 1520 * 'Memory reads of data in memory that is 32 or 64 bits do not 1521 * undergo any format conversion.' 1522 */ 1523 if (type != UTIL_FORMAT_TYPE_FLOAT && 1524 !desc->channel[first_non_void].pure_integer) 1525 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1526 1527 switch (desc->nr_channels) { 1528 case 1: 1529 return V_008F0C_BUF_DATA_FORMAT_32; 1530 case 2: 1531 return V_008F0C_BUF_DATA_FORMAT_32_32; 1532 case 3: 1533 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1534 case 4: 1535 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1536 } 1537 break; 1538 } 1539 1540 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1541} 1542 1543static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1544 const struct util_format_description *desc, 1545 int first_non_void) 1546{ 1547 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1548 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1549 1550 switch (desc->channel[first_non_void].type) { 1551 case UTIL_FORMAT_TYPE_SIGNED: 1552 if (desc->channel[first_non_void].normalized) 1553 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1554 else if (desc->channel[first_non_void].pure_integer) 1555 return V_008F0C_BUF_NUM_FORMAT_SINT; 1556 else 1557 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1558 break; 1559 case UTIL_FORMAT_TYPE_UNSIGNED: 1560 if (desc->channel[first_non_void].normalized) 1561 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1562 else if (desc->channel[first_non_void].pure_integer) 1563 return V_008F0C_BUF_NUM_FORMAT_UINT; 1564 else 1565 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1566 break; 1567 case UTIL_FORMAT_TYPE_FLOAT: 1568 default: 1569 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1570 } 1571} 1572 1573static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1574{ 1575 const struct util_format_description *desc; 1576 int first_non_void; 1577 unsigned data_format; 1578 1579 desc = util_format_description(format); 1580 first_non_void = util_format_get_first_non_void_channel(format); 1581 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1582 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1583} 1584 1585static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1586{ 1587 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1588 r600_translate_colorswap(format) != ~0U; 1589} 1590 1591static bool si_is_zs_format_supported(enum pipe_format format) 1592{ 1593 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1594} 1595 1596boolean si_is_format_supported(struct pipe_screen *screen, 1597 enum pipe_format format, 1598 enum pipe_texture_target target, 1599 unsigned sample_count, 1600 unsigned usage) 1601{ 1602 struct si_screen *sscreen = (struct si_screen *)screen; 1603 unsigned retval = 0; 1604 1605 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1606 R600_ERR("r600: unsupported texture type %d\n", target); 1607 return FALSE; 1608 } 1609 1610 if (!util_format_is_supported(format, usage)) 1611 return FALSE; 1612 1613 if (sample_count > 1) { 1614 /* 2D tiling on CIK is supported since DRM 2.35.0 */ 1615 if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35) 1616 return FALSE; 1617 1618 switch (sample_count) { 1619 case 2: 1620 case 4: 1621 case 8: 1622 break; 1623 default: 1624 return FALSE; 1625 } 1626 } 1627 1628 if (usage & PIPE_BIND_SAMPLER_VIEW) { 1629 if (target == PIPE_BUFFER) { 1630 if (si_is_vertex_format_supported(screen, format)) 1631 retval |= PIPE_BIND_SAMPLER_VIEW; 1632 } else { 1633 if (si_is_sampler_format_supported(screen, format)) 1634 retval |= PIPE_BIND_SAMPLER_VIEW; 1635 } 1636 } 1637 1638 if ((usage & (PIPE_BIND_RENDER_TARGET | 1639 PIPE_BIND_DISPLAY_TARGET | 1640 PIPE_BIND_SCANOUT | 1641 PIPE_BIND_SHARED | 1642 PIPE_BIND_BLENDABLE)) && 1643 si_is_colorbuffer_format_supported(format)) { 1644 retval |= usage & 1645 (PIPE_BIND_RENDER_TARGET | 1646 PIPE_BIND_DISPLAY_TARGET | 1647 PIPE_BIND_SCANOUT | 1648 PIPE_BIND_SHARED); 1649 if (!util_format_is_pure_integer(format) && 1650 !util_format_is_depth_or_stencil(format)) 1651 retval |= usage & PIPE_BIND_BLENDABLE; 1652 } 1653 1654 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1655 si_is_zs_format_supported(format)) { 1656 retval |= PIPE_BIND_DEPTH_STENCIL; 1657 } 1658 1659 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 1660 si_is_vertex_format_supported(screen, format)) { 1661 retval |= PIPE_BIND_VERTEX_BUFFER; 1662 } 1663 1664 if (usage & PIPE_BIND_TRANSFER_READ) 1665 retval |= PIPE_BIND_TRANSFER_READ; 1666 if (usage & PIPE_BIND_TRANSFER_WRITE) 1667 retval |= PIPE_BIND_TRANSFER_WRITE; 1668 1669 return retval == usage; 1670} 1671 1672unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil) 1673{ 1674 unsigned tile_mode_index = 0; 1675 1676 if (stencil) { 1677 tile_mode_index = rtex->surface.stencil_tiling_index[level]; 1678 } else { 1679 tile_mode_index = rtex->surface.tiling_index[level]; 1680 } 1681 return tile_mode_index; 1682} 1683 1684/* 1685 * framebuffer handling 1686 */ 1687 1688static void si_initialize_color_surface(struct si_context *sctx, 1689 struct r600_surface *surf) 1690{ 1691 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1692 unsigned level = surf->base.u.tex.level; 1693 uint64_t offset = rtex->surface.level[level].offset; 1694 unsigned pitch, slice; 1695 unsigned color_info, color_attrib, color_pitch, color_view; 1696 unsigned tile_mode_index; 1697 unsigned format, swap, ntype, endian; 1698 const struct util_format_description *desc; 1699 int i; 1700 unsigned blend_clamp = 0, blend_bypass = 0; 1701 unsigned max_comp_size; 1702 1703 /* Layered rendering doesn't work with LINEAR_GENERAL. 1704 * (LINEAR_ALIGNED and others work) */ 1705 if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) { 1706 assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer); 1707 offset += rtex->surface.level[level].slice_size * 1708 surf->base.u.tex.first_layer; 1709 color_view = 0; 1710 } else { 1711 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 1712 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 1713 } 1714 1715 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1; 1716 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; 1717 if (slice) { 1718 slice = slice - 1; 1719 } 1720 1721 tile_mode_index = si_tile_mode_index(rtex, level, false); 1722 1723 desc = util_format_description(surf->base.format); 1724 for (i = 0; i < 4; i++) { 1725 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 1726 break; 1727 } 1728 } 1729 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 1730 ntype = V_028C70_NUMBER_FLOAT; 1731 } else { 1732 ntype = V_028C70_NUMBER_UNORM; 1733 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 1734 ntype = V_028C70_NUMBER_SRGB; 1735 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1736 if (desc->channel[i].pure_integer) { 1737 ntype = V_028C70_NUMBER_SINT; 1738 } else { 1739 assert(desc->channel[i].normalized); 1740 ntype = V_028C70_NUMBER_SNORM; 1741 } 1742 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1743 if (desc->channel[i].pure_integer) { 1744 ntype = V_028C70_NUMBER_UINT; 1745 } else { 1746 assert(desc->channel[i].normalized); 1747 ntype = V_028C70_NUMBER_UNORM; 1748 } 1749 } 1750 } 1751 1752 format = si_translate_colorformat(surf->base.format); 1753 if (format == V_028C70_COLOR_INVALID) { 1754 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 1755 } 1756 assert(format != V_028C70_COLOR_INVALID); 1757 swap = r600_translate_colorswap(surf->base.format); 1758 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) { 1759 endian = V_028C70_ENDIAN_NONE; 1760 } else { 1761 endian = si_colorformat_endian_swap(format); 1762 } 1763 1764 /* blend clamp should be set for all NORM/SRGB types */ 1765 if (ntype == V_028C70_NUMBER_UNORM || 1766 ntype == V_028C70_NUMBER_SNORM || 1767 ntype == V_028C70_NUMBER_SRGB) 1768 blend_clamp = 1; 1769 1770 /* set blend bypass according to docs if SINT/UINT or 1771 8/24 COLOR variants */ 1772 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 1773 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 1774 format == V_028C70_COLOR_X24_8_32_FLOAT) { 1775 blend_clamp = 0; 1776 blend_bypass = 1; 1777 } 1778 1779 color_info = S_028C70_FORMAT(format) | 1780 S_028C70_COMP_SWAP(swap) | 1781 S_028C70_BLEND_CLAMP(blend_clamp) | 1782 S_028C70_BLEND_BYPASS(blend_bypass) | 1783 S_028C70_NUMBER_TYPE(ntype) | 1784 S_028C70_ENDIAN(endian); 1785 1786 color_pitch = S_028C64_TILE_MAX(pitch); 1787 1788 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) | 1789 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1); 1790 1791 if (rtex->resource.b.b.nr_samples > 1) { 1792 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 1793 1794 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 1795 S_028C74_NUM_FRAGMENTS(log_samples); 1796 1797 if (rtex->fmask.size) { 1798 color_info |= S_028C70_COMPRESSION(1); 1799 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 1800 1801 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index); 1802 1803 if (sctx->b.chip_class == SI) { 1804 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 1805 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 1806 } 1807 if (sctx->b.chip_class >= CIK) { 1808 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1); 1809 } 1810 } 1811 } 1812 1813 offset += rtex->resource.gpu_address; 1814 1815 surf->cb_color_base = offset >> 8; 1816 surf->cb_color_pitch = color_pitch; 1817 surf->cb_color_slice = S_028C68_TILE_MAX(slice); 1818 surf->cb_color_view = color_view; 1819 surf->cb_color_info = color_info; 1820 surf->cb_color_attrib = color_attrib; 1821 1822 if (rtex->fmask.size) { 1823 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; 1824 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); 1825 } else { 1826 /* This must be set for fast clear to work without FMASK. */ 1827 surf->cb_color_fmask = surf->cb_color_base; 1828 surf->cb_color_fmask_slice = surf->cb_color_slice; 1829 surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 1830 1831 if (sctx->b.chip_class == SI) { 1832 unsigned bankh = util_logbase2(rtex->surface.bankh); 1833 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 1834 } 1835 1836 if (sctx->b.chip_class >= CIK) { 1837 surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch); 1838 } 1839 } 1840 1841 /* Determine pixel shader export format */ 1842 max_comp_size = si_colorformat_max_comp_size(format); 1843 if (ntype == V_028C70_NUMBER_SRGB || 1844 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && 1845 max_comp_size <= 10) || 1846 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { 1847 surf->export_16bpc = true; 1848 } 1849 1850 surf->color_initialized = true; 1851} 1852 1853static void si_init_depth_surface(struct si_context *sctx, 1854 struct r600_surface *surf) 1855{ 1856 struct si_screen *sscreen = sctx->screen; 1857 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1858 unsigned level = surf->base.u.tex.level; 1859 struct radeon_surface_level *levelinfo = &rtex->surface.level[level]; 1860 unsigned format, tile_mode_index, array_mode; 1861 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config; 1862 uint32_t z_info, s_info, db_depth_info; 1863 uint64_t z_offs, s_offs; 1864 uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0; 1865 1866 switch (sctx->framebuffer.state.zsbuf->texture->format) { 1867 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1868 case PIPE_FORMAT_X8Z24_UNORM: 1869 case PIPE_FORMAT_Z24X8_UNORM: 1870 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1871 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 1872 break; 1873 case PIPE_FORMAT_Z32_FLOAT: 1874 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1875 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 1876 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 1877 break; 1878 case PIPE_FORMAT_Z16_UNORM: 1879 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 1880 break; 1881 default: 1882 assert(0); 1883 } 1884 1885 format = si_translate_dbformat(rtex->resource.b.b.format); 1886 1887 if (format == V_028040_Z_INVALID) { 1888 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 1889 } 1890 assert(format != V_028040_Z_INVALID); 1891 1892 s_offs = z_offs = rtex->resource.gpu_address; 1893 z_offs += rtex->surface.level[level].offset; 1894 s_offs += rtex->surface.stencil_level[level].offset; 1895 1896 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 1897 1898 z_info = S_028040_FORMAT(format); 1899 if (rtex->resource.b.b.nr_samples > 1) { 1900 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 1901 } 1902 1903 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 1904 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 1905 else 1906 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 1907 1908 if (sctx->b.chip_class >= CIK) { 1909 switch (rtex->surface.level[level].mode) { 1910 case RADEON_SURF_MODE_2D: 1911 array_mode = V_02803C_ARRAY_2D_TILED_THIN1; 1912 break; 1913 case RADEON_SURF_MODE_1D: 1914 case RADEON_SURF_MODE_LINEAR_ALIGNED: 1915 case RADEON_SURF_MODE_LINEAR: 1916 default: 1917 array_mode = V_02803C_ARRAY_1D_TILED_THIN1; 1918 break; 1919 } 1920 tile_split = rtex->surface.tile_split; 1921 stile_split = rtex->surface.stencil_tile_split; 1922 macro_aspect = rtex->surface.mtilea; 1923 bankw = rtex->surface.bankw; 1924 bankh = rtex->surface.bankh; 1925 tile_split = cik_tile_split(tile_split); 1926 stile_split = cik_tile_split(stile_split); 1927 macro_aspect = cik_macro_tile_aspect(macro_aspect); 1928 bankw = cik_bank_wh(bankw); 1929 bankh = cik_bank_wh(bankh); 1930 nbanks = si_num_banks(sscreen, rtex); 1931 tile_mode_index = si_tile_mode_index(rtex, level, false); 1932 pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); 1933 1934 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) | 1935 S_02803C_PIPE_CONFIG(pipe_config) | 1936 S_02803C_BANK_WIDTH(bankw) | 1937 S_02803C_BANK_HEIGHT(bankh) | 1938 S_02803C_MACRO_TILE_ASPECT(macro_aspect) | 1939 S_02803C_NUM_BANKS(nbanks); 1940 z_info |= S_028040_TILE_SPLIT(tile_split); 1941 s_info |= S_028044_TILE_SPLIT(stile_split); 1942 } else { 1943 tile_mode_index = si_tile_mode_index(rtex, level, false); 1944 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 1945 tile_mode_index = si_tile_mode_index(rtex, level, true); 1946 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 1947 } 1948 1949 /* HiZ aka depth buffer htile */ 1950 /* use htile only for first level */ 1951 if (rtex->htile_buffer && !level) { 1952 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 1953 S_028040_ALLOW_EXPCLEAR(1); 1954 1955 /* This is optimal for the clear value of 1.0 and using 1956 * the LESS and LEQUAL test functions. Set this to 0 1957 * for the opposite case. This can only be changed when 1958 * clearing. */ 1959 z_info |= S_028040_ZRANGE_PRECISION(1); 1960 1961 /* Use all of the htile_buffer for depth, because we don't 1962 * use HTILE for stencil because of FAST_STENCIL_DISABLE. */ 1963 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 1964 1965 uint64_t va = rtex->htile_buffer->gpu_address; 1966 db_htile_data_base = va >> 8; 1967 db_htile_surface = S_028ABC_FULL_CACHE(1); 1968 } else { 1969 db_htile_data_base = 0; 1970 db_htile_surface = 0; 1971 } 1972 1973 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 1974 1975 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 1976 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 1977 surf->db_htile_data_base = db_htile_data_base; 1978 surf->db_depth_info = db_depth_info; 1979 surf->db_z_info = z_info; 1980 surf->db_stencil_info = s_info; 1981 surf->db_depth_base = z_offs >> 8; 1982 surf->db_stencil_base = s_offs >> 8; 1983 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 1984 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 1985 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 1986 levelinfo->nblk_y) / 64 - 1); 1987 surf->db_htile_surface = db_htile_surface; 1988 surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl; 1989 1990 surf->depth_initialized = true; 1991} 1992 1993static void si_set_framebuffer_state(struct pipe_context *ctx, 1994 const struct pipe_framebuffer_state *state) 1995{ 1996 struct si_context *sctx = (struct si_context *)ctx; 1997 struct pipe_constant_buffer constbuf = {0}; 1998 struct r600_surface *surf = NULL; 1999 struct r600_texture *rtex; 2000 bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer; 2001 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2002 int i; 2003 2004 /* Only flush TC when changing the framebuffer state, because 2005 * the only client not using TC that can change textures is 2006 * the framebuffer. 2007 * 2008 * Flush all CB and DB caches here because all buffers can be used 2009 * for write by both TC (with shader image stores) and CB/DB. 2010 */ 2011 sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | 2012 SI_CONTEXT_INV_TC_L2 | 2013 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; 2014 2015 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2016 2017 sctx->framebuffer.export_16bpc = 0; 2018 sctx->framebuffer.compressed_cb_mask = 0; 2019 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2020 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2021 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 2022 util_format_is_pure_integer(state->cbufs[0]->format); 2023 2024 if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer) 2025 sctx->db_render_state.dirty = true; 2026 2027 for (i = 0; i < state->nr_cbufs; i++) { 2028 if (!state->cbufs[i]) 2029 continue; 2030 2031 surf = (struct r600_surface*)state->cbufs[i]; 2032 rtex = (struct r600_texture*)surf->base.texture; 2033 2034 if (!surf->color_initialized) { 2035 si_initialize_color_surface(sctx, surf); 2036 } 2037 2038 if (surf->export_16bpc) { 2039 sctx->framebuffer.export_16bpc |= 1 << i; 2040 } 2041 2042 if (rtex->fmask.size && rtex->cmask.size) { 2043 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2044 } 2045 } 2046 /* Set the 16BPC export for possible dual-src blending. */ 2047 if (i == 1 && surf && surf->export_16bpc) { 2048 sctx->framebuffer.export_16bpc |= 1 << 1; 2049 } 2050 2051 assert(!(sctx->framebuffer.export_16bpc & ~0xff)); 2052 2053 if (state->zsbuf) { 2054 surf = (struct r600_surface*)state->zsbuf; 2055 2056 if (!surf->depth_initialized) { 2057 si_init_depth_surface(sctx, surf); 2058 } 2059 } 2060 2061 si_update_fb_rs_state(sctx); 2062 si_update_fb_blend_state(sctx); 2063 2064 sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3; 2065 sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4; 2066 sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */ 2067 sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */ 2068 sctx->framebuffer.atom.dirty = true; 2069 2070 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2071 sctx->msaa_config.dirty = true; 2072 sctx->db_render_state.dirty = true; 2073 2074 /* Set sample locations as fragment shader constants. */ 2075 switch (sctx->framebuffer.nr_samples) { 2076 case 1: 2077 constbuf.user_buffer = sctx->b.sample_locations_1x; 2078 break; 2079 case 2: 2080 constbuf.user_buffer = sctx->b.sample_locations_2x; 2081 break; 2082 case 4: 2083 constbuf.user_buffer = sctx->b.sample_locations_4x; 2084 break; 2085 case 8: 2086 constbuf.user_buffer = sctx->b.sample_locations_8x; 2087 break; 2088 case 16: 2089 constbuf.user_buffer = sctx->b.sample_locations_16x; 2090 break; 2091 default: 2092 assert(0); 2093 } 2094 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2095 ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, 2096 SI_DRIVER_STATE_CONST_BUF, &constbuf); 2097 } 2098} 2099 2100static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2101{ 2102 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 2103 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2104 unsigned i, nr_cbufs = state->nr_cbufs; 2105 struct r600_texture *tex = NULL; 2106 struct r600_surface *cb = NULL; 2107 2108 /* Colorbuffers. */ 2109 for (i = 0; i < nr_cbufs; i++) { 2110 cb = (struct r600_surface*)state->cbufs[i]; 2111 if (!cb) { 2112 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2113 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2114 continue; 2115 } 2116 2117 tex = (struct r600_texture *)cb->base.texture; 2118 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2119 &tex->resource, RADEON_USAGE_READWRITE, 2120 tex->surface.nsamples > 1 ? 2121 RADEON_PRIO_COLOR_BUFFER_MSAA : 2122 RADEON_PRIO_COLOR_BUFFER); 2123 2124 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2125 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2126 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2127 RADEON_PRIO_COLOR_META); 2128 } 2129 2130 r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13); 2131 radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2132 radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2133 radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2134 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2135 radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2136 radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2137 radeon_emit(cs, 0); /* R_028C78 unused */ 2138 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2139 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2140 radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2141 radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2142 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2143 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2144 } 2145 /* set CB_COLOR1_INFO for possible dual-src blending */ 2146 if (i == 1 && state->cbufs[0]) { 2147 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 2148 cb->cb_color_info | tex->cb_color_info); 2149 i++; 2150 } 2151 for (; i < 8 ; i++) { 2152 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2153 } 2154 2155 /* ZS buffer. */ 2156 if (state->zsbuf) { 2157 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2158 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2159 2160 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2161 &rtex->resource, RADEON_USAGE_READWRITE, 2162 zb->base.texture->nr_samples > 1 ? 2163 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2164 RADEON_PRIO_DEPTH_BUFFER); 2165 2166 if (zb->db_htile_data_base) { 2167 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2168 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2169 RADEON_PRIO_DEPTH_META); 2170 } 2171 2172 r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2173 r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2174 2175 r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2176 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2177 radeon_emit(cs, zb->db_z_info); /* R_028040_DB_Z_INFO */ 2178 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2179 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2180 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2181 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2182 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2183 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2184 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2185 2186 r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2187 r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); 2188 r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2189 zb->pa_su_poly_offset_db_fmt_cntl); 2190 } else { 2191 r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2192 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2193 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2194 } 2195 2196 /* Framebuffer dimensions. */ 2197 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2198 r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2199 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2200 2201 cayman_emit_msaa_sample_locs(cs, sctx->framebuffer.nr_samples); 2202} 2203 2204static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom) 2205{ 2206 struct si_context *sctx = (struct si_context *)rctx; 2207 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 2208 2209 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2210 sctx->ps_iter_samples); 2211} 2212 2213const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */ 2214 2215static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2216{ 2217 struct si_context *sctx = (struct si_context *)ctx; 2218 2219 if (sctx->ps_iter_samples == min_samples) 2220 return; 2221 2222 sctx->ps_iter_samples = min_samples; 2223 2224 if (sctx->framebuffer.nr_samples > 1) 2225 sctx->msaa_config.dirty = true; 2226} 2227 2228/* 2229 * Samplers 2230 */ 2231 2232static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx, 2233 struct pipe_resource *texture, 2234 const struct pipe_sampler_view *state) 2235{ 2236 struct si_context *sctx = (struct si_context*)ctx; 2237 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 2238 struct r600_texture *tmp = (struct r600_texture*)texture; 2239 const struct util_format_description *desc; 2240 unsigned format, num_format; 2241 uint32_t pitch = 0; 2242 unsigned char state_swizzle[4], swizzle[4]; 2243 unsigned height, depth, width; 2244 enum pipe_format pipe_format = state->format; 2245 struct radeon_surface_level *surflevel; 2246 int first_non_void; 2247 uint64_t va; 2248 2249 if (view == NULL) 2250 return NULL; 2251 2252 /* initialize base object */ 2253 view->base = *state; 2254 view->base.texture = NULL; 2255 view->base.reference.count = 1; 2256 view->base.context = ctx; 2257 2258 /* NULL resource, obey swizzle (only ZERO and ONE make sense). */ 2259 if (!texture) { 2260 view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) | 2261 S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) | 2262 S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) | 2263 S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) | 2264 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D); 2265 return &view->base; 2266 } 2267 2268 pipe_resource_reference(&view->base.texture, texture); 2269 view->resource = &tmp->resource; 2270 2271 /* Buffer resource. */ 2272 if (texture->target == PIPE_BUFFER) { 2273 unsigned stride; 2274 2275 desc = util_format_description(state->format); 2276 first_non_void = util_format_get_first_non_void_channel(state->format); 2277 stride = desc->block.bits / 8; 2278 va = tmp->resource.gpu_address + state->u.buf.first_element*stride; 2279 format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2280 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2281 2282 view->state[4] = va; 2283 view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 2284 S_008F04_STRIDE(stride); 2285 view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element; 2286 view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2287 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2288 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2289 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2290 S_008F0C_NUM_FORMAT(num_format) | 2291 S_008F0C_DATA_FORMAT(format); 2292 2293 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); 2294 return &view->base; 2295 } 2296 2297 state_swizzle[0] = state->swizzle_r; 2298 state_swizzle[1] = state->swizzle_g; 2299 state_swizzle[2] = state->swizzle_b; 2300 state_swizzle[3] = state->swizzle_a; 2301 2302 surflevel = tmp->surface.level; 2303 2304 /* Texturing with separate depth and stencil. */ 2305 if (tmp->is_depth && !tmp->is_flushing_texture) { 2306 switch (pipe_format) { 2307 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2308 pipe_format = PIPE_FORMAT_Z32_FLOAT; 2309 break; 2310 case PIPE_FORMAT_X8Z24_UNORM: 2311 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2312 /* Z24 is always stored like this. */ 2313 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 2314 break; 2315 case PIPE_FORMAT_X24S8_UINT: 2316 case PIPE_FORMAT_S8X24_UINT: 2317 case PIPE_FORMAT_X32_S8X24_UINT: 2318 pipe_format = PIPE_FORMAT_S8_UINT; 2319 surflevel = tmp->surface.stencil_level; 2320 break; 2321 default:; 2322 } 2323 } 2324 2325 desc = util_format_description(pipe_format); 2326 2327 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2328 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2329 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2330 2331 switch (pipe_format) { 2332 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2333 case PIPE_FORMAT_X24S8_UINT: 2334 case PIPE_FORMAT_X32_S8X24_UINT: 2335 case PIPE_FORMAT_X8Z24_UNORM: 2336 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2337 break; 2338 default: 2339 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2340 } 2341 } else { 2342 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2343 } 2344 2345 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2346 2347 switch (pipe_format) { 2348 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2349 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2350 break; 2351 default: 2352 if (first_non_void < 0) { 2353 if (util_format_is_compressed(pipe_format)) { 2354 switch (pipe_format) { 2355 case PIPE_FORMAT_DXT1_SRGB: 2356 case PIPE_FORMAT_DXT1_SRGBA: 2357 case PIPE_FORMAT_DXT3_SRGBA: 2358 case PIPE_FORMAT_DXT5_SRGBA: 2359 case PIPE_FORMAT_BPTC_SRGBA: 2360 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2361 break; 2362 case PIPE_FORMAT_RGTC1_SNORM: 2363 case PIPE_FORMAT_LATC1_SNORM: 2364 case PIPE_FORMAT_RGTC2_SNORM: 2365 case PIPE_FORMAT_LATC2_SNORM: 2366 /* implies float, so use SNORM/UNORM to determine 2367 whether data is signed or not */ 2368 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2369 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2370 break; 2371 default: 2372 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2373 break; 2374 } 2375 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2376 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2377 } else { 2378 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2379 } 2380 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2381 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2382 } else { 2383 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2384 2385 switch (desc->channel[first_non_void].type) { 2386 case UTIL_FORMAT_TYPE_FLOAT: 2387 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2388 break; 2389 case UTIL_FORMAT_TYPE_SIGNED: 2390 if (desc->channel[first_non_void].normalized) 2391 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2392 else if (desc->channel[first_non_void].pure_integer) 2393 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2394 else 2395 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2396 break; 2397 case UTIL_FORMAT_TYPE_UNSIGNED: 2398 if (desc->channel[first_non_void].normalized) 2399 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2400 else if (desc->channel[first_non_void].pure_integer) 2401 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2402 else 2403 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2404 } 2405 } 2406 } 2407 2408 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void); 2409 if (format == ~0) { 2410 format = 0; 2411 } 2412 2413 /* not supported any more */ 2414 //endian = si_colorformat_endian_swap(format); 2415 2416 width = surflevel[0].npix_x; 2417 height = surflevel[0].npix_y; 2418 depth = surflevel[0].npix_z; 2419 pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format); 2420 2421 if (texture->target == PIPE_TEXTURE_1D_ARRAY) { 2422 height = 1; 2423 depth = texture->array_size; 2424 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { 2425 depth = texture->array_size; 2426 } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) 2427 depth = texture->array_size / 6; 2428 2429 va = tmp->resource.gpu_address + surflevel[0].offset; 2430 va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size; 2431 2432 view->state[0] = va >> 8; 2433 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) | 2434 S_008F14_DATA_FORMAT(format) | 2435 S_008F14_NUM_FORMAT(num_format)); 2436 view->state[2] = (S_008F18_WIDTH(width - 1) | 2437 S_008F18_HEIGHT(height - 1)); 2438 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2439 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2440 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2441 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2442 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ? 2443 0 : state->u.tex.first_level - tmp->mipmap_shift) | 2444 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ? 2445 util_logbase2(texture->nr_samples) : 2446 state->u.tex.last_level - tmp->mipmap_shift) | 2447 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) | 2448 S_008F1C_POW2_PAD(texture->last_level > 0) | 2449 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples))); 2450 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); 2451 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2452 S_008F24_LAST_ARRAY(state->u.tex.last_layer)); 2453 view->state[6] = 0; 2454 view->state[7] = 0; 2455 2456 /* Initialize the sampler view for FMASK. */ 2457 if (tmp->fmask.size) { 2458 uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset; 2459 uint32_t fmask_format; 2460 2461 switch (texture->nr_samples) { 2462 case 2: 2463 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2464 break; 2465 case 4: 2466 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2467 break; 2468 case 8: 2469 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2470 break; 2471 default: 2472 assert(0); 2473 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2474 } 2475 2476 view->fmask_state[0] = va >> 8; 2477 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2478 S_008F14_DATA_FORMAT(fmask_format) | 2479 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2480 view->fmask_state[2] = S_008F18_WIDTH(width - 1) | 2481 S_008F18_HEIGHT(height - 1); 2482 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2483 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2484 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2485 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2486 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | 2487 S_008F1C_TYPE(si_tex_dim(texture->target, 0)); 2488 view->fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2489 S_008F20_PITCH(tmp->fmask.pitch - 1); 2490 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2491 S_008F24_LAST_ARRAY(state->u.tex.last_layer); 2492 view->fmask_state[6] = 0; 2493 view->fmask_state[7] = 0; 2494 } 2495 2496 return &view->base; 2497} 2498 2499static void si_sampler_view_destroy(struct pipe_context *ctx, 2500 struct pipe_sampler_view *state) 2501{ 2502 struct si_sampler_view *view = (struct si_sampler_view *)state; 2503 2504 if (view->resource && view->resource->b.b.target == PIPE_BUFFER) 2505 LIST_DELINIT(&view->list); 2506 2507 pipe_resource_reference(&state->texture, NULL); 2508 FREE(view); 2509} 2510 2511static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 2512{ 2513 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 2514 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 2515 (linear_filter && 2516 (wrap == PIPE_TEX_WRAP_CLAMP || 2517 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 2518} 2519 2520static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 2521{ 2522 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 2523 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 2524 2525 return (state->border_color.ui[0] || state->border_color.ui[1] || 2526 state->border_color.ui[2] || state->border_color.ui[3]) && 2527 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 2528 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 2529 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 2530} 2531 2532static void *si_create_sampler_state(struct pipe_context *ctx, 2533 const struct pipe_sampler_state *state) 2534{ 2535 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 2536 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; 2537 unsigned border_color_type; 2538 2539 if (rstate == NULL) { 2540 return NULL; 2541 } 2542 2543 if (sampler_state_needs_border_color(state)) 2544 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 2545 else 2546 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 2547 2548 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 2549 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 2550 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 2551 r600_tex_aniso_filter(state->max_anisotropy) << 9 | 2552 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 2553 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 2554 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map)); 2555 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 2556 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 2557 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 2558 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) | 2559 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) | 2560 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); 2561 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type); 2562 2563 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2564 memcpy(rstate->border_color, state->border_color.ui, 2565 sizeof(rstate->border_color)); 2566 } 2567 2568 return rstate; 2569} 2570 2571/* Upload border colors and update the pointers in resource descriptors. 2572 * There can only be 4096 border colors per context. 2573 * 2574 * XXX: This is broken if the buffer gets reallocated. 2575 */ 2576static void si_set_border_colors(struct si_context *sctx, unsigned count, 2577 void **states) 2578{ 2579 struct si_sampler_state **rstates = (struct si_sampler_state **)states; 2580 uint32_t *border_color_table = NULL; 2581 int i, j; 2582 2583 for (i = 0; i < count; i++) { 2584 if (rstates[i] && 2585 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) == 2586 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2587 if (!sctx->border_color_table || 2588 ((sctx->border_color_offset + count - i) & 2589 C_008F3C_BORDER_COLOR_PTR)) { 2590 r600_resource_reference(&sctx->border_color_table, NULL); 2591 sctx->border_color_offset = 0; 2592 2593 sctx->border_color_table = 2594 si_resource_create_custom(&sctx->screen->b.b, 2595 PIPE_USAGE_DYNAMIC, 2596 4096 * 4 * 4); 2597 } 2598 2599 if (!border_color_table) { 2600 border_color_table = 2601 sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf, 2602 sctx->b.rings.gfx.cs, 2603 PIPE_TRANSFER_WRITE | 2604 PIPE_TRANSFER_UNSYNCHRONIZED); 2605 } 2606 2607 for (j = 0; j < 4; j++) { 2608 border_color_table[4 * sctx->border_color_offset + j] = 2609 util_le32_to_cpu(rstates[i]->border_color[j]); 2610 } 2611 2612 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR; 2613 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++); 2614 } 2615 } 2616 2617 if (border_color_table) { 2618 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 2619 2620 uint64_t va_offset = sctx->border_color_table->gpu_address; 2621 2622 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); 2623 if (sctx->b.chip_class >= CIK) 2624 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40); 2625 si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ, 2626 RADEON_PRIO_SHADER_DATA); 2627 si_pm4_set_state(sctx, ta_bordercolor_base, pm4); 2628 } 2629} 2630 2631static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, 2632 unsigned start, unsigned count, 2633 void **states) 2634{ 2635 struct si_context *sctx = (struct si_context *)ctx; 2636 2637 if (!count || shader >= SI_NUM_SHADERS) 2638 return; 2639 2640 si_set_border_colors(sctx, count, states); 2641 si_set_sampler_descriptors(sctx, shader, start, count, states); 2642} 2643 2644static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 2645{ 2646 struct si_context *sctx = (struct si_context *)ctx; 2647 struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask); 2648 struct si_pm4_state *pm4 = &state->pm4; 2649 uint16_t mask = sample_mask; 2650 2651 if (state == NULL) 2652 return; 2653 2654 state->sample_mask = mask; 2655 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16)); 2656 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16)); 2657 2658 si_pm4_set_state(sctx, sample_mask, state); 2659} 2660 2661static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 2662{ 2663 free(state); 2664} 2665 2666/* 2667 * Vertex elements & buffers 2668 */ 2669 2670static void *si_create_vertex_elements(struct pipe_context *ctx, 2671 unsigned count, 2672 const struct pipe_vertex_element *elements) 2673{ 2674 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 2675 int i; 2676 2677 assert(count < PIPE_MAX_ATTRIBS); 2678 if (!v) 2679 return NULL; 2680 2681 v->count = count; 2682 for (i = 0; i < count; ++i) { 2683 const struct util_format_description *desc; 2684 unsigned data_format, num_format; 2685 int first_non_void; 2686 2687 desc = util_format_description(elements[i].src_format); 2688 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 2689 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2690 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2691 2692 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2693 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2694 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2695 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2696 S_008F0C_NUM_FORMAT(num_format) | 2697 S_008F0C_DATA_FORMAT(data_format); 2698 v->format_size[i] = desc->block.bits / 8; 2699 } 2700 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 2701 2702 return v; 2703} 2704 2705static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 2706{ 2707 struct si_context *sctx = (struct si_context *)ctx; 2708 struct si_vertex_element *v = (struct si_vertex_element*)state; 2709 2710 sctx->vertex_elements = v; 2711 sctx->vertex_buffers_dirty = true; 2712} 2713 2714static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 2715{ 2716 struct si_context *sctx = (struct si_context *)ctx; 2717 2718 if (sctx->vertex_elements == state) 2719 sctx->vertex_elements = NULL; 2720 FREE(state); 2721} 2722 2723static void si_set_vertex_buffers(struct pipe_context *ctx, 2724 unsigned start_slot, unsigned count, 2725 const struct pipe_vertex_buffer *buffers) 2726{ 2727 struct si_context *sctx = (struct si_context *)ctx; 2728 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 2729 int i; 2730 2731 assert(start_slot + count <= Elements(sctx->vertex_buffer)); 2732 2733 if (buffers) { 2734 for (i = 0; i < count; i++) { 2735 const struct pipe_vertex_buffer *src = buffers + i; 2736 struct pipe_vertex_buffer *dsti = dst + i; 2737 2738 pipe_resource_reference(&dsti->buffer, src->buffer); 2739 dsti->buffer_offset = src->buffer_offset; 2740 dsti->stride = src->stride; 2741 } 2742 } else { 2743 for (i = 0; i < count; i++) { 2744 pipe_resource_reference(&dst[i].buffer, NULL); 2745 } 2746 } 2747 sctx->vertex_buffers_dirty = true; 2748} 2749 2750static void si_set_index_buffer(struct pipe_context *ctx, 2751 const struct pipe_index_buffer *ib) 2752{ 2753 struct si_context *sctx = (struct si_context *)ctx; 2754 2755 if (ib) { 2756 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer); 2757 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 2758 } else { 2759 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 2760 } 2761} 2762 2763/* 2764 * Misc 2765 */ 2766static void si_set_polygon_stipple(struct pipe_context *ctx, 2767 const struct pipe_poly_stipple *state) 2768{ 2769 struct si_context *sctx = (struct si_context *)ctx; 2770 struct pipe_resource *tex; 2771 struct pipe_sampler_view *view; 2772 bool is_zero = true; 2773 bool is_one = true; 2774 int i; 2775 2776 /* The hardware obeys 0 and 1 swizzles in the descriptor even if 2777 * the resource is NULL/invalid. Take advantage of this fact and skip 2778 * texture allocation if the stipple pattern is constant. 2779 * 2780 * This is an optimization for the common case when stippling isn't 2781 * used but set_polygon_stipple is still called by st/mesa. 2782 */ 2783 for (i = 0; i < Elements(state->stipple); i++) { 2784 is_zero = is_zero && state->stipple[i] == 0; 2785 is_one = is_one && state->stipple[i] == 0xffffffff; 2786 } 2787 2788 if (is_zero || is_one) { 2789 struct pipe_sampler_view templ = {{0}}; 2790 2791 templ.swizzle_r = PIPE_SWIZZLE_ZERO; 2792 templ.swizzle_g = PIPE_SWIZZLE_ZERO; 2793 templ.swizzle_b = PIPE_SWIZZLE_ZERO; 2794 /* The pattern should be inverted in the texture. */ 2795 templ.swizzle_a = is_zero ? PIPE_SWIZZLE_ONE : PIPE_SWIZZLE_ZERO; 2796 2797 view = ctx->create_sampler_view(ctx, NULL, &templ); 2798 } else { 2799 /* Create a new texture. */ 2800 tex = util_pstipple_create_stipple_texture(ctx, state->stipple); 2801 if (!tex) 2802 return; 2803 2804 view = util_pstipple_create_sampler_view(ctx, tex); 2805 pipe_resource_reference(&tex, NULL); 2806 } 2807 2808 ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 2809 SI_POLY_STIPPLE_SAMPLER, 1, &view); 2810 pipe_sampler_view_reference(&view, NULL); 2811 2812 /* Bind the sampler state if needed. */ 2813 if (!sctx->pstipple_sampler_state) { 2814 sctx->pstipple_sampler_state = util_pstipple_create_sampler(ctx); 2815 ctx->bind_sampler_states(ctx, PIPE_SHADER_FRAGMENT, 2816 SI_POLY_STIPPLE_SAMPLER, 1, 2817 &sctx->pstipple_sampler_state); 2818 } 2819} 2820 2821static void si_texture_barrier(struct pipe_context *ctx) 2822{ 2823 struct si_context *sctx = (struct si_context *)ctx; 2824 2825 sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | 2826 SI_CONTEXT_INV_TC_L2 | 2827 SI_CONTEXT_FLUSH_AND_INV_CB; 2828} 2829 2830static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 2831{ 2832 struct pipe_blend_state blend; 2833 2834 memset(&blend, 0, sizeof(blend)); 2835 blend.independent_blend_enable = true; 2836 blend.rt[0].colormask = 0xf; 2837 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 2838} 2839 2840static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 2841 bool include_draw_vbo) 2842{ 2843 si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo); 2844} 2845 2846void si_init_state_functions(struct si_context *sctx) 2847{ 2848 si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0); 2849 si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10); 2850 si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6); 2851 2852 sctx->b.b.create_blend_state = si_create_blend_state; 2853 sctx->b.b.bind_blend_state = si_bind_blend_state; 2854 sctx->b.b.delete_blend_state = si_delete_blend_state; 2855 sctx->b.b.set_blend_color = si_set_blend_color; 2856 2857 sctx->b.b.create_rasterizer_state = si_create_rs_state; 2858 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 2859 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 2860 2861 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 2862 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 2863 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 2864 2865 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 2866 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 2867 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 2868 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 2869 2870 sctx->b.b.set_clip_state = si_set_clip_state; 2871 sctx->b.b.set_scissor_states = si_set_scissor_states; 2872 sctx->b.b.set_viewport_states = si_set_viewport_states; 2873 sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref; 2874 2875 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 2876 sctx->b.b.get_sample_position = cayman_get_sample_position; 2877 2878 sctx->b.b.create_sampler_state = si_create_sampler_state; 2879 sctx->b.b.bind_sampler_states = si_bind_sampler_states; 2880 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 2881 2882 sctx->b.b.create_sampler_view = si_create_sampler_view; 2883 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 2884 2885 sctx->b.b.set_sample_mask = si_set_sample_mask; 2886 2887 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 2888 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 2889 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 2890 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 2891 sctx->b.b.set_index_buffer = si_set_index_buffer; 2892 2893 sctx->b.b.texture_barrier = si_texture_barrier; 2894 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; 2895 sctx->b.b.set_min_samples = si_set_min_samples; 2896 2897 sctx->b.dma_copy = si_dma_copy; 2898 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 2899 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 2900 2901 sctx->b.b.draw_vbo = si_draw_vbo; 2902} 2903 2904static void 2905si_write_harvested_raster_configs(struct si_context *sctx, 2906 struct si_pm4_state *pm4, 2907 unsigned raster_config) 2908{ 2909 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 2910 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 2911 unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; 2912 unsigned num_rb = sctx->screen->b.info.r600_num_backends; 2913 unsigned rb_per_pkr = num_rb / num_se / sh_per_se; 2914 unsigned rb_per_se = num_rb / num_se; 2915 unsigned se0_mask = (1 << rb_per_se) - 1; 2916 unsigned se1_mask = se0_mask << rb_per_se; 2917 unsigned se; 2918 2919 assert(num_se == 1 || num_se == 2); 2920 assert(sh_per_se == 1 || sh_per_se == 2); 2921 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 2922 2923 /* XXX: I can't figure out what the *_XSEL and *_YSEL 2924 * fields are for, so I'm leaving them as their default 2925 * values. */ 2926 2927 se0_mask &= rb_mask; 2928 se1_mask &= rb_mask; 2929 if (num_se == 2 && (!se0_mask || !se1_mask)) { 2930 raster_config &= C_028350_SE_MAP; 2931 2932 if (!se0_mask) { 2933 raster_config |= 2934 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 2935 } else { 2936 raster_config |= 2937 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 2938 } 2939 } 2940 2941 for (se = 0; se < num_se; se++) { 2942 unsigned raster_config_se = raster_config; 2943 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 2944 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 2945 2946 pkr0_mask &= rb_mask; 2947 pkr1_mask &= rb_mask; 2948 if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) { 2949 raster_config_se &= C_028350_PKR_MAP; 2950 2951 if (!pkr0_mask) { 2952 raster_config_se |= 2953 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 2954 } else { 2955 raster_config_se |= 2956 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 2957 } 2958 } 2959 2960 if (rb_per_pkr == 2) { 2961 unsigned rb0_mask = 1 << (se * rb_per_se); 2962 unsigned rb1_mask = rb0_mask << 1; 2963 2964 rb0_mask &= rb_mask; 2965 rb1_mask &= rb_mask; 2966 if (!rb0_mask || !rb1_mask) { 2967 raster_config_se &= C_028350_RB_MAP_PKR0; 2968 2969 if (!rb0_mask) { 2970 raster_config_se |= 2971 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 2972 } else { 2973 raster_config_se |= 2974 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 2975 } 2976 } 2977 2978 if (sh_per_se == 2) { 2979 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 2980 rb1_mask = rb0_mask << 1; 2981 rb0_mask &= rb_mask; 2982 rb1_mask &= rb_mask; 2983 if (!rb0_mask || !rb1_mask) { 2984 raster_config_se &= C_028350_RB_MAP_PKR1; 2985 2986 if (!rb0_mask) { 2987 raster_config_se |= 2988 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 2989 } else { 2990 raster_config_se |= 2991 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 2992 } 2993 } 2994 } 2995 } 2996 2997 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 2998 SE_INDEX(se) | SH_BROADCAST_WRITES | 2999 INSTANCE_BROADCAST_WRITES); 3000 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 3001 } 3002 3003 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3004 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 3005 INSTANCE_BROADCAST_WRITES); 3006} 3007 3008void si_init_config(struct si_context *sctx) 3009{ 3010 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 3011 3012 if (pm4 == NULL) 3013 return; 3014 3015 si_cmd_context_control(pm4); 3016 3017 si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0); 3018 si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0); 3019 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0); 3020 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0); 3021 si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0); 3022 si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0); 3023 si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0); 3024 si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0); 3025 si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0); 3026 si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0); 3027 si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0); 3028 si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0); 3029 3030 /* FIXME calculate these values somehow ??? */ 3031 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80); 3032 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3033 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3034 3035 si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0); 3036 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3037 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0); 3038 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3039 3040 si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0); 3041 si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0); 3042 si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0); 3043 si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0); 3044 3045 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3046 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000); 3047 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3048 if (sctx->b.chip_class < CIK) 3049 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3050 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3051 3052 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3053 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3054 3055 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3056 3057 if (sctx->b.chip_class >= CIK) { 3058 switch (sctx->screen->b.family) { 3059 case CHIP_BONAIRE: 3060 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012); 3061 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 3062 break; 3063 case CHIP_HAWAII: 3064 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a); 3065 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e); 3066 break; 3067 case CHIP_KAVERI: 3068 /* XXX todo */ 3069 case CHIP_KABINI: 3070 /* XXX todo */ 3071 case CHIP_MULLINS: 3072 /* XXX todo */ 3073 default: 3074 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3075 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 3076 break; 3077 } 3078 } else { 3079 unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; 3080 unsigned num_rb = sctx->screen->b.info.r600_num_backends; 3081 unsigned raster_config; 3082 3083 switch (sctx->screen->b.family) { 3084 case CHIP_TAHITI: 3085 case CHIP_PITCAIRN: 3086 raster_config = 0x2a00126a; 3087 break; 3088 case CHIP_VERDE: 3089 raster_config = 0x0000124a; 3090 break; 3091 case CHIP_OLAND: 3092 raster_config = 0x00000082; 3093 break; 3094 case CHIP_HAINAN: 3095 raster_config = 0x00000000; 3096 break; 3097 default: 3098 fprintf(stderr, 3099 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 3100 raster_config = 0x00000000; 3101 break; 3102 } 3103 3104 /* Always use the default config when all backends are enabled 3105 * (or when we failed to determine the enabled backends). 3106 */ 3107 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 3108 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 3109 raster_config); 3110 } else { 3111 si_write_harvested_raster_configs(sctx, pm4, raster_config); 3112 } 3113 } 3114 3115 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 3116 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 3117 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 3118 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 3119 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 3120 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 3121 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 3122 3123 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3124 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 3125 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 3126 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 3127 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000); 3128 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000); 3129 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000); 3130 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000); 3131 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000); 3132 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000); 3133 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000); 3134 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000); 3135 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000); 3136 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000); 3137 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 3138 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 3139 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 3140 3141 /* There is a hang if stencil is used and fast stencil is enabled 3142 * regardless of whether HTILE is depth-only or not. 3143 */ 3144 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 3145 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 3146 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | 3147 S_02800C_FAST_STENCIL_DISABLE(1)); 3148 3149 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 3150 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 3151 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 3152 3153 if (sctx->b.chip_class >= CIK) { 3154 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 3155 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0)); 3156 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 3157 } 3158 3159 sctx->init_config = pm4; 3160} 3161