si_state.c revision b3057f8097f88d9072df6d9c09bcc8c039b88a7c
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "si_shader.h" 29#include "sid.h" 30#include "radeon/r600_cs.h" 31 32#include "util/u_format.h" 33#include "util/u_format_s3tc.h" 34#include "util/u_memory.h" 35 36static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, 37 void (*emit)(struct si_context *ctx, struct r600_atom *state), 38 unsigned num_dw) 39{ 40 atom->emit = (void*)emit; 41 atom->num_dw = num_dw; 42 atom->dirty = false; 43 *list_elem = atom; 44} 45 46uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) 47{ 48 if (sscreen->b.chip_class == CIK && 49 sscreen->b.info.cik_macrotile_mode_array_valid) { 50 unsigned index, tileb; 51 52 tileb = 8 * 8 * tex->surface.bpe; 53 tileb = MIN2(tex->surface.tile_split, tileb); 54 55 for (index = 0; tileb > 64; index++) { 56 tileb >>= 1; 57 } 58 assert(index < 16); 59 60 return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3; 61 } 62 63 if (sscreen->b.chip_class == SI && 64 sscreen->b.info.si_tile_mode_array_valid) { 65 /* Don't use stencil_tiling_index, because num_banks is always 66 * read from the depth mode. */ 67 unsigned tile_mode_index = tex->surface.tiling_index[0]; 68 assert(tile_mode_index < 32); 69 70 return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]); 71 } 72 73 /* The old way. */ 74 switch (sscreen->b.tiling_info.num_banks) { 75 case 2: 76 return V_02803C_ADDR_SURF_2_BANK; 77 case 4: 78 return V_02803C_ADDR_SURF_4_BANK; 79 case 8: 80 default: 81 return V_02803C_ADDR_SURF_8_BANK; 82 case 16: 83 return V_02803C_ADDR_SURF_16_BANK; 84 } 85} 86 87unsigned cik_tile_split(unsigned tile_split) 88{ 89 switch (tile_split) { 90 case 64: 91 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B; 92 break; 93 case 128: 94 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B; 95 break; 96 case 256: 97 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B; 98 break; 99 case 512: 100 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B; 101 break; 102 default: 103 case 1024: 104 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB; 105 break; 106 case 2048: 107 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB; 108 break; 109 case 4096: 110 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB; 111 break; 112 } 113 return tile_split; 114} 115 116unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect) 117{ 118 switch (macro_tile_aspect) { 119 default: 120 case 1: 121 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1; 122 break; 123 case 2: 124 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2; 125 break; 126 case 4: 127 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4; 128 break; 129 case 8: 130 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8; 131 break; 132 } 133 return macro_tile_aspect; 134} 135 136unsigned cik_bank_wh(unsigned bankwh) 137{ 138 switch (bankwh) { 139 default: 140 case 1: 141 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1; 142 break; 143 case 2: 144 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2; 145 break; 146 case 4: 147 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4; 148 break; 149 case 8: 150 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8; 151 break; 152 } 153 return bankwh; 154} 155 156unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode) 157{ 158 if (sscreen->b.info.si_tile_mode_array_valid) { 159 uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode]; 160 161 return G_009910_PIPE_CONFIG(gb_tile_mode); 162 } 163 164 /* This is probably broken for a lot of chips, but it's only used 165 * if the kernel cannot return the tile mode array for CIK. */ 166 switch (sscreen->b.info.r600_num_tile_pipes) { 167 case 16: 168 return V_02803C_X_ADDR_SURF_P16_32X32_16X16; 169 case 8: 170 return V_02803C_X_ADDR_SURF_P8_32X32_16X16; 171 case 4: 172 default: 173 if (sscreen->b.info.r600_num_backends == 4) 174 return V_02803C_X_ADDR_SURF_P4_16X16; 175 else 176 return V_02803C_X_ADDR_SURF_P4_8X16; 177 case 2: 178 return V_02803C_ADDR_SURF_P2; 179 } 180} 181 182static unsigned si_map_swizzle(unsigned swizzle) 183{ 184 switch (swizzle) { 185 case UTIL_FORMAT_SWIZZLE_Y: 186 return V_008F0C_SQ_SEL_Y; 187 case UTIL_FORMAT_SWIZZLE_Z: 188 return V_008F0C_SQ_SEL_Z; 189 case UTIL_FORMAT_SWIZZLE_W: 190 return V_008F0C_SQ_SEL_W; 191 case UTIL_FORMAT_SWIZZLE_0: 192 return V_008F0C_SQ_SEL_0; 193 case UTIL_FORMAT_SWIZZLE_1: 194 return V_008F0C_SQ_SEL_1; 195 default: /* UTIL_FORMAT_SWIZZLE_X */ 196 return V_008F0C_SQ_SEL_X; 197 } 198} 199 200static uint32_t S_FIXED(float value, uint32_t frac_bits) 201{ 202 return value * (1 << frac_bits); 203} 204 205/* 12.4 fixed-point */ 206static unsigned si_pack_float_12p4(float x) 207{ 208 return x <= 0 ? 0 : 209 x >= 4096 ? 0xffff : x * 16; 210} 211 212/* 213 * Inferred framebuffer and blender state. 214 * 215 * One of the reasons this must be derived from the framebuffer state is that: 216 * - The blend state mask is 0xf most of the time. 217 * - The COLOR1 format isn't INVALID because of possible dual-source blending, 218 * so COLOR1 is enabled pretty much all the time. 219 * So CB_TARGET_MASK is the only register that can disable COLOR1. 220 */ 221static void si_update_fb_blend_state(struct si_context *sctx) 222{ 223 struct si_pm4_state *pm4; 224 struct si_state_blend *blend = sctx->queued.named.blend; 225 uint32_t mask = 0, i; 226 227 if (blend == NULL) 228 return; 229 230 pm4 = CALLOC_STRUCT(si_pm4_state); 231 if (pm4 == NULL) 232 return; 233 234 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) 235 if (sctx->framebuffer.state.cbufs[i]) 236 mask |= 0xf << (4*i); 237 mask &= blend->cb_target_mask; 238 239 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask); 240 si_pm4_set_state(sctx, fb_blend, pm4); 241} 242 243/* 244 * Blender functions 245 */ 246 247static uint32_t si_translate_blend_function(int blend_func) 248{ 249 switch (blend_func) { 250 case PIPE_BLEND_ADD: 251 return V_028780_COMB_DST_PLUS_SRC; 252 case PIPE_BLEND_SUBTRACT: 253 return V_028780_COMB_SRC_MINUS_DST; 254 case PIPE_BLEND_REVERSE_SUBTRACT: 255 return V_028780_COMB_DST_MINUS_SRC; 256 case PIPE_BLEND_MIN: 257 return V_028780_COMB_MIN_DST_SRC; 258 case PIPE_BLEND_MAX: 259 return V_028780_COMB_MAX_DST_SRC; 260 default: 261 R600_ERR("Unknown blend function %d\n", blend_func); 262 assert(0); 263 break; 264 } 265 return 0; 266} 267 268static uint32_t si_translate_blend_factor(int blend_fact) 269{ 270 switch (blend_fact) { 271 case PIPE_BLENDFACTOR_ONE: 272 return V_028780_BLEND_ONE; 273 case PIPE_BLENDFACTOR_SRC_COLOR: 274 return V_028780_BLEND_SRC_COLOR; 275 case PIPE_BLENDFACTOR_SRC_ALPHA: 276 return V_028780_BLEND_SRC_ALPHA; 277 case PIPE_BLENDFACTOR_DST_ALPHA: 278 return V_028780_BLEND_DST_ALPHA; 279 case PIPE_BLENDFACTOR_DST_COLOR: 280 return V_028780_BLEND_DST_COLOR; 281 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 282 return V_028780_BLEND_SRC_ALPHA_SATURATE; 283 case PIPE_BLENDFACTOR_CONST_COLOR: 284 return V_028780_BLEND_CONSTANT_COLOR; 285 case PIPE_BLENDFACTOR_CONST_ALPHA: 286 return V_028780_BLEND_CONSTANT_ALPHA; 287 case PIPE_BLENDFACTOR_ZERO: 288 return V_028780_BLEND_ZERO; 289 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 290 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 291 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 292 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 293 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 294 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 295 case PIPE_BLENDFACTOR_INV_DST_COLOR: 296 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 297 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 298 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 299 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 300 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 301 case PIPE_BLENDFACTOR_SRC1_COLOR: 302 return V_028780_BLEND_SRC1_COLOR; 303 case PIPE_BLENDFACTOR_SRC1_ALPHA: 304 return V_028780_BLEND_SRC1_ALPHA; 305 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 306 return V_028780_BLEND_INV_SRC1_COLOR; 307 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 308 return V_028780_BLEND_INV_SRC1_ALPHA; 309 default: 310 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 311 assert(0); 312 break; 313 } 314 return 0; 315} 316 317static void *si_create_blend_state_mode(struct pipe_context *ctx, 318 const struct pipe_blend_state *state, 319 unsigned mode) 320{ 321 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 322 struct si_pm4_state *pm4 = &blend->pm4; 323 324 uint32_t color_control = 0; 325 326 if (blend == NULL) 327 return NULL; 328 329 blend->alpha_to_one = state->alpha_to_one; 330 331 if (state->logicop_enable) { 332 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 333 } else { 334 color_control |= S_028808_ROP3(0xcc); 335 } 336 337 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 338 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 339 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 340 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 341 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 342 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 343 344 blend->cb_target_mask = 0; 345 for (int i = 0; i < 8; i++) { 346 /* state->rt entries > 0 only written if independent blending */ 347 const int j = state->independent_blend_enable ? i : 0; 348 349 unsigned eqRGB = state->rt[j].rgb_func; 350 unsigned srcRGB = state->rt[j].rgb_src_factor; 351 unsigned dstRGB = state->rt[j].rgb_dst_factor; 352 unsigned eqA = state->rt[j].alpha_func; 353 unsigned srcA = state->rt[j].alpha_src_factor; 354 unsigned dstA = state->rt[j].alpha_dst_factor; 355 356 unsigned blend_cntl = 0; 357 358 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ 359 blend->cb_target_mask |= state->rt[j].colormask << (4 * i); 360 361 if (!state->rt[j].blend_enable) { 362 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 363 continue; 364 } 365 366 blend_cntl |= S_028780_ENABLE(1); 367 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 368 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 369 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 370 371 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 372 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 373 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 374 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 375 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 376 } 377 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 378 } 379 380 if (blend->cb_target_mask) { 381 color_control |= S_028808_MODE(mode); 382 } else { 383 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 384 } 385 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 386 387 return blend; 388} 389 390static void *si_create_blend_state(struct pipe_context *ctx, 391 const struct pipe_blend_state *state) 392{ 393 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 394} 395 396static void si_bind_blend_state(struct pipe_context *ctx, void *state) 397{ 398 struct si_context *sctx = (struct si_context *)ctx; 399 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 400 si_update_fb_blend_state(sctx); 401} 402 403static void si_delete_blend_state(struct pipe_context *ctx, void *state) 404{ 405 struct si_context *sctx = (struct si_context *)ctx; 406 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 407} 408 409static void si_set_blend_color(struct pipe_context *ctx, 410 const struct pipe_blend_color *state) 411{ 412 struct si_context *sctx = (struct si_context *)ctx; 413 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 414 415 if (pm4 == NULL) 416 return; 417 418 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0])); 419 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1])); 420 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2])); 421 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3])); 422 423 si_pm4_set_state(sctx, blend_color, pm4); 424} 425 426/* 427 * Clipping, scissors and viewport 428 */ 429 430static void si_set_clip_state(struct pipe_context *ctx, 431 const struct pipe_clip_state *state) 432{ 433 struct si_context *sctx = (struct si_context *)ctx; 434 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 435 struct pipe_constant_buffer cb; 436 437 if (pm4 == NULL) 438 return; 439 440 for (int i = 0; i < 6; i++) { 441 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16, 442 fui(state->ucp[i][0])); 443 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16, 444 fui(state->ucp[i][1])); 445 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16, 446 fui(state->ucp[i][2])); 447 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16, 448 fui(state->ucp[i][3])); 449 } 450 451 cb.buffer = NULL; 452 cb.user_buffer = state->ucp; 453 cb.buffer_offset = 0; 454 cb.buffer_size = 4*4*8; 455 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb); 456 pipe_resource_reference(&cb.buffer, NULL); 457 458 si_pm4_set_state(sctx, clip, pm4); 459} 460 461#define SIX_BITS 0x3F 462 463static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 464{ 465 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 466 struct tgsi_shader_info *info = si_get_vs_info(sctx); 467 struct si_shader *vs = si_get_vs_state(sctx); 468 unsigned window_space = 469 vs->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 470 unsigned clipdist_mask = 471 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 472 473 r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 474 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 475 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 476 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 477 S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) | 478 S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) | 479 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || 480 info->writes_edgeflag || 481 info->writes_layer) | 482 (sctx->queued.named.rasterizer->clip_plane_enable & 483 clipdist_mask)); 484 r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 485 sctx->queued.named.rasterizer->pa_cl_clip_cntl | 486 (clipdist_mask ? 0 : 487 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) | 488 S_028810_CLIP_DISABLE(window_space)); 489} 490 491static void si_set_scissor_states(struct pipe_context *ctx, 492 unsigned start_slot, 493 unsigned num_scissors, 494 const struct pipe_scissor_state *state) 495{ 496 struct si_context *sctx = (struct si_context *)ctx; 497 struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor); 498 struct si_pm4_state *pm4 = &scissor->pm4; 499 500 if (scissor == NULL) 501 return; 502 503 scissor->scissor = *state; 504 si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 505 S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) | 506 S_028250_WINDOW_OFFSET_DISABLE(1)); 507 si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 508 S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy)); 509 510 si_pm4_set_state(sctx, scissor, scissor); 511} 512 513static void si_set_viewport_states(struct pipe_context *ctx, 514 unsigned start_slot, 515 unsigned num_viewports, 516 const struct pipe_viewport_state *state) 517{ 518 struct si_context *sctx = (struct si_context *)ctx; 519 struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport); 520 struct si_pm4_state *pm4 = &viewport->pm4; 521 522 if (viewport == NULL) 523 return; 524 525 viewport->viewport = *state; 526 si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0])); 527 si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0])); 528 si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1])); 529 si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1])); 530 si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2])); 531 si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2])); 532 533 si_pm4_set_state(sctx, viewport, viewport); 534} 535 536/* 537 * inferred state between framebuffer and rasterizer 538 */ 539static void si_update_fb_rs_state(struct si_context *sctx) 540{ 541 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 542 struct si_pm4_state *pm4; 543 float offset_units; 544 545 if (!rs || !sctx->framebuffer.state.zsbuf) 546 return; 547 548 offset_units = sctx->queued.named.rasterizer->offset_units; 549 switch (sctx->framebuffer.state.zsbuf->texture->format) { 550 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 551 case PIPE_FORMAT_X8Z24_UNORM: 552 case PIPE_FORMAT_Z24X8_UNORM: 553 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 554 offset_units *= 2.0f; 555 break; 556 case PIPE_FORMAT_Z32_FLOAT: 557 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 558 offset_units *= 1.0f; 559 break; 560 case PIPE_FORMAT_Z16_UNORM: 561 offset_units *= 4.0f; 562 break; 563 default: 564 return; 565 } 566 567 pm4 = CALLOC_STRUCT(si_pm4_state); 568 569 if (pm4 == NULL) 570 return; 571 572 /* FIXME some of those reg can be computed with cso */ 573 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 574 fui(sctx->queued.named.rasterizer->offset_scale)); 575 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 576 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 577 fui(sctx->queued.named.rasterizer->offset_scale)); 578 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 579 580 si_pm4_set_state(sctx, fb_rs, pm4); 581} 582 583/* 584 * Rasterizer 585 */ 586 587static uint32_t si_translate_fill(uint32_t func) 588{ 589 switch(func) { 590 case PIPE_POLYGON_MODE_FILL: 591 return V_028814_X_DRAW_TRIANGLES; 592 case PIPE_POLYGON_MODE_LINE: 593 return V_028814_X_DRAW_LINES; 594 case PIPE_POLYGON_MODE_POINT: 595 return V_028814_X_DRAW_POINTS; 596 default: 597 assert(0); 598 return V_028814_X_DRAW_POINTS; 599 } 600} 601 602static void *si_create_rs_state(struct pipe_context *ctx, 603 const struct pipe_rasterizer_state *state) 604{ 605 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 606 struct si_pm4_state *pm4 = &rs->pm4; 607 unsigned tmp; 608 unsigned prov_vtx = 1, polygon_dual_mode; 609 float psize_min, psize_max; 610 611 if (rs == NULL) { 612 return NULL; 613 } 614 615 rs->two_side = state->light_twoside; 616 rs->multisample_enable = state->multisample; 617 rs->clip_plane_enable = state->clip_plane_enable; 618 rs->line_stipple_enable = state->line_stipple_enable; 619 620 polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || 621 state->fill_back != PIPE_POLYGON_MODE_FILL); 622 623 if (state->flatshade_first) 624 prov_vtx = 0; 625 626 rs->flatshade = state->flatshade; 627 rs->sprite_coord_enable = state->sprite_coord_enable; 628 rs->pa_sc_line_stipple = state->line_stipple_enable ? 629 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 630 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 631 rs->pa_su_sc_mode_cntl = 632 S_028814_PROVOKING_VTX_LAST(prov_vtx) | 633 S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 634 S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 635 S_028814_FACE(!state->front_ccw) | 636 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 637 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 638 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 639 S_028814_POLY_MODE(polygon_dual_mode) | 640 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 641 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)); 642 rs->pa_cl_clip_cntl = 643 S_028810_PS_UCP_MODE(3) | 644 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 645 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 646 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 647 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 648 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 649 650 /* offset */ 651 rs->offset_units = state->offset_units; 652 rs->offset_scale = state->offset_scale * 12.0f; 653 654 tmp = S_0286D4_FLAT_SHADE_ENA(1); 655 if (state->sprite_coord_enable) { 656 tmp |= S_0286D4_PNT_SPRITE_ENA(1) | 657 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 658 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 659 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 660 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1); 661 if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { 662 tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); 663 } 664 } 665 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp); 666 667 /* point size 12.4 fixed point */ 668 tmp = (unsigned)(state->point_size * 8.0); 669 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 670 671 if (state->point_size_per_vertex) { 672 psize_min = util_get_min_point_size(state); 673 psize_max = 8192; 674 } else { 675 /* Force the point size to be as if the vertex output was disabled. */ 676 psize_min = state->point_size; 677 psize_max = state->point_size; 678 } 679 /* Divide by two, because 0.5 = 1 pixel. */ 680 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 681 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 682 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 683 684 tmp = (unsigned)state->line_width * 8; 685 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 686 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 687 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 688 S_028A48_MSAA_ENABLE(state->multisample) | 689 S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); 690 691 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 692 S_028BE4_PIX_CENTER(state->half_pixel_center) | 693 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 694 695 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 696 697 return rs; 698} 699 700static void si_bind_rs_state(struct pipe_context *ctx, void *state) 701{ 702 struct si_context *sctx = (struct si_context *)ctx; 703 struct si_state_rasterizer *old_rs = 704 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 705 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 706 707 if (state == NULL) 708 return; 709 710 // TODO 711 sctx->pa_sc_line_stipple = rs->pa_sc_line_stipple; 712 sctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl; 713 714 if (sctx->framebuffer.nr_samples > 1 && 715 (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) 716 sctx->db_render_state.dirty = true; 717 718 si_pm4_bind_state(sctx, rasterizer, rs); 719 si_update_fb_rs_state(sctx); 720 721 sctx->clip_regs.dirty = true; 722 sctx->last_rast_prim = -1; /* reset this so that it gets updated */ 723} 724 725static void si_delete_rs_state(struct pipe_context *ctx, void *state) 726{ 727 struct si_context *sctx = (struct si_context *)ctx; 728 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 729} 730 731/* 732 * infeered state between dsa and stencil ref 733 */ 734static void si_update_dsa_stencil_ref(struct si_context *sctx) 735{ 736 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 737 struct pipe_stencil_ref *ref = &sctx->stencil_ref; 738 struct si_state_dsa *dsa = sctx->queued.named.dsa; 739 740 if (pm4 == NULL) 741 return; 742 743 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK, 744 S_028430_STENCILTESTVAL(ref->ref_value[0]) | 745 S_028430_STENCILMASK(dsa->valuemask[0]) | 746 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 747 S_028430_STENCILOPVAL(1)); 748 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF, 749 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 750 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 751 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 752 S_028434_STENCILOPVAL_BF(1)); 753 754 si_pm4_set_state(sctx, dsa_stencil_ref, pm4); 755} 756 757static void si_set_pipe_stencil_ref(struct pipe_context *ctx, 758 const struct pipe_stencil_ref *state) 759{ 760 struct si_context *sctx = (struct si_context *)ctx; 761 sctx->stencil_ref = *state; 762 si_update_dsa_stencil_ref(sctx); 763} 764 765 766/* 767 * DSA 768 */ 769 770static uint32_t si_translate_stencil_op(int s_op) 771{ 772 switch (s_op) { 773 case PIPE_STENCIL_OP_KEEP: 774 return V_02842C_STENCIL_KEEP; 775 case PIPE_STENCIL_OP_ZERO: 776 return V_02842C_STENCIL_ZERO; 777 case PIPE_STENCIL_OP_REPLACE: 778 return V_02842C_STENCIL_REPLACE_TEST; 779 case PIPE_STENCIL_OP_INCR: 780 return V_02842C_STENCIL_ADD_CLAMP; 781 case PIPE_STENCIL_OP_DECR: 782 return V_02842C_STENCIL_SUB_CLAMP; 783 case PIPE_STENCIL_OP_INCR_WRAP: 784 return V_02842C_STENCIL_ADD_WRAP; 785 case PIPE_STENCIL_OP_DECR_WRAP: 786 return V_02842C_STENCIL_SUB_WRAP; 787 case PIPE_STENCIL_OP_INVERT: 788 return V_02842C_STENCIL_INVERT; 789 default: 790 R600_ERR("Unknown stencil op %d", s_op); 791 assert(0); 792 break; 793 } 794 return 0; 795} 796 797static void *si_create_dsa_state(struct pipe_context *ctx, 798 const struct pipe_depth_stencil_alpha_state *state) 799{ 800 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 801 struct si_pm4_state *pm4 = &dsa->pm4; 802 unsigned db_depth_control; 803 uint32_t db_stencil_control = 0; 804 805 if (dsa == NULL) { 806 return NULL; 807 } 808 809 dsa->valuemask[0] = state->stencil[0].valuemask; 810 dsa->valuemask[1] = state->stencil[1].valuemask; 811 dsa->writemask[0] = state->stencil[0].writemask; 812 dsa->writemask[1] = state->stencil[1].writemask; 813 814 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 815 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 816 S_028800_ZFUNC(state->depth.func); 817 818 /* stencil */ 819 if (state->stencil[0].enabled) { 820 db_depth_control |= S_028800_STENCIL_ENABLE(1); 821 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 822 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 823 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 824 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 825 826 if (state->stencil[1].enabled) { 827 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 828 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 829 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 830 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 831 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 832 } 833 } 834 835 /* alpha */ 836 if (state->alpha.enabled) { 837 dsa->alpha_func = state->alpha.func; 838 dsa->alpha_ref = state->alpha.ref_value; 839 840 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 841 SI_SGPR_ALPHA_REF * 4, fui(dsa->alpha_ref)); 842 } else { 843 dsa->alpha_func = PIPE_FUNC_ALWAYS; 844 } 845 846 /* misc */ 847 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 848 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 849 850 return dsa; 851} 852 853static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 854{ 855 struct si_context *sctx = (struct si_context *)ctx; 856 struct si_state_dsa *dsa = state; 857 858 if (state == NULL) 859 return; 860 861 si_pm4_bind_state(sctx, dsa, dsa); 862 si_update_dsa_stencil_ref(sctx); 863} 864 865static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 866{ 867 struct si_context *sctx = (struct si_context *)ctx; 868 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 869} 870 871static void *si_create_db_flush_dsa(struct si_context *sctx) 872{ 873 struct pipe_depth_stencil_alpha_state dsa = {}; 874 875 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 876} 877 878/* DB RENDER STATE */ 879 880static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 881{ 882 struct si_context *sctx = (struct si_context*)ctx; 883 884 sctx->db_render_state.dirty = true; 885} 886 887static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 888{ 889 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 890 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 891 unsigned db_shader_control; 892 893 r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 894 895 /* DB_RENDER_CONTROL */ 896 if (sctx->dbcb_depth_copy_enabled || 897 sctx->dbcb_stencil_copy_enabled) { 898 radeon_emit(cs, 899 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 900 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 901 S_028000_COPY_CENTROID(1) | 902 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 903 } else if (sctx->db_inplace_flush_enabled) { 904 radeon_emit(cs, 905 S_028000_DEPTH_COMPRESS_DISABLE(1) | 906 S_028000_STENCIL_COMPRESS_DISABLE(1)); 907 } else if (sctx->db_depth_clear) { 908 radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1)); 909 } else { 910 radeon_emit(cs, 0); 911 } 912 913 /* DB_COUNT_CONTROL (occlusion queries) */ 914 if (sctx->b.num_occlusion_queries > 0) { 915 if (sctx->b.chip_class >= CIK) { 916 radeon_emit(cs, 917 S_028004_PERFECT_ZPASS_COUNTS(1) | 918 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 919 S_028004_ZPASS_ENABLE(1) | 920 S_028004_SLICE_EVEN_ENABLE(1) | 921 S_028004_SLICE_ODD_ENABLE(1)); 922 } else { 923 radeon_emit(cs, 924 S_028004_PERFECT_ZPASS_COUNTS(1) | 925 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 926 } 927 } else { 928 /* Disable occlusion queries. */ 929 if (sctx->b.chip_class >= CIK) { 930 radeon_emit(cs, 0); 931 } else { 932 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 933 } 934 } 935 936 /* DB_RENDER_OVERRIDE2 */ 937 if (sctx->db_depth_disable_expclear) { 938 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 939 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1)); 940 } else { 941 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0); 942 } 943 944 db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) | 945 S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | 946 sctx->ps_db_shader_control; 947 948 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 949 if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) 950 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 951 952 r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 953 db_shader_control); 954} 955 956/* 957 * format translation 958 */ 959static uint32_t si_translate_colorformat(enum pipe_format format) 960{ 961 const struct util_format_description *desc = util_format_description(format); 962 963#define HAS_SIZE(x,y,z,w) \ 964 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 965 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 966 967 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 968 return V_028C70_COLOR_10_11_11; 969 970 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 971 return V_028C70_COLOR_INVALID; 972 973 switch (desc->nr_channels) { 974 case 1: 975 switch (desc->channel[0].size) { 976 case 8: 977 return V_028C70_COLOR_8; 978 case 16: 979 return V_028C70_COLOR_16; 980 case 32: 981 return V_028C70_COLOR_32; 982 } 983 break; 984 case 2: 985 if (desc->channel[0].size == desc->channel[1].size) { 986 switch (desc->channel[0].size) { 987 case 8: 988 return V_028C70_COLOR_8_8; 989 case 16: 990 return V_028C70_COLOR_16_16; 991 case 32: 992 return V_028C70_COLOR_32_32; 993 } 994 } else if (HAS_SIZE(8,24,0,0)) { 995 return V_028C70_COLOR_24_8; 996 } else if (HAS_SIZE(24,8,0,0)) { 997 return V_028C70_COLOR_8_24; 998 } 999 break; 1000 case 3: 1001 if (HAS_SIZE(5,6,5,0)) { 1002 return V_028C70_COLOR_5_6_5; 1003 } else if (HAS_SIZE(32,8,24,0)) { 1004 return V_028C70_COLOR_X24_8_32_FLOAT; 1005 } 1006 break; 1007 case 4: 1008 if (desc->channel[0].size == desc->channel[1].size && 1009 desc->channel[0].size == desc->channel[2].size && 1010 desc->channel[0].size == desc->channel[3].size) { 1011 switch (desc->channel[0].size) { 1012 case 4: 1013 return V_028C70_COLOR_4_4_4_4; 1014 case 8: 1015 return V_028C70_COLOR_8_8_8_8; 1016 case 16: 1017 return V_028C70_COLOR_16_16_16_16; 1018 case 32: 1019 return V_028C70_COLOR_32_32_32_32; 1020 } 1021 } else if (HAS_SIZE(5,5,5,1)) { 1022 return V_028C70_COLOR_1_5_5_5; 1023 } else if (HAS_SIZE(10,10,10,2)) { 1024 return V_028C70_COLOR_2_10_10_10; 1025 } 1026 break; 1027 } 1028 return V_028C70_COLOR_INVALID; 1029} 1030 1031static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1032{ 1033 if (SI_BIG_ENDIAN) { 1034 switch(colorformat) { 1035 /* 8-bit buffers. */ 1036 case V_028C70_COLOR_8: 1037 return V_028C70_ENDIAN_NONE; 1038 1039 /* 16-bit buffers. */ 1040 case V_028C70_COLOR_5_6_5: 1041 case V_028C70_COLOR_1_5_5_5: 1042 case V_028C70_COLOR_4_4_4_4: 1043 case V_028C70_COLOR_16: 1044 case V_028C70_COLOR_8_8: 1045 return V_028C70_ENDIAN_8IN16; 1046 1047 /* 32-bit buffers. */ 1048 case V_028C70_COLOR_8_8_8_8: 1049 case V_028C70_COLOR_2_10_10_10: 1050 case V_028C70_COLOR_8_24: 1051 case V_028C70_COLOR_24_8: 1052 case V_028C70_COLOR_16_16: 1053 return V_028C70_ENDIAN_8IN32; 1054 1055 /* 64-bit buffers. */ 1056 case V_028C70_COLOR_16_16_16_16: 1057 return V_028C70_ENDIAN_8IN16; 1058 1059 case V_028C70_COLOR_32_32: 1060 return V_028C70_ENDIAN_8IN32; 1061 1062 /* 128-bit buffers. */ 1063 case V_028C70_COLOR_32_32_32_32: 1064 return V_028C70_ENDIAN_8IN32; 1065 default: 1066 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1067 } 1068 } else { 1069 return V_028C70_ENDIAN_NONE; 1070 } 1071} 1072 1073/* Returns the size in bits of the widest component of a CB format */ 1074static unsigned si_colorformat_max_comp_size(uint32_t colorformat) 1075{ 1076 switch(colorformat) { 1077 case V_028C70_COLOR_4_4_4_4: 1078 return 4; 1079 1080 case V_028C70_COLOR_1_5_5_5: 1081 case V_028C70_COLOR_5_5_5_1: 1082 return 5; 1083 1084 case V_028C70_COLOR_5_6_5: 1085 return 6; 1086 1087 case V_028C70_COLOR_8: 1088 case V_028C70_COLOR_8_8: 1089 case V_028C70_COLOR_8_8_8_8: 1090 return 8; 1091 1092 case V_028C70_COLOR_10_10_10_2: 1093 case V_028C70_COLOR_2_10_10_10: 1094 return 10; 1095 1096 case V_028C70_COLOR_10_11_11: 1097 case V_028C70_COLOR_11_11_10: 1098 return 11; 1099 1100 case V_028C70_COLOR_16: 1101 case V_028C70_COLOR_16_16: 1102 case V_028C70_COLOR_16_16_16_16: 1103 return 16; 1104 1105 case V_028C70_COLOR_8_24: 1106 case V_028C70_COLOR_24_8: 1107 return 24; 1108 1109 case V_028C70_COLOR_32: 1110 case V_028C70_COLOR_32_32: 1111 case V_028C70_COLOR_32_32_32_32: 1112 case V_028C70_COLOR_X24_8_32_FLOAT: 1113 return 32; 1114 } 1115 1116 assert(!"Unknown maximum component size"); 1117 return 0; 1118} 1119 1120static uint32_t si_translate_dbformat(enum pipe_format format) 1121{ 1122 switch (format) { 1123 case PIPE_FORMAT_Z16_UNORM: 1124 return V_028040_Z_16; 1125 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1126 case PIPE_FORMAT_X8Z24_UNORM: 1127 case PIPE_FORMAT_Z24X8_UNORM: 1128 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1129 return V_028040_Z_24; /* deprecated on SI */ 1130 case PIPE_FORMAT_Z32_FLOAT: 1131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1132 return V_028040_Z_32_FLOAT; 1133 default: 1134 return V_028040_Z_INVALID; 1135 } 1136} 1137 1138/* 1139 * Texture translation 1140 */ 1141 1142static uint32_t si_translate_texformat(struct pipe_screen *screen, 1143 enum pipe_format format, 1144 const struct util_format_description *desc, 1145 int first_non_void) 1146{ 1147 struct si_screen *sscreen = (struct si_screen*)screen; 1148 bool enable_s3tc = sscreen->b.info.drm_minor >= 31; 1149 boolean uniform = TRUE; 1150 int i; 1151 1152 /* Colorspace (return non-RGB formats directly). */ 1153 switch (desc->colorspace) { 1154 /* Depth stencil formats */ 1155 case UTIL_FORMAT_COLORSPACE_ZS: 1156 switch (format) { 1157 case PIPE_FORMAT_Z16_UNORM: 1158 return V_008F14_IMG_DATA_FORMAT_16; 1159 case PIPE_FORMAT_X24S8_UINT: 1160 case PIPE_FORMAT_Z24X8_UNORM: 1161 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1162 return V_008F14_IMG_DATA_FORMAT_8_24; 1163 case PIPE_FORMAT_X8Z24_UNORM: 1164 case PIPE_FORMAT_S8X24_UINT: 1165 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1166 return V_008F14_IMG_DATA_FORMAT_24_8; 1167 case PIPE_FORMAT_S8_UINT: 1168 return V_008F14_IMG_DATA_FORMAT_8; 1169 case PIPE_FORMAT_Z32_FLOAT: 1170 return V_008F14_IMG_DATA_FORMAT_32; 1171 case PIPE_FORMAT_X32_S8X24_UINT: 1172 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1173 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1174 default: 1175 goto out_unknown; 1176 } 1177 1178 case UTIL_FORMAT_COLORSPACE_YUV: 1179 goto out_unknown; /* TODO */ 1180 1181 case UTIL_FORMAT_COLORSPACE_SRGB: 1182 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1183 goto out_unknown; 1184 break; 1185 1186 default: 1187 break; 1188 } 1189 1190 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1191 if (!enable_s3tc) 1192 goto out_unknown; 1193 1194 switch (format) { 1195 case PIPE_FORMAT_RGTC1_SNORM: 1196 case PIPE_FORMAT_LATC1_SNORM: 1197 case PIPE_FORMAT_RGTC1_UNORM: 1198 case PIPE_FORMAT_LATC1_UNORM: 1199 return V_008F14_IMG_DATA_FORMAT_BC4; 1200 case PIPE_FORMAT_RGTC2_SNORM: 1201 case PIPE_FORMAT_LATC2_SNORM: 1202 case PIPE_FORMAT_RGTC2_UNORM: 1203 case PIPE_FORMAT_LATC2_UNORM: 1204 return V_008F14_IMG_DATA_FORMAT_BC5; 1205 default: 1206 goto out_unknown; 1207 } 1208 } 1209 1210 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1211 if (!enable_s3tc) 1212 goto out_unknown; 1213 1214 switch (format) { 1215 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1216 case PIPE_FORMAT_BPTC_SRGBA: 1217 return V_008F14_IMG_DATA_FORMAT_BC7; 1218 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1219 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1220 return V_008F14_IMG_DATA_FORMAT_BC6; 1221 default: 1222 goto out_unknown; 1223 } 1224 } 1225 1226 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1227 switch (format) { 1228 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1229 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1230 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1231 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1232 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1233 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1234 default: 1235 goto out_unknown; 1236 } 1237 } 1238 1239 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1240 1241 if (!enable_s3tc) 1242 goto out_unknown; 1243 1244 if (!util_format_s3tc_enabled) { 1245 goto out_unknown; 1246 } 1247 1248 switch (format) { 1249 case PIPE_FORMAT_DXT1_RGB: 1250 case PIPE_FORMAT_DXT1_RGBA: 1251 case PIPE_FORMAT_DXT1_SRGB: 1252 case PIPE_FORMAT_DXT1_SRGBA: 1253 return V_008F14_IMG_DATA_FORMAT_BC1; 1254 case PIPE_FORMAT_DXT3_RGBA: 1255 case PIPE_FORMAT_DXT3_SRGBA: 1256 return V_008F14_IMG_DATA_FORMAT_BC2; 1257 case PIPE_FORMAT_DXT5_RGBA: 1258 case PIPE_FORMAT_DXT5_SRGBA: 1259 return V_008F14_IMG_DATA_FORMAT_BC3; 1260 default: 1261 goto out_unknown; 1262 } 1263 } 1264 1265 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1266 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1267 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1268 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1269 } 1270 1271 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1272 1273 /* See whether the components are of the same size. */ 1274 for (i = 1; i < desc->nr_channels; i++) { 1275 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1276 } 1277 1278 /* Non-uniform formats. */ 1279 if (!uniform) { 1280 switch(desc->nr_channels) { 1281 case 3: 1282 if (desc->channel[0].size == 5 && 1283 desc->channel[1].size == 6 && 1284 desc->channel[2].size == 5) { 1285 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1286 } 1287 goto out_unknown; 1288 case 4: 1289 if (desc->channel[0].size == 5 && 1290 desc->channel[1].size == 5 && 1291 desc->channel[2].size == 5 && 1292 desc->channel[3].size == 1) { 1293 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1294 } 1295 if (desc->channel[0].size == 10 && 1296 desc->channel[1].size == 10 && 1297 desc->channel[2].size == 10 && 1298 desc->channel[3].size == 2) { 1299 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1300 } 1301 goto out_unknown; 1302 } 1303 goto out_unknown; 1304 } 1305 1306 if (first_non_void < 0 || first_non_void > 3) 1307 goto out_unknown; 1308 1309 /* uniform formats */ 1310 switch (desc->channel[first_non_void].size) { 1311 case 4: 1312 switch (desc->nr_channels) { 1313#if 0 /* Not supported for render targets */ 1314 case 2: 1315 return V_008F14_IMG_DATA_FORMAT_4_4; 1316#endif 1317 case 4: 1318 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1319 } 1320 break; 1321 case 8: 1322 switch (desc->nr_channels) { 1323 case 1: 1324 return V_008F14_IMG_DATA_FORMAT_8; 1325 case 2: 1326 return V_008F14_IMG_DATA_FORMAT_8_8; 1327 case 4: 1328 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1329 } 1330 break; 1331 case 16: 1332 switch (desc->nr_channels) { 1333 case 1: 1334 return V_008F14_IMG_DATA_FORMAT_16; 1335 case 2: 1336 return V_008F14_IMG_DATA_FORMAT_16_16; 1337 case 4: 1338 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1339 } 1340 break; 1341 case 32: 1342 switch (desc->nr_channels) { 1343 case 1: 1344 return V_008F14_IMG_DATA_FORMAT_32; 1345 case 2: 1346 return V_008F14_IMG_DATA_FORMAT_32_32; 1347#if 0 /* Not supported for render targets */ 1348 case 3: 1349 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1350#endif 1351 case 4: 1352 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1353 } 1354 } 1355 1356out_unknown: 1357 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1358 return ~0; 1359} 1360 1361static unsigned si_tex_wrap(unsigned wrap) 1362{ 1363 switch (wrap) { 1364 default: 1365 case PIPE_TEX_WRAP_REPEAT: 1366 return V_008F30_SQ_TEX_WRAP; 1367 case PIPE_TEX_WRAP_CLAMP: 1368 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1369 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1370 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1371 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1372 return V_008F30_SQ_TEX_CLAMP_BORDER; 1373 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1374 return V_008F30_SQ_TEX_MIRROR; 1375 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1376 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1377 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1378 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1379 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1380 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1381 } 1382} 1383 1384static unsigned si_tex_filter(unsigned filter) 1385{ 1386 switch (filter) { 1387 default: 1388 case PIPE_TEX_FILTER_NEAREST: 1389 return V_008F38_SQ_TEX_XY_FILTER_POINT; 1390 case PIPE_TEX_FILTER_LINEAR: 1391 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 1392 } 1393} 1394 1395static unsigned si_tex_mipfilter(unsigned filter) 1396{ 1397 switch (filter) { 1398 case PIPE_TEX_MIPFILTER_NEAREST: 1399 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1400 case PIPE_TEX_MIPFILTER_LINEAR: 1401 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1402 default: 1403 case PIPE_TEX_MIPFILTER_NONE: 1404 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1405 } 1406} 1407 1408static unsigned si_tex_compare(unsigned compare) 1409{ 1410 switch (compare) { 1411 default: 1412 case PIPE_FUNC_NEVER: 1413 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1414 case PIPE_FUNC_LESS: 1415 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1416 case PIPE_FUNC_EQUAL: 1417 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1418 case PIPE_FUNC_LEQUAL: 1419 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1420 case PIPE_FUNC_GREATER: 1421 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1422 case PIPE_FUNC_NOTEQUAL: 1423 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1424 case PIPE_FUNC_GEQUAL: 1425 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1426 case PIPE_FUNC_ALWAYS: 1427 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1428 } 1429} 1430 1431static unsigned si_tex_dim(unsigned dim, unsigned nr_samples) 1432{ 1433 switch (dim) { 1434 default: 1435 case PIPE_TEXTURE_1D: 1436 return V_008F1C_SQ_RSRC_IMG_1D; 1437 case PIPE_TEXTURE_1D_ARRAY: 1438 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1439 case PIPE_TEXTURE_2D: 1440 case PIPE_TEXTURE_RECT: 1441 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1442 V_008F1C_SQ_RSRC_IMG_2D; 1443 case PIPE_TEXTURE_2D_ARRAY: 1444 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1445 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1446 case PIPE_TEXTURE_3D: 1447 return V_008F1C_SQ_RSRC_IMG_3D; 1448 case PIPE_TEXTURE_CUBE: 1449 case PIPE_TEXTURE_CUBE_ARRAY: 1450 return V_008F1C_SQ_RSRC_IMG_CUBE; 1451 } 1452} 1453 1454/* 1455 * Format support testing 1456 */ 1457 1458static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1459{ 1460 return si_translate_texformat(screen, format, util_format_description(format), 1461 util_format_get_first_non_void_channel(format)) != ~0U; 1462} 1463 1464static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1465 const struct util_format_description *desc, 1466 int first_non_void) 1467{ 1468 unsigned type = desc->channel[first_non_void].type; 1469 int i; 1470 1471 if (type == UTIL_FORMAT_TYPE_FIXED) 1472 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1473 1474 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1475 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1476 1477 if (desc->nr_channels == 4 && 1478 desc->channel[0].size == 10 && 1479 desc->channel[1].size == 10 && 1480 desc->channel[2].size == 10 && 1481 desc->channel[3].size == 2) 1482 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1483 1484 /* See whether the components are of the same size. */ 1485 for (i = 0; i < desc->nr_channels; i++) { 1486 if (desc->channel[first_non_void].size != desc->channel[i].size) 1487 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1488 } 1489 1490 switch (desc->channel[first_non_void].size) { 1491 case 8: 1492 switch (desc->nr_channels) { 1493 case 1: 1494 return V_008F0C_BUF_DATA_FORMAT_8; 1495 case 2: 1496 return V_008F0C_BUF_DATA_FORMAT_8_8; 1497 case 3: 1498 case 4: 1499 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1500 } 1501 break; 1502 case 16: 1503 switch (desc->nr_channels) { 1504 case 1: 1505 return V_008F0C_BUF_DATA_FORMAT_16; 1506 case 2: 1507 return V_008F0C_BUF_DATA_FORMAT_16_16; 1508 case 3: 1509 case 4: 1510 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1511 } 1512 break; 1513 case 32: 1514 /* From the Southern Islands ISA documentation about MTBUF: 1515 * 'Memory reads of data in memory that is 32 or 64 bits do not 1516 * undergo any format conversion.' 1517 */ 1518 if (type != UTIL_FORMAT_TYPE_FLOAT && 1519 !desc->channel[first_non_void].pure_integer) 1520 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1521 1522 switch (desc->nr_channels) { 1523 case 1: 1524 return V_008F0C_BUF_DATA_FORMAT_32; 1525 case 2: 1526 return V_008F0C_BUF_DATA_FORMAT_32_32; 1527 case 3: 1528 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1529 case 4: 1530 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1531 } 1532 break; 1533 } 1534 1535 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1536} 1537 1538static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1539 const struct util_format_description *desc, 1540 int first_non_void) 1541{ 1542 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1543 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1544 1545 switch (desc->channel[first_non_void].type) { 1546 case UTIL_FORMAT_TYPE_SIGNED: 1547 if (desc->channel[first_non_void].normalized) 1548 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1549 else if (desc->channel[first_non_void].pure_integer) 1550 return V_008F0C_BUF_NUM_FORMAT_SINT; 1551 else 1552 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1553 break; 1554 case UTIL_FORMAT_TYPE_UNSIGNED: 1555 if (desc->channel[first_non_void].normalized) 1556 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1557 else if (desc->channel[first_non_void].pure_integer) 1558 return V_008F0C_BUF_NUM_FORMAT_UINT; 1559 else 1560 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1561 break; 1562 case UTIL_FORMAT_TYPE_FLOAT: 1563 default: 1564 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1565 } 1566} 1567 1568static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1569{ 1570 const struct util_format_description *desc; 1571 int first_non_void; 1572 unsigned data_format; 1573 1574 desc = util_format_description(format); 1575 first_non_void = util_format_get_first_non_void_channel(format); 1576 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1577 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1578} 1579 1580static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1581{ 1582 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1583 r600_translate_colorswap(format) != ~0U; 1584} 1585 1586static bool si_is_zs_format_supported(enum pipe_format format) 1587{ 1588 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1589} 1590 1591boolean si_is_format_supported(struct pipe_screen *screen, 1592 enum pipe_format format, 1593 enum pipe_texture_target target, 1594 unsigned sample_count, 1595 unsigned usage) 1596{ 1597 struct si_screen *sscreen = (struct si_screen *)screen; 1598 unsigned retval = 0; 1599 1600 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1601 R600_ERR("r600: unsupported texture type %d\n", target); 1602 return FALSE; 1603 } 1604 1605 if (!util_format_is_supported(format, usage)) 1606 return FALSE; 1607 1608 if (sample_count > 1) { 1609 /* 2D tiling on CIK is supported since DRM 2.35.0 */ 1610 if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35) 1611 return FALSE; 1612 1613 switch (sample_count) { 1614 case 2: 1615 case 4: 1616 case 8: 1617 break; 1618 default: 1619 return FALSE; 1620 } 1621 } 1622 1623 if (usage & PIPE_BIND_SAMPLER_VIEW) { 1624 if (target == PIPE_BUFFER) { 1625 if (si_is_vertex_format_supported(screen, format)) 1626 retval |= PIPE_BIND_SAMPLER_VIEW; 1627 } else { 1628 if (si_is_sampler_format_supported(screen, format)) 1629 retval |= PIPE_BIND_SAMPLER_VIEW; 1630 } 1631 } 1632 1633 if ((usage & (PIPE_BIND_RENDER_TARGET | 1634 PIPE_BIND_DISPLAY_TARGET | 1635 PIPE_BIND_SCANOUT | 1636 PIPE_BIND_SHARED | 1637 PIPE_BIND_BLENDABLE)) && 1638 si_is_colorbuffer_format_supported(format)) { 1639 retval |= usage & 1640 (PIPE_BIND_RENDER_TARGET | 1641 PIPE_BIND_DISPLAY_TARGET | 1642 PIPE_BIND_SCANOUT | 1643 PIPE_BIND_SHARED); 1644 if (!util_format_is_pure_integer(format) && 1645 !util_format_is_depth_or_stencil(format)) 1646 retval |= usage & PIPE_BIND_BLENDABLE; 1647 } 1648 1649 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1650 si_is_zs_format_supported(format)) { 1651 retval |= PIPE_BIND_DEPTH_STENCIL; 1652 } 1653 1654 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 1655 si_is_vertex_format_supported(screen, format)) { 1656 retval |= PIPE_BIND_VERTEX_BUFFER; 1657 } 1658 1659 if (usage & PIPE_BIND_TRANSFER_READ) 1660 retval |= PIPE_BIND_TRANSFER_READ; 1661 if (usage & PIPE_BIND_TRANSFER_WRITE) 1662 retval |= PIPE_BIND_TRANSFER_WRITE; 1663 1664 return retval == usage; 1665} 1666 1667unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil) 1668{ 1669 unsigned tile_mode_index = 0; 1670 1671 if (stencil) { 1672 tile_mode_index = rtex->surface.stencil_tiling_index[level]; 1673 } else { 1674 tile_mode_index = rtex->surface.tiling_index[level]; 1675 } 1676 return tile_mode_index; 1677} 1678 1679/* 1680 * framebuffer handling 1681 */ 1682 1683static void si_initialize_color_surface(struct si_context *sctx, 1684 struct r600_surface *surf) 1685{ 1686 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1687 unsigned level = surf->base.u.tex.level; 1688 uint64_t offset = rtex->surface.level[level].offset; 1689 unsigned pitch, slice; 1690 unsigned color_info, color_attrib, color_pitch, color_view; 1691 unsigned tile_mode_index; 1692 unsigned format, swap, ntype, endian; 1693 const struct util_format_description *desc; 1694 int i; 1695 unsigned blend_clamp = 0, blend_bypass = 0; 1696 unsigned max_comp_size; 1697 1698 /* Layered rendering doesn't work with LINEAR_GENERAL. 1699 * (LINEAR_ALIGNED and others work) */ 1700 if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) { 1701 assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer); 1702 offset += rtex->surface.level[level].slice_size * 1703 surf->base.u.tex.first_layer; 1704 color_view = 0; 1705 } else { 1706 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 1707 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 1708 } 1709 1710 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1; 1711 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; 1712 if (slice) { 1713 slice = slice - 1; 1714 } 1715 1716 tile_mode_index = si_tile_mode_index(rtex, level, false); 1717 1718 desc = util_format_description(surf->base.format); 1719 for (i = 0; i < 4; i++) { 1720 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 1721 break; 1722 } 1723 } 1724 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 1725 ntype = V_028C70_NUMBER_FLOAT; 1726 } else { 1727 ntype = V_028C70_NUMBER_UNORM; 1728 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 1729 ntype = V_028C70_NUMBER_SRGB; 1730 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1731 if (desc->channel[i].pure_integer) { 1732 ntype = V_028C70_NUMBER_SINT; 1733 } else { 1734 assert(desc->channel[i].normalized); 1735 ntype = V_028C70_NUMBER_SNORM; 1736 } 1737 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1738 if (desc->channel[i].pure_integer) { 1739 ntype = V_028C70_NUMBER_UINT; 1740 } else { 1741 assert(desc->channel[i].normalized); 1742 ntype = V_028C70_NUMBER_UNORM; 1743 } 1744 } 1745 } 1746 1747 format = si_translate_colorformat(surf->base.format); 1748 if (format == V_028C70_COLOR_INVALID) { 1749 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 1750 } 1751 assert(format != V_028C70_COLOR_INVALID); 1752 swap = r600_translate_colorswap(surf->base.format); 1753 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) { 1754 endian = V_028C70_ENDIAN_NONE; 1755 } else { 1756 endian = si_colorformat_endian_swap(format); 1757 } 1758 1759 /* blend clamp should be set for all NORM/SRGB types */ 1760 if (ntype == V_028C70_NUMBER_UNORM || 1761 ntype == V_028C70_NUMBER_SNORM || 1762 ntype == V_028C70_NUMBER_SRGB) 1763 blend_clamp = 1; 1764 1765 /* set blend bypass according to docs if SINT/UINT or 1766 8/24 COLOR variants */ 1767 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 1768 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 1769 format == V_028C70_COLOR_X24_8_32_FLOAT) { 1770 blend_clamp = 0; 1771 blend_bypass = 1; 1772 } 1773 1774 color_info = S_028C70_FORMAT(format) | 1775 S_028C70_COMP_SWAP(swap) | 1776 S_028C70_BLEND_CLAMP(blend_clamp) | 1777 S_028C70_BLEND_BYPASS(blend_bypass) | 1778 S_028C70_NUMBER_TYPE(ntype) | 1779 S_028C70_ENDIAN(endian); 1780 1781 color_pitch = S_028C64_TILE_MAX(pitch); 1782 1783 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) | 1784 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1); 1785 1786 if (rtex->resource.b.b.nr_samples > 1) { 1787 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 1788 1789 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 1790 S_028C74_NUM_FRAGMENTS(log_samples); 1791 1792 if (rtex->fmask.size) { 1793 color_info |= S_028C70_COMPRESSION(1); 1794 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 1795 1796 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index); 1797 1798 if (sctx->b.chip_class == SI) { 1799 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 1800 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 1801 } 1802 if (sctx->b.chip_class >= CIK) { 1803 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1); 1804 } 1805 } 1806 } 1807 1808 offset += rtex->resource.gpu_address; 1809 1810 surf->cb_color_base = offset >> 8; 1811 surf->cb_color_pitch = color_pitch; 1812 surf->cb_color_slice = S_028C68_TILE_MAX(slice); 1813 surf->cb_color_view = color_view; 1814 surf->cb_color_info = color_info; 1815 surf->cb_color_attrib = color_attrib; 1816 1817 if (rtex->fmask.size) { 1818 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; 1819 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); 1820 } else { 1821 /* This must be set for fast clear to work without FMASK. */ 1822 surf->cb_color_fmask = surf->cb_color_base; 1823 surf->cb_color_fmask_slice = surf->cb_color_slice; 1824 surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 1825 1826 if (sctx->b.chip_class == SI) { 1827 unsigned bankh = util_logbase2(rtex->surface.bankh); 1828 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 1829 } 1830 1831 if (sctx->b.chip_class >= CIK) { 1832 surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch); 1833 } 1834 } 1835 1836 /* Determine pixel shader export format */ 1837 max_comp_size = si_colorformat_max_comp_size(format); 1838 if (ntype == V_028C70_NUMBER_SRGB || 1839 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && 1840 max_comp_size <= 10) || 1841 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { 1842 surf->export_16bpc = true; 1843 } 1844 1845 surf->color_initialized = true; 1846} 1847 1848static void si_init_depth_surface(struct si_context *sctx, 1849 struct r600_surface *surf) 1850{ 1851 struct si_screen *sscreen = sctx->screen; 1852 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1853 unsigned level = surf->base.u.tex.level; 1854 struct radeon_surface_level *levelinfo = &rtex->surface.level[level]; 1855 unsigned format, tile_mode_index, array_mode; 1856 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config; 1857 uint32_t z_info, s_info, db_depth_info; 1858 uint64_t z_offs, s_offs; 1859 uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0; 1860 1861 switch (sctx->framebuffer.state.zsbuf->texture->format) { 1862 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1863 case PIPE_FORMAT_X8Z24_UNORM: 1864 case PIPE_FORMAT_Z24X8_UNORM: 1865 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1866 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 1867 break; 1868 case PIPE_FORMAT_Z32_FLOAT: 1869 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1870 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 1871 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 1872 break; 1873 case PIPE_FORMAT_Z16_UNORM: 1874 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 1875 break; 1876 default: 1877 assert(0); 1878 } 1879 1880 format = si_translate_dbformat(rtex->resource.b.b.format); 1881 1882 if (format == V_028040_Z_INVALID) { 1883 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 1884 } 1885 assert(format != V_028040_Z_INVALID); 1886 1887 s_offs = z_offs = rtex->resource.gpu_address; 1888 z_offs += rtex->surface.level[level].offset; 1889 s_offs += rtex->surface.stencil_level[level].offset; 1890 1891 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 1892 1893 z_info = S_028040_FORMAT(format); 1894 if (rtex->resource.b.b.nr_samples > 1) { 1895 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 1896 } 1897 1898 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 1899 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 1900 else 1901 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 1902 1903 if (sctx->b.chip_class >= CIK) { 1904 switch (rtex->surface.level[level].mode) { 1905 case RADEON_SURF_MODE_2D: 1906 array_mode = V_02803C_ARRAY_2D_TILED_THIN1; 1907 break; 1908 case RADEON_SURF_MODE_1D: 1909 case RADEON_SURF_MODE_LINEAR_ALIGNED: 1910 case RADEON_SURF_MODE_LINEAR: 1911 default: 1912 array_mode = V_02803C_ARRAY_1D_TILED_THIN1; 1913 break; 1914 } 1915 tile_split = rtex->surface.tile_split; 1916 stile_split = rtex->surface.stencil_tile_split; 1917 macro_aspect = rtex->surface.mtilea; 1918 bankw = rtex->surface.bankw; 1919 bankh = rtex->surface.bankh; 1920 tile_split = cik_tile_split(tile_split); 1921 stile_split = cik_tile_split(stile_split); 1922 macro_aspect = cik_macro_tile_aspect(macro_aspect); 1923 bankw = cik_bank_wh(bankw); 1924 bankh = cik_bank_wh(bankh); 1925 nbanks = si_num_banks(sscreen, rtex); 1926 tile_mode_index = si_tile_mode_index(rtex, level, false); 1927 pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); 1928 1929 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) | 1930 S_02803C_PIPE_CONFIG(pipe_config) | 1931 S_02803C_BANK_WIDTH(bankw) | 1932 S_02803C_BANK_HEIGHT(bankh) | 1933 S_02803C_MACRO_TILE_ASPECT(macro_aspect) | 1934 S_02803C_NUM_BANKS(nbanks); 1935 z_info |= S_028040_TILE_SPLIT(tile_split); 1936 s_info |= S_028044_TILE_SPLIT(stile_split); 1937 } else { 1938 tile_mode_index = si_tile_mode_index(rtex, level, false); 1939 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 1940 tile_mode_index = si_tile_mode_index(rtex, level, true); 1941 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 1942 } 1943 1944 /* HiZ aka depth buffer htile */ 1945 /* use htile only for first level */ 1946 if (rtex->htile_buffer && !level) { 1947 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 1948 S_028040_ALLOW_EXPCLEAR(1); 1949 1950 /* This is optimal for the clear value of 1.0 and using 1951 * the LESS and LEQUAL test functions. Set this to 0 1952 * for the opposite case. This can only be changed when 1953 * clearing. */ 1954 z_info |= S_028040_ZRANGE_PRECISION(1); 1955 1956 /* Use all of the htile_buffer for depth, because we don't 1957 * use HTILE for stencil because of FAST_STENCIL_DISABLE. */ 1958 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 1959 1960 uint64_t va = rtex->htile_buffer->gpu_address; 1961 db_htile_data_base = va >> 8; 1962 db_htile_surface = S_028ABC_FULL_CACHE(1); 1963 } else { 1964 db_htile_data_base = 0; 1965 db_htile_surface = 0; 1966 } 1967 1968 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 1969 1970 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 1971 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 1972 surf->db_htile_data_base = db_htile_data_base; 1973 surf->db_depth_info = db_depth_info; 1974 surf->db_z_info = z_info; 1975 surf->db_stencil_info = s_info; 1976 surf->db_depth_base = z_offs >> 8; 1977 surf->db_stencil_base = s_offs >> 8; 1978 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 1979 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 1980 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 1981 levelinfo->nblk_y) / 64 - 1); 1982 surf->db_htile_surface = db_htile_surface; 1983 surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl; 1984 1985 surf->depth_initialized = true; 1986} 1987 1988static void si_set_framebuffer_state(struct pipe_context *ctx, 1989 const struct pipe_framebuffer_state *state) 1990{ 1991 struct si_context *sctx = (struct si_context *)ctx; 1992 struct pipe_constant_buffer constbuf = {0}; 1993 struct r600_surface *surf = NULL; 1994 struct r600_texture *rtex; 1995 bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer; 1996 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 1997 int i; 1998 1999 if (sctx->framebuffer.state.nr_cbufs) { 2000 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB | 2001 R600_CONTEXT_FLUSH_AND_INV_CB_META; 2002 } 2003 if (sctx->framebuffer.state.zsbuf) { 2004 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_DB | 2005 R600_CONTEXT_FLUSH_AND_INV_DB_META; 2006 } 2007 2008 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2009 2010 sctx->framebuffer.export_16bpc = 0; 2011 sctx->framebuffer.compressed_cb_mask = 0; 2012 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2013 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2014 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 2015 util_format_is_pure_integer(state->cbufs[0]->format); 2016 2017 if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer) 2018 sctx->db_render_state.dirty = true; 2019 2020 for (i = 0; i < state->nr_cbufs; i++) { 2021 if (!state->cbufs[i]) 2022 continue; 2023 2024 surf = (struct r600_surface*)state->cbufs[i]; 2025 rtex = (struct r600_texture*)surf->base.texture; 2026 2027 if (!surf->color_initialized) { 2028 si_initialize_color_surface(sctx, surf); 2029 } 2030 2031 if (surf->export_16bpc) { 2032 sctx->framebuffer.export_16bpc |= 1 << i; 2033 } 2034 2035 if (rtex->fmask.size && rtex->cmask.size) { 2036 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2037 } 2038 } 2039 /* Set the 16BPC export for possible dual-src blending. */ 2040 if (i == 1 && surf && surf->export_16bpc) { 2041 sctx->framebuffer.export_16bpc |= 1 << 1; 2042 } 2043 2044 assert(!(sctx->framebuffer.export_16bpc & ~0xff)); 2045 2046 if (state->zsbuf) { 2047 surf = (struct r600_surface*)state->zsbuf; 2048 2049 if (!surf->depth_initialized) { 2050 si_init_depth_surface(sctx, surf); 2051 } 2052 } 2053 2054 si_update_fb_rs_state(sctx); 2055 si_update_fb_blend_state(sctx); 2056 2057 sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3; 2058 sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4; 2059 sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */ 2060 sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */ 2061 sctx->framebuffer.atom.dirty = true; 2062 2063 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2064 sctx->msaa_config.dirty = true; 2065 sctx->db_render_state.dirty = true; 2066 2067 /* Set sample locations as fragment shader constants. */ 2068 switch (sctx->framebuffer.nr_samples) { 2069 case 1: 2070 constbuf.user_buffer = sctx->b.sample_locations_1x; 2071 break; 2072 case 2: 2073 constbuf.user_buffer = sctx->b.sample_locations_2x; 2074 break; 2075 case 4: 2076 constbuf.user_buffer = sctx->b.sample_locations_4x; 2077 break; 2078 case 8: 2079 constbuf.user_buffer = sctx->b.sample_locations_8x; 2080 break; 2081 case 16: 2082 constbuf.user_buffer = sctx->b.sample_locations_16x; 2083 break; 2084 default: 2085 assert(0); 2086 } 2087 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2088 ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, 2089 SI_DRIVER_STATE_CONST_BUF, &constbuf); 2090 } 2091} 2092 2093static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2094{ 2095 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 2096 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2097 unsigned i, nr_cbufs = state->nr_cbufs; 2098 struct r600_texture *tex = NULL; 2099 struct r600_surface *cb = NULL; 2100 2101 /* Colorbuffers. */ 2102 for (i = 0; i < nr_cbufs; i++) { 2103 cb = (struct r600_surface*)state->cbufs[i]; 2104 if (!cb) { 2105 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2106 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2107 continue; 2108 } 2109 2110 tex = (struct r600_texture *)cb->base.texture; 2111 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2112 &tex->resource, RADEON_USAGE_READWRITE, 2113 tex->surface.nsamples > 1 ? 2114 RADEON_PRIO_COLOR_BUFFER_MSAA : 2115 RADEON_PRIO_COLOR_BUFFER); 2116 2117 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2118 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2119 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2120 RADEON_PRIO_COLOR_META); 2121 } 2122 2123 r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13); 2124 radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2125 radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2126 radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2127 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2128 radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2129 radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2130 radeon_emit(cs, 0); /* R_028C78 unused */ 2131 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2132 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2133 radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2134 radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2135 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2136 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2137 } 2138 /* set CB_COLOR1_INFO for possible dual-src blending */ 2139 if (i == 1 && state->cbufs[0]) { 2140 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 2141 cb->cb_color_info | tex->cb_color_info); 2142 i++; 2143 } 2144 for (; i < 8 ; i++) { 2145 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2146 } 2147 2148 /* ZS buffer. */ 2149 if (state->zsbuf) { 2150 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2151 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2152 2153 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2154 &rtex->resource, RADEON_USAGE_READWRITE, 2155 zb->base.texture->nr_samples > 1 ? 2156 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2157 RADEON_PRIO_DEPTH_BUFFER); 2158 2159 if (zb->db_htile_data_base) { 2160 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2161 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2162 RADEON_PRIO_DEPTH_META); 2163 } 2164 2165 r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2166 r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2167 2168 r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2169 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2170 radeon_emit(cs, zb->db_z_info); /* R_028040_DB_Z_INFO */ 2171 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2172 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2173 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2174 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2175 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2176 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2177 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2178 2179 r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2180 r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); 2181 r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2182 zb->pa_su_poly_offset_db_fmt_cntl); 2183 } else { 2184 r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2185 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2186 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2187 } 2188 2189 /* Framebuffer dimensions. */ 2190 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2191 r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2192 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2193 2194 cayman_emit_msaa_sample_locs(cs, sctx->framebuffer.nr_samples); 2195} 2196 2197static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom) 2198{ 2199 struct si_context *sctx = (struct si_context *)rctx; 2200 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 2201 2202 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2203 sctx->ps_iter_samples); 2204} 2205 2206const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */ 2207 2208static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2209{ 2210 struct si_context *sctx = (struct si_context *)ctx; 2211 2212 if (sctx->ps_iter_samples == min_samples) 2213 return; 2214 2215 sctx->ps_iter_samples = min_samples; 2216 2217 if (sctx->framebuffer.nr_samples > 1) 2218 sctx->msaa_config.dirty = true; 2219} 2220 2221/* 2222 * Samplers 2223 */ 2224 2225static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx, 2226 struct pipe_resource *texture, 2227 const struct pipe_sampler_view *state) 2228{ 2229 struct si_context *sctx = (struct si_context*)ctx; 2230 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 2231 struct r600_texture *tmp = (struct r600_texture*)texture; 2232 const struct util_format_description *desc; 2233 unsigned format, num_format; 2234 uint32_t pitch = 0; 2235 unsigned char state_swizzle[4], swizzle[4]; 2236 unsigned height, depth, width; 2237 enum pipe_format pipe_format = state->format; 2238 struct radeon_surface_level *surflevel; 2239 int first_non_void; 2240 uint64_t va; 2241 2242 if (view == NULL) 2243 return NULL; 2244 2245 /* initialize base object */ 2246 view->base = *state; 2247 view->base.texture = NULL; 2248 pipe_resource_reference(&view->base.texture, texture); 2249 view->base.reference.count = 1; 2250 view->base.context = ctx; 2251 view->resource = &tmp->resource; 2252 2253 /* Buffer resource. */ 2254 if (texture->target == PIPE_BUFFER) { 2255 unsigned stride; 2256 2257 desc = util_format_description(state->format); 2258 first_non_void = util_format_get_first_non_void_channel(state->format); 2259 stride = desc->block.bits / 8; 2260 va = tmp->resource.gpu_address + state->u.buf.first_element*stride; 2261 format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2262 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2263 2264 view->state[0] = va; 2265 view->state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 2266 S_008F04_STRIDE(stride); 2267 view->state[2] = state->u.buf.last_element + 1 - state->u.buf.first_element; 2268 view->state[3] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2269 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2270 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2271 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2272 S_008F0C_NUM_FORMAT(num_format) | 2273 S_008F0C_DATA_FORMAT(format); 2274 2275 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); 2276 return &view->base; 2277 } 2278 2279 state_swizzle[0] = state->swizzle_r; 2280 state_swizzle[1] = state->swizzle_g; 2281 state_swizzle[2] = state->swizzle_b; 2282 state_swizzle[3] = state->swizzle_a; 2283 2284 surflevel = tmp->surface.level; 2285 2286 /* Texturing with separate depth and stencil. */ 2287 if (tmp->is_depth && !tmp->is_flushing_texture) { 2288 switch (pipe_format) { 2289 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2290 pipe_format = PIPE_FORMAT_Z32_FLOAT; 2291 break; 2292 case PIPE_FORMAT_X8Z24_UNORM: 2293 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2294 /* Z24 is always stored like this. */ 2295 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 2296 break; 2297 case PIPE_FORMAT_X24S8_UINT: 2298 case PIPE_FORMAT_S8X24_UINT: 2299 case PIPE_FORMAT_X32_S8X24_UINT: 2300 pipe_format = PIPE_FORMAT_S8_UINT; 2301 surflevel = tmp->surface.stencil_level; 2302 break; 2303 default:; 2304 } 2305 } 2306 2307 desc = util_format_description(pipe_format); 2308 2309 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2310 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2311 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2312 2313 switch (pipe_format) { 2314 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2315 case PIPE_FORMAT_X24S8_UINT: 2316 case PIPE_FORMAT_X32_S8X24_UINT: 2317 case PIPE_FORMAT_X8Z24_UNORM: 2318 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2319 break; 2320 default: 2321 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2322 } 2323 } else { 2324 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2325 } 2326 2327 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2328 2329 switch (pipe_format) { 2330 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2331 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2332 break; 2333 default: 2334 if (first_non_void < 0) { 2335 if (util_format_is_compressed(pipe_format)) { 2336 switch (pipe_format) { 2337 case PIPE_FORMAT_DXT1_SRGB: 2338 case PIPE_FORMAT_DXT1_SRGBA: 2339 case PIPE_FORMAT_DXT3_SRGBA: 2340 case PIPE_FORMAT_DXT5_SRGBA: 2341 case PIPE_FORMAT_BPTC_SRGBA: 2342 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2343 break; 2344 case PIPE_FORMAT_RGTC1_SNORM: 2345 case PIPE_FORMAT_LATC1_SNORM: 2346 case PIPE_FORMAT_RGTC2_SNORM: 2347 case PIPE_FORMAT_LATC2_SNORM: 2348 /* implies float, so use SNORM/UNORM to determine 2349 whether data is signed or not */ 2350 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2351 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2352 break; 2353 default: 2354 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2355 break; 2356 } 2357 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2358 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2359 } else { 2360 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2361 } 2362 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2363 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2364 } else { 2365 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2366 2367 switch (desc->channel[first_non_void].type) { 2368 case UTIL_FORMAT_TYPE_FLOAT: 2369 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2370 break; 2371 case UTIL_FORMAT_TYPE_SIGNED: 2372 if (desc->channel[first_non_void].normalized) 2373 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2374 else if (desc->channel[first_non_void].pure_integer) 2375 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2376 else 2377 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2378 break; 2379 case UTIL_FORMAT_TYPE_UNSIGNED: 2380 if (desc->channel[first_non_void].normalized) 2381 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2382 else if (desc->channel[first_non_void].pure_integer) 2383 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2384 else 2385 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2386 } 2387 } 2388 } 2389 2390 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void); 2391 if (format == ~0) { 2392 format = 0; 2393 } 2394 2395 /* not supported any more */ 2396 //endian = si_colorformat_endian_swap(format); 2397 2398 width = surflevel[0].npix_x; 2399 height = surflevel[0].npix_y; 2400 depth = surflevel[0].npix_z; 2401 pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format); 2402 2403 if (texture->target == PIPE_TEXTURE_1D_ARRAY) { 2404 height = 1; 2405 depth = texture->array_size; 2406 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { 2407 depth = texture->array_size; 2408 } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) 2409 depth = texture->array_size / 6; 2410 2411 va = tmp->resource.gpu_address + surflevel[0].offset; 2412 va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size; 2413 2414 view->state[0] = va >> 8; 2415 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) | 2416 S_008F14_DATA_FORMAT(format) | 2417 S_008F14_NUM_FORMAT(num_format)); 2418 view->state[2] = (S_008F18_WIDTH(width - 1) | 2419 S_008F18_HEIGHT(height - 1)); 2420 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2421 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2422 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2423 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2424 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ? 2425 0 : state->u.tex.first_level - tmp->mipmap_shift) | 2426 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ? 2427 util_logbase2(texture->nr_samples) : 2428 state->u.tex.last_level - tmp->mipmap_shift) | 2429 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) | 2430 S_008F1C_POW2_PAD(texture->last_level > 0) | 2431 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples))); 2432 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); 2433 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2434 S_008F24_LAST_ARRAY(state->u.tex.last_layer)); 2435 view->state[6] = 0; 2436 view->state[7] = 0; 2437 2438 /* Initialize the sampler view for FMASK. */ 2439 if (tmp->fmask.size) { 2440 uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset; 2441 uint32_t fmask_format; 2442 2443 switch (texture->nr_samples) { 2444 case 2: 2445 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2446 break; 2447 case 4: 2448 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2449 break; 2450 case 8: 2451 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2452 break; 2453 default: 2454 assert(0); 2455 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2456 } 2457 2458 view->fmask_state[0] = va >> 8; 2459 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2460 S_008F14_DATA_FORMAT(fmask_format) | 2461 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2462 view->fmask_state[2] = S_008F18_WIDTH(width - 1) | 2463 S_008F18_HEIGHT(height - 1); 2464 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2465 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2466 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2467 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2468 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | 2469 S_008F1C_TYPE(si_tex_dim(texture->target, 0)); 2470 view->fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2471 S_008F20_PITCH(tmp->fmask.pitch - 1); 2472 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2473 S_008F24_LAST_ARRAY(state->u.tex.last_layer); 2474 view->fmask_state[6] = 0; 2475 view->fmask_state[7] = 0; 2476 } 2477 2478 return &view->base; 2479} 2480 2481static void si_sampler_view_destroy(struct pipe_context *ctx, 2482 struct pipe_sampler_view *state) 2483{ 2484 struct si_sampler_view *view = (struct si_sampler_view *)state; 2485 2486 if (view->resource->b.b.target == PIPE_BUFFER) 2487 LIST_DELINIT(&view->list); 2488 2489 pipe_resource_reference(&state->texture, NULL); 2490 FREE(view); 2491} 2492 2493static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 2494{ 2495 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 2496 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 2497 (linear_filter && 2498 (wrap == PIPE_TEX_WRAP_CLAMP || 2499 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 2500} 2501 2502static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 2503{ 2504 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 2505 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 2506 2507 return (state->border_color.ui[0] || state->border_color.ui[1] || 2508 state->border_color.ui[2] || state->border_color.ui[3]) && 2509 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 2510 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 2511 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 2512} 2513 2514static void *si_create_sampler_state(struct pipe_context *ctx, 2515 const struct pipe_sampler_state *state) 2516{ 2517 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 2518 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; 2519 unsigned border_color_type; 2520 2521 if (rstate == NULL) { 2522 return NULL; 2523 } 2524 2525 if (sampler_state_needs_border_color(state)) 2526 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 2527 else 2528 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 2529 2530 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 2531 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 2532 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 2533 r600_tex_aniso_filter(state->max_anisotropy) << 9 | 2534 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 2535 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 2536 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map)); 2537 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 2538 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 2539 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 2540 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) | 2541 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) | 2542 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); 2543 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type); 2544 2545 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2546 memcpy(rstate->border_color, state->border_color.ui, 2547 sizeof(rstate->border_color)); 2548 } 2549 2550 return rstate; 2551} 2552 2553/* Upload border colors and update the pointers in resource descriptors. 2554 * There can only be 4096 border colors per context. 2555 * 2556 * XXX: This is broken if the buffer gets reallocated. 2557 */ 2558static void si_set_border_colors(struct si_context *sctx, unsigned count, 2559 void **states) 2560{ 2561 struct si_sampler_state **rstates = (struct si_sampler_state **)states; 2562 uint32_t *border_color_table = NULL; 2563 int i, j; 2564 2565 for (i = 0; i < count; i++) { 2566 if (rstates[i] && 2567 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) == 2568 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2569 if (!sctx->border_color_table || 2570 ((sctx->border_color_offset + count - i) & 2571 C_008F3C_BORDER_COLOR_PTR)) { 2572 r600_resource_reference(&sctx->border_color_table, NULL); 2573 sctx->border_color_offset = 0; 2574 2575 sctx->border_color_table = 2576 si_resource_create_custom(&sctx->screen->b.b, 2577 PIPE_USAGE_DYNAMIC, 2578 4096 * 4 * 4); 2579 } 2580 2581 if (!border_color_table) { 2582 border_color_table = 2583 sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf, 2584 sctx->b.rings.gfx.cs, 2585 PIPE_TRANSFER_WRITE | 2586 PIPE_TRANSFER_UNSYNCHRONIZED); 2587 } 2588 2589 for (j = 0; j < 4; j++) { 2590 border_color_table[4 * sctx->border_color_offset + j] = 2591 util_le32_to_cpu(rstates[i]->border_color[j]); 2592 } 2593 2594 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR; 2595 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++); 2596 } 2597 } 2598 2599 if (border_color_table) { 2600 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 2601 2602 uint64_t va_offset = sctx->border_color_table->gpu_address; 2603 2604 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); 2605 if (sctx->b.chip_class >= CIK) 2606 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40); 2607 si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ, 2608 RADEON_PRIO_SHADER_DATA); 2609 si_pm4_set_state(sctx, ta_bordercolor_base, pm4); 2610 } 2611} 2612 2613static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, 2614 unsigned start, unsigned count, 2615 void **states) 2616{ 2617 struct si_context *sctx = (struct si_context *)ctx; 2618 2619 if (!count || shader >= SI_NUM_SHADERS) 2620 return; 2621 2622 si_set_border_colors(sctx, count, states); 2623 si_set_sampler_descriptors(sctx, shader, start, count, states); 2624} 2625 2626static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 2627{ 2628 struct si_context *sctx = (struct si_context *)ctx; 2629 struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask); 2630 struct si_pm4_state *pm4 = &state->pm4; 2631 uint16_t mask = sample_mask; 2632 2633 if (state == NULL) 2634 return; 2635 2636 state->sample_mask = mask; 2637 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16)); 2638 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16)); 2639 2640 si_pm4_set_state(sctx, sample_mask, state); 2641} 2642 2643static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 2644{ 2645 free(state); 2646} 2647 2648/* 2649 * Vertex elements & buffers 2650 */ 2651 2652static void *si_create_vertex_elements(struct pipe_context *ctx, 2653 unsigned count, 2654 const struct pipe_vertex_element *elements) 2655{ 2656 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 2657 int i; 2658 2659 assert(count < PIPE_MAX_ATTRIBS); 2660 if (!v) 2661 return NULL; 2662 2663 v->count = count; 2664 for (i = 0; i < count; ++i) { 2665 const struct util_format_description *desc; 2666 unsigned data_format, num_format; 2667 int first_non_void; 2668 2669 desc = util_format_description(elements[i].src_format); 2670 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 2671 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2672 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2673 2674 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2675 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2676 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2677 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2678 S_008F0C_NUM_FORMAT(num_format) | 2679 S_008F0C_DATA_FORMAT(data_format); 2680 v->format_size[i] = desc->block.bits / 8; 2681 } 2682 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 2683 2684 return v; 2685} 2686 2687static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 2688{ 2689 struct si_context *sctx = (struct si_context *)ctx; 2690 struct si_vertex_element *v = (struct si_vertex_element*)state; 2691 2692 sctx->vertex_elements = v; 2693 sctx->vertex_buffers_dirty = true; 2694} 2695 2696static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 2697{ 2698 struct si_context *sctx = (struct si_context *)ctx; 2699 2700 if (sctx->vertex_elements == state) 2701 sctx->vertex_elements = NULL; 2702 FREE(state); 2703} 2704 2705static void si_set_vertex_buffers(struct pipe_context *ctx, 2706 unsigned start_slot, unsigned count, 2707 const struct pipe_vertex_buffer *buffers) 2708{ 2709 struct si_context *sctx = (struct si_context *)ctx; 2710 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 2711 int i; 2712 2713 assert(start_slot + count <= Elements(sctx->vertex_buffer)); 2714 2715 if (buffers) { 2716 for (i = 0; i < count; i++) { 2717 const struct pipe_vertex_buffer *src = buffers + i; 2718 struct pipe_vertex_buffer *dsti = dst + i; 2719 2720 pipe_resource_reference(&dsti->buffer, src->buffer); 2721 dsti->buffer_offset = src->buffer_offset; 2722 dsti->stride = src->stride; 2723 } 2724 } else { 2725 for (i = 0; i < count; i++) { 2726 pipe_resource_reference(&dst[i].buffer, NULL); 2727 } 2728 } 2729 sctx->vertex_buffers_dirty = true; 2730} 2731 2732static void si_set_index_buffer(struct pipe_context *ctx, 2733 const struct pipe_index_buffer *ib) 2734{ 2735 struct si_context *sctx = (struct si_context *)ctx; 2736 2737 if (ib) { 2738 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer); 2739 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 2740 } else { 2741 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 2742 } 2743} 2744 2745/* 2746 * Misc 2747 */ 2748static void si_set_polygon_stipple(struct pipe_context *ctx, 2749 const struct pipe_poly_stipple *state) 2750{ 2751} 2752 2753static void si_texture_barrier(struct pipe_context *ctx) 2754{ 2755 struct si_context *sctx = (struct si_context *)ctx; 2756 2757 sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | 2758 R600_CONTEXT_FLUSH_AND_INV_CB; 2759} 2760 2761static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 2762{ 2763 struct pipe_blend_state blend; 2764 2765 memset(&blend, 0, sizeof(blend)); 2766 blend.independent_blend_enable = true; 2767 blend.rt[0].colormask = 0xf; 2768 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 2769} 2770 2771static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 2772 bool include_draw_vbo) 2773{ 2774 si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo); 2775} 2776 2777void si_init_state_functions(struct si_context *sctx) 2778{ 2779 si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0); 2780 si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10); 2781 si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6); 2782 2783 sctx->b.b.create_blend_state = si_create_blend_state; 2784 sctx->b.b.bind_blend_state = si_bind_blend_state; 2785 sctx->b.b.delete_blend_state = si_delete_blend_state; 2786 sctx->b.b.set_blend_color = si_set_blend_color; 2787 2788 sctx->b.b.create_rasterizer_state = si_create_rs_state; 2789 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 2790 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 2791 2792 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 2793 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 2794 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 2795 2796 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 2797 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 2798 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 2799 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 2800 2801 sctx->b.b.set_clip_state = si_set_clip_state; 2802 sctx->b.b.set_scissor_states = si_set_scissor_states; 2803 sctx->b.b.set_viewport_states = si_set_viewport_states; 2804 sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref; 2805 2806 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 2807 sctx->b.b.get_sample_position = cayman_get_sample_position; 2808 2809 sctx->b.b.create_sampler_state = si_create_sampler_state; 2810 sctx->b.b.bind_sampler_states = si_bind_sampler_states; 2811 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 2812 2813 sctx->b.b.create_sampler_view = si_create_sampler_view; 2814 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 2815 2816 sctx->b.b.set_sample_mask = si_set_sample_mask; 2817 2818 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 2819 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 2820 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 2821 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 2822 sctx->b.b.set_index_buffer = si_set_index_buffer; 2823 2824 sctx->b.b.texture_barrier = si_texture_barrier; 2825 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; 2826 sctx->b.b.set_min_samples = si_set_min_samples; 2827 2828 sctx->b.dma_copy = si_dma_copy; 2829 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 2830 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 2831 2832 sctx->b.b.draw_vbo = si_draw_vbo; 2833} 2834 2835static void 2836si_write_harvested_raster_configs(struct si_context *sctx, 2837 struct si_pm4_state *pm4, 2838 unsigned raster_config) 2839{ 2840 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 2841 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 2842 unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; 2843 unsigned num_rb = sctx->screen->b.info.r600_num_backends; 2844 unsigned rb_per_pkr = num_rb / num_se / sh_per_se; 2845 unsigned rb_per_se = num_rb / num_se; 2846 unsigned se0_mask = (1 << rb_per_se) - 1; 2847 unsigned se1_mask = se0_mask << rb_per_se; 2848 unsigned se; 2849 2850 assert(num_se == 1 || num_se == 2); 2851 assert(sh_per_se == 1 || sh_per_se == 2); 2852 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 2853 2854 /* XXX: I can't figure out what the *_XSEL and *_YSEL 2855 * fields are for, so I'm leaving them as their default 2856 * values. */ 2857 2858 se0_mask &= rb_mask; 2859 se1_mask &= rb_mask; 2860 if (num_se == 2 && (!se0_mask || !se1_mask)) { 2861 raster_config &= C_028350_SE_MAP; 2862 2863 if (!se0_mask) { 2864 raster_config |= 2865 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 2866 } else { 2867 raster_config |= 2868 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 2869 } 2870 } 2871 2872 for (se = 0; se < num_se; se++) { 2873 unsigned raster_config_se = raster_config; 2874 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 2875 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 2876 2877 pkr0_mask &= rb_mask; 2878 pkr1_mask &= rb_mask; 2879 if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) { 2880 raster_config_se &= C_028350_PKR_MAP; 2881 2882 if (!pkr0_mask) { 2883 raster_config_se |= 2884 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 2885 } else { 2886 raster_config_se |= 2887 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 2888 } 2889 } 2890 2891 if (rb_per_pkr == 2) { 2892 unsigned rb0_mask = 1 << (se * rb_per_se); 2893 unsigned rb1_mask = rb0_mask << 1; 2894 2895 rb0_mask &= rb_mask; 2896 rb1_mask &= rb_mask; 2897 if (!rb0_mask || !rb1_mask) { 2898 raster_config_se &= C_028350_RB_MAP_PKR0; 2899 2900 if (!rb0_mask) { 2901 raster_config_se |= 2902 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 2903 } else { 2904 raster_config_se |= 2905 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 2906 } 2907 } 2908 2909 if (sh_per_se == 2) { 2910 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 2911 rb1_mask = rb0_mask << 1; 2912 rb0_mask &= rb_mask; 2913 rb1_mask &= rb_mask; 2914 if (!rb0_mask || !rb1_mask) { 2915 raster_config_se &= C_028350_RB_MAP_PKR1; 2916 2917 if (!rb0_mask) { 2918 raster_config_se |= 2919 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 2920 } else { 2921 raster_config_se |= 2922 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 2923 } 2924 } 2925 } 2926 } 2927 2928 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 2929 SE_INDEX(se) | SH_BROADCAST_WRITES | 2930 INSTANCE_BROADCAST_WRITES); 2931 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 2932 } 2933 2934 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 2935 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 2936 INSTANCE_BROADCAST_WRITES); 2937} 2938 2939void si_init_config(struct si_context *sctx) 2940{ 2941 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 2942 2943 if (pm4 == NULL) 2944 return; 2945 2946 si_cmd_context_control(pm4); 2947 2948 si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0); 2949 si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0); 2950 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0); 2951 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0); 2952 si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0); 2953 si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0); 2954 si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0); 2955 si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0); 2956 si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0); 2957 si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0); 2958 si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0); 2959 si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0); 2960 2961 /* FIXME calculate these values somehow ??? */ 2962 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80); 2963 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 2964 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 2965 2966 si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0); 2967 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 2968 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0); 2969 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 2970 2971 si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0); 2972 si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0); 2973 si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0); 2974 si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0); 2975 2976 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 2977 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000); 2978 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 2979 if (sctx->b.chip_class < CIK) 2980 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 2981 S_008A14_CLIP_VTX_REORDER_ENA(1)); 2982 2983 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 2984 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 2985 2986 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 2987 2988 if (sctx->b.chip_class >= CIK) { 2989 switch (sctx->screen->b.family) { 2990 case CHIP_BONAIRE: 2991 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012); 2992 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 2993 break; 2994 case CHIP_HAWAII: 2995 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a); 2996 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e); 2997 break; 2998 case CHIP_KAVERI: 2999 /* XXX todo */ 3000 case CHIP_KABINI: 3001 /* XXX todo */ 3002 case CHIP_MULLINS: 3003 /* XXX todo */ 3004 default: 3005 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3006 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 3007 break; 3008 } 3009 } else { 3010 unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; 3011 unsigned num_rb = sctx->screen->b.info.r600_num_backends; 3012 unsigned raster_config; 3013 3014 switch (sctx->screen->b.family) { 3015 case CHIP_TAHITI: 3016 case CHIP_PITCAIRN: 3017 raster_config = 0x2a00126a; 3018 break; 3019 case CHIP_VERDE: 3020 raster_config = 0x0000124a; 3021 break; 3022 case CHIP_OLAND: 3023 raster_config = 0x00000082; 3024 break; 3025 case CHIP_HAINAN: 3026 raster_config = 0x00000000; 3027 break; 3028 default: 3029 fprintf(stderr, 3030 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 3031 raster_config = 0x00000000; 3032 break; 3033 } 3034 3035 /* Always use the default config when all backends are enabled 3036 * (or when we failed to determine the enabled backends). 3037 */ 3038 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 3039 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 3040 raster_config); 3041 } else { 3042 si_write_harvested_raster_configs(sctx, pm4, raster_config); 3043 } 3044 } 3045 3046 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 3047 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 3048 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 3049 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 3050 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 3051 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 3052 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 3053 3054 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3055 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 3056 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000); 3057 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000); 3058 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000); 3059 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000); 3060 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000); 3061 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000); 3062 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000); 3063 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000); 3064 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000); 3065 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000); 3066 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 3067 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 3068 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 3069 3070 /* There is a hang if stencil is used and fast stencil is enabled 3071 * regardless of whether HTILE is depth-only or not. 3072 */ 3073 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 3074 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 3075 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | 3076 S_02800C_FAST_STENCIL_DISABLE(1)); 3077 3078 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 3079 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 3080 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 3081 3082 if (sctx->b.chip_class >= CIK) { 3083 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 3084 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0)); 3085 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 3086 } 3087 3088 si_pm4_set_state(sctx, init, pm4); 3089} 3090