si_state.c revision 08775a219628611989ab87c621255ac3c841dcda
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "si_shader.h" 29#include "sid.h" 30#include "radeon/r600_cs.h" 31 32#include "util/u_dual_blend.h" 33#include "util/u_format.h" 34#include "util/u_format_s3tc.h" 35#include "util/u_memory.h" 36#include "util/u_pstipple.h" 37 38static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, 39 void (*emit_func)(struct si_context *ctx, struct r600_atom *state), 40 unsigned num_dw) 41{ 42 atom->emit = (void*)emit_func; 43 atom->num_dw = num_dw; 44 atom->dirty = false; 45 *list_elem = atom; 46} 47 48unsigned si_array_mode(unsigned mode) 49{ 50 switch (mode) { 51 case RADEON_SURF_MODE_LINEAR_ALIGNED: 52 return V_009910_ARRAY_LINEAR_ALIGNED; 53 case RADEON_SURF_MODE_1D: 54 return V_009910_ARRAY_1D_TILED_THIN1; 55 case RADEON_SURF_MODE_2D: 56 return V_009910_ARRAY_2D_TILED_THIN1; 57 default: 58 case RADEON_SURF_MODE_LINEAR: 59 return V_009910_ARRAY_LINEAR_GENERAL; 60 } 61} 62 63uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) 64{ 65 if (sscreen->b.chip_class >= CIK && 66 sscreen->b.info.cik_macrotile_mode_array_valid) { 67 unsigned index, tileb; 68 69 tileb = 8 * 8 * tex->surface.bpe; 70 tileb = MIN2(tex->surface.tile_split, tileb); 71 72 for (index = 0; tileb > 64; index++) { 73 tileb >>= 1; 74 } 75 assert(index < 16); 76 77 return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3; 78 } 79 80 if (sscreen->b.chip_class == SI && 81 sscreen->b.info.si_tile_mode_array_valid) { 82 /* Don't use stencil_tiling_index, because num_banks is always 83 * read from the depth mode. */ 84 unsigned tile_mode_index = tex->surface.tiling_index[0]; 85 assert(tile_mode_index < 32); 86 87 return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]); 88 } 89 90 /* The old way. */ 91 switch (sscreen->b.tiling_info.num_banks) { 92 case 2: 93 return V_02803C_ADDR_SURF_2_BANK; 94 case 4: 95 return V_02803C_ADDR_SURF_4_BANK; 96 case 8: 97 default: 98 return V_02803C_ADDR_SURF_8_BANK; 99 case 16: 100 return V_02803C_ADDR_SURF_16_BANK; 101 } 102} 103 104unsigned cik_tile_split(unsigned tile_split) 105{ 106 switch (tile_split) { 107 case 64: 108 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B; 109 break; 110 case 128: 111 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B; 112 break; 113 case 256: 114 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B; 115 break; 116 case 512: 117 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B; 118 break; 119 default: 120 case 1024: 121 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB; 122 break; 123 case 2048: 124 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB; 125 break; 126 case 4096: 127 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB; 128 break; 129 } 130 return tile_split; 131} 132 133unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect) 134{ 135 switch (macro_tile_aspect) { 136 default: 137 case 1: 138 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1; 139 break; 140 case 2: 141 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2; 142 break; 143 case 4: 144 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4; 145 break; 146 case 8: 147 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8; 148 break; 149 } 150 return macro_tile_aspect; 151} 152 153unsigned cik_bank_wh(unsigned bankwh) 154{ 155 switch (bankwh) { 156 default: 157 case 1: 158 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1; 159 break; 160 case 2: 161 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2; 162 break; 163 case 4: 164 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4; 165 break; 166 case 8: 167 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8; 168 break; 169 } 170 return bankwh; 171} 172 173unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode) 174{ 175 if (sscreen->b.info.si_tile_mode_array_valid) { 176 uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode]; 177 178 return G_009910_PIPE_CONFIG(gb_tile_mode); 179 } 180 181 /* This is probably broken for a lot of chips, but it's only used 182 * if the kernel cannot return the tile mode array for CIK. */ 183 switch (sscreen->b.info.r600_num_tile_pipes) { 184 case 16: 185 return V_02803C_X_ADDR_SURF_P16_32X32_16X16; 186 case 8: 187 return V_02803C_X_ADDR_SURF_P8_32X32_16X16; 188 case 4: 189 default: 190 if (sscreen->b.info.r600_num_backends == 4) 191 return V_02803C_X_ADDR_SURF_P4_16X16; 192 else 193 return V_02803C_X_ADDR_SURF_P4_8X16; 194 case 2: 195 return V_02803C_ADDR_SURF_P2; 196 } 197} 198 199static unsigned si_map_swizzle(unsigned swizzle) 200{ 201 switch (swizzle) { 202 case UTIL_FORMAT_SWIZZLE_Y: 203 return V_008F0C_SQ_SEL_Y; 204 case UTIL_FORMAT_SWIZZLE_Z: 205 return V_008F0C_SQ_SEL_Z; 206 case UTIL_FORMAT_SWIZZLE_W: 207 return V_008F0C_SQ_SEL_W; 208 case UTIL_FORMAT_SWIZZLE_0: 209 return V_008F0C_SQ_SEL_0; 210 case UTIL_FORMAT_SWIZZLE_1: 211 return V_008F0C_SQ_SEL_1; 212 default: /* UTIL_FORMAT_SWIZZLE_X */ 213 return V_008F0C_SQ_SEL_X; 214 } 215} 216 217static uint32_t S_FIXED(float value, uint32_t frac_bits) 218{ 219 return value * (1 << frac_bits); 220} 221 222/* 12.4 fixed-point */ 223static unsigned si_pack_float_12p4(float x) 224{ 225 return x <= 0 ? 0 : 226 x >= 4096 ? 0xffff : x * 16; 227} 228 229/* 230 * Inferred framebuffer and blender state. 231 * 232 * One of the reasons this must be derived from the framebuffer state is that: 233 * - The blend state mask is 0xf most of the time. 234 * - The COLOR1 format isn't INVALID because of possible dual-source blending, 235 * so COLOR1 is enabled pretty much all the time. 236 * So CB_TARGET_MASK is the only register that can disable COLOR1. 237 * 238 * Another reason is to avoid a hang with dual source blending. 239 */ 240void si_update_fb_blend_state(struct si_context *sctx) 241{ 242 struct si_pm4_state *pm4; 243 struct si_state_blend *blend = sctx->queued.named.blend; 244 uint32_t mask = 0, i; 245 246 if (blend == NULL) 247 return; 248 249 pm4 = CALLOC_STRUCT(si_pm4_state); 250 if (pm4 == NULL) 251 return; 252 253 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) 254 if (sctx->framebuffer.state.cbufs[i]) 255 mask |= 0xf << (4*i); 256 mask &= blend->cb_target_mask; 257 258 /* Avoid a hang that happens when dual source blending is enabled 259 * but there is not enough color outputs. This is undefined behavior, 260 * so disable color writes completely. 261 * 262 * Reproducible with Unigine Heaven 4.0 and drirc missing. 263 */ 264 if (blend->dual_src_blend && 265 (sctx->ps_shader->ps_colors_written & 0x3) != 0x3) 266 mask = 0; 267 268 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask); 269 si_pm4_set_state(sctx, fb_blend, pm4); 270} 271 272/* 273 * Blender functions 274 */ 275 276static uint32_t si_translate_blend_function(int blend_func) 277{ 278 switch (blend_func) { 279 case PIPE_BLEND_ADD: 280 return V_028780_COMB_DST_PLUS_SRC; 281 case PIPE_BLEND_SUBTRACT: 282 return V_028780_COMB_SRC_MINUS_DST; 283 case PIPE_BLEND_REVERSE_SUBTRACT: 284 return V_028780_COMB_DST_MINUS_SRC; 285 case PIPE_BLEND_MIN: 286 return V_028780_COMB_MIN_DST_SRC; 287 case PIPE_BLEND_MAX: 288 return V_028780_COMB_MAX_DST_SRC; 289 default: 290 R600_ERR("Unknown blend function %d\n", blend_func); 291 assert(0); 292 break; 293 } 294 return 0; 295} 296 297static uint32_t si_translate_blend_factor(int blend_fact) 298{ 299 switch (blend_fact) { 300 case PIPE_BLENDFACTOR_ONE: 301 return V_028780_BLEND_ONE; 302 case PIPE_BLENDFACTOR_SRC_COLOR: 303 return V_028780_BLEND_SRC_COLOR; 304 case PIPE_BLENDFACTOR_SRC_ALPHA: 305 return V_028780_BLEND_SRC_ALPHA; 306 case PIPE_BLENDFACTOR_DST_ALPHA: 307 return V_028780_BLEND_DST_ALPHA; 308 case PIPE_BLENDFACTOR_DST_COLOR: 309 return V_028780_BLEND_DST_COLOR; 310 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 311 return V_028780_BLEND_SRC_ALPHA_SATURATE; 312 case PIPE_BLENDFACTOR_CONST_COLOR: 313 return V_028780_BLEND_CONSTANT_COLOR; 314 case PIPE_BLENDFACTOR_CONST_ALPHA: 315 return V_028780_BLEND_CONSTANT_ALPHA; 316 case PIPE_BLENDFACTOR_ZERO: 317 return V_028780_BLEND_ZERO; 318 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 319 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 320 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 321 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 322 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 323 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 324 case PIPE_BLENDFACTOR_INV_DST_COLOR: 325 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 326 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 327 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 328 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 329 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 330 case PIPE_BLENDFACTOR_SRC1_COLOR: 331 return V_028780_BLEND_SRC1_COLOR; 332 case PIPE_BLENDFACTOR_SRC1_ALPHA: 333 return V_028780_BLEND_SRC1_ALPHA; 334 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 335 return V_028780_BLEND_INV_SRC1_COLOR; 336 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 337 return V_028780_BLEND_INV_SRC1_ALPHA; 338 default: 339 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 340 assert(0); 341 break; 342 } 343 return 0; 344} 345 346static void *si_create_blend_state_mode(struct pipe_context *ctx, 347 const struct pipe_blend_state *state, 348 unsigned mode) 349{ 350 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 351 struct si_pm4_state *pm4 = &blend->pm4; 352 353 uint32_t color_control = 0; 354 355 if (blend == NULL) 356 return NULL; 357 358 blend->alpha_to_one = state->alpha_to_one; 359 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 360 361 if (state->logicop_enable) { 362 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 363 } else { 364 color_control |= S_028808_ROP3(0xcc); 365 } 366 367 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 368 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 369 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 370 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 371 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 372 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 373 374 blend->cb_target_mask = 0; 375 for (int i = 0; i < 8; i++) { 376 /* state->rt entries > 0 only written if independent blending */ 377 const int j = state->independent_blend_enable ? i : 0; 378 379 unsigned eqRGB = state->rt[j].rgb_func; 380 unsigned srcRGB = state->rt[j].rgb_src_factor; 381 unsigned dstRGB = state->rt[j].rgb_dst_factor; 382 unsigned eqA = state->rt[j].alpha_func; 383 unsigned srcA = state->rt[j].alpha_src_factor; 384 unsigned dstA = state->rt[j].alpha_dst_factor; 385 386 unsigned blend_cntl = 0; 387 388 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ 389 blend->cb_target_mask |= state->rt[j].colormask << (4 * i); 390 391 if (!state->rt[j].blend_enable) { 392 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 393 continue; 394 } 395 396 blend_cntl |= S_028780_ENABLE(1); 397 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 398 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 399 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 400 401 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 402 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 403 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 404 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 405 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 406 } 407 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 408 } 409 410 if (blend->cb_target_mask) { 411 color_control |= S_028808_MODE(mode); 412 } else { 413 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 414 } 415 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 416 417 return blend; 418} 419 420static void *si_create_blend_state(struct pipe_context *ctx, 421 const struct pipe_blend_state *state) 422{ 423 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 424} 425 426static void si_bind_blend_state(struct pipe_context *ctx, void *state) 427{ 428 struct si_context *sctx = (struct si_context *)ctx; 429 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 430 si_update_fb_blend_state(sctx); 431} 432 433static void si_delete_blend_state(struct pipe_context *ctx, void *state) 434{ 435 struct si_context *sctx = (struct si_context *)ctx; 436 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 437} 438 439static void si_set_blend_color(struct pipe_context *ctx, 440 const struct pipe_blend_color *state) 441{ 442 struct si_context *sctx = (struct si_context *)ctx; 443 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 444 445 if (pm4 == NULL) 446 return; 447 448 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0])); 449 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1])); 450 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2])); 451 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3])); 452 453 si_pm4_set_state(sctx, blend_color, pm4); 454} 455 456/* 457 * Clipping, scissors and viewport 458 */ 459 460static void si_set_clip_state(struct pipe_context *ctx, 461 const struct pipe_clip_state *state) 462{ 463 struct si_context *sctx = (struct si_context *)ctx; 464 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 465 struct pipe_constant_buffer cb; 466 467 if (pm4 == NULL) 468 return; 469 470 for (int i = 0; i < 6; i++) { 471 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16, 472 fui(state->ucp[i][0])); 473 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16, 474 fui(state->ucp[i][1])); 475 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16, 476 fui(state->ucp[i][2])); 477 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16, 478 fui(state->ucp[i][3])); 479 } 480 481 cb.buffer = NULL; 482 cb.user_buffer = state->ucp; 483 cb.buffer_offset = 0; 484 cb.buffer_size = 4*4*8; 485 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb); 486 pipe_resource_reference(&cb.buffer, NULL); 487 488 si_pm4_set_state(sctx, clip, pm4); 489} 490 491#define SIX_BITS 0x3F 492 493static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 494{ 495 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 496 struct tgsi_shader_info *info = si_get_vs_info(sctx); 497 unsigned window_space = 498 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 499 unsigned clipdist_mask = 500 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 501 502 r600_write_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 503 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 504 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 505 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 506 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | 507 S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) | 508 S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) | 509 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize || 510 info->writes_edgeflag || 511 info->writes_layer || 512 info->writes_viewport_index) | 513 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | 514 (sctx->queued.named.rasterizer->clip_plane_enable & 515 clipdist_mask)); 516 r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 517 sctx->queued.named.rasterizer->pa_cl_clip_cntl | 518 (clipdist_mask ? 0 : 519 sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) | 520 S_028810_CLIP_DISABLE(window_space)); 521} 522 523static void si_set_scissor_states(struct pipe_context *ctx, 524 unsigned start_slot, 525 unsigned num_scissors, 526 const struct pipe_scissor_state *state) 527{ 528 struct si_context *sctx = (struct si_context *)ctx; 529 struct si_state_scissor *scissor; 530 struct si_pm4_state *pm4; 531 int i; 532 533 for (i = start_slot; i < start_slot + num_scissors; i++) { 534 int idx = i - start_slot; 535 int offset = i * 4 * 2; 536 537 scissor = CALLOC_STRUCT(si_state_scissor); 538 if (scissor == NULL) 539 return; 540 pm4 = &scissor->pm4; 541 scissor->scissor = state[idx]; 542 si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset, 543 S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) | 544 S_028250_WINDOW_OFFSET_DISABLE(1)); 545 si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset, 546 S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy)); 547 si_pm4_set_state(sctx, scissor[i], scissor); 548 } 549} 550 551static void si_set_viewport_states(struct pipe_context *ctx, 552 unsigned start_slot, 553 unsigned num_viewports, 554 const struct pipe_viewport_state *state) 555{ 556 struct si_context *sctx = (struct si_context *)ctx; 557 struct si_state_viewport *viewport; 558 struct si_pm4_state *pm4; 559 int i; 560 561 for (i = start_slot; i < start_slot + num_viewports; i++) { 562 int idx = i - start_slot; 563 int offset = i * 4 * 6; 564 565 viewport = CALLOC_STRUCT(si_state_viewport); 566 if (!viewport) 567 return; 568 pm4 = &viewport->pm4; 569 570 viewport->viewport = state[idx]; 571 si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE + offset, fui(state[idx].scale[0])); 572 si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET + offset, fui(state[idx].translate[0])); 573 si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE + offset, fui(state[idx].scale[1])); 574 si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET + offset, fui(state[idx].translate[1])); 575 si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE + offset, fui(state[idx].scale[2])); 576 si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET + offset, fui(state[idx].translate[2])); 577 578 si_pm4_set_state(sctx, viewport[i], viewport); 579 } 580} 581 582/* 583 * inferred state between framebuffer and rasterizer 584 */ 585static void si_update_fb_rs_state(struct si_context *sctx) 586{ 587 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 588 struct si_pm4_state *pm4; 589 float offset_units; 590 591 if (!rs || !sctx->framebuffer.state.zsbuf) 592 return; 593 594 offset_units = sctx->queued.named.rasterizer->offset_units; 595 switch (sctx->framebuffer.state.zsbuf->texture->format) { 596 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 597 case PIPE_FORMAT_X8Z24_UNORM: 598 case PIPE_FORMAT_Z24X8_UNORM: 599 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 600 offset_units *= 2.0f; 601 break; 602 case PIPE_FORMAT_Z32_FLOAT: 603 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 604 offset_units *= 1.0f; 605 break; 606 case PIPE_FORMAT_Z16_UNORM: 607 offset_units *= 4.0f; 608 break; 609 default: 610 return; 611 } 612 613 pm4 = CALLOC_STRUCT(si_pm4_state); 614 615 if (pm4 == NULL) 616 return; 617 618 /* FIXME some of those reg can be computed with cso */ 619 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 620 fui(sctx->queued.named.rasterizer->offset_scale)); 621 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 622 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 623 fui(sctx->queued.named.rasterizer->offset_scale)); 624 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 625 626 si_pm4_set_state(sctx, fb_rs, pm4); 627} 628 629/* 630 * Rasterizer 631 */ 632 633static uint32_t si_translate_fill(uint32_t func) 634{ 635 switch(func) { 636 case PIPE_POLYGON_MODE_FILL: 637 return V_028814_X_DRAW_TRIANGLES; 638 case PIPE_POLYGON_MODE_LINE: 639 return V_028814_X_DRAW_LINES; 640 case PIPE_POLYGON_MODE_POINT: 641 return V_028814_X_DRAW_POINTS; 642 default: 643 assert(0); 644 return V_028814_X_DRAW_POINTS; 645 } 646} 647 648static void *si_create_rs_state(struct pipe_context *ctx, 649 const struct pipe_rasterizer_state *state) 650{ 651 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 652 struct si_pm4_state *pm4 = &rs->pm4; 653 unsigned tmp; 654 float psize_min, psize_max; 655 656 if (rs == NULL) { 657 return NULL; 658 } 659 660 rs->two_side = state->light_twoside; 661 rs->multisample_enable = state->multisample; 662 rs->clip_plane_enable = state->clip_plane_enable; 663 rs->line_stipple_enable = state->line_stipple_enable; 664 rs->poly_stipple_enable = state->poly_stipple_enable; 665 rs->line_smooth = state->line_smooth; 666 rs->poly_smooth = state->poly_smooth; 667 668 rs->flatshade = state->flatshade; 669 rs->sprite_coord_enable = state->sprite_coord_enable; 670 rs->pa_sc_line_stipple = state->line_stipple_enable ? 671 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 672 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 673 rs->pa_cl_clip_cntl = 674 S_028810_PS_UCP_MODE(3) | 675 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 676 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 677 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 678 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 679 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 680 681 /* offset */ 682 rs->offset_units = state->offset_units; 683 rs->offset_scale = state->offset_scale * 16.0f; 684 685 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 686 S_0286D4_FLAT_SHADE_ENA(1) | 687 S_0286D4_PNT_SPRITE_ENA(1) | 688 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 689 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 690 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 691 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 692 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 693 694 /* point size 12.4 fixed point */ 695 tmp = (unsigned)(state->point_size * 8.0); 696 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 697 698 if (state->point_size_per_vertex) { 699 psize_min = util_get_min_point_size(state); 700 psize_max = 8192; 701 } else { 702 /* Force the point size to be as if the vertex output was disabled. */ 703 psize_min = state->point_size; 704 psize_max = state->point_size; 705 } 706 /* Divide by two, because 0.5 = 1 pixel. */ 707 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 708 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 709 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 710 711 tmp = (unsigned)state->line_width * 8; 712 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 713 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 714 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 715 S_028A48_MSAA_ENABLE(state->multisample || 716 state->poly_smooth || 717 state->line_smooth) | 718 S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); 719 720 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 721 S_028BE4_PIX_CENTER(state->half_pixel_center) | 722 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 723 724 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 725 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 726 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 727 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 728 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 729 S_028814_FACE(!state->front_ccw) | 730 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 731 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 732 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 733 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 734 state->fill_back != PIPE_POLYGON_MODE_FILL) | 735 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 736 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 737 return rs; 738} 739 740static void si_bind_rs_state(struct pipe_context *ctx, void *state) 741{ 742 struct si_context *sctx = (struct si_context *)ctx; 743 struct si_state_rasterizer *old_rs = 744 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 745 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 746 747 if (state == NULL) 748 return; 749 750 if (sctx->framebuffer.nr_samples > 1 && 751 (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) 752 si_mark_atom_dirty(sctx, &sctx->db_render_state); 753 754 si_pm4_bind_state(sctx, rasterizer, rs); 755 si_update_fb_rs_state(sctx); 756 757 si_mark_atom_dirty(sctx, &sctx->clip_regs); 758} 759 760static void si_delete_rs_state(struct pipe_context *ctx, void *state) 761{ 762 struct si_context *sctx = (struct si_context *)ctx; 763 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 764} 765 766/* 767 * infeered state between dsa and stencil ref 768 */ 769static void si_update_dsa_stencil_ref(struct si_context *sctx) 770{ 771 struct si_pm4_state *pm4; 772 struct pipe_stencil_ref *ref = &sctx->stencil_ref; 773 struct si_state_dsa *dsa = sctx->queued.named.dsa; 774 775 if (!dsa) 776 return; 777 778 pm4 = CALLOC_STRUCT(si_pm4_state); 779 if (pm4 == NULL) 780 return; 781 782 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK, 783 S_028430_STENCILTESTVAL(ref->ref_value[0]) | 784 S_028430_STENCILMASK(dsa->valuemask[0]) | 785 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 786 S_028430_STENCILOPVAL(1)); 787 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF, 788 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 789 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 790 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 791 S_028434_STENCILOPVAL_BF(1)); 792 793 si_pm4_set_state(sctx, dsa_stencil_ref, pm4); 794} 795 796static void si_set_pipe_stencil_ref(struct pipe_context *ctx, 797 const struct pipe_stencil_ref *state) 798{ 799 struct si_context *sctx = (struct si_context *)ctx; 800 sctx->stencil_ref = *state; 801 si_update_dsa_stencil_ref(sctx); 802} 803 804 805/* 806 * DSA 807 */ 808 809static uint32_t si_translate_stencil_op(int s_op) 810{ 811 switch (s_op) { 812 case PIPE_STENCIL_OP_KEEP: 813 return V_02842C_STENCIL_KEEP; 814 case PIPE_STENCIL_OP_ZERO: 815 return V_02842C_STENCIL_ZERO; 816 case PIPE_STENCIL_OP_REPLACE: 817 return V_02842C_STENCIL_REPLACE_TEST; 818 case PIPE_STENCIL_OP_INCR: 819 return V_02842C_STENCIL_ADD_CLAMP; 820 case PIPE_STENCIL_OP_DECR: 821 return V_02842C_STENCIL_SUB_CLAMP; 822 case PIPE_STENCIL_OP_INCR_WRAP: 823 return V_02842C_STENCIL_ADD_WRAP; 824 case PIPE_STENCIL_OP_DECR_WRAP: 825 return V_02842C_STENCIL_SUB_WRAP; 826 case PIPE_STENCIL_OP_INVERT: 827 return V_02842C_STENCIL_INVERT; 828 default: 829 R600_ERR("Unknown stencil op %d", s_op); 830 assert(0); 831 break; 832 } 833 return 0; 834} 835 836static void *si_create_dsa_state(struct pipe_context *ctx, 837 const struct pipe_depth_stencil_alpha_state *state) 838{ 839 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 840 struct si_pm4_state *pm4 = &dsa->pm4; 841 unsigned db_depth_control; 842 uint32_t db_stencil_control = 0; 843 844 if (dsa == NULL) { 845 return NULL; 846 } 847 848 dsa->valuemask[0] = state->stencil[0].valuemask; 849 dsa->valuemask[1] = state->stencil[1].valuemask; 850 dsa->writemask[0] = state->stencil[0].writemask; 851 dsa->writemask[1] = state->stencil[1].writemask; 852 853 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 854 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 855 S_028800_ZFUNC(state->depth.func) | 856 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 857 858 /* stencil */ 859 if (state->stencil[0].enabled) { 860 db_depth_control |= S_028800_STENCIL_ENABLE(1); 861 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 862 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 863 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 864 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 865 866 if (state->stencil[1].enabled) { 867 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 868 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 869 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 870 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 871 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 872 } 873 } 874 875 /* alpha */ 876 if (state->alpha.enabled) { 877 dsa->alpha_func = state->alpha.func; 878 879 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 880 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 881 } else { 882 dsa->alpha_func = PIPE_FUNC_ALWAYS; 883 } 884 885 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 886 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 887 if (state->depth.bounds_test) { 888 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 889 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 890 } 891 892 return dsa; 893} 894 895static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 896{ 897 struct si_context *sctx = (struct si_context *)ctx; 898 struct si_state_dsa *dsa = state; 899 900 if (state == NULL) 901 return; 902 903 si_pm4_bind_state(sctx, dsa, dsa); 904 si_update_dsa_stencil_ref(sctx); 905} 906 907static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 908{ 909 struct si_context *sctx = (struct si_context *)ctx; 910 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 911} 912 913static void *si_create_db_flush_dsa(struct si_context *sctx) 914{ 915 struct pipe_depth_stencil_alpha_state dsa = {}; 916 917 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 918} 919 920/* DB RENDER STATE */ 921 922static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 923{ 924 struct si_context *sctx = (struct si_context*)ctx; 925 926 si_mark_atom_dirty(sctx, &sctx->db_render_state); 927} 928 929static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 930{ 931 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 932 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 933 unsigned db_shader_control; 934 935 r600_write_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 936 937 /* DB_RENDER_CONTROL */ 938 if (sctx->dbcb_depth_copy_enabled || 939 sctx->dbcb_stencil_copy_enabled) { 940 radeon_emit(cs, 941 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 942 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 943 S_028000_COPY_CENTROID(1) | 944 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 945 } else if (sctx->db_inplace_flush_enabled) { 946 radeon_emit(cs, 947 S_028000_DEPTH_COMPRESS_DISABLE(1) | 948 S_028000_STENCIL_COMPRESS_DISABLE(1)); 949 } else if (sctx->db_depth_clear) { 950 radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1)); 951 } else { 952 radeon_emit(cs, 0); 953 } 954 955 /* DB_COUNT_CONTROL (occlusion queries) */ 956 if (sctx->b.num_occlusion_queries > 0) { 957 if (sctx->b.chip_class >= CIK) { 958 radeon_emit(cs, 959 S_028004_PERFECT_ZPASS_COUNTS(1) | 960 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 961 S_028004_ZPASS_ENABLE(1) | 962 S_028004_SLICE_EVEN_ENABLE(1) | 963 S_028004_SLICE_ODD_ENABLE(1)); 964 } else { 965 radeon_emit(cs, 966 S_028004_PERFECT_ZPASS_COUNTS(1) | 967 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 968 } 969 } else { 970 /* Disable occlusion queries. */ 971 if (sctx->b.chip_class >= CIK) { 972 radeon_emit(cs, 0); 973 } else { 974 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 975 } 976 } 977 978 /* DB_RENDER_OVERRIDE2 */ 979 if (sctx->db_depth_disable_expclear) { 980 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 981 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1)); 982 } else { 983 r600_write_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0); 984 } 985 986 db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) | 987 sctx->ps_db_shader_control; 988 989 /* Bug workaround for smoothing (overrasterization) on SI. */ 990 if (sctx->b.chip_class == SI && sctx->smoothing_enabled) 991 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 992 else 993 db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); 994 995 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 996 if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) 997 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 998 999 r600_write_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 1000 db_shader_control); 1001} 1002 1003/* 1004 * format translation 1005 */ 1006static uint32_t si_translate_colorformat(enum pipe_format format) 1007{ 1008 const struct util_format_description *desc = util_format_description(format); 1009 1010#define HAS_SIZE(x,y,z,w) \ 1011 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1012 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1013 1014 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1015 return V_028C70_COLOR_10_11_11; 1016 1017 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1018 return V_028C70_COLOR_INVALID; 1019 1020 switch (desc->nr_channels) { 1021 case 1: 1022 switch (desc->channel[0].size) { 1023 case 8: 1024 return V_028C70_COLOR_8; 1025 case 16: 1026 return V_028C70_COLOR_16; 1027 case 32: 1028 return V_028C70_COLOR_32; 1029 } 1030 break; 1031 case 2: 1032 if (desc->channel[0].size == desc->channel[1].size) { 1033 switch (desc->channel[0].size) { 1034 case 8: 1035 return V_028C70_COLOR_8_8; 1036 case 16: 1037 return V_028C70_COLOR_16_16; 1038 case 32: 1039 return V_028C70_COLOR_32_32; 1040 } 1041 } else if (HAS_SIZE(8,24,0,0)) { 1042 return V_028C70_COLOR_24_8; 1043 } else if (HAS_SIZE(24,8,0,0)) { 1044 return V_028C70_COLOR_8_24; 1045 } 1046 break; 1047 case 3: 1048 if (HAS_SIZE(5,6,5,0)) { 1049 return V_028C70_COLOR_5_6_5; 1050 } else if (HAS_SIZE(32,8,24,0)) { 1051 return V_028C70_COLOR_X24_8_32_FLOAT; 1052 } 1053 break; 1054 case 4: 1055 if (desc->channel[0].size == desc->channel[1].size && 1056 desc->channel[0].size == desc->channel[2].size && 1057 desc->channel[0].size == desc->channel[3].size) { 1058 switch (desc->channel[0].size) { 1059 case 4: 1060 return V_028C70_COLOR_4_4_4_4; 1061 case 8: 1062 return V_028C70_COLOR_8_8_8_8; 1063 case 16: 1064 return V_028C70_COLOR_16_16_16_16; 1065 case 32: 1066 return V_028C70_COLOR_32_32_32_32; 1067 } 1068 } else if (HAS_SIZE(5,5,5,1)) { 1069 return V_028C70_COLOR_1_5_5_5; 1070 } else if (HAS_SIZE(10,10,10,2)) { 1071 return V_028C70_COLOR_2_10_10_10; 1072 } 1073 break; 1074 } 1075 return V_028C70_COLOR_INVALID; 1076} 1077 1078static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1079{ 1080 if (SI_BIG_ENDIAN) { 1081 switch(colorformat) { 1082 /* 8-bit buffers. */ 1083 case V_028C70_COLOR_8: 1084 return V_028C70_ENDIAN_NONE; 1085 1086 /* 16-bit buffers. */ 1087 case V_028C70_COLOR_5_6_5: 1088 case V_028C70_COLOR_1_5_5_5: 1089 case V_028C70_COLOR_4_4_4_4: 1090 case V_028C70_COLOR_16: 1091 case V_028C70_COLOR_8_8: 1092 return V_028C70_ENDIAN_8IN16; 1093 1094 /* 32-bit buffers. */ 1095 case V_028C70_COLOR_8_8_8_8: 1096 case V_028C70_COLOR_2_10_10_10: 1097 case V_028C70_COLOR_8_24: 1098 case V_028C70_COLOR_24_8: 1099 case V_028C70_COLOR_16_16: 1100 return V_028C70_ENDIAN_8IN32; 1101 1102 /* 64-bit buffers. */ 1103 case V_028C70_COLOR_16_16_16_16: 1104 return V_028C70_ENDIAN_8IN16; 1105 1106 case V_028C70_COLOR_32_32: 1107 return V_028C70_ENDIAN_8IN32; 1108 1109 /* 128-bit buffers. */ 1110 case V_028C70_COLOR_32_32_32_32: 1111 return V_028C70_ENDIAN_8IN32; 1112 default: 1113 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1114 } 1115 } else { 1116 return V_028C70_ENDIAN_NONE; 1117 } 1118} 1119 1120/* Returns the size in bits of the widest component of a CB format */ 1121static unsigned si_colorformat_max_comp_size(uint32_t colorformat) 1122{ 1123 switch(colorformat) { 1124 case V_028C70_COLOR_4_4_4_4: 1125 return 4; 1126 1127 case V_028C70_COLOR_1_5_5_5: 1128 case V_028C70_COLOR_5_5_5_1: 1129 return 5; 1130 1131 case V_028C70_COLOR_5_6_5: 1132 return 6; 1133 1134 case V_028C70_COLOR_8: 1135 case V_028C70_COLOR_8_8: 1136 case V_028C70_COLOR_8_8_8_8: 1137 return 8; 1138 1139 case V_028C70_COLOR_10_10_10_2: 1140 case V_028C70_COLOR_2_10_10_10: 1141 return 10; 1142 1143 case V_028C70_COLOR_10_11_11: 1144 case V_028C70_COLOR_11_11_10: 1145 return 11; 1146 1147 case V_028C70_COLOR_16: 1148 case V_028C70_COLOR_16_16: 1149 case V_028C70_COLOR_16_16_16_16: 1150 return 16; 1151 1152 case V_028C70_COLOR_8_24: 1153 case V_028C70_COLOR_24_8: 1154 return 24; 1155 1156 case V_028C70_COLOR_32: 1157 case V_028C70_COLOR_32_32: 1158 case V_028C70_COLOR_32_32_32_32: 1159 case V_028C70_COLOR_X24_8_32_FLOAT: 1160 return 32; 1161 } 1162 1163 assert(!"Unknown maximum component size"); 1164 return 0; 1165} 1166 1167static uint32_t si_translate_dbformat(enum pipe_format format) 1168{ 1169 switch (format) { 1170 case PIPE_FORMAT_Z16_UNORM: 1171 return V_028040_Z_16; 1172 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1173 case PIPE_FORMAT_X8Z24_UNORM: 1174 case PIPE_FORMAT_Z24X8_UNORM: 1175 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1176 return V_028040_Z_24; /* deprecated on SI */ 1177 case PIPE_FORMAT_Z32_FLOAT: 1178 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1179 return V_028040_Z_32_FLOAT; 1180 default: 1181 return V_028040_Z_INVALID; 1182 } 1183} 1184 1185/* 1186 * Texture translation 1187 */ 1188 1189static uint32_t si_translate_texformat(struct pipe_screen *screen, 1190 enum pipe_format format, 1191 const struct util_format_description *desc, 1192 int first_non_void) 1193{ 1194 struct si_screen *sscreen = (struct si_screen*)screen; 1195 bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 && 1196 sscreen->b.info.drm_minor >= 31) || 1197 sscreen->b.info.drm_major == 3; 1198 boolean uniform = TRUE; 1199 int i; 1200 1201 /* Colorspace (return non-RGB formats directly). */ 1202 switch (desc->colorspace) { 1203 /* Depth stencil formats */ 1204 case UTIL_FORMAT_COLORSPACE_ZS: 1205 switch (format) { 1206 case PIPE_FORMAT_Z16_UNORM: 1207 return V_008F14_IMG_DATA_FORMAT_16; 1208 case PIPE_FORMAT_X24S8_UINT: 1209 case PIPE_FORMAT_Z24X8_UNORM: 1210 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1211 return V_008F14_IMG_DATA_FORMAT_8_24; 1212 case PIPE_FORMAT_X8Z24_UNORM: 1213 case PIPE_FORMAT_S8X24_UINT: 1214 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1215 return V_008F14_IMG_DATA_FORMAT_24_8; 1216 case PIPE_FORMAT_S8_UINT: 1217 return V_008F14_IMG_DATA_FORMAT_8; 1218 case PIPE_FORMAT_Z32_FLOAT: 1219 return V_008F14_IMG_DATA_FORMAT_32; 1220 case PIPE_FORMAT_X32_S8X24_UINT: 1221 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1222 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1223 default: 1224 goto out_unknown; 1225 } 1226 1227 case UTIL_FORMAT_COLORSPACE_YUV: 1228 goto out_unknown; /* TODO */ 1229 1230 case UTIL_FORMAT_COLORSPACE_SRGB: 1231 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1232 goto out_unknown; 1233 break; 1234 1235 default: 1236 break; 1237 } 1238 1239 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1240 if (!enable_compressed_formats) 1241 goto out_unknown; 1242 1243 switch (format) { 1244 case PIPE_FORMAT_RGTC1_SNORM: 1245 case PIPE_FORMAT_LATC1_SNORM: 1246 case PIPE_FORMAT_RGTC1_UNORM: 1247 case PIPE_FORMAT_LATC1_UNORM: 1248 return V_008F14_IMG_DATA_FORMAT_BC4; 1249 case PIPE_FORMAT_RGTC2_SNORM: 1250 case PIPE_FORMAT_LATC2_SNORM: 1251 case PIPE_FORMAT_RGTC2_UNORM: 1252 case PIPE_FORMAT_LATC2_UNORM: 1253 return V_008F14_IMG_DATA_FORMAT_BC5; 1254 default: 1255 goto out_unknown; 1256 } 1257 } 1258 1259 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1260 if (!enable_compressed_formats) 1261 goto out_unknown; 1262 1263 switch (format) { 1264 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1265 case PIPE_FORMAT_BPTC_SRGBA: 1266 return V_008F14_IMG_DATA_FORMAT_BC7; 1267 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1268 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1269 return V_008F14_IMG_DATA_FORMAT_BC6; 1270 default: 1271 goto out_unknown; 1272 } 1273 } 1274 1275 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1276 switch (format) { 1277 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1278 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1279 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1280 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1281 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1282 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1283 default: 1284 goto out_unknown; 1285 } 1286 } 1287 1288 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1289 if (!enable_compressed_formats) 1290 goto out_unknown; 1291 1292 if (!util_format_s3tc_enabled) { 1293 goto out_unknown; 1294 } 1295 1296 switch (format) { 1297 case PIPE_FORMAT_DXT1_RGB: 1298 case PIPE_FORMAT_DXT1_RGBA: 1299 case PIPE_FORMAT_DXT1_SRGB: 1300 case PIPE_FORMAT_DXT1_SRGBA: 1301 return V_008F14_IMG_DATA_FORMAT_BC1; 1302 case PIPE_FORMAT_DXT3_RGBA: 1303 case PIPE_FORMAT_DXT3_SRGBA: 1304 return V_008F14_IMG_DATA_FORMAT_BC2; 1305 case PIPE_FORMAT_DXT5_RGBA: 1306 case PIPE_FORMAT_DXT5_SRGBA: 1307 return V_008F14_IMG_DATA_FORMAT_BC3; 1308 default: 1309 goto out_unknown; 1310 } 1311 } 1312 1313 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1314 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1315 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1316 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1317 } 1318 1319 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1320 1321 /* See whether the components are of the same size. */ 1322 for (i = 1; i < desc->nr_channels; i++) { 1323 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1324 } 1325 1326 /* Non-uniform formats. */ 1327 if (!uniform) { 1328 switch(desc->nr_channels) { 1329 case 3: 1330 if (desc->channel[0].size == 5 && 1331 desc->channel[1].size == 6 && 1332 desc->channel[2].size == 5) { 1333 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1334 } 1335 goto out_unknown; 1336 case 4: 1337 if (desc->channel[0].size == 5 && 1338 desc->channel[1].size == 5 && 1339 desc->channel[2].size == 5 && 1340 desc->channel[3].size == 1) { 1341 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1342 } 1343 if (desc->channel[0].size == 10 && 1344 desc->channel[1].size == 10 && 1345 desc->channel[2].size == 10 && 1346 desc->channel[3].size == 2) { 1347 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1348 } 1349 goto out_unknown; 1350 } 1351 goto out_unknown; 1352 } 1353 1354 if (first_non_void < 0 || first_non_void > 3) 1355 goto out_unknown; 1356 1357 /* uniform formats */ 1358 switch (desc->channel[first_non_void].size) { 1359 case 4: 1360 switch (desc->nr_channels) { 1361#if 0 /* Not supported for render targets */ 1362 case 2: 1363 return V_008F14_IMG_DATA_FORMAT_4_4; 1364#endif 1365 case 4: 1366 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1367 } 1368 break; 1369 case 8: 1370 switch (desc->nr_channels) { 1371 case 1: 1372 return V_008F14_IMG_DATA_FORMAT_8; 1373 case 2: 1374 return V_008F14_IMG_DATA_FORMAT_8_8; 1375 case 4: 1376 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1377 } 1378 break; 1379 case 16: 1380 switch (desc->nr_channels) { 1381 case 1: 1382 return V_008F14_IMG_DATA_FORMAT_16; 1383 case 2: 1384 return V_008F14_IMG_DATA_FORMAT_16_16; 1385 case 4: 1386 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1387 } 1388 break; 1389 case 32: 1390 switch (desc->nr_channels) { 1391 case 1: 1392 return V_008F14_IMG_DATA_FORMAT_32; 1393 case 2: 1394 return V_008F14_IMG_DATA_FORMAT_32_32; 1395#if 0 /* Not supported for render targets */ 1396 case 3: 1397 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1398#endif 1399 case 4: 1400 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1401 } 1402 } 1403 1404out_unknown: 1405 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1406 return ~0; 1407} 1408 1409static unsigned si_tex_wrap(unsigned wrap) 1410{ 1411 switch (wrap) { 1412 default: 1413 case PIPE_TEX_WRAP_REPEAT: 1414 return V_008F30_SQ_TEX_WRAP; 1415 case PIPE_TEX_WRAP_CLAMP: 1416 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1417 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1418 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1419 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1420 return V_008F30_SQ_TEX_CLAMP_BORDER; 1421 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1422 return V_008F30_SQ_TEX_MIRROR; 1423 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1424 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1425 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1426 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1427 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1428 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1429 } 1430} 1431 1432static unsigned si_tex_filter(unsigned filter) 1433{ 1434 switch (filter) { 1435 default: 1436 case PIPE_TEX_FILTER_NEAREST: 1437 return V_008F38_SQ_TEX_XY_FILTER_POINT; 1438 case PIPE_TEX_FILTER_LINEAR: 1439 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 1440 } 1441} 1442 1443static unsigned si_tex_mipfilter(unsigned filter) 1444{ 1445 switch (filter) { 1446 case PIPE_TEX_MIPFILTER_NEAREST: 1447 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1448 case PIPE_TEX_MIPFILTER_LINEAR: 1449 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1450 default: 1451 case PIPE_TEX_MIPFILTER_NONE: 1452 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1453 } 1454} 1455 1456static unsigned si_tex_compare(unsigned compare) 1457{ 1458 switch (compare) { 1459 default: 1460 case PIPE_FUNC_NEVER: 1461 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1462 case PIPE_FUNC_LESS: 1463 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1464 case PIPE_FUNC_EQUAL: 1465 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1466 case PIPE_FUNC_LEQUAL: 1467 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1468 case PIPE_FUNC_GREATER: 1469 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1470 case PIPE_FUNC_NOTEQUAL: 1471 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1472 case PIPE_FUNC_GEQUAL: 1473 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1474 case PIPE_FUNC_ALWAYS: 1475 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1476 } 1477} 1478 1479static unsigned si_tex_dim(unsigned dim, unsigned nr_samples) 1480{ 1481 switch (dim) { 1482 default: 1483 case PIPE_TEXTURE_1D: 1484 return V_008F1C_SQ_RSRC_IMG_1D; 1485 case PIPE_TEXTURE_1D_ARRAY: 1486 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1487 case PIPE_TEXTURE_2D: 1488 case PIPE_TEXTURE_RECT: 1489 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1490 V_008F1C_SQ_RSRC_IMG_2D; 1491 case PIPE_TEXTURE_2D_ARRAY: 1492 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1493 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1494 case PIPE_TEXTURE_3D: 1495 return V_008F1C_SQ_RSRC_IMG_3D; 1496 case PIPE_TEXTURE_CUBE: 1497 case PIPE_TEXTURE_CUBE_ARRAY: 1498 return V_008F1C_SQ_RSRC_IMG_CUBE; 1499 } 1500} 1501 1502/* 1503 * Format support testing 1504 */ 1505 1506static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1507{ 1508 return si_translate_texformat(screen, format, util_format_description(format), 1509 util_format_get_first_non_void_channel(format)) != ~0U; 1510} 1511 1512static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1513 const struct util_format_description *desc, 1514 int first_non_void) 1515{ 1516 unsigned type = desc->channel[first_non_void].type; 1517 int i; 1518 1519 if (type == UTIL_FORMAT_TYPE_FIXED) 1520 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1521 1522 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1523 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1524 1525 if (desc->nr_channels == 4 && 1526 desc->channel[0].size == 10 && 1527 desc->channel[1].size == 10 && 1528 desc->channel[2].size == 10 && 1529 desc->channel[3].size == 2) 1530 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1531 1532 /* See whether the components are of the same size. */ 1533 for (i = 0; i < desc->nr_channels; i++) { 1534 if (desc->channel[first_non_void].size != desc->channel[i].size) 1535 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1536 } 1537 1538 switch (desc->channel[first_non_void].size) { 1539 case 8: 1540 switch (desc->nr_channels) { 1541 case 1: 1542 return V_008F0C_BUF_DATA_FORMAT_8; 1543 case 2: 1544 return V_008F0C_BUF_DATA_FORMAT_8_8; 1545 case 3: 1546 case 4: 1547 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1548 } 1549 break; 1550 case 16: 1551 switch (desc->nr_channels) { 1552 case 1: 1553 return V_008F0C_BUF_DATA_FORMAT_16; 1554 case 2: 1555 return V_008F0C_BUF_DATA_FORMAT_16_16; 1556 case 3: 1557 case 4: 1558 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1559 } 1560 break; 1561 case 32: 1562 /* From the Southern Islands ISA documentation about MTBUF: 1563 * 'Memory reads of data in memory that is 32 or 64 bits do not 1564 * undergo any format conversion.' 1565 */ 1566 if (type != UTIL_FORMAT_TYPE_FLOAT && 1567 !desc->channel[first_non_void].pure_integer) 1568 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1569 1570 switch (desc->nr_channels) { 1571 case 1: 1572 return V_008F0C_BUF_DATA_FORMAT_32; 1573 case 2: 1574 return V_008F0C_BUF_DATA_FORMAT_32_32; 1575 case 3: 1576 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1577 case 4: 1578 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1579 } 1580 break; 1581 } 1582 1583 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1584} 1585 1586static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1587 const struct util_format_description *desc, 1588 int first_non_void) 1589{ 1590 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1591 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1592 1593 switch (desc->channel[first_non_void].type) { 1594 case UTIL_FORMAT_TYPE_SIGNED: 1595 if (desc->channel[first_non_void].normalized) 1596 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1597 else if (desc->channel[first_non_void].pure_integer) 1598 return V_008F0C_BUF_NUM_FORMAT_SINT; 1599 else 1600 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1601 break; 1602 case UTIL_FORMAT_TYPE_UNSIGNED: 1603 if (desc->channel[first_non_void].normalized) 1604 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1605 else if (desc->channel[first_non_void].pure_integer) 1606 return V_008F0C_BUF_NUM_FORMAT_UINT; 1607 else 1608 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1609 break; 1610 case UTIL_FORMAT_TYPE_FLOAT: 1611 default: 1612 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1613 } 1614} 1615 1616static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1617{ 1618 const struct util_format_description *desc; 1619 int first_non_void; 1620 unsigned data_format; 1621 1622 desc = util_format_description(format); 1623 first_non_void = util_format_get_first_non_void_channel(format); 1624 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1625 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1626} 1627 1628static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1629{ 1630 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1631 r600_translate_colorswap(format) != ~0U; 1632} 1633 1634static bool si_is_zs_format_supported(enum pipe_format format) 1635{ 1636 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1637} 1638 1639boolean si_is_format_supported(struct pipe_screen *screen, 1640 enum pipe_format format, 1641 enum pipe_texture_target target, 1642 unsigned sample_count, 1643 unsigned usage) 1644{ 1645 unsigned retval = 0; 1646 1647 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1648 R600_ERR("r600: unsupported texture type %d\n", target); 1649 return FALSE; 1650 } 1651 1652 if (!util_format_is_supported(format, usage)) 1653 return FALSE; 1654 1655 if (sample_count > 1) { 1656 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 1657 return FALSE; 1658 1659 switch (sample_count) { 1660 case 2: 1661 case 4: 1662 case 8: 1663 break; 1664 default: 1665 return FALSE; 1666 } 1667 } 1668 1669 if (usage & PIPE_BIND_SAMPLER_VIEW) { 1670 if (target == PIPE_BUFFER) { 1671 if (si_is_vertex_format_supported(screen, format)) 1672 retval |= PIPE_BIND_SAMPLER_VIEW; 1673 } else { 1674 if (si_is_sampler_format_supported(screen, format)) 1675 retval |= PIPE_BIND_SAMPLER_VIEW; 1676 } 1677 } 1678 1679 if ((usage & (PIPE_BIND_RENDER_TARGET | 1680 PIPE_BIND_DISPLAY_TARGET | 1681 PIPE_BIND_SCANOUT | 1682 PIPE_BIND_SHARED | 1683 PIPE_BIND_BLENDABLE)) && 1684 si_is_colorbuffer_format_supported(format)) { 1685 retval |= usage & 1686 (PIPE_BIND_RENDER_TARGET | 1687 PIPE_BIND_DISPLAY_TARGET | 1688 PIPE_BIND_SCANOUT | 1689 PIPE_BIND_SHARED); 1690 if (!util_format_is_pure_integer(format) && 1691 !util_format_is_depth_or_stencil(format)) 1692 retval |= usage & PIPE_BIND_BLENDABLE; 1693 } 1694 1695 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1696 si_is_zs_format_supported(format)) { 1697 retval |= PIPE_BIND_DEPTH_STENCIL; 1698 } 1699 1700 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 1701 si_is_vertex_format_supported(screen, format)) { 1702 retval |= PIPE_BIND_VERTEX_BUFFER; 1703 } 1704 1705 if (usage & PIPE_BIND_TRANSFER_READ) 1706 retval |= PIPE_BIND_TRANSFER_READ; 1707 if (usage & PIPE_BIND_TRANSFER_WRITE) 1708 retval |= PIPE_BIND_TRANSFER_WRITE; 1709 1710 return retval == usage; 1711} 1712 1713unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil) 1714{ 1715 unsigned tile_mode_index = 0; 1716 1717 if (stencil) { 1718 tile_mode_index = rtex->surface.stencil_tiling_index[level]; 1719 } else { 1720 tile_mode_index = rtex->surface.tiling_index[level]; 1721 } 1722 return tile_mode_index; 1723} 1724 1725/* 1726 * framebuffer handling 1727 */ 1728 1729static void si_initialize_color_surface(struct si_context *sctx, 1730 struct r600_surface *surf) 1731{ 1732 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1733 unsigned level = surf->base.u.tex.level; 1734 uint64_t offset = rtex->surface.level[level].offset; 1735 unsigned pitch, slice; 1736 unsigned color_info, color_attrib, color_pitch, color_view; 1737 unsigned tile_mode_index; 1738 unsigned format, swap, ntype, endian; 1739 const struct util_format_description *desc; 1740 int i; 1741 unsigned blend_clamp = 0, blend_bypass = 0; 1742 unsigned max_comp_size; 1743 1744 /* Layered rendering doesn't work with LINEAR_GENERAL. 1745 * (LINEAR_ALIGNED and others work) */ 1746 if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) { 1747 assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer); 1748 offset += rtex->surface.level[level].slice_size * 1749 surf->base.u.tex.first_layer; 1750 color_view = 0; 1751 } else { 1752 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 1753 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 1754 } 1755 1756 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1; 1757 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; 1758 if (slice) { 1759 slice = slice - 1; 1760 } 1761 1762 tile_mode_index = si_tile_mode_index(rtex, level, false); 1763 1764 desc = util_format_description(surf->base.format); 1765 for (i = 0; i < 4; i++) { 1766 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 1767 break; 1768 } 1769 } 1770 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 1771 ntype = V_028C70_NUMBER_FLOAT; 1772 } else { 1773 ntype = V_028C70_NUMBER_UNORM; 1774 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 1775 ntype = V_028C70_NUMBER_SRGB; 1776 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1777 if (desc->channel[i].pure_integer) { 1778 ntype = V_028C70_NUMBER_SINT; 1779 } else { 1780 assert(desc->channel[i].normalized); 1781 ntype = V_028C70_NUMBER_SNORM; 1782 } 1783 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1784 if (desc->channel[i].pure_integer) { 1785 ntype = V_028C70_NUMBER_UINT; 1786 } else { 1787 assert(desc->channel[i].normalized); 1788 ntype = V_028C70_NUMBER_UNORM; 1789 } 1790 } 1791 } 1792 1793 format = si_translate_colorformat(surf->base.format); 1794 if (format == V_028C70_COLOR_INVALID) { 1795 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 1796 } 1797 assert(format != V_028C70_COLOR_INVALID); 1798 swap = r600_translate_colorswap(surf->base.format); 1799 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) { 1800 endian = V_028C70_ENDIAN_NONE; 1801 } else { 1802 endian = si_colorformat_endian_swap(format); 1803 } 1804 1805 /* blend clamp should be set for all NORM/SRGB types */ 1806 if (ntype == V_028C70_NUMBER_UNORM || 1807 ntype == V_028C70_NUMBER_SNORM || 1808 ntype == V_028C70_NUMBER_SRGB) 1809 blend_clamp = 1; 1810 1811 /* set blend bypass according to docs if SINT/UINT or 1812 8/24 COLOR variants */ 1813 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 1814 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 1815 format == V_028C70_COLOR_X24_8_32_FLOAT) { 1816 blend_clamp = 0; 1817 blend_bypass = 1; 1818 } 1819 1820 color_info = S_028C70_FORMAT(format) | 1821 S_028C70_COMP_SWAP(swap) | 1822 S_028C70_BLEND_CLAMP(blend_clamp) | 1823 S_028C70_BLEND_BYPASS(blend_bypass) | 1824 S_028C70_NUMBER_TYPE(ntype) | 1825 S_028C70_ENDIAN(endian); 1826 1827 color_pitch = S_028C64_TILE_MAX(pitch); 1828 1829 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) | 1830 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1); 1831 1832 if (rtex->resource.b.b.nr_samples > 1) { 1833 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 1834 1835 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 1836 S_028C74_NUM_FRAGMENTS(log_samples); 1837 1838 if (rtex->fmask.size) { 1839 color_info |= S_028C70_COMPRESSION(1); 1840 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 1841 1842 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index); 1843 1844 if (sctx->b.chip_class == SI) { 1845 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 1846 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 1847 } 1848 if (sctx->b.chip_class >= CIK) { 1849 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1); 1850 } 1851 } 1852 } 1853 1854 offset += rtex->resource.gpu_address; 1855 1856 surf->cb_color_base = offset >> 8; 1857 surf->cb_color_pitch = color_pitch; 1858 surf->cb_color_slice = S_028C68_TILE_MAX(slice); 1859 surf->cb_color_view = color_view; 1860 surf->cb_color_info = color_info; 1861 surf->cb_color_attrib = color_attrib; 1862 1863 if (sctx->b.chip_class >= VI) 1864 surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1); 1865 1866 if (rtex->fmask.size) { 1867 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; 1868 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); 1869 } else { 1870 /* This must be set for fast clear to work without FMASK. */ 1871 surf->cb_color_fmask = surf->cb_color_base; 1872 surf->cb_color_fmask_slice = surf->cb_color_slice; 1873 surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 1874 1875 if (sctx->b.chip_class == SI) { 1876 unsigned bankh = util_logbase2(rtex->surface.bankh); 1877 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 1878 } 1879 1880 if (sctx->b.chip_class >= CIK) { 1881 surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch); 1882 } 1883 } 1884 1885 /* Determine pixel shader export format */ 1886 max_comp_size = si_colorformat_max_comp_size(format); 1887 if (ntype == V_028C70_NUMBER_SRGB || 1888 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && 1889 max_comp_size <= 10) || 1890 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { 1891 surf->export_16bpc = true; 1892 } 1893 1894 surf->color_initialized = true; 1895} 1896 1897static void si_init_depth_surface(struct si_context *sctx, 1898 struct r600_surface *surf) 1899{ 1900 struct si_screen *sscreen = sctx->screen; 1901 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1902 unsigned level = surf->base.u.tex.level; 1903 struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; 1904 unsigned format, tile_mode_index, array_mode; 1905 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config; 1906 uint32_t z_info, s_info, db_depth_info; 1907 uint64_t z_offs, s_offs; 1908 uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0; 1909 1910 switch (sctx->framebuffer.state.zsbuf->texture->format) { 1911 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1912 case PIPE_FORMAT_X8Z24_UNORM: 1913 case PIPE_FORMAT_Z24X8_UNORM: 1914 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1915 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 1916 break; 1917 case PIPE_FORMAT_Z32_FLOAT: 1918 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1919 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 1920 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 1921 break; 1922 case PIPE_FORMAT_Z16_UNORM: 1923 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 1924 break; 1925 default: 1926 assert(0); 1927 } 1928 1929 format = si_translate_dbformat(rtex->resource.b.b.format); 1930 1931 if (format == V_028040_Z_INVALID) { 1932 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 1933 } 1934 assert(format != V_028040_Z_INVALID); 1935 1936 s_offs = z_offs = rtex->resource.gpu_address; 1937 z_offs += rtex->surface.level[level].offset; 1938 s_offs += rtex->surface.stencil_level[level].offset; 1939 1940 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 1941 1942 z_info = S_028040_FORMAT(format); 1943 if (rtex->resource.b.b.nr_samples > 1) { 1944 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 1945 } 1946 1947 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 1948 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 1949 else 1950 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 1951 1952 if (sctx->b.chip_class >= CIK) { 1953 switch (rtex->surface.level[level].mode) { 1954 case RADEON_SURF_MODE_2D: 1955 array_mode = V_02803C_ARRAY_2D_TILED_THIN1; 1956 break; 1957 case RADEON_SURF_MODE_1D: 1958 case RADEON_SURF_MODE_LINEAR_ALIGNED: 1959 case RADEON_SURF_MODE_LINEAR: 1960 default: 1961 array_mode = V_02803C_ARRAY_1D_TILED_THIN1; 1962 break; 1963 } 1964 tile_split = rtex->surface.tile_split; 1965 stile_split = rtex->surface.stencil_tile_split; 1966 macro_aspect = rtex->surface.mtilea; 1967 bankw = rtex->surface.bankw; 1968 bankh = rtex->surface.bankh; 1969 tile_split = cik_tile_split(tile_split); 1970 stile_split = cik_tile_split(stile_split); 1971 macro_aspect = cik_macro_tile_aspect(macro_aspect); 1972 bankw = cik_bank_wh(bankw); 1973 bankh = cik_bank_wh(bankh); 1974 nbanks = si_num_banks(sscreen, rtex); 1975 tile_mode_index = si_tile_mode_index(rtex, level, false); 1976 pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); 1977 1978 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) | 1979 S_02803C_PIPE_CONFIG(pipe_config) | 1980 S_02803C_BANK_WIDTH(bankw) | 1981 S_02803C_BANK_HEIGHT(bankh) | 1982 S_02803C_MACRO_TILE_ASPECT(macro_aspect) | 1983 S_02803C_NUM_BANKS(nbanks); 1984 z_info |= S_028040_TILE_SPLIT(tile_split); 1985 s_info |= S_028044_TILE_SPLIT(stile_split); 1986 } else { 1987 tile_mode_index = si_tile_mode_index(rtex, level, false); 1988 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 1989 tile_mode_index = si_tile_mode_index(rtex, level, true); 1990 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 1991 } 1992 1993 /* HiZ aka depth buffer htile */ 1994 /* use htile only for first level */ 1995 if (rtex->htile_buffer && !level) { 1996 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 1997 S_028040_ALLOW_EXPCLEAR(1); 1998 1999 /* Use all of the htile_buffer for depth, because we don't 2000 * use HTILE for stencil because of FAST_STENCIL_DISABLE. */ 2001 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2002 2003 uint64_t va = rtex->htile_buffer->gpu_address; 2004 db_htile_data_base = va >> 8; 2005 db_htile_surface = S_028ABC_FULL_CACHE(1); 2006 } else { 2007 db_htile_data_base = 0; 2008 db_htile_surface = 0; 2009 } 2010 2011 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2012 2013 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2014 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2015 surf->db_htile_data_base = db_htile_data_base; 2016 surf->db_depth_info = db_depth_info; 2017 surf->db_z_info = z_info; 2018 surf->db_stencil_info = s_info; 2019 surf->db_depth_base = z_offs >> 8; 2020 surf->db_stencil_base = s_offs >> 8; 2021 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2022 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2023 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2024 levelinfo->nblk_y) / 64 - 1); 2025 surf->db_htile_surface = db_htile_surface; 2026 surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl; 2027 2028 surf->depth_initialized = true; 2029} 2030 2031static void si_set_framebuffer_state(struct pipe_context *ctx, 2032 const struct pipe_framebuffer_state *state) 2033{ 2034 struct si_context *sctx = (struct si_context *)ctx; 2035 struct pipe_constant_buffer constbuf = {0}; 2036 struct r600_surface *surf = NULL; 2037 struct r600_texture *rtex; 2038 bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer; 2039 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2040 int i; 2041 2042 /* Only flush TC when changing the framebuffer state, because 2043 * the only client not using TC that can change textures is 2044 * the framebuffer. 2045 * 2046 * Flush all CB and DB caches here because all buffers can be used 2047 * for write by both TC (with shader image stores) and CB/DB. 2048 */ 2049 sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | 2050 SI_CONTEXT_INV_TC_L2 | 2051 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; 2052 2053 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2054 2055 sctx->framebuffer.export_16bpc = 0; 2056 sctx->framebuffer.compressed_cb_mask = 0; 2057 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2058 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2059 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 2060 util_format_is_pure_integer(state->cbufs[0]->format); 2061 2062 if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer) 2063 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2064 2065 for (i = 0; i < state->nr_cbufs; i++) { 2066 if (!state->cbufs[i]) 2067 continue; 2068 2069 surf = (struct r600_surface*)state->cbufs[i]; 2070 rtex = (struct r600_texture*)surf->base.texture; 2071 2072 if (!surf->color_initialized) { 2073 si_initialize_color_surface(sctx, surf); 2074 } 2075 2076 if (surf->export_16bpc) { 2077 sctx->framebuffer.export_16bpc |= 1 << i; 2078 } 2079 2080 if (rtex->fmask.size && rtex->cmask.size) { 2081 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2082 } 2083 r600_context_add_resource_size(ctx, surf->base.texture); 2084 } 2085 /* Set the 16BPC export for possible dual-src blending. */ 2086 if (i == 1 && surf && surf->export_16bpc) { 2087 sctx->framebuffer.export_16bpc |= 1 << 1; 2088 } 2089 2090 assert(!(sctx->framebuffer.export_16bpc & ~0xff)); 2091 2092 if (state->zsbuf) { 2093 surf = (struct r600_surface*)state->zsbuf; 2094 2095 if (!surf->depth_initialized) { 2096 si_init_depth_surface(sctx, surf); 2097 } 2098 r600_context_add_resource_size(ctx, surf->base.texture); 2099 } 2100 2101 si_update_fb_rs_state(sctx); 2102 si_update_fb_blend_state(sctx); 2103 2104 sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3; 2105 sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4; 2106 sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */ 2107 sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */ 2108 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); 2109 2110 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2111 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2112 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2113 2114 /* Set sample locations as fragment shader constants. */ 2115 switch (sctx->framebuffer.nr_samples) { 2116 case 1: 2117 constbuf.user_buffer = sctx->b.sample_locations_1x; 2118 break; 2119 case 2: 2120 constbuf.user_buffer = sctx->b.sample_locations_2x; 2121 break; 2122 case 4: 2123 constbuf.user_buffer = sctx->b.sample_locations_4x; 2124 break; 2125 case 8: 2126 constbuf.user_buffer = sctx->b.sample_locations_8x; 2127 break; 2128 case 16: 2129 constbuf.user_buffer = sctx->b.sample_locations_16x; 2130 break; 2131 default: 2132 assert(0); 2133 } 2134 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2135 ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, 2136 SI_DRIVER_STATE_CONST_BUF, &constbuf); 2137 2138 /* Smoothing (only possible with nr_samples == 1) uses the same 2139 * sample locations as the MSAA it simulates. 2140 * 2141 * Therefore, don't update the sample locations when 2142 * transitioning from no AA to smoothing-equivalent AA, and 2143 * vice versa. 2144 */ 2145 if ((sctx->framebuffer.nr_samples != 1 || 2146 old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) && 2147 (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES || 2148 old_nr_samples != 1)) 2149 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs); 2150 } 2151} 2152 2153static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2154{ 2155 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 2156 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2157 unsigned i, nr_cbufs = state->nr_cbufs; 2158 struct r600_texture *tex = NULL; 2159 struct r600_surface *cb = NULL; 2160 2161 /* Colorbuffers. */ 2162 for (i = 0; i < nr_cbufs; i++) { 2163 cb = (struct r600_surface*)state->cbufs[i]; 2164 if (!cb) { 2165 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2166 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2167 continue; 2168 } 2169 2170 tex = (struct r600_texture *)cb->base.texture; 2171 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2172 &tex->resource, RADEON_USAGE_READWRITE, 2173 tex->surface.nsamples > 1 ? 2174 RADEON_PRIO_COLOR_BUFFER_MSAA : 2175 RADEON_PRIO_COLOR_BUFFER); 2176 2177 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2178 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2179 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2180 RADEON_PRIO_COLOR_META); 2181 } 2182 2183 r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 2184 sctx->b.chip_class >= VI ? 14 : 13); 2185 radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2186 radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2187 radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2188 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2189 radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2190 radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2191 radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ 2192 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2193 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2194 radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2195 radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2196 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2197 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2198 2199 if (sctx->b.chip_class >= VI) 2200 radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */ 2201 } 2202 /* set CB_COLOR1_INFO for possible dual-src blending */ 2203 if (i == 1 && state->cbufs[0]) { 2204 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 2205 cb->cb_color_info | tex->cb_color_info); 2206 i++; 2207 } 2208 for (; i < 8 ; i++) { 2209 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2210 } 2211 2212 /* ZS buffer. */ 2213 if (state->zsbuf) { 2214 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2215 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2216 2217 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2218 &rtex->resource, RADEON_USAGE_READWRITE, 2219 zb->base.texture->nr_samples > 1 ? 2220 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2221 RADEON_PRIO_DEPTH_BUFFER); 2222 2223 if (zb->db_htile_data_base) { 2224 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2225 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2226 RADEON_PRIO_DEPTH_META); 2227 } 2228 2229 r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2230 r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2231 2232 r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2233 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2234 radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */ 2235 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 2236 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2237 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2238 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2239 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2240 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2241 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2242 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2243 2244 r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2245 r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); 2246 r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2247 zb->pa_su_poly_offset_db_fmt_cntl); 2248 } else { 2249 r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2250 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2251 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2252 } 2253 2254 /* Framebuffer dimensions. */ 2255 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2256 r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2257 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2258} 2259 2260static void si_emit_msaa_sample_locs(struct r600_common_context *rctx, 2261 struct r600_atom *atom) 2262{ 2263 struct si_context *sctx = (struct si_context *)rctx; 2264 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 2265 unsigned nr_samples = sctx->framebuffer.nr_samples; 2266 2267 cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples : 2268 SI_NUM_SMOOTH_AA_SAMPLES); 2269} 2270 2271const struct r600_atom si_atom_msaa_sample_locs = { si_emit_msaa_sample_locs, 18 }; /* number of CS dwords */ 2272 2273static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom) 2274{ 2275 struct si_context *sctx = (struct si_context *)rctx; 2276 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 2277 2278 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2279 sctx->ps_iter_samples, 2280 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0); 2281} 2282 2283const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */ 2284 2285static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2286{ 2287 struct si_context *sctx = (struct si_context *)ctx; 2288 2289 if (sctx->ps_iter_samples == min_samples) 2290 return; 2291 2292 sctx->ps_iter_samples = min_samples; 2293 2294 if (sctx->framebuffer.nr_samples > 1) 2295 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2296} 2297 2298/* 2299 * Samplers 2300 */ 2301 2302/** 2303 * Create a sampler view. 2304 * 2305 * @param ctx context 2306 * @param texture texture 2307 * @param state sampler view template 2308 * @param width0 width0 override (for compressed textures as int) 2309 * @param height0 height0 override (for compressed textures as int) 2310 * @param force_level set the base address to the level (for compressed textures) 2311 */ 2312struct pipe_sampler_view * 2313si_create_sampler_view_custom(struct pipe_context *ctx, 2314 struct pipe_resource *texture, 2315 const struct pipe_sampler_view *state, 2316 unsigned width0, unsigned height0, 2317 unsigned force_level) 2318{ 2319 struct si_context *sctx = (struct si_context*)ctx; 2320 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 2321 struct r600_texture *tmp = (struct r600_texture*)texture; 2322 const struct util_format_description *desc; 2323 unsigned format, num_format, base_level, first_level, last_level; 2324 uint32_t pitch = 0; 2325 unsigned char state_swizzle[4], swizzle[4]; 2326 unsigned height, depth, width; 2327 enum pipe_format pipe_format = state->format; 2328 struct radeon_surf_level *surflevel; 2329 int first_non_void; 2330 uint64_t va; 2331 2332 if (view == NULL) 2333 return NULL; 2334 2335 /* initialize base object */ 2336 view->base = *state; 2337 view->base.texture = NULL; 2338 view->base.reference.count = 1; 2339 view->base.context = ctx; 2340 2341 /* NULL resource, obey swizzle (only ZERO and ONE make sense). */ 2342 if (!texture) { 2343 view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) | 2344 S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) | 2345 S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) | 2346 S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) | 2347 S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D); 2348 return &view->base; 2349 } 2350 2351 pipe_resource_reference(&view->base.texture, texture); 2352 view->resource = &tmp->resource; 2353 2354 /* Buffer resource. */ 2355 if (texture->target == PIPE_BUFFER) { 2356 unsigned stride, num_records; 2357 2358 desc = util_format_description(state->format); 2359 first_non_void = util_format_get_first_non_void_channel(state->format); 2360 stride = desc->block.bits / 8; 2361 va = tmp->resource.gpu_address + state->u.buf.first_element*stride; 2362 format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2363 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2364 2365 num_records = state->u.buf.last_element + 1 - state->u.buf.first_element; 2366 num_records = MIN2(num_records, texture->width0 / stride); 2367 2368 if (sctx->b.chip_class >= VI) 2369 num_records *= stride; 2370 2371 view->state[4] = va; 2372 view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 2373 S_008F04_STRIDE(stride); 2374 view->state[6] = num_records; 2375 view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2376 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2377 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2378 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2379 S_008F0C_NUM_FORMAT(num_format) | 2380 S_008F0C_DATA_FORMAT(format); 2381 2382 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); 2383 return &view->base; 2384 } 2385 2386 state_swizzle[0] = state->swizzle_r; 2387 state_swizzle[1] = state->swizzle_g; 2388 state_swizzle[2] = state->swizzle_b; 2389 state_swizzle[3] = state->swizzle_a; 2390 2391 surflevel = tmp->surface.level; 2392 2393 /* Texturing with separate depth and stencil. */ 2394 if (tmp->is_depth && !tmp->is_flushing_texture) { 2395 switch (pipe_format) { 2396 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2397 pipe_format = PIPE_FORMAT_Z32_FLOAT; 2398 break; 2399 case PIPE_FORMAT_X8Z24_UNORM: 2400 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2401 /* Z24 is always stored like this. */ 2402 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 2403 break; 2404 case PIPE_FORMAT_X24S8_UINT: 2405 case PIPE_FORMAT_S8X24_UINT: 2406 case PIPE_FORMAT_X32_S8X24_UINT: 2407 pipe_format = PIPE_FORMAT_S8_UINT; 2408 surflevel = tmp->surface.stencil_level; 2409 break; 2410 default:; 2411 } 2412 } 2413 2414 desc = util_format_description(pipe_format); 2415 2416 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2417 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2418 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2419 2420 switch (pipe_format) { 2421 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2422 case PIPE_FORMAT_X24S8_UINT: 2423 case PIPE_FORMAT_X32_S8X24_UINT: 2424 case PIPE_FORMAT_X8Z24_UNORM: 2425 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2426 break; 2427 default: 2428 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2429 } 2430 } else { 2431 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2432 } 2433 2434 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2435 2436 switch (pipe_format) { 2437 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2438 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2439 break; 2440 default: 2441 if (first_non_void < 0) { 2442 if (util_format_is_compressed(pipe_format)) { 2443 switch (pipe_format) { 2444 case PIPE_FORMAT_DXT1_SRGB: 2445 case PIPE_FORMAT_DXT1_SRGBA: 2446 case PIPE_FORMAT_DXT3_SRGBA: 2447 case PIPE_FORMAT_DXT5_SRGBA: 2448 case PIPE_FORMAT_BPTC_SRGBA: 2449 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2450 break; 2451 case PIPE_FORMAT_RGTC1_SNORM: 2452 case PIPE_FORMAT_LATC1_SNORM: 2453 case PIPE_FORMAT_RGTC2_SNORM: 2454 case PIPE_FORMAT_LATC2_SNORM: 2455 /* implies float, so use SNORM/UNORM to determine 2456 whether data is signed or not */ 2457 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2458 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2459 break; 2460 default: 2461 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2462 break; 2463 } 2464 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2465 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2466 } else { 2467 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2468 } 2469 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2470 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2471 } else { 2472 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2473 2474 switch (desc->channel[first_non_void].type) { 2475 case UTIL_FORMAT_TYPE_FLOAT: 2476 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2477 break; 2478 case UTIL_FORMAT_TYPE_SIGNED: 2479 if (desc->channel[first_non_void].normalized) 2480 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2481 else if (desc->channel[first_non_void].pure_integer) 2482 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2483 else 2484 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2485 break; 2486 case UTIL_FORMAT_TYPE_UNSIGNED: 2487 if (desc->channel[first_non_void].normalized) 2488 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2489 else if (desc->channel[first_non_void].pure_integer) 2490 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2491 else 2492 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2493 } 2494 } 2495 } 2496 2497 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void); 2498 if (format == ~0) { 2499 format = 0; 2500 } 2501 2502 base_level = 0; 2503 first_level = state->u.tex.first_level; 2504 last_level = state->u.tex.last_level; 2505 width = width0; 2506 height = height0; 2507 depth = texture->depth0; 2508 2509 if (force_level) { 2510 assert(force_level == first_level && 2511 force_level == last_level); 2512 base_level = force_level; 2513 first_level = 0; 2514 last_level = 0; 2515 width = u_minify(width, force_level); 2516 height = u_minify(height, force_level); 2517 depth = u_minify(depth, force_level); 2518 } 2519 2520 pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format); 2521 2522 if (texture->target == PIPE_TEXTURE_1D_ARRAY) { 2523 height = 1; 2524 depth = texture->array_size; 2525 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { 2526 depth = texture->array_size; 2527 } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) 2528 depth = texture->array_size / 6; 2529 2530 va = tmp->resource.gpu_address + surflevel[base_level].offset; 2531 2532 view->state[0] = va >> 8; 2533 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) | 2534 S_008F14_DATA_FORMAT(format) | 2535 S_008F14_NUM_FORMAT(num_format)); 2536 view->state[2] = (S_008F18_WIDTH(width - 1) | 2537 S_008F18_HEIGHT(height - 1)); 2538 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2539 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2540 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2541 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2542 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ? 2543 0 : first_level) | 2544 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ? 2545 util_logbase2(texture->nr_samples) : 2546 last_level) | 2547 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) | 2548 S_008F1C_POW2_PAD(texture->last_level > 0) | 2549 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples))); 2550 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); 2551 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2552 S_008F24_LAST_ARRAY(state->u.tex.last_layer)); 2553 view->state[6] = 0; 2554 view->state[7] = 0; 2555 2556 /* Initialize the sampler view for FMASK. */ 2557 if (tmp->fmask.size) { 2558 uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset; 2559 uint32_t fmask_format; 2560 2561 switch (texture->nr_samples) { 2562 case 2: 2563 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2564 break; 2565 case 4: 2566 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2567 break; 2568 case 8: 2569 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2570 break; 2571 default: 2572 assert(0); 2573 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2574 } 2575 2576 view->fmask_state[0] = va >> 8; 2577 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2578 S_008F14_DATA_FORMAT(fmask_format) | 2579 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2580 view->fmask_state[2] = S_008F18_WIDTH(width - 1) | 2581 S_008F18_HEIGHT(height - 1); 2582 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2583 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2584 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2585 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2586 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | 2587 S_008F1C_TYPE(si_tex_dim(texture->target, 0)); 2588 view->fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2589 S_008F20_PITCH(tmp->fmask.pitch - 1); 2590 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2591 S_008F24_LAST_ARRAY(state->u.tex.last_layer); 2592 view->fmask_state[6] = 0; 2593 view->fmask_state[7] = 0; 2594 } 2595 2596 return &view->base; 2597} 2598 2599static struct pipe_sampler_view * 2600si_create_sampler_view(struct pipe_context *ctx, 2601 struct pipe_resource *texture, 2602 const struct pipe_sampler_view *state) 2603{ 2604 return si_create_sampler_view_custom(ctx, texture, state, 2605 texture ? texture->width0 : 0, 2606 texture ? texture->height0 : 0, 0); 2607} 2608 2609static void si_sampler_view_destroy(struct pipe_context *ctx, 2610 struct pipe_sampler_view *state) 2611{ 2612 struct si_sampler_view *view = (struct si_sampler_view *)state; 2613 2614 if (view->resource && view->resource->b.b.target == PIPE_BUFFER) 2615 LIST_DELINIT(&view->list); 2616 2617 pipe_resource_reference(&state->texture, NULL); 2618 FREE(view); 2619} 2620 2621static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 2622{ 2623 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 2624 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 2625 (linear_filter && 2626 (wrap == PIPE_TEX_WRAP_CLAMP || 2627 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 2628} 2629 2630static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 2631{ 2632 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 2633 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 2634 2635 return (state->border_color.ui[0] || state->border_color.ui[1] || 2636 state->border_color.ui[2] || state->border_color.ui[3]) && 2637 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 2638 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 2639 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 2640} 2641 2642static void *si_create_sampler_state(struct pipe_context *ctx, 2643 const struct pipe_sampler_state *state) 2644{ 2645 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 2646 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; 2647 unsigned border_color_type; 2648 2649 if (rstate == NULL) { 2650 return NULL; 2651 } 2652 2653 if (sampler_state_needs_border_color(state)) 2654 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 2655 else 2656 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 2657 2658 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 2659 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 2660 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 2661 r600_tex_aniso_filter(state->max_anisotropy) << 9 | 2662 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 2663 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 2664 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map)); 2665 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 2666 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 2667 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 2668 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) | 2669 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) | 2670 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); 2671 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type); 2672 2673 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2674 memcpy(rstate->border_color, state->border_color.ui, 2675 sizeof(rstate->border_color)); 2676 } 2677 2678 return rstate; 2679} 2680 2681/* Upload border colors and update the pointers in resource descriptors. 2682 * There can only be 4096 border colors per context. 2683 * 2684 * XXX: This is broken if the buffer gets reallocated. 2685 */ 2686static void si_set_border_colors(struct si_context *sctx, unsigned count, 2687 void **states) 2688{ 2689 struct si_sampler_state **rstates = (struct si_sampler_state **)states; 2690 uint32_t *border_color_table = NULL; 2691 int i, j; 2692 2693 for (i = 0; i < count; i++) { 2694 if (rstates[i] && 2695 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) == 2696 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2697 if (!sctx->border_color_table || 2698 ((sctx->border_color_offset + count - i) & 2699 C_008F3C_BORDER_COLOR_PTR)) { 2700 r600_resource_reference(&sctx->border_color_table, NULL); 2701 sctx->border_color_offset = 0; 2702 2703 sctx->border_color_table = 2704 si_resource_create_custom(&sctx->screen->b.b, 2705 PIPE_USAGE_DYNAMIC, 2706 4096 * 4 * 4); 2707 } 2708 2709 if (!border_color_table) { 2710 border_color_table = 2711 sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf, 2712 sctx->b.rings.gfx.cs, 2713 PIPE_TRANSFER_WRITE | 2714 PIPE_TRANSFER_UNSYNCHRONIZED); 2715 } 2716 2717 for (j = 0; j < 4; j++) { 2718 border_color_table[4 * sctx->border_color_offset + j] = 2719 util_le32_to_cpu(rstates[i]->border_color[j]); 2720 } 2721 2722 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR; 2723 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++); 2724 } 2725 } 2726 2727 if (border_color_table) { 2728 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 2729 2730 uint64_t va_offset = sctx->border_color_table->gpu_address; 2731 2732 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); 2733 if (sctx->b.chip_class >= CIK) 2734 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40); 2735 si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ, 2736 RADEON_PRIO_SHADER_DATA); 2737 si_pm4_set_state(sctx, ta_bordercolor_base, pm4); 2738 } 2739} 2740 2741static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, 2742 unsigned start, unsigned count, 2743 void **states) 2744{ 2745 struct si_context *sctx = (struct si_context *)ctx; 2746 2747 if (!count || shader >= SI_NUM_SHADERS) 2748 return; 2749 2750 si_set_border_colors(sctx, count, states); 2751 si_set_sampler_descriptors(sctx, shader, start, count, states); 2752} 2753 2754static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 2755{ 2756 struct si_context *sctx = (struct si_context *)ctx; 2757 struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask); 2758 struct si_pm4_state *pm4 = &state->pm4; 2759 uint16_t mask = sample_mask; 2760 2761 if (state == NULL) 2762 return; 2763 2764 state->sample_mask = mask; 2765 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16)); 2766 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16)); 2767 2768 si_pm4_set_state(sctx, sample_mask, state); 2769} 2770 2771static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 2772{ 2773 free(state); 2774} 2775 2776/* 2777 * Vertex elements & buffers 2778 */ 2779 2780static void *si_create_vertex_elements(struct pipe_context *ctx, 2781 unsigned count, 2782 const struct pipe_vertex_element *elements) 2783{ 2784 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 2785 int i; 2786 2787 assert(count < PIPE_MAX_ATTRIBS); 2788 if (!v) 2789 return NULL; 2790 2791 v->count = count; 2792 for (i = 0; i < count; ++i) { 2793 const struct util_format_description *desc; 2794 unsigned data_format, num_format; 2795 int first_non_void; 2796 2797 desc = util_format_description(elements[i].src_format); 2798 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 2799 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2800 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2801 2802 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2803 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2804 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2805 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2806 S_008F0C_NUM_FORMAT(num_format) | 2807 S_008F0C_DATA_FORMAT(data_format); 2808 v->format_size[i] = desc->block.bits / 8; 2809 } 2810 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 2811 2812 return v; 2813} 2814 2815static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 2816{ 2817 struct si_context *sctx = (struct si_context *)ctx; 2818 struct si_vertex_element *v = (struct si_vertex_element*)state; 2819 2820 sctx->vertex_elements = v; 2821 sctx->vertex_buffers_dirty = true; 2822} 2823 2824static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 2825{ 2826 struct si_context *sctx = (struct si_context *)ctx; 2827 2828 if (sctx->vertex_elements == state) 2829 sctx->vertex_elements = NULL; 2830 FREE(state); 2831} 2832 2833static void si_set_vertex_buffers(struct pipe_context *ctx, 2834 unsigned start_slot, unsigned count, 2835 const struct pipe_vertex_buffer *buffers) 2836{ 2837 struct si_context *sctx = (struct si_context *)ctx; 2838 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 2839 int i; 2840 2841 assert(start_slot + count <= Elements(sctx->vertex_buffer)); 2842 2843 if (buffers) { 2844 for (i = 0; i < count; i++) { 2845 const struct pipe_vertex_buffer *src = buffers + i; 2846 struct pipe_vertex_buffer *dsti = dst + i; 2847 2848 pipe_resource_reference(&dsti->buffer, src->buffer); 2849 dsti->buffer_offset = src->buffer_offset; 2850 dsti->stride = src->stride; 2851 r600_context_add_resource_size(ctx, src->buffer); 2852 } 2853 } else { 2854 for (i = 0; i < count; i++) { 2855 pipe_resource_reference(&dst[i].buffer, NULL); 2856 } 2857 } 2858 sctx->vertex_buffers_dirty = true; 2859} 2860 2861static void si_set_index_buffer(struct pipe_context *ctx, 2862 const struct pipe_index_buffer *ib) 2863{ 2864 struct si_context *sctx = (struct si_context *)ctx; 2865 2866 if (ib) { 2867 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer); 2868 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 2869 r600_context_add_resource_size(ctx, ib->buffer); 2870 } else { 2871 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 2872 } 2873} 2874 2875/* 2876 * Misc 2877 */ 2878static void si_set_polygon_stipple(struct pipe_context *ctx, 2879 const struct pipe_poly_stipple *state) 2880{ 2881 struct si_context *sctx = (struct si_context *)ctx; 2882 struct pipe_resource *tex; 2883 struct pipe_sampler_view *view; 2884 bool is_zero = true; 2885 bool is_one = true; 2886 int i; 2887 2888 /* The hardware obeys 0 and 1 swizzles in the descriptor even if 2889 * the resource is NULL/invalid. Take advantage of this fact and skip 2890 * texture allocation if the stipple pattern is constant. 2891 * 2892 * This is an optimization for the common case when stippling isn't 2893 * used but set_polygon_stipple is still called by st/mesa. 2894 */ 2895 for (i = 0; i < Elements(state->stipple); i++) { 2896 is_zero = is_zero && state->stipple[i] == 0; 2897 is_one = is_one && state->stipple[i] == 0xffffffff; 2898 } 2899 2900 if (is_zero || is_one) { 2901 struct pipe_sampler_view templ = {{0}}; 2902 2903 templ.swizzle_r = PIPE_SWIZZLE_ZERO; 2904 templ.swizzle_g = PIPE_SWIZZLE_ZERO; 2905 templ.swizzle_b = PIPE_SWIZZLE_ZERO; 2906 /* The pattern should be inverted in the texture. */ 2907 templ.swizzle_a = is_zero ? PIPE_SWIZZLE_ONE : PIPE_SWIZZLE_ZERO; 2908 2909 view = ctx->create_sampler_view(ctx, NULL, &templ); 2910 } else { 2911 /* Create a new texture. */ 2912 tex = util_pstipple_create_stipple_texture(ctx, state->stipple); 2913 if (!tex) 2914 return; 2915 2916 view = util_pstipple_create_sampler_view(ctx, tex); 2917 pipe_resource_reference(&tex, NULL); 2918 } 2919 2920 ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 2921 SI_POLY_STIPPLE_SAMPLER, 1, &view); 2922 pipe_sampler_view_reference(&view, NULL); 2923 2924 /* Bind the sampler state if needed. */ 2925 if (!sctx->pstipple_sampler_state) { 2926 sctx->pstipple_sampler_state = util_pstipple_create_sampler(ctx); 2927 ctx->bind_sampler_states(ctx, PIPE_SHADER_FRAGMENT, 2928 SI_POLY_STIPPLE_SAMPLER, 1, 2929 &sctx->pstipple_sampler_state); 2930 } 2931} 2932 2933static void si_set_tess_state(struct pipe_context *ctx, 2934 const float default_outer_level[4], 2935 const float default_inner_level[2]) 2936{ 2937 struct si_context *sctx = (struct si_context *)ctx; 2938 struct pipe_constant_buffer cb; 2939 float array[8]; 2940 2941 memcpy(array, default_outer_level, sizeof(float) * 4); 2942 memcpy(array+4, default_inner_level, sizeof(float) * 2); 2943 2944 cb.buffer = NULL; 2945 cb.user_buffer = NULL; 2946 cb.buffer_size = sizeof(array); 2947 2948 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, 2949 (void*)array, sizeof(array), 2950 &cb.buffer_offset); 2951 2952 ctx->set_constant_buffer(ctx, PIPE_SHADER_TESS_CTRL, 2953 SI_DRIVER_STATE_CONST_BUF, &cb); 2954 pipe_resource_reference(&cb.buffer, NULL); 2955} 2956 2957static void si_texture_barrier(struct pipe_context *ctx) 2958{ 2959 struct si_context *sctx = (struct si_context *)ctx; 2960 2961 sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | 2962 SI_CONTEXT_INV_TC_L2 | 2963 SI_CONTEXT_FLUSH_AND_INV_CB; 2964} 2965 2966static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 2967{ 2968 struct pipe_blend_state blend; 2969 2970 memset(&blend, 0, sizeof(blend)); 2971 blend.independent_blend_enable = true; 2972 blend.rt[0].colormask = 0xf; 2973 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 2974} 2975 2976static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 2977 bool include_draw_vbo) 2978{ 2979 si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo); 2980} 2981 2982static void si_init_config(struct si_context *sctx); 2983 2984void si_init_state_functions(struct si_context *sctx) 2985{ 2986 si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0); 2987 si_init_atom(&sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state, 10); 2988 si_init_atom(&sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs, 6); 2989 2990 sctx->b.b.create_blend_state = si_create_blend_state; 2991 sctx->b.b.bind_blend_state = si_bind_blend_state; 2992 sctx->b.b.delete_blend_state = si_delete_blend_state; 2993 sctx->b.b.set_blend_color = si_set_blend_color; 2994 2995 sctx->b.b.create_rasterizer_state = si_create_rs_state; 2996 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 2997 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 2998 2999 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 3000 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 3001 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 3002 3003 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 3004 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 3005 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 3006 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 3007 3008 sctx->b.b.set_clip_state = si_set_clip_state; 3009 sctx->b.b.set_scissor_states = si_set_scissor_states; 3010 sctx->b.b.set_viewport_states = si_set_viewport_states; 3011 sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref; 3012 3013 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 3014 sctx->b.b.get_sample_position = cayman_get_sample_position; 3015 3016 sctx->b.b.create_sampler_state = si_create_sampler_state; 3017 sctx->b.b.bind_sampler_states = si_bind_sampler_states; 3018 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 3019 3020 sctx->b.b.create_sampler_view = si_create_sampler_view; 3021 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 3022 3023 sctx->b.b.set_sample_mask = si_set_sample_mask; 3024 3025 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 3026 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 3027 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 3028 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 3029 sctx->b.b.set_index_buffer = si_set_index_buffer; 3030 3031 sctx->b.b.texture_barrier = si_texture_barrier; 3032 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; 3033 sctx->b.b.set_min_samples = si_set_min_samples; 3034 sctx->b.b.set_tess_state = si_set_tess_state; 3035 3036 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 3037 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 3038 3039 sctx->b.b.draw_vbo = si_draw_vbo; 3040 3041 if (sctx->b.chip_class >= CIK) { 3042 sctx->b.dma_copy = cik_sdma_copy; 3043 } else { 3044 sctx->b.dma_copy = si_dma_copy; 3045 } 3046 3047 si_init_config(sctx); 3048} 3049 3050static void 3051si_write_harvested_raster_configs(struct si_context *sctx, 3052 struct si_pm4_state *pm4, 3053 unsigned raster_config, 3054 unsigned raster_config_1) 3055{ 3056 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 3057 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 3058 unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; 3059 unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16); 3060 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 3061 unsigned rb_per_se = num_rb / num_se; 3062 unsigned se_mask[4]; 3063 unsigned se; 3064 3065 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3066 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3067 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3068 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3069 3070 assert(num_se == 1 || num_se == 2 || num_se == 4); 3071 assert(sh_per_se == 1 || sh_per_se == 2); 3072 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 3073 3074 /* XXX: I can't figure out what the *_XSEL and *_YSEL 3075 * fields are for, so I'm leaving them as their default 3076 * values. */ 3077 3078 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3079 (!se_mask[2] && !se_mask[3]))) { 3080 raster_config_1 &= C_028354_SE_PAIR_MAP; 3081 3082 if (!se_mask[0] && !se_mask[1]) { 3083 raster_config_1 |= 3084 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 3085 } else { 3086 raster_config_1 |= 3087 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 3088 } 3089 } 3090 3091 for (se = 0; se < num_se; se++) { 3092 unsigned raster_config_se = raster_config; 3093 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3094 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3095 int idx = (se / 2) * 2; 3096 3097 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3098 raster_config_se &= C_028350_SE_MAP; 3099 3100 if (!se_mask[idx]) { 3101 raster_config_se |= 3102 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 3103 } else { 3104 raster_config_se |= 3105 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 3106 } 3107 } 3108 3109 pkr0_mask &= rb_mask; 3110 pkr1_mask &= rb_mask; 3111 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3112 raster_config_se &= C_028350_PKR_MAP; 3113 3114 if (!pkr0_mask) { 3115 raster_config_se |= 3116 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 3117 } else { 3118 raster_config_se |= 3119 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 3120 } 3121 } 3122 3123 if (rb_per_se >= 2) { 3124 unsigned rb0_mask = 1 << (se * rb_per_se); 3125 unsigned rb1_mask = rb0_mask << 1; 3126 3127 rb0_mask &= rb_mask; 3128 rb1_mask &= rb_mask; 3129 if (!rb0_mask || !rb1_mask) { 3130 raster_config_se &= C_028350_RB_MAP_PKR0; 3131 3132 if (!rb0_mask) { 3133 raster_config_se |= 3134 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 3135 } else { 3136 raster_config_se |= 3137 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 3138 } 3139 } 3140 3141 if (rb_per_se > 2) { 3142 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3143 rb1_mask = rb0_mask << 1; 3144 rb0_mask &= rb_mask; 3145 rb1_mask &= rb_mask; 3146 if (!rb0_mask || !rb1_mask) { 3147 raster_config_se &= C_028350_RB_MAP_PKR1; 3148 3149 if (!rb0_mask) { 3150 raster_config_se |= 3151 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 3152 } else { 3153 raster_config_se |= 3154 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 3155 } 3156 } 3157 } 3158 } 3159 3160 /* GRBM_GFX_INDEX is privileged on VI */ 3161 if (sctx->b.chip_class <= CIK) 3162 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3163 SE_INDEX(se) | SH_BROADCAST_WRITES | 3164 INSTANCE_BROADCAST_WRITES); 3165 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 3166 if (sctx->b.chip_class >= CIK) 3167 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 3168 } 3169 3170 /* GRBM_GFX_INDEX is privileged on VI */ 3171 if (sctx->b.chip_class <= CIK) 3172 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3173 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 3174 INSTANCE_BROADCAST_WRITES); 3175} 3176 3177static void si_init_config(struct si_context *sctx) 3178{ 3179 unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16); 3180 unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; 3181 unsigned raster_config, raster_config_1; 3182 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 3183 int i; 3184 3185 if (pm4 == NULL) 3186 return; 3187 3188 si_cmd_context_control(pm4); 3189 3190 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 3191 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 3192 3193 /* FIXME calculate these values somehow ??? */ 3194 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80); 3195 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3196 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3197 3198 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3199 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0); 3200 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3201 3202 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3203 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0); 3204 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3205 if (sctx->b.chip_class < CIK) 3206 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3207 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3208 3209 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3210 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3211 3212 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3213 3214 for (i = 0; i < 16; i++) { 3215 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0); 3216 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0)); 3217 } 3218 3219 switch (sctx->screen->b.family) { 3220 case CHIP_TAHITI: 3221 case CHIP_PITCAIRN: 3222 raster_config = 0x2a00126a; 3223 raster_config_1 = 0x00000000; 3224 break; 3225 case CHIP_VERDE: 3226 raster_config = 0x0000124a; 3227 raster_config_1 = 0x00000000; 3228 break; 3229 case CHIP_OLAND: 3230 raster_config = 0x00000082; 3231 raster_config_1 = 0x00000000; 3232 break; 3233 case CHIP_HAINAN: 3234 raster_config = 0x00000000; 3235 raster_config_1 = 0x00000000; 3236 break; 3237 case CHIP_BONAIRE: 3238 raster_config = 0x16000012; 3239 raster_config_1 = 0x00000000; 3240 break; 3241 case CHIP_HAWAII: 3242 raster_config = 0x3a00161a; 3243 raster_config_1 = 0x0000002e; 3244 break; 3245 case CHIP_FIJI: 3246 /* Fiji should be same as Hawaii, but that causes corruption in some cases */ 3247 raster_config = 0x16000012; /* 0x3a00161a */ 3248 raster_config_1 = 0x0000002a; /* 0x0000002e */ 3249 break; 3250 case CHIP_TONGA: 3251 raster_config = 0x16000012; 3252 raster_config_1 = 0x0000002a; 3253 break; 3254 case CHIP_ICELAND: 3255 raster_config = 0x00000002; 3256 raster_config_1 = 0x00000000; 3257 break; 3258 case CHIP_CARRIZO: 3259 raster_config = 0x00000002; 3260 raster_config_1 = 0x00000000; 3261 break; 3262 case CHIP_KAVERI: 3263 /* KV should be 0x00000002, but that causes problems with radeon */ 3264 raster_config = 0x00000000; /* 0x00000002 */ 3265 raster_config_1 = 0x00000000; 3266 break; 3267 case CHIP_KABINI: 3268 case CHIP_MULLINS: 3269 raster_config = 0x00000000; 3270 raster_config_1 = 0x00000000; 3271 break; 3272 default: 3273 fprintf(stderr, 3274 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 3275 raster_config = 0x00000000; 3276 raster_config_1 = 0x00000000; 3277 break; 3278 } 3279 3280 /* Always use the default config when all backends are enabled 3281 * (or when we failed to determine the enabled backends). 3282 */ 3283 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 3284 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 3285 raster_config); 3286 if (sctx->b.chip_class >= CIK) 3287 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 3288 raster_config_1); 3289 } else { 3290 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 3291 } 3292 3293 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 3294 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 3295 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 3296 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 3297 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 3298 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 3299 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 3300 3301 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3302 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 3303 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 3304 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 3305 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 3306 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0)); 3307 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0)); 3308 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0)); 3309 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0)); 3310 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0); 3311 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 3312 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 3313 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 3314 3315 /* There is a hang if stencil is used and fast stencil is enabled 3316 * regardless of whether HTILE is depth-only or not. 3317 */ 3318 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 3319 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 3320 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | 3321 S_02800C_FAST_STENCIL_DISABLE(1)); 3322 3323 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 3324 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 3325 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 3326 3327 if (sctx->b.chip_class >= CIK) { 3328 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffc)); 3329 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); 3330 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xfffe)); 3331 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); 3332 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 3333 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0)); 3334 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 3335 } 3336 3337 if (sctx->b.chip_class >= VI) { 3338 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, 3339 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1)); 3340 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); 3341 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); 3342 } 3343 3344 sctx->init_config = pm4; 3345} 3346