si_state.c revision a75fee78c680054aeb1b96ec25e02dd36286fed5
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "si_shader.h" 29#include "sid.h" 30#include "radeon/r600_cs.h" 31 32#include "tgsi/tgsi_parse.h" 33#include "tgsi/tgsi_scan.h" 34#include "util/u_format.h" 35#include "util/u_format_s3tc.h" 36#include "util/u_framebuffer.h" 37#include "util/u_helpers.h" 38#include "util/u_memory.h" 39#include "util/u_simple_shaders.h" 40 41static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, 42 void (*emit)(struct si_context *ctx, struct r600_atom *state), 43 unsigned num_dw) 44{ 45 atom->emit = (void*)emit; 46 atom->num_dw = num_dw; 47 atom->dirty = false; 48 *list_elem = atom; 49} 50 51uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) 52{ 53 if (sscreen->b.chip_class == CIK && 54 sscreen->b.info.cik_macrotile_mode_array_valid) { 55 unsigned index, tileb; 56 57 tileb = 8 * 8 * tex->surface.bpe; 58 tileb = MIN2(tex->surface.tile_split, tileb); 59 60 for (index = 0; tileb > 64; index++) { 61 tileb >>= 1; 62 } 63 assert(index < 16); 64 65 return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3; 66 } 67 68 if (sscreen->b.chip_class == SI && 69 sscreen->b.info.si_tile_mode_array_valid) { 70 /* Don't use stencil_tiling_index, because num_banks is always 71 * read from the depth mode. */ 72 unsigned tile_mode_index = tex->surface.tiling_index[0]; 73 assert(tile_mode_index < 32); 74 75 return G_009910_NUM_BANKS(sscreen->b.info.si_tile_mode_array[tile_mode_index]); 76 } 77 78 /* The old way. */ 79 switch (sscreen->b.tiling_info.num_banks) { 80 case 2: 81 return V_02803C_ADDR_SURF_2_BANK; 82 case 4: 83 return V_02803C_ADDR_SURF_4_BANK; 84 case 8: 85 default: 86 return V_02803C_ADDR_SURF_8_BANK; 87 case 16: 88 return V_02803C_ADDR_SURF_16_BANK; 89 } 90} 91 92unsigned cik_tile_split(unsigned tile_split) 93{ 94 switch (tile_split) { 95 case 64: 96 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B; 97 break; 98 case 128: 99 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B; 100 break; 101 case 256: 102 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B; 103 break; 104 case 512: 105 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B; 106 break; 107 default: 108 case 1024: 109 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB; 110 break; 111 case 2048: 112 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB; 113 break; 114 case 4096: 115 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB; 116 break; 117 } 118 return tile_split; 119} 120 121unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect) 122{ 123 switch (macro_tile_aspect) { 124 default: 125 case 1: 126 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1; 127 break; 128 case 2: 129 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2; 130 break; 131 case 4: 132 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4; 133 break; 134 case 8: 135 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8; 136 break; 137 } 138 return macro_tile_aspect; 139} 140 141unsigned cik_bank_wh(unsigned bankwh) 142{ 143 switch (bankwh) { 144 default: 145 case 1: 146 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1; 147 break; 148 case 2: 149 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2; 150 break; 151 case 4: 152 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4; 153 break; 154 case 8: 155 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8; 156 break; 157 } 158 return bankwh; 159} 160 161unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode) 162{ 163 if (sscreen->b.info.si_tile_mode_array_valid) { 164 uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode]; 165 166 return G_009910_PIPE_CONFIG(gb_tile_mode); 167 } 168 169 /* This is probably broken for a lot of chips, but it's only used 170 * if the kernel cannot return the tile mode array for CIK. */ 171 switch (sscreen->b.info.r600_num_tile_pipes) { 172 case 16: 173 return V_02803C_X_ADDR_SURF_P16_32X32_16X16; 174 case 8: 175 return V_02803C_X_ADDR_SURF_P8_32X32_16X16; 176 case 4: 177 default: 178 if (sscreen->b.info.r600_num_backends == 4) 179 return V_02803C_X_ADDR_SURF_P4_16X16; 180 else 181 return V_02803C_X_ADDR_SURF_P4_8X16; 182 case 2: 183 return V_02803C_ADDR_SURF_P2; 184 } 185} 186 187static unsigned si_map_swizzle(unsigned swizzle) 188{ 189 switch (swizzle) { 190 case UTIL_FORMAT_SWIZZLE_Y: 191 return V_008F0C_SQ_SEL_Y; 192 case UTIL_FORMAT_SWIZZLE_Z: 193 return V_008F0C_SQ_SEL_Z; 194 case UTIL_FORMAT_SWIZZLE_W: 195 return V_008F0C_SQ_SEL_W; 196 case UTIL_FORMAT_SWIZZLE_0: 197 return V_008F0C_SQ_SEL_0; 198 case UTIL_FORMAT_SWIZZLE_1: 199 return V_008F0C_SQ_SEL_1; 200 default: /* UTIL_FORMAT_SWIZZLE_X */ 201 return V_008F0C_SQ_SEL_X; 202 } 203} 204 205static uint32_t S_FIXED(float value, uint32_t frac_bits) 206{ 207 return value * (1 << frac_bits); 208} 209 210/* 12.4 fixed-point */ 211static unsigned si_pack_float_12p4(float x) 212{ 213 return x <= 0 ? 0 : 214 x >= 4096 ? 0xffff : x * 16; 215} 216 217/* 218 * inferred framebuffer and blender state 219 */ 220static void si_update_fb_blend_state(struct si_context *sctx) 221{ 222 struct si_pm4_state *pm4; 223 struct si_state_blend *blend = sctx->queued.named.blend; 224 uint32_t mask; 225 226 if (blend == NULL) 227 return; 228 229 pm4 = si_pm4_alloc_state(sctx); 230 if (pm4 == NULL) 231 return; 232 233 mask = (1ULL << ((unsigned)sctx->framebuffer.state.nr_cbufs * 4)) - 1; 234 mask &= blend->cb_target_mask; 235 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask); 236 237 si_pm4_set_state(sctx, fb_blend, pm4); 238} 239 240/* 241 * Blender functions 242 */ 243 244static uint32_t si_translate_blend_function(int blend_func) 245{ 246 switch (blend_func) { 247 case PIPE_BLEND_ADD: 248 return V_028780_COMB_DST_PLUS_SRC; 249 case PIPE_BLEND_SUBTRACT: 250 return V_028780_COMB_SRC_MINUS_DST; 251 case PIPE_BLEND_REVERSE_SUBTRACT: 252 return V_028780_COMB_DST_MINUS_SRC; 253 case PIPE_BLEND_MIN: 254 return V_028780_COMB_MIN_DST_SRC; 255 case PIPE_BLEND_MAX: 256 return V_028780_COMB_MAX_DST_SRC; 257 default: 258 R600_ERR("Unknown blend function %d\n", blend_func); 259 assert(0); 260 break; 261 } 262 return 0; 263} 264 265static uint32_t si_translate_blend_factor(int blend_fact) 266{ 267 switch (blend_fact) { 268 case PIPE_BLENDFACTOR_ONE: 269 return V_028780_BLEND_ONE; 270 case PIPE_BLENDFACTOR_SRC_COLOR: 271 return V_028780_BLEND_SRC_COLOR; 272 case PIPE_BLENDFACTOR_SRC_ALPHA: 273 return V_028780_BLEND_SRC_ALPHA; 274 case PIPE_BLENDFACTOR_DST_ALPHA: 275 return V_028780_BLEND_DST_ALPHA; 276 case PIPE_BLENDFACTOR_DST_COLOR: 277 return V_028780_BLEND_DST_COLOR; 278 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 279 return V_028780_BLEND_SRC_ALPHA_SATURATE; 280 case PIPE_BLENDFACTOR_CONST_COLOR: 281 return V_028780_BLEND_CONSTANT_COLOR; 282 case PIPE_BLENDFACTOR_CONST_ALPHA: 283 return V_028780_BLEND_CONSTANT_ALPHA; 284 case PIPE_BLENDFACTOR_ZERO: 285 return V_028780_BLEND_ZERO; 286 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 287 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 288 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 289 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 290 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 291 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 292 case PIPE_BLENDFACTOR_INV_DST_COLOR: 293 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 294 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 295 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 296 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 297 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 298 case PIPE_BLENDFACTOR_SRC1_COLOR: 299 return V_028780_BLEND_SRC1_COLOR; 300 case PIPE_BLENDFACTOR_SRC1_ALPHA: 301 return V_028780_BLEND_SRC1_ALPHA; 302 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 303 return V_028780_BLEND_INV_SRC1_COLOR; 304 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 305 return V_028780_BLEND_INV_SRC1_ALPHA; 306 default: 307 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 308 assert(0); 309 break; 310 } 311 return 0; 312} 313 314static void *si_create_blend_state_mode(struct pipe_context *ctx, 315 const struct pipe_blend_state *state, 316 unsigned mode) 317{ 318 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 319 struct si_pm4_state *pm4 = &blend->pm4; 320 321 uint32_t color_control = 0; 322 323 if (blend == NULL) 324 return NULL; 325 326 blend->alpha_to_one = state->alpha_to_one; 327 328 if (state->logicop_enable) { 329 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 330 } else { 331 color_control |= S_028808_ROP3(0xcc); 332 } 333 334 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 335 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 336 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 337 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 338 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 339 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 340 341 blend->cb_target_mask = 0; 342 for (int i = 0; i < 8; i++) { 343 /* state->rt entries > 0 only written if independent blending */ 344 const int j = state->independent_blend_enable ? i : 0; 345 346 unsigned eqRGB = state->rt[j].rgb_func; 347 unsigned srcRGB = state->rt[j].rgb_src_factor; 348 unsigned dstRGB = state->rt[j].rgb_dst_factor; 349 unsigned eqA = state->rt[j].alpha_func; 350 unsigned srcA = state->rt[j].alpha_src_factor; 351 unsigned dstA = state->rt[j].alpha_dst_factor; 352 353 unsigned blend_cntl = 0; 354 355 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ 356 blend->cb_target_mask |= state->rt[j].colormask << (4 * i); 357 358 if (!state->rt[j].blend_enable) { 359 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 360 continue; 361 } 362 363 blend_cntl |= S_028780_ENABLE(1); 364 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 365 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 366 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 367 368 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 369 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 370 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 371 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 372 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 373 } 374 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 375 } 376 377 if (blend->cb_target_mask) { 378 color_control |= S_028808_MODE(mode); 379 } else { 380 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 381 } 382 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 383 384 return blend; 385} 386 387static void *si_create_blend_state(struct pipe_context *ctx, 388 const struct pipe_blend_state *state) 389{ 390 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 391} 392 393static void si_bind_blend_state(struct pipe_context *ctx, void *state) 394{ 395 struct si_context *sctx = (struct si_context *)ctx; 396 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 397 si_update_fb_blend_state(sctx); 398} 399 400static void si_delete_blend_state(struct pipe_context *ctx, void *state) 401{ 402 struct si_context *sctx = (struct si_context *)ctx; 403 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 404} 405 406static void si_set_blend_color(struct pipe_context *ctx, 407 const struct pipe_blend_color *state) 408{ 409 struct si_context *sctx = (struct si_context *)ctx; 410 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 411 412 if (pm4 == NULL) 413 return; 414 415 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0])); 416 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1])); 417 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2])); 418 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3])); 419 420 si_pm4_set_state(sctx, blend_color, pm4); 421} 422 423/* 424 * Clipping, scissors and viewport 425 */ 426 427static void si_set_clip_state(struct pipe_context *ctx, 428 const struct pipe_clip_state *state) 429{ 430 struct si_context *sctx = (struct si_context *)ctx; 431 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 432 struct pipe_constant_buffer cb; 433 434 if (pm4 == NULL) 435 return; 436 437 for (int i = 0; i < 6; i++) { 438 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16, 439 fui(state->ucp[i][0])); 440 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16, 441 fui(state->ucp[i][1])); 442 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16, 443 fui(state->ucp[i][2])); 444 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16, 445 fui(state->ucp[i][3])); 446 } 447 448 cb.buffer = NULL; 449 cb.user_buffer = state->ucp; 450 cb.buffer_offset = 0; 451 cb.buffer_size = 4*4*8; 452 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb); 453 pipe_resource_reference(&cb.buffer, NULL); 454 455 si_pm4_set_state(sctx, clip, pm4); 456} 457 458static void si_set_scissor_states(struct pipe_context *ctx, 459 unsigned start_slot, 460 unsigned num_scissors, 461 const struct pipe_scissor_state *state) 462{ 463 struct si_context *sctx = (struct si_context *)ctx; 464 struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor); 465 struct si_pm4_state *pm4 = &scissor->pm4; 466 467 if (scissor == NULL) 468 return; 469 470 scissor->scissor = *state; 471 si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 472 S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) | 473 S_028250_WINDOW_OFFSET_DISABLE(1)); 474 si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 475 S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy)); 476 477 si_pm4_set_state(sctx, scissor, scissor); 478} 479 480static void si_set_viewport_states(struct pipe_context *ctx, 481 unsigned start_slot, 482 unsigned num_viewports, 483 const struct pipe_viewport_state *state) 484{ 485 struct si_context *sctx = (struct si_context *)ctx; 486 struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport); 487 struct si_pm4_state *pm4 = &viewport->pm4; 488 489 if (viewport == NULL) 490 return; 491 492 viewport->viewport = *state; 493 si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0])); 494 si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0])); 495 si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1])); 496 si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1])); 497 si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2])); 498 si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2])); 499 500 si_pm4_set_state(sctx, viewport, viewport); 501} 502 503/* 504 * inferred state between framebuffer and rasterizer 505 */ 506static void si_update_fb_rs_state(struct si_context *sctx) 507{ 508 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 509 struct si_pm4_state *pm4; 510 float offset_units; 511 512 if (!rs || !sctx->framebuffer.state.zsbuf) 513 return; 514 515 offset_units = sctx->queued.named.rasterizer->offset_units; 516 switch (sctx->framebuffer.state.zsbuf->texture->format) { 517 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 518 case PIPE_FORMAT_X8Z24_UNORM: 519 case PIPE_FORMAT_Z24X8_UNORM: 520 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 521 offset_units *= 2.0f; 522 break; 523 case PIPE_FORMAT_Z32_FLOAT: 524 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 525 offset_units *= 1.0f; 526 break; 527 case PIPE_FORMAT_Z16_UNORM: 528 offset_units *= 4.0f; 529 break; 530 default: 531 return; 532 } 533 534 pm4 = si_pm4_alloc_state(sctx); 535 536 if (pm4 == NULL) 537 return; 538 539 /* FIXME some of those reg can be computed with cso */ 540 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 541 fui(sctx->queued.named.rasterizer->offset_scale)); 542 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 543 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 544 fui(sctx->queued.named.rasterizer->offset_scale)); 545 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 546 547 si_pm4_set_state(sctx, fb_rs, pm4); 548} 549 550/* 551 * Rasterizer 552 */ 553 554static uint32_t si_translate_fill(uint32_t func) 555{ 556 switch(func) { 557 case PIPE_POLYGON_MODE_FILL: 558 return V_028814_X_DRAW_TRIANGLES; 559 case PIPE_POLYGON_MODE_LINE: 560 return V_028814_X_DRAW_LINES; 561 case PIPE_POLYGON_MODE_POINT: 562 return V_028814_X_DRAW_POINTS; 563 default: 564 assert(0); 565 return V_028814_X_DRAW_POINTS; 566 } 567} 568 569static void *si_create_rs_state(struct pipe_context *ctx, 570 const struct pipe_rasterizer_state *state) 571{ 572 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 573 struct si_pm4_state *pm4 = &rs->pm4; 574 unsigned tmp; 575 unsigned prov_vtx = 1, polygon_dual_mode; 576 float psize_min, psize_max; 577 578 if (rs == NULL) { 579 return NULL; 580 } 581 582 rs->two_side = state->light_twoside; 583 rs->multisample_enable = state->multisample; 584 rs->clip_plane_enable = state->clip_plane_enable; 585 rs->line_stipple_enable = state->line_stipple_enable; 586 587 polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || 588 state->fill_back != PIPE_POLYGON_MODE_FILL); 589 590 if (state->flatshade_first) 591 prov_vtx = 0; 592 593 rs->flatshade = state->flatshade; 594 rs->sprite_coord_enable = state->sprite_coord_enable; 595 rs->pa_sc_line_stipple = state->line_stipple_enable ? 596 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 597 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 598 rs->pa_su_sc_mode_cntl = 599 S_028814_PROVOKING_VTX_LAST(prov_vtx) | 600 S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 601 S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 602 S_028814_FACE(!state->front_ccw) | 603 S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | 604 S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | 605 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | 606 S_028814_POLY_MODE(polygon_dual_mode) | 607 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 608 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)); 609 rs->pa_cl_clip_cntl = 610 S_028810_PS_UCP_MODE(3) | 611 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 612 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 613 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 614 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 615 616 /* offset */ 617 rs->offset_units = state->offset_units; 618 rs->offset_scale = state->offset_scale * 12.0f; 619 620 tmp = S_0286D4_FLAT_SHADE_ENA(1); 621 if (state->sprite_coord_enable) { 622 tmp |= S_0286D4_PNT_SPRITE_ENA(1) | 623 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 624 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 625 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 626 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1); 627 if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { 628 tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); 629 } 630 } 631 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp); 632 633 /* point size 12.4 fixed point */ 634 tmp = (unsigned)(state->point_size * 8.0); 635 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 636 637 if (state->point_size_per_vertex) { 638 psize_min = util_get_min_point_size(state); 639 psize_max = 8192; 640 } else { 641 /* Force the point size to be as if the vertex output was disabled. */ 642 psize_min = state->point_size; 643 psize_max = state->point_size; 644 } 645 /* Divide by two, because 0.5 = 1 pixel. */ 646 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 647 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 648 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 649 650 tmp = (unsigned)state->line_width * 8; 651 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 652 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 653 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 654 S_028A48_MSAA_ENABLE(state->multisample) | 655 S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); 656 657 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 658 S_028BE4_PIX_CENTER(state->half_pixel_center) | 659 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 660 661 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 662 663 return rs; 664} 665 666static void si_bind_rs_state(struct pipe_context *ctx, void *state) 667{ 668 struct si_context *sctx = (struct si_context *)ctx; 669 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 670 671 if (state == NULL) 672 return; 673 674 // TODO 675 sctx->sprite_coord_enable = rs->sprite_coord_enable; 676 sctx->pa_sc_line_stipple = rs->pa_sc_line_stipple; 677 sctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl; 678 679 si_pm4_bind_state(sctx, rasterizer, rs); 680 si_update_fb_rs_state(sctx); 681} 682 683static void si_delete_rs_state(struct pipe_context *ctx, void *state) 684{ 685 struct si_context *sctx = (struct si_context *)ctx; 686 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 687} 688 689/* 690 * infeered state between dsa and stencil ref 691 */ 692static void si_update_dsa_stencil_ref(struct si_context *sctx) 693{ 694 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 695 struct pipe_stencil_ref *ref = &sctx->stencil_ref; 696 struct si_state_dsa *dsa = sctx->queued.named.dsa; 697 698 if (pm4 == NULL) 699 return; 700 701 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK, 702 S_028430_STENCILTESTVAL(ref->ref_value[0]) | 703 S_028430_STENCILMASK(dsa->valuemask[0]) | 704 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 705 S_028430_STENCILOPVAL(1)); 706 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF, 707 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 708 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 709 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 710 S_028434_STENCILOPVAL_BF(1)); 711 712 si_pm4_set_state(sctx, dsa_stencil_ref, pm4); 713} 714 715static void si_set_pipe_stencil_ref(struct pipe_context *ctx, 716 const struct pipe_stencil_ref *state) 717{ 718 struct si_context *sctx = (struct si_context *)ctx; 719 sctx->stencil_ref = *state; 720 si_update_dsa_stencil_ref(sctx); 721} 722 723 724/* 725 * DSA 726 */ 727 728static uint32_t si_translate_stencil_op(int s_op) 729{ 730 switch (s_op) { 731 case PIPE_STENCIL_OP_KEEP: 732 return V_02842C_STENCIL_KEEP; 733 case PIPE_STENCIL_OP_ZERO: 734 return V_02842C_STENCIL_ZERO; 735 case PIPE_STENCIL_OP_REPLACE: 736 return V_02842C_STENCIL_REPLACE_TEST; 737 case PIPE_STENCIL_OP_INCR: 738 return V_02842C_STENCIL_ADD_CLAMP; 739 case PIPE_STENCIL_OP_DECR: 740 return V_02842C_STENCIL_SUB_CLAMP; 741 case PIPE_STENCIL_OP_INCR_WRAP: 742 return V_02842C_STENCIL_ADD_WRAP; 743 case PIPE_STENCIL_OP_DECR_WRAP: 744 return V_02842C_STENCIL_SUB_WRAP; 745 case PIPE_STENCIL_OP_INVERT: 746 return V_02842C_STENCIL_INVERT; 747 default: 748 R600_ERR("Unknown stencil op %d", s_op); 749 assert(0); 750 break; 751 } 752 return 0; 753} 754 755static void *si_create_dsa_state(struct pipe_context *ctx, 756 const struct pipe_depth_stencil_alpha_state *state) 757{ 758 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 759 struct si_pm4_state *pm4 = &dsa->pm4; 760 unsigned db_depth_control; 761 uint32_t db_stencil_control = 0; 762 763 if (dsa == NULL) { 764 return NULL; 765 } 766 767 dsa->valuemask[0] = state->stencil[0].valuemask; 768 dsa->valuemask[1] = state->stencil[1].valuemask; 769 dsa->writemask[0] = state->stencil[0].writemask; 770 dsa->writemask[1] = state->stencil[1].writemask; 771 772 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 773 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 774 S_028800_ZFUNC(state->depth.func); 775 776 /* stencil */ 777 if (state->stencil[0].enabled) { 778 db_depth_control |= S_028800_STENCIL_ENABLE(1); 779 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 780 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 781 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 782 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 783 784 if (state->stencil[1].enabled) { 785 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 786 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 787 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 788 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 789 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 790 } 791 } 792 793 /* alpha */ 794 if (state->alpha.enabled) { 795 dsa->alpha_func = state->alpha.func; 796 dsa->alpha_ref = state->alpha.ref_value; 797 798 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 799 SI_SGPR_ALPHA_REF * 4, fui(dsa->alpha_ref)); 800 } else { 801 dsa->alpha_func = PIPE_FUNC_ALWAYS; 802 } 803 804 /* misc */ 805 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 806 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 807 808 return dsa; 809} 810 811static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 812{ 813 struct si_context *sctx = (struct si_context *)ctx; 814 struct si_state_dsa *dsa = state; 815 816 if (state == NULL) 817 return; 818 819 si_pm4_bind_state(sctx, dsa, dsa); 820 si_update_dsa_stencil_ref(sctx); 821} 822 823static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 824{ 825 struct si_context *sctx = (struct si_context *)ctx; 826 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 827} 828 829static void *si_create_db_flush_dsa(struct si_context *sctx) 830{ 831 struct pipe_depth_stencil_alpha_state dsa = {}; 832 833 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 834} 835 836/* 837 * format translation 838 */ 839static uint32_t si_translate_colorformat(enum pipe_format format) 840{ 841 const struct util_format_description *desc = util_format_description(format); 842 843#define HAS_SIZE(x,y,z,w) \ 844 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 845 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 846 847 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 848 return V_028C70_COLOR_10_11_11; 849 850 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 851 return V_028C70_COLOR_INVALID; 852 853 switch (desc->nr_channels) { 854 case 1: 855 switch (desc->channel[0].size) { 856 case 8: 857 return V_028C70_COLOR_8; 858 case 16: 859 return V_028C70_COLOR_16; 860 case 32: 861 return V_028C70_COLOR_32; 862 } 863 break; 864 case 2: 865 if (desc->channel[0].size == desc->channel[1].size) { 866 switch (desc->channel[0].size) { 867 case 8: 868 return V_028C70_COLOR_8_8; 869 case 16: 870 return V_028C70_COLOR_16_16; 871 case 32: 872 return V_028C70_COLOR_32_32; 873 } 874 } else if (HAS_SIZE(8,24,0,0)) { 875 return V_028C70_COLOR_24_8; 876 } else if (HAS_SIZE(24,8,0,0)) { 877 return V_028C70_COLOR_8_24; 878 } 879 break; 880 case 3: 881 if (HAS_SIZE(5,6,5,0)) { 882 return V_028C70_COLOR_5_6_5; 883 } else if (HAS_SIZE(32,8,24,0)) { 884 return V_028C70_COLOR_X24_8_32_FLOAT; 885 } 886 break; 887 case 4: 888 if (desc->channel[0].size == desc->channel[1].size && 889 desc->channel[0].size == desc->channel[2].size && 890 desc->channel[0].size == desc->channel[3].size) { 891 switch (desc->channel[0].size) { 892 case 4: 893 return V_028C70_COLOR_4_4_4_4; 894 case 8: 895 return V_028C70_COLOR_8_8_8_8; 896 case 16: 897 return V_028C70_COLOR_16_16_16_16; 898 case 32: 899 return V_028C70_COLOR_32_32_32_32; 900 } 901 } else if (HAS_SIZE(5,5,5,1)) { 902 return V_028C70_COLOR_1_5_5_5; 903 } else if (HAS_SIZE(10,10,10,2)) { 904 return V_028C70_COLOR_2_10_10_10; 905 } 906 break; 907 } 908 return V_028C70_COLOR_INVALID; 909} 910 911static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 912{ 913 if (SI_BIG_ENDIAN) { 914 switch(colorformat) { 915 /* 8-bit buffers. */ 916 case V_028C70_COLOR_8: 917 return V_028C70_ENDIAN_NONE; 918 919 /* 16-bit buffers. */ 920 case V_028C70_COLOR_5_6_5: 921 case V_028C70_COLOR_1_5_5_5: 922 case V_028C70_COLOR_4_4_4_4: 923 case V_028C70_COLOR_16: 924 case V_028C70_COLOR_8_8: 925 return V_028C70_ENDIAN_8IN16; 926 927 /* 32-bit buffers. */ 928 case V_028C70_COLOR_8_8_8_8: 929 case V_028C70_COLOR_2_10_10_10: 930 case V_028C70_COLOR_8_24: 931 case V_028C70_COLOR_24_8: 932 case V_028C70_COLOR_16_16: 933 return V_028C70_ENDIAN_8IN32; 934 935 /* 64-bit buffers. */ 936 case V_028C70_COLOR_16_16_16_16: 937 return V_028C70_ENDIAN_8IN16; 938 939 case V_028C70_COLOR_32_32: 940 return V_028C70_ENDIAN_8IN32; 941 942 /* 128-bit buffers. */ 943 case V_028C70_COLOR_32_32_32_32: 944 return V_028C70_ENDIAN_8IN32; 945 default: 946 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 947 } 948 } else { 949 return V_028C70_ENDIAN_NONE; 950 } 951} 952 953/* Returns the size in bits of the widest component of a CB format */ 954static unsigned si_colorformat_max_comp_size(uint32_t colorformat) 955{ 956 switch(colorformat) { 957 case V_028C70_COLOR_4_4_4_4: 958 return 4; 959 960 case V_028C70_COLOR_1_5_5_5: 961 case V_028C70_COLOR_5_5_5_1: 962 return 5; 963 964 case V_028C70_COLOR_5_6_5: 965 return 6; 966 967 case V_028C70_COLOR_8: 968 case V_028C70_COLOR_8_8: 969 case V_028C70_COLOR_8_8_8_8: 970 return 8; 971 972 case V_028C70_COLOR_10_10_10_2: 973 case V_028C70_COLOR_2_10_10_10: 974 return 10; 975 976 case V_028C70_COLOR_10_11_11: 977 case V_028C70_COLOR_11_11_10: 978 return 11; 979 980 case V_028C70_COLOR_16: 981 case V_028C70_COLOR_16_16: 982 case V_028C70_COLOR_16_16_16_16: 983 return 16; 984 985 case V_028C70_COLOR_8_24: 986 case V_028C70_COLOR_24_8: 987 return 24; 988 989 case V_028C70_COLOR_32: 990 case V_028C70_COLOR_32_32: 991 case V_028C70_COLOR_32_32_32_32: 992 case V_028C70_COLOR_X24_8_32_FLOAT: 993 return 32; 994 } 995 996 assert(!"Unknown maximum component size"); 997 return 0; 998} 999 1000static uint32_t si_translate_dbformat(enum pipe_format format) 1001{ 1002 switch (format) { 1003 case PIPE_FORMAT_Z16_UNORM: 1004 return V_028040_Z_16; 1005 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1006 case PIPE_FORMAT_X8Z24_UNORM: 1007 case PIPE_FORMAT_Z24X8_UNORM: 1008 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1009 return V_028040_Z_24; /* deprecated on SI */ 1010 case PIPE_FORMAT_Z32_FLOAT: 1011 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1012 return V_028040_Z_32_FLOAT; 1013 default: 1014 return V_028040_Z_INVALID; 1015 } 1016} 1017 1018/* 1019 * Texture translation 1020 */ 1021 1022static uint32_t si_translate_texformat(struct pipe_screen *screen, 1023 enum pipe_format format, 1024 const struct util_format_description *desc, 1025 int first_non_void) 1026{ 1027 struct si_screen *sscreen = (struct si_screen*)screen; 1028 bool enable_s3tc = sscreen->b.info.drm_minor >= 31; 1029 boolean uniform = TRUE; 1030 int i; 1031 1032 /* Colorspace (return non-RGB formats directly). */ 1033 switch (desc->colorspace) { 1034 /* Depth stencil formats */ 1035 case UTIL_FORMAT_COLORSPACE_ZS: 1036 switch (format) { 1037 case PIPE_FORMAT_Z16_UNORM: 1038 return V_008F14_IMG_DATA_FORMAT_16; 1039 case PIPE_FORMAT_X24S8_UINT: 1040 case PIPE_FORMAT_Z24X8_UNORM: 1041 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1042 return V_008F14_IMG_DATA_FORMAT_8_24; 1043 case PIPE_FORMAT_X8Z24_UNORM: 1044 case PIPE_FORMAT_S8X24_UINT: 1045 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1046 return V_008F14_IMG_DATA_FORMAT_24_8; 1047 case PIPE_FORMAT_S8_UINT: 1048 return V_008F14_IMG_DATA_FORMAT_8; 1049 case PIPE_FORMAT_Z32_FLOAT: 1050 return V_008F14_IMG_DATA_FORMAT_32; 1051 case PIPE_FORMAT_X32_S8X24_UINT: 1052 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1053 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1054 default: 1055 goto out_unknown; 1056 } 1057 1058 case UTIL_FORMAT_COLORSPACE_YUV: 1059 goto out_unknown; /* TODO */ 1060 1061 case UTIL_FORMAT_COLORSPACE_SRGB: 1062 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1063 goto out_unknown; 1064 break; 1065 1066 default: 1067 break; 1068 } 1069 1070 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1071 if (!enable_s3tc) 1072 goto out_unknown; 1073 1074 switch (format) { 1075 case PIPE_FORMAT_RGTC1_SNORM: 1076 case PIPE_FORMAT_LATC1_SNORM: 1077 case PIPE_FORMAT_RGTC1_UNORM: 1078 case PIPE_FORMAT_LATC1_UNORM: 1079 return V_008F14_IMG_DATA_FORMAT_BC4; 1080 case PIPE_FORMAT_RGTC2_SNORM: 1081 case PIPE_FORMAT_LATC2_SNORM: 1082 case PIPE_FORMAT_RGTC2_UNORM: 1083 case PIPE_FORMAT_LATC2_UNORM: 1084 return V_008F14_IMG_DATA_FORMAT_BC5; 1085 default: 1086 goto out_unknown; 1087 } 1088 } 1089 1090 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1091 if (!enable_s3tc) 1092 goto out_unknown; 1093 1094 switch (format) { 1095 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1096 case PIPE_FORMAT_BPTC_SRGBA: 1097 return V_008F14_IMG_DATA_FORMAT_BC7; 1098 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1099 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1100 return V_008F14_IMG_DATA_FORMAT_BC6; 1101 default: 1102 goto out_unknown; 1103 } 1104 } 1105 1106 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1107 switch (format) { 1108 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1109 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1110 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1111 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1112 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1113 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1114 default: 1115 goto out_unknown; 1116 } 1117 } 1118 1119 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1120 1121 if (!enable_s3tc) 1122 goto out_unknown; 1123 1124 if (!util_format_s3tc_enabled) { 1125 goto out_unknown; 1126 } 1127 1128 switch (format) { 1129 case PIPE_FORMAT_DXT1_RGB: 1130 case PIPE_FORMAT_DXT1_RGBA: 1131 case PIPE_FORMAT_DXT1_SRGB: 1132 case PIPE_FORMAT_DXT1_SRGBA: 1133 return V_008F14_IMG_DATA_FORMAT_BC1; 1134 case PIPE_FORMAT_DXT3_RGBA: 1135 case PIPE_FORMAT_DXT3_SRGBA: 1136 return V_008F14_IMG_DATA_FORMAT_BC2; 1137 case PIPE_FORMAT_DXT5_RGBA: 1138 case PIPE_FORMAT_DXT5_SRGBA: 1139 return V_008F14_IMG_DATA_FORMAT_BC3; 1140 default: 1141 goto out_unknown; 1142 } 1143 } 1144 1145 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1146 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1147 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1148 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1149 } 1150 1151 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1152 1153 /* See whether the components are of the same size. */ 1154 for (i = 1; i < desc->nr_channels; i++) { 1155 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1156 } 1157 1158 /* Non-uniform formats. */ 1159 if (!uniform) { 1160 switch(desc->nr_channels) { 1161 case 3: 1162 if (desc->channel[0].size == 5 && 1163 desc->channel[1].size == 6 && 1164 desc->channel[2].size == 5) { 1165 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1166 } 1167 goto out_unknown; 1168 case 4: 1169 if (desc->channel[0].size == 5 && 1170 desc->channel[1].size == 5 && 1171 desc->channel[2].size == 5 && 1172 desc->channel[3].size == 1) { 1173 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1174 } 1175 if (desc->channel[0].size == 10 && 1176 desc->channel[1].size == 10 && 1177 desc->channel[2].size == 10 && 1178 desc->channel[3].size == 2) { 1179 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1180 } 1181 goto out_unknown; 1182 } 1183 goto out_unknown; 1184 } 1185 1186 if (first_non_void < 0 || first_non_void > 3) 1187 goto out_unknown; 1188 1189 /* uniform formats */ 1190 switch (desc->channel[first_non_void].size) { 1191 case 4: 1192 switch (desc->nr_channels) { 1193#if 0 /* Not supported for render targets */ 1194 case 2: 1195 return V_008F14_IMG_DATA_FORMAT_4_4; 1196#endif 1197 case 4: 1198 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1199 } 1200 break; 1201 case 8: 1202 switch (desc->nr_channels) { 1203 case 1: 1204 return V_008F14_IMG_DATA_FORMAT_8; 1205 case 2: 1206 return V_008F14_IMG_DATA_FORMAT_8_8; 1207 case 4: 1208 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1209 } 1210 break; 1211 case 16: 1212 switch (desc->nr_channels) { 1213 case 1: 1214 return V_008F14_IMG_DATA_FORMAT_16; 1215 case 2: 1216 return V_008F14_IMG_DATA_FORMAT_16_16; 1217 case 4: 1218 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1219 } 1220 break; 1221 case 32: 1222 switch (desc->nr_channels) { 1223 case 1: 1224 return V_008F14_IMG_DATA_FORMAT_32; 1225 case 2: 1226 return V_008F14_IMG_DATA_FORMAT_32_32; 1227#if 0 /* Not supported for render targets */ 1228 case 3: 1229 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1230#endif 1231 case 4: 1232 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1233 } 1234 } 1235 1236out_unknown: 1237 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1238 return ~0; 1239} 1240 1241static unsigned si_tex_wrap(unsigned wrap) 1242{ 1243 switch (wrap) { 1244 default: 1245 case PIPE_TEX_WRAP_REPEAT: 1246 return V_008F30_SQ_TEX_WRAP; 1247 case PIPE_TEX_WRAP_CLAMP: 1248 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1249 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1250 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1251 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1252 return V_008F30_SQ_TEX_CLAMP_BORDER; 1253 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1254 return V_008F30_SQ_TEX_MIRROR; 1255 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1256 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1257 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1258 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1259 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1260 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1261 } 1262} 1263 1264static unsigned si_tex_filter(unsigned filter) 1265{ 1266 switch (filter) { 1267 default: 1268 case PIPE_TEX_FILTER_NEAREST: 1269 return V_008F38_SQ_TEX_XY_FILTER_POINT; 1270 case PIPE_TEX_FILTER_LINEAR: 1271 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 1272 } 1273} 1274 1275static unsigned si_tex_mipfilter(unsigned filter) 1276{ 1277 switch (filter) { 1278 case PIPE_TEX_MIPFILTER_NEAREST: 1279 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1280 case PIPE_TEX_MIPFILTER_LINEAR: 1281 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1282 default: 1283 case PIPE_TEX_MIPFILTER_NONE: 1284 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1285 } 1286} 1287 1288static unsigned si_tex_compare(unsigned compare) 1289{ 1290 switch (compare) { 1291 default: 1292 case PIPE_FUNC_NEVER: 1293 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1294 case PIPE_FUNC_LESS: 1295 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1296 case PIPE_FUNC_EQUAL: 1297 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1298 case PIPE_FUNC_LEQUAL: 1299 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1300 case PIPE_FUNC_GREATER: 1301 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1302 case PIPE_FUNC_NOTEQUAL: 1303 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1304 case PIPE_FUNC_GEQUAL: 1305 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1306 case PIPE_FUNC_ALWAYS: 1307 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1308 } 1309} 1310 1311static unsigned si_tex_dim(unsigned dim, unsigned nr_samples) 1312{ 1313 switch (dim) { 1314 default: 1315 case PIPE_TEXTURE_1D: 1316 return V_008F1C_SQ_RSRC_IMG_1D; 1317 case PIPE_TEXTURE_1D_ARRAY: 1318 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1319 case PIPE_TEXTURE_2D: 1320 case PIPE_TEXTURE_RECT: 1321 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1322 V_008F1C_SQ_RSRC_IMG_2D; 1323 case PIPE_TEXTURE_2D_ARRAY: 1324 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1325 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1326 case PIPE_TEXTURE_3D: 1327 return V_008F1C_SQ_RSRC_IMG_3D; 1328 case PIPE_TEXTURE_CUBE: 1329 case PIPE_TEXTURE_CUBE_ARRAY: 1330 return V_008F1C_SQ_RSRC_IMG_CUBE; 1331 } 1332} 1333 1334/* 1335 * Format support testing 1336 */ 1337 1338static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1339{ 1340 return si_translate_texformat(screen, format, util_format_description(format), 1341 util_format_get_first_non_void_channel(format)) != ~0U; 1342} 1343 1344static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1345 const struct util_format_description *desc, 1346 int first_non_void) 1347{ 1348 unsigned type = desc->channel[first_non_void].type; 1349 int i; 1350 1351 if (type == UTIL_FORMAT_TYPE_FIXED) 1352 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1353 1354 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1355 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1356 1357 if (desc->nr_channels == 4 && 1358 desc->channel[0].size == 10 && 1359 desc->channel[1].size == 10 && 1360 desc->channel[2].size == 10 && 1361 desc->channel[3].size == 2) 1362 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1363 1364 /* See whether the components are of the same size. */ 1365 for (i = 0; i < desc->nr_channels; i++) { 1366 if (desc->channel[first_non_void].size != desc->channel[i].size) 1367 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1368 } 1369 1370 switch (desc->channel[first_non_void].size) { 1371 case 8: 1372 switch (desc->nr_channels) { 1373 case 1: 1374 return V_008F0C_BUF_DATA_FORMAT_8; 1375 case 2: 1376 return V_008F0C_BUF_DATA_FORMAT_8_8; 1377 case 3: 1378 case 4: 1379 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1380 } 1381 break; 1382 case 16: 1383 switch (desc->nr_channels) { 1384 case 1: 1385 return V_008F0C_BUF_DATA_FORMAT_16; 1386 case 2: 1387 return V_008F0C_BUF_DATA_FORMAT_16_16; 1388 case 3: 1389 case 4: 1390 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1391 } 1392 break; 1393 case 32: 1394 /* From the Southern Islands ISA documentation about MTBUF: 1395 * 'Memory reads of data in memory that is 32 or 64 bits do not 1396 * undergo any format conversion.' 1397 */ 1398 if (type != UTIL_FORMAT_TYPE_FLOAT && 1399 !desc->channel[first_non_void].pure_integer) 1400 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1401 1402 switch (desc->nr_channels) { 1403 case 1: 1404 return V_008F0C_BUF_DATA_FORMAT_32; 1405 case 2: 1406 return V_008F0C_BUF_DATA_FORMAT_32_32; 1407 case 3: 1408 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1409 case 4: 1410 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1411 } 1412 break; 1413 } 1414 1415 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1416} 1417 1418static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1419 const struct util_format_description *desc, 1420 int first_non_void) 1421{ 1422 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1423 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1424 1425 switch (desc->channel[first_non_void].type) { 1426 case UTIL_FORMAT_TYPE_SIGNED: 1427 if (desc->channel[first_non_void].normalized) 1428 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1429 else if (desc->channel[first_non_void].pure_integer) 1430 return V_008F0C_BUF_NUM_FORMAT_SINT; 1431 else 1432 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1433 break; 1434 case UTIL_FORMAT_TYPE_UNSIGNED: 1435 if (desc->channel[first_non_void].normalized) 1436 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1437 else if (desc->channel[first_non_void].pure_integer) 1438 return V_008F0C_BUF_NUM_FORMAT_UINT; 1439 else 1440 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1441 break; 1442 case UTIL_FORMAT_TYPE_FLOAT: 1443 default: 1444 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1445 } 1446} 1447 1448static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1449{ 1450 const struct util_format_description *desc; 1451 int first_non_void; 1452 unsigned data_format; 1453 1454 desc = util_format_description(format); 1455 first_non_void = util_format_get_first_non_void_channel(format); 1456 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1457 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1458} 1459 1460static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1461{ 1462 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1463 r600_translate_colorswap(format) != ~0U; 1464} 1465 1466static bool si_is_zs_format_supported(enum pipe_format format) 1467{ 1468 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1469} 1470 1471boolean si_is_format_supported(struct pipe_screen *screen, 1472 enum pipe_format format, 1473 enum pipe_texture_target target, 1474 unsigned sample_count, 1475 unsigned usage) 1476{ 1477 struct si_screen *sscreen = (struct si_screen *)screen; 1478 unsigned retval = 0; 1479 1480 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1481 R600_ERR("r600: unsupported texture type %d\n", target); 1482 return FALSE; 1483 } 1484 1485 if (!util_format_is_supported(format, usage)) 1486 return FALSE; 1487 1488 if (sample_count > 1) { 1489 /* 2D tiling on CIK is supported since DRM 2.35.0 */ 1490 if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35) 1491 return FALSE; 1492 1493 switch (sample_count) { 1494 case 2: 1495 case 4: 1496 case 8: 1497 break; 1498 default: 1499 return FALSE; 1500 } 1501 } 1502 1503 if (usage & PIPE_BIND_SAMPLER_VIEW) { 1504 if (target == PIPE_BUFFER) { 1505 if (si_is_vertex_format_supported(screen, format)) 1506 retval |= PIPE_BIND_SAMPLER_VIEW; 1507 } else { 1508 if (si_is_sampler_format_supported(screen, format)) 1509 retval |= PIPE_BIND_SAMPLER_VIEW; 1510 } 1511 } 1512 1513 if ((usage & (PIPE_BIND_RENDER_TARGET | 1514 PIPE_BIND_DISPLAY_TARGET | 1515 PIPE_BIND_SCANOUT | 1516 PIPE_BIND_SHARED | 1517 PIPE_BIND_BLENDABLE)) && 1518 si_is_colorbuffer_format_supported(format)) { 1519 retval |= usage & 1520 (PIPE_BIND_RENDER_TARGET | 1521 PIPE_BIND_DISPLAY_TARGET | 1522 PIPE_BIND_SCANOUT | 1523 PIPE_BIND_SHARED); 1524 if (!util_format_is_pure_integer(format) && 1525 !util_format_is_depth_or_stencil(format)) 1526 retval |= usage & PIPE_BIND_BLENDABLE; 1527 } 1528 1529 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1530 si_is_zs_format_supported(format)) { 1531 retval |= PIPE_BIND_DEPTH_STENCIL; 1532 } 1533 1534 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 1535 si_is_vertex_format_supported(screen, format)) { 1536 retval |= PIPE_BIND_VERTEX_BUFFER; 1537 } 1538 1539 if (usage & PIPE_BIND_TRANSFER_READ) 1540 retval |= PIPE_BIND_TRANSFER_READ; 1541 if (usage & PIPE_BIND_TRANSFER_WRITE) 1542 retval |= PIPE_BIND_TRANSFER_WRITE; 1543 1544 return retval == usage; 1545} 1546 1547unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil) 1548{ 1549 unsigned tile_mode_index = 0; 1550 1551 if (stencil) { 1552 tile_mode_index = rtex->surface.stencil_tiling_index[level]; 1553 } else { 1554 tile_mode_index = rtex->surface.tiling_index[level]; 1555 } 1556 return tile_mode_index; 1557} 1558 1559/* 1560 * framebuffer handling 1561 */ 1562 1563static void si_initialize_color_surface(struct si_context *sctx, 1564 struct r600_surface *surf) 1565{ 1566 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1567 unsigned level = surf->base.u.tex.level; 1568 uint64_t offset = rtex->surface.level[level].offset; 1569 unsigned pitch, slice; 1570 unsigned color_info, color_attrib, color_pitch, color_view; 1571 unsigned tile_mode_index; 1572 unsigned format, swap, ntype, endian; 1573 const struct util_format_description *desc; 1574 int i; 1575 unsigned blend_clamp = 0, blend_bypass = 0; 1576 unsigned max_comp_size; 1577 1578 /* Layered rendering doesn't work with LINEAR_GENERAL. 1579 * (LINEAR_ALIGNED and others work) */ 1580 if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) { 1581 assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer); 1582 offset += rtex->surface.level[level].slice_size * 1583 surf->base.u.tex.first_layer; 1584 color_view = 0; 1585 } else { 1586 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 1587 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 1588 } 1589 1590 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1; 1591 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; 1592 if (slice) { 1593 slice = slice - 1; 1594 } 1595 1596 tile_mode_index = si_tile_mode_index(rtex, level, false); 1597 1598 desc = util_format_description(surf->base.format); 1599 for (i = 0; i < 4; i++) { 1600 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 1601 break; 1602 } 1603 } 1604 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 1605 ntype = V_028C70_NUMBER_FLOAT; 1606 } else { 1607 ntype = V_028C70_NUMBER_UNORM; 1608 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 1609 ntype = V_028C70_NUMBER_SRGB; 1610 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1611 if (desc->channel[i].pure_integer) { 1612 ntype = V_028C70_NUMBER_SINT; 1613 } else { 1614 assert(desc->channel[i].normalized); 1615 ntype = V_028C70_NUMBER_SNORM; 1616 } 1617 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1618 if (desc->channel[i].pure_integer) { 1619 ntype = V_028C70_NUMBER_UINT; 1620 } else { 1621 assert(desc->channel[i].normalized); 1622 ntype = V_028C70_NUMBER_UNORM; 1623 } 1624 } 1625 } 1626 1627 format = si_translate_colorformat(surf->base.format); 1628 if (format == V_028C70_COLOR_INVALID) { 1629 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 1630 } 1631 assert(format != V_028C70_COLOR_INVALID); 1632 swap = r600_translate_colorswap(surf->base.format); 1633 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) { 1634 endian = V_028C70_ENDIAN_NONE; 1635 } else { 1636 endian = si_colorformat_endian_swap(format); 1637 } 1638 1639 /* blend clamp should be set for all NORM/SRGB types */ 1640 if (ntype == V_028C70_NUMBER_UNORM || 1641 ntype == V_028C70_NUMBER_SNORM || 1642 ntype == V_028C70_NUMBER_SRGB) 1643 blend_clamp = 1; 1644 1645 /* set blend bypass according to docs if SINT/UINT or 1646 8/24 COLOR variants */ 1647 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 1648 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 1649 format == V_028C70_COLOR_X24_8_32_FLOAT) { 1650 blend_clamp = 0; 1651 blend_bypass = 1; 1652 } 1653 1654 color_info = S_028C70_FORMAT(format) | 1655 S_028C70_COMP_SWAP(swap) | 1656 S_028C70_BLEND_CLAMP(blend_clamp) | 1657 S_028C70_BLEND_BYPASS(blend_bypass) | 1658 S_028C70_NUMBER_TYPE(ntype) | 1659 S_028C70_ENDIAN(endian); 1660 1661 color_pitch = S_028C64_TILE_MAX(pitch); 1662 1663 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) | 1664 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1); 1665 1666 if (rtex->resource.b.b.nr_samples > 1) { 1667 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 1668 1669 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 1670 S_028C74_NUM_FRAGMENTS(log_samples); 1671 1672 if (rtex->fmask.size) { 1673 color_info |= S_028C70_COMPRESSION(1); 1674 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 1675 1676 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index); 1677 1678 if (sctx->b.chip_class == SI) { 1679 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 1680 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 1681 } 1682 if (sctx->b.chip_class >= CIK) { 1683 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1); 1684 } 1685 } 1686 } 1687 1688 offset += rtex->resource.gpu_address; 1689 1690 surf->cb_color_base = offset >> 8; 1691 surf->cb_color_pitch = color_pitch; 1692 surf->cb_color_slice = S_028C68_TILE_MAX(slice); 1693 surf->cb_color_view = color_view; 1694 surf->cb_color_info = color_info; 1695 surf->cb_color_attrib = color_attrib; 1696 1697 if (rtex->fmask.size) { 1698 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; 1699 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); 1700 } else { 1701 /* This must be set for fast clear to work without FMASK. */ 1702 surf->cb_color_fmask = surf->cb_color_base; 1703 surf->cb_color_fmask_slice = surf->cb_color_slice; 1704 surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 1705 1706 if (sctx->b.chip_class == SI) { 1707 unsigned bankh = util_logbase2(rtex->surface.bankh); 1708 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 1709 } 1710 1711 if (sctx->b.chip_class >= CIK) { 1712 surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch); 1713 } 1714 } 1715 1716 /* Determine pixel shader export format */ 1717 max_comp_size = si_colorformat_max_comp_size(format); 1718 if (ntype == V_028C70_NUMBER_SRGB || 1719 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && 1720 max_comp_size <= 10) || 1721 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { 1722 surf->export_16bpc = true; 1723 } 1724 1725 surf->color_initialized = true; 1726} 1727 1728static void si_init_depth_surface(struct si_context *sctx, 1729 struct r600_surface *surf) 1730{ 1731 struct si_screen *sscreen = sctx->screen; 1732 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1733 unsigned level = surf->base.u.tex.level; 1734 struct radeon_surface_level *levelinfo = &rtex->surface.level[level]; 1735 unsigned format, tile_mode_index, array_mode; 1736 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config; 1737 uint32_t z_info, s_info, db_depth_info; 1738 uint64_t z_offs, s_offs; 1739 uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0; 1740 1741 switch (sctx->framebuffer.state.zsbuf->texture->format) { 1742 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1743 case PIPE_FORMAT_X8Z24_UNORM: 1744 case PIPE_FORMAT_Z24X8_UNORM: 1745 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1746 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 1747 break; 1748 case PIPE_FORMAT_Z32_FLOAT: 1749 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1750 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 1751 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 1752 break; 1753 case PIPE_FORMAT_Z16_UNORM: 1754 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 1755 break; 1756 default: 1757 assert(0); 1758 } 1759 1760 format = si_translate_dbformat(rtex->resource.b.b.format); 1761 1762 if (format == V_028040_Z_INVALID) { 1763 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 1764 } 1765 assert(format != V_028040_Z_INVALID); 1766 1767 s_offs = z_offs = rtex->resource.gpu_address; 1768 z_offs += rtex->surface.level[level].offset; 1769 s_offs += rtex->surface.stencil_level[level].offset; 1770 1771 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 1772 1773 z_info = S_028040_FORMAT(format); 1774 if (rtex->resource.b.b.nr_samples > 1) { 1775 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 1776 } 1777 1778 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 1779 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 1780 else 1781 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 1782 1783 if (sctx->b.chip_class >= CIK) { 1784 switch (rtex->surface.level[level].mode) { 1785 case RADEON_SURF_MODE_2D: 1786 array_mode = V_02803C_ARRAY_2D_TILED_THIN1; 1787 break; 1788 case RADEON_SURF_MODE_1D: 1789 case RADEON_SURF_MODE_LINEAR_ALIGNED: 1790 case RADEON_SURF_MODE_LINEAR: 1791 default: 1792 array_mode = V_02803C_ARRAY_1D_TILED_THIN1; 1793 break; 1794 } 1795 tile_split = rtex->surface.tile_split; 1796 stile_split = rtex->surface.stencil_tile_split; 1797 macro_aspect = rtex->surface.mtilea; 1798 bankw = rtex->surface.bankw; 1799 bankh = rtex->surface.bankh; 1800 tile_split = cik_tile_split(tile_split); 1801 stile_split = cik_tile_split(stile_split); 1802 macro_aspect = cik_macro_tile_aspect(macro_aspect); 1803 bankw = cik_bank_wh(bankw); 1804 bankh = cik_bank_wh(bankh); 1805 nbanks = si_num_banks(sscreen, rtex); 1806 tile_mode_index = si_tile_mode_index(rtex, level, false); 1807 pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); 1808 1809 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) | 1810 S_02803C_PIPE_CONFIG(pipe_config) | 1811 S_02803C_BANK_WIDTH(bankw) | 1812 S_02803C_BANK_HEIGHT(bankh) | 1813 S_02803C_MACRO_TILE_ASPECT(macro_aspect) | 1814 S_02803C_NUM_BANKS(nbanks); 1815 z_info |= S_028040_TILE_SPLIT(tile_split); 1816 s_info |= S_028044_TILE_SPLIT(stile_split); 1817 } else { 1818 tile_mode_index = si_tile_mode_index(rtex, level, false); 1819 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 1820 tile_mode_index = si_tile_mode_index(rtex, level, true); 1821 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 1822 } 1823 1824 /* HiZ aka depth buffer htile */ 1825 /* use htile only for first level */ 1826 if (rtex->htile_buffer && !level) { 1827 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 1828 S_028040_ALLOW_EXPCLEAR(1); 1829 1830 /* This is optimal for the clear value of 1.0 and using 1831 * the LESS and LEQUAL test functions. Set this to 0 1832 * for the opposite case. This can only be changed when 1833 * clearing. */ 1834 z_info |= S_028040_ZRANGE_PRECISION(1); 1835 1836 /* Use all of the htile_buffer for depth, because we don't 1837 * use HTILE for stencil because of FAST_STENCIL_DISABLE. */ 1838 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 1839 1840 uint64_t va = rtex->htile_buffer->gpu_address; 1841 db_htile_data_base = va >> 8; 1842 db_htile_surface = S_028ABC_FULL_CACHE(1); 1843 } else { 1844 db_htile_data_base = 0; 1845 db_htile_surface = 0; 1846 } 1847 1848 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 1849 1850 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 1851 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 1852 surf->db_htile_data_base = db_htile_data_base; 1853 surf->db_depth_info = db_depth_info; 1854 surf->db_z_info = z_info; 1855 surf->db_stencil_info = s_info; 1856 surf->db_depth_base = z_offs >> 8; 1857 surf->db_stencil_base = s_offs >> 8; 1858 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 1859 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 1860 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 1861 levelinfo->nblk_y) / 64 - 1); 1862 surf->db_htile_surface = db_htile_surface; 1863 surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl; 1864 1865 surf->depth_initialized = true; 1866} 1867 1868static void si_set_framebuffer_state(struct pipe_context *ctx, 1869 const struct pipe_framebuffer_state *state) 1870{ 1871 struct si_context *sctx = (struct si_context *)ctx; 1872 struct pipe_constant_buffer constbuf = {0}; 1873 struct r600_surface *surf = NULL; 1874 struct r600_texture *rtex; 1875 int i; 1876 1877 if (sctx->framebuffer.state.nr_cbufs) { 1878 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB | 1879 R600_CONTEXT_FLUSH_AND_INV_CB_META; 1880 } 1881 if (sctx->framebuffer.state.zsbuf) { 1882 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_DB | 1883 R600_CONTEXT_FLUSH_AND_INV_DB_META; 1884 } 1885 1886 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 1887 1888 sctx->framebuffer.export_16bpc = 0; 1889 sctx->framebuffer.compressed_cb_mask = 0; 1890 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 1891 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 1892 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 1893 util_format_is_pure_integer(state->cbufs[0]->format); 1894 1895 for (i = 0; i < state->nr_cbufs; i++) { 1896 if (!state->cbufs[i]) 1897 continue; 1898 1899 surf = (struct r600_surface*)state->cbufs[i]; 1900 rtex = (struct r600_texture*)surf->base.texture; 1901 1902 if (!surf->color_initialized) { 1903 si_initialize_color_surface(sctx, surf); 1904 } 1905 1906 if (surf->export_16bpc) { 1907 sctx->framebuffer.export_16bpc |= 1 << i; 1908 } 1909 1910 if (rtex->fmask.size && rtex->cmask.size) { 1911 sctx->framebuffer.compressed_cb_mask |= 1 << i; 1912 } 1913 } 1914 /* Set the 16BPC export for possible dual-src blending. */ 1915 if (i == 1 && surf && surf->export_16bpc) { 1916 sctx->framebuffer.export_16bpc |= 1 << 1; 1917 } 1918 1919 assert(!(sctx->framebuffer.export_16bpc & ~0xff)); 1920 1921 if (state->zsbuf) { 1922 surf = (struct r600_surface*)state->zsbuf; 1923 1924 if (!surf->depth_initialized) { 1925 si_init_depth_surface(sctx, surf); 1926 } 1927 } 1928 1929 si_update_fb_rs_state(sctx); 1930 si_update_fb_blend_state(sctx); 1931 1932 sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3; 1933 sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4; 1934 sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */ 1935 sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */ 1936 sctx->framebuffer.atom.dirty = true; 1937 sctx->msaa_config.dirty = true; 1938 1939 /* Set sample locations as fragment shader constants. */ 1940 switch (sctx->framebuffer.nr_samples) { 1941 case 1: 1942 constbuf.user_buffer = sctx->b.sample_locations_1x; 1943 break; 1944 case 2: 1945 constbuf.user_buffer = sctx->b.sample_locations_2x; 1946 break; 1947 case 4: 1948 constbuf.user_buffer = sctx->b.sample_locations_4x; 1949 break; 1950 case 8: 1951 constbuf.user_buffer = sctx->b.sample_locations_8x; 1952 break; 1953 case 16: 1954 constbuf.user_buffer = sctx->b.sample_locations_16x; 1955 break; 1956 default: 1957 assert(0); 1958 } 1959 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 1960 ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, 1961 SI_DRIVER_STATE_CONST_BUF, &constbuf); 1962} 1963 1964static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 1965{ 1966 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 1967 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 1968 unsigned i, nr_cbufs = state->nr_cbufs; 1969 struct r600_texture *tex = NULL; 1970 struct r600_surface *cb = NULL; 1971 1972 /* Colorbuffers. */ 1973 for (i = 0; i < nr_cbufs; i++) { 1974 cb = (struct r600_surface*)state->cbufs[i]; 1975 if (!cb) { 1976 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 1977 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 1978 continue; 1979 } 1980 1981 tex = (struct r600_texture *)cb->base.texture; 1982 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 1983 &tex->resource, RADEON_USAGE_READWRITE, 1984 tex->surface.nsamples > 1 ? 1985 RADEON_PRIO_COLOR_BUFFER_MSAA : 1986 RADEON_PRIO_COLOR_BUFFER); 1987 1988 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 1989 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 1990 tex->cmask_buffer, RADEON_USAGE_READWRITE, 1991 RADEON_PRIO_COLOR_META); 1992 } 1993 1994 r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13); 1995 radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 1996 radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 1997 radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 1998 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 1999 radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2000 radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2001 radeon_emit(cs, 0); /* R_028C78 unused */ 2002 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2003 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2004 radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2005 radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2006 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2007 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2008 } 2009 /* set CB_COLOR1_INFO for possible dual-src blending */ 2010 if (i == 1 && state->cbufs[0]) { 2011 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 2012 cb->cb_color_info | tex->cb_color_info); 2013 i++; 2014 } 2015 for (; i < 8 ; i++) { 2016 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2017 } 2018 2019 /* ZS buffer. */ 2020 if (state->zsbuf) { 2021 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2022 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2023 2024 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2025 &rtex->resource, RADEON_USAGE_READWRITE, 2026 zb->base.texture->nr_samples > 1 ? 2027 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2028 RADEON_PRIO_DEPTH_BUFFER); 2029 2030 if (zb->db_htile_data_base) { 2031 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2032 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2033 RADEON_PRIO_DEPTH_META); 2034 } 2035 2036 r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2037 r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2038 2039 r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2040 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2041 radeon_emit(cs, zb->db_z_info); /* R_028040_DB_Z_INFO */ 2042 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2043 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2044 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2045 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2046 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2047 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2048 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2049 2050 r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2051 r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); 2052 r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2053 zb->pa_su_poly_offset_db_fmt_cntl); 2054 } else { 2055 r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2056 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2057 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2058 } 2059 2060 /* Framebuffer dimensions. */ 2061 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2062 r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2063 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2064 2065 cayman_emit_msaa_sample_locs(cs, sctx->framebuffer.nr_samples); 2066} 2067 2068static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom) 2069{ 2070 struct si_context *sctx = (struct si_context *)rctx; 2071 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 2072 2073 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2074 sctx->ps_iter_samples); 2075} 2076 2077const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */ 2078 2079static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2080{ 2081 struct si_context *sctx = (struct si_context *)ctx; 2082 2083 if (sctx->ps_iter_samples == min_samples) 2084 return; 2085 2086 sctx->ps_iter_samples = min_samples; 2087 2088 if (sctx->framebuffer.nr_samples > 1) 2089 sctx->msaa_config.dirty = true; 2090} 2091 2092/* 2093 * shaders 2094 */ 2095 2096/* Compute the key for the hw shader variant */ 2097static INLINE void si_shader_selector_key(struct pipe_context *ctx, 2098 struct si_pipe_shader_selector *sel, 2099 union si_shader_key *key) 2100{ 2101 struct si_context *sctx = (struct si_context *)ctx; 2102 memset(key, 0, sizeof(*key)); 2103 2104 if ((sel->type == PIPE_SHADER_VERTEX || sel->type == PIPE_SHADER_GEOMETRY) && 2105 sctx->queued.named.rasterizer) { 2106 if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf0) 2107 key->vs.ucps_enabled |= 0x2; 2108 if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf) 2109 key->vs.ucps_enabled |= 0x1; 2110 } 2111 2112 if (sel->type == PIPE_SHADER_VERTEX) { 2113 unsigned i; 2114 if (!sctx->vertex_elements) 2115 return; 2116 2117 for (i = 0; i < sctx->vertex_elements->count; ++i) 2118 key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; 2119 2120 key->vs.as_es = sctx->gs_shader != NULL; 2121 } else if (sel->type == PIPE_SHADER_FRAGMENT) { 2122 if (sel->fs_write_all) 2123 key->ps.nr_cbufs = sctx->framebuffer.state.nr_cbufs; 2124 key->ps.export_16bpc = sctx->framebuffer.export_16bpc; 2125 2126 if (sctx->queued.named.rasterizer) { 2127 key->ps.color_two_side = sctx->queued.named.rasterizer->two_side; 2128 key->ps.flatshade = sctx->queued.named.rasterizer->flatshade; 2129 key->ps.interp_at_sample = sctx->framebuffer.nr_samples > 1 && 2130 sctx->ps_iter_samples == sctx->framebuffer.nr_samples; 2131 2132 if (sctx->queued.named.blend) { 2133 key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one && 2134 sctx->queued.named.rasterizer->multisample_enable && 2135 !sctx->framebuffer.cb0_is_integer; 2136 } 2137 } 2138 if (sctx->queued.named.dsa) { 2139 key->ps.alpha_func = sctx->queued.named.dsa->alpha_func; 2140 2141 /* Alpha-test should be disabled if colorbuffer 0 is integer. */ 2142 if (sctx->framebuffer.cb0_is_integer) 2143 key->ps.alpha_func = PIPE_FUNC_ALWAYS; 2144 } else { 2145 key->ps.alpha_func = PIPE_FUNC_ALWAYS; 2146 } 2147 } 2148} 2149 2150/* Select the hw shader variant depending on the current state. */ 2151int si_shader_select(struct pipe_context *ctx, 2152 struct si_pipe_shader_selector *sel) 2153{ 2154 union si_shader_key key; 2155 struct si_pipe_shader * shader = NULL; 2156 int r; 2157 2158 si_shader_selector_key(ctx, sel, &key); 2159 2160 /* Check if we don't need to change anything. 2161 * This path is also used for most shaders that don't need multiple 2162 * variants, it will cost just a computation of the key and this 2163 * test. */ 2164 if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) { 2165 return 0; 2166 } 2167 2168 /* lookup if we have other variants in the list */ 2169 if (sel->num_shaders > 1) { 2170 struct si_pipe_shader *p = sel->current, *c = p->next_variant; 2171 2172 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) { 2173 p = c; 2174 c = c->next_variant; 2175 } 2176 2177 if (c) { 2178 p->next_variant = c->next_variant; 2179 shader = c; 2180 } 2181 } 2182 2183 if (shader) { 2184 shader->next_variant = sel->current; 2185 sel->current = shader; 2186 } else { 2187 shader = CALLOC(1, sizeof(struct si_pipe_shader)); 2188 shader->selector = sel; 2189 shader->key = key; 2190 2191 shader->next_variant = sel->current; 2192 sel->current = shader; 2193 r = si_pipe_shader_create(ctx, shader); 2194 if (unlikely(r)) { 2195 R600_ERR("Failed to build shader variant (type=%u) %d\n", 2196 sel->type, r); 2197 sel->current = NULL; 2198 FREE(shader); 2199 return r; 2200 } 2201 sel->num_shaders++; 2202 } 2203 2204 return 0; 2205} 2206 2207static void *si_create_shader_state(struct pipe_context *ctx, 2208 const struct pipe_shader_state *state, 2209 unsigned pipe_shader_type) 2210{ 2211 struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector); 2212 int r; 2213 2214 sel->type = pipe_shader_type; 2215 sel->tokens = tgsi_dup_tokens(state->tokens); 2216 sel->so = state->stream_output; 2217 2218 if (pipe_shader_type == PIPE_SHADER_FRAGMENT) { 2219 struct tgsi_shader_info info; 2220 2221 tgsi_scan_shader(state->tokens, &info); 2222 sel->fs_write_all = info.color0_writes_all_cbufs; 2223 } 2224 2225 r = si_shader_select(ctx, sel); 2226 if (r) { 2227 free(sel); 2228 return NULL; 2229 } 2230 2231 return sel; 2232} 2233 2234static void *si_create_fs_state(struct pipe_context *ctx, 2235 const struct pipe_shader_state *state) 2236{ 2237 return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT); 2238} 2239 2240static void *si_create_gs_state(struct pipe_context *ctx, 2241 const struct pipe_shader_state *state) 2242{ 2243 return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY); 2244} 2245 2246static void *si_create_vs_state(struct pipe_context *ctx, 2247 const struct pipe_shader_state *state) 2248{ 2249 return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX); 2250} 2251 2252static void si_bind_vs_shader(struct pipe_context *ctx, void *state) 2253{ 2254 struct si_context *sctx = (struct si_context *)ctx; 2255 struct si_pipe_shader_selector *sel = state; 2256 2257 if (sctx->vs_shader == sel) 2258 return; 2259 2260 if (!sel || !sel->current) 2261 return; 2262 2263 sctx->vs_shader = sel; 2264} 2265 2266static void si_bind_gs_shader(struct pipe_context *ctx, void *state) 2267{ 2268 struct si_context *sctx = (struct si_context *)ctx; 2269 struct si_pipe_shader_selector *sel = state; 2270 2271 if (sctx->gs_shader == sel) 2272 return; 2273 2274 sctx->gs_shader = sel; 2275} 2276 2277static void si_bind_ps_shader(struct pipe_context *ctx, void *state) 2278{ 2279 struct si_context *sctx = (struct si_context *)ctx; 2280 struct si_pipe_shader_selector *sel = state; 2281 2282 /* skip if supplied shader is one already in use */ 2283 if (sctx->ps_shader == sel) 2284 return; 2285 2286 /* use dummy shader if supplied shader is corrupt */ 2287 if (!sel || !sel->current) { 2288 if (!sctx->dummy_pixel_shader) { 2289 sctx->dummy_pixel_shader = 2290 util_make_fragment_cloneinput_shader(&sctx->b.b, 0, 2291 TGSI_SEMANTIC_GENERIC, 2292 TGSI_INTERPOLATE_CONSTANT); 2293 } 2294 2295 sel = sctx->dummy_pixel_shader; 2296 } 2297 2298 sctx->ps_shader = sel; 2299} 2300 2301static void si_delete_shader_selector(struct pipe_context *ctx, 2302 struct si_pipe_shader_selector *sel) 2303{ 2304 struct si_context *sctx = (struct si_context *)ctx; 2305 struct si_pipe_shader *p = sel->current, *c; 2306 2307 while (p) { 2308 c = p->next_variant; 2309 if (sel->type == PIPE_SHADER_GEOMETRY) 2310 si_pm4_delete_state(sctx, gs, p->pm4); 2311 else if (sel->type == PIPE_SHADER_FRAGMENT) 2312 si_pm4_delete_state(sctx, ps, p->pm4); 2313 else if (p->key.vs.as_es) 2314 si_pm4_delete_state(sctx, es, p->pm4); 2315 else 2316 si_pm4_delete_state(sctx, vs, p->pm4); 2317 si_pipe_shader_destroy(ctx, p); 2318 free(p); 2319 p = c; 2320 } 2321 2322 free(sel->tokens); 2323 free(sel); 2324 } 2325 2326static void si_delete_vs_shader(struct pipe_context *ctx, void *state) 2327{ 2328 struct si_context *sctx = (struct si_context *)ctx; 2329 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; 2330 2331 if (sctx->vs_shader == sel) { 2332 sctx->vs_shader = NULL; 2333 } 2334 2335 si_delete_shader_selector(ctx, sel); 2336} 2337 2338static void si_delete_gs_shader(struct pipe_context *ctx, void *state) 2339{ 2340 struct si_context *sctx = (struct si_context *)ctx; 2341 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; 2342 2343 if (sctx->gs_shader == sel) { 2344 sctx->gs_shader = NULL; 2345 } 2346 2347 si_delete_shader_selector(ctx, sel); 2348} 2349 2350static void si_delete_ps_shader(struct pipe_context *ctx, void *state) 2351{ 2352 struct si_context *sctx = (struct si_context *)ctx; 2353 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; 2354 2355 if (sctx->ps_shader == sel) { 2356 sctx->ps_shader = NULL; 2357 } 2358 2359 si_delete_shader_selector(ctx, sel); 2360} 2361 2362/* 2363 * Samplers 2364 */ 2365 2366static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx, 2367 struct pipe_resource *texture, 2368 const struct pipe_sampler_view *state) 2369{ 2370 struct si_context *sctx = (struct si_context*)ctx; 2371 struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view); 2372 struct r600_texture *tmp = (struct r600_texture*)texture; 2373 const struct util_format_description *desc; 2374 unsigned format, num_format; 2375 uint32_t pitch = 0; 2376 unsigned char state_swizzle[4], swizzle[4]; 2377 unsigned height, depth, width; 2378 enum pipe_format pipe_format = state->format; 2379 struct radeon_surface_level *surflevel; 2380 int first_non_void; 2381 uint64_t va; 2382 2383 if (view == NULL) 2384 return NULL; 2385 2386 /* initialize base object */ 2387 view->base = *state; 2388 view->base.texture = NULL; 2389 pipe_resource_reference(&view->base.texture, texture); 2390 view->base.reference.count = 1; 2391 view->base.context = ctx; 2392 view->resource = &tmp->resource; 2393 2394 /* Buffer resource. */ 2395 if (texture->target == PIPE_BUFFER) { 2396 unsigned stride; 2397 2398 desc = util_format_description(state->format); 2399 first_non_void = util_format_get_first_non_void_channel(state->format); 2400 stride = desc->block.bits / 8; 2401 va = tmp->resource.gpu_address + state->u.buf.first_element*stride; 2402 format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2403 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2404 2405 view->state[0] = va; 2406 view->state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 2407 S_008F04_STRIDE(stride); 2408 view->state[2] = state->u.buf.last_element + 1 - state->u.buf.first_element; 2409 view->state[3] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2410 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2411 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2412 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2413 S_008F0C_NUM_FORMAT(num_format) | 2414 S_008F0C_DATA_FORMAT(format); 2415 2416 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers); 2417 return &view->base; 2418 } 2419 2420 state_swizzle[0] = state->swizzle_r; 2421 state_swizzle[1] = state->swizzle_g; 2422 state_swizzle[2] = state->swizzle_b; 2423 state_swizzle[3] = state->swizzle_a; 2424 2425 surflevel = tmp->surface.level; 2426 2427 /* Texturing with separate depth and stencil. */ 2428 if (tmp->is_depth && !tmp->is_flushing_texture) { 2429 switch (pipe_format) { 2430 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2431 pipe_format = PIPE_FORMAT_Z32_FLOAT; 2432 break; 2433 case PIPE_FORMAT_X8Z24_UNORM: 2434 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2435 /* Z24 is always stored like this. */ 2436 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 2437 break; 2438 case PIPE_FORMAT_X24S8_UINT: 2439 case PIPE_FORMAT_S8X24_UINT: 2440 case PIPE_FORMAT_X32_S8X24_UINT: 2441 pipe_format = PIPE_FORMAT_S8_UINT; 2442 surflevel = tmp->surface.stencil_level; 2443 break; 2444 default:; 2445 } 2446 } 2447 2448 desc = util_format_description(pipe_format); 2449 2450 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2451 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2452 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2453 2454 switch (pipe_format) { 2455 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2456 case PIPE_FORMAT_X24S8_UINT: 2457 case PIPE_FORMAT_X32_S8X24_UINT: 2458 case PIPE_FORMAT_X8Z24_UNORM: 2459 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2460 break; 2461 default: 2462 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2463 } 2464 } else { 2465 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2466 } 2467 2468 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2469 2470 switch (pipe_format) { 2471 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2472 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2473 break; 2474 default: 2475 if (first_non_void < 0) { 2476 if (util_format_is_compressed(pipe_format)) { 2477 switch (pipe_format) { 2478 case PIPE_FORMAT_DXT1_SRGB: 2479 case PIPE_FORMAT_DXT1_SRGBA: 2480 case PIPE_FORMAT_DXT3_SRGBA: 2481 case PIPE_FORMAT_DXT5_SRGBA: 2482 case PIPE_FORMAT_BPTC_SRGBA: 2483 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2484 break; 2485 case PIPE_FORMAT_RGTC1_SNORM: 2486 case PIPE_FORMAT_LATC1_SNORM: 2487 case PIPE_FORMAT_RGTC2_SNORM: 2488 case PIPE_FORMAT_LATC2_SNORM: 2489 /* implies float, so use SNORM/UNORM to determine 2490 whether data is signed or not */ 2491 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2492 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2493 break; 2494 default: 2495 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2496 break; 2497 } 2498 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2499 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2500 } else { 2501 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2502 } 2503 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2504 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2505 } else { 2506 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2507 2508 switch (desc->channel[first_non_void].type) { 2509 case UTIL_FORMAT_TYPE_FLOAT: 2510 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2511 break; 2512 case UTIL_FORMAT_TYPE_SIGNED: 2513 if (desc->channel[first_non_void].normalized) 2514 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2515 else if (desc->channel[first_non_void].pure_integer) 2516 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2517 else 2518 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2519 break; 2520 case UTIL_FORMAT_TYPE_UNSIGNED: 2521 if (desc->channel[first_non_void].normalized) 2522 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2523 else if (desc->channel[first_non_void].pure_integer) 2524 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2525 else 2526 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2527 } 2528 } 2529 } 2530 2531 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void); 2532 if (format == ~0) { 2533 format = 0; 2534 } 2535 2536 /* not supported any more */ 2537 //endian = si_colorformat_endian_swap(format); 2538 2539 width = surflevel[0].npix_x; 2540 height = surflevel[0].npix_y; 2541 depth = surflevel[0].npix_z; 2542 pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format); 2543 2544 if (texture->target == PIPE_TEXTURE_1D_ARRAY) { 2545 height = 1; 2546 depth = texture->array_size; 2547 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { 2548 depth = texture->array_size; 2549 } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) 2550 depth = texture->array_size / 6; 2551 2552 va = tmp->resource.gpu_address + surflevel[0].offset; 2553 va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size; 2554 2555 view->state[0] = va >> 8; 2556 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) | 2557 S_008F14_DATA_FORMAT(format) | 2558 S_008F14_NUM_FORMAT(num_format)); 2559 view->state[2] = (S_008F18_WIDTH(width - 1) | 2560 S_008F18_HEIGHT(height - 1)); 2561 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2562 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2563 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2564 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2565 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ? 2566 0 : state->u.tex.first_level - tmp->mipmap_shift) | 2567 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ? 2568 util_logbase2(texture->nr_samples) : 2569 state->u.tex.last_level - tmp->mipmap_shift) | 2570 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) | 2571 S_008F1C_POW2_PAD(texture->last_level > 0) | 2572 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples))); 2573 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); 2574 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2575 S_008F24_LAST_ARRAY(state->u.tex.last_layer)); 2576 view->state[6] = 0; 2577 view->state[7] = 0; 2578 2579 /* Initialize the sampler view for FMASK. */ 2580 if (tmp->fmask.size) { 2581 uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset; 2582 uint32_t fmask_format; 2583 2584 switch (texture->nr_samples) { 2585 case 2: 2586 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2587 break; 2588 case 4: 2589 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2590 break; 2591 case 8: 2592 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2593 break; 2594 default: 2595 assert(0); 2596 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2597 } 2598 2599 view->fmask_state[0] = va >> 8; 2600 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2601 S_008F14_DATA_FORMAT(fmask_format) | 2602 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2603 view->fmask_state[2] = S_008F18_WIDTH(width - 1) | 2604 S_008F18_HEIGHT(height - 1); 2605 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2606 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2607 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2608 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2609 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | 2610 S_008F1C_TYPE(si_tex_dim(texture->target, 0)); 2611 view->fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2612 S_008F20_PITCH(tmp->fmask.pitch - 1); 2613 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2614 S_008F24_LAST_ARRAY(state->u.tex.last_layer); 2615 view->fmask_state[6] = 0; 2616 view->fmask_state[7] = 0; 2617 } 2618 2619 return &view->base; 2620} 2621 2622static void si_sampler_view_destroy(struct pipe_context *ctx, 2623 struct pipe_sampler_view *state) 2624{ 2625 struct si_pipe_sampler_view *view = (struct si_pipe_sampler_view *)state; 2626 2627 if (view->resource->b.b.target == PIPE_BUFFER) 2628 LIST_DELINIT(&view->list); 2629 2630 pipe_resource_reference(&state->texture, NULL); 2631 FREE(view); 2632} 2633 2634static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 2635{ 2636 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 2637 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 2638 (linear_filter && 2639 (wrap == PIPE_TEX_WRAP_CLAMP || 2640 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 2641} 2642 2643static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 2644{ 2645 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 2646 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 2647 2648 return (state->border_color.ui[0] || state->border_color.ui[1] || 2649 state->border_color.ui[2] || state->border_color.ui[3]) && 2650 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 2651 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 2652 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 2653} 2654 2655static void *si_create_sampler_state(struct pipe_context *ctx, 2656 const struct pipe_sampler_state *state) 2657{ 2658 struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state); 2659 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; 2660 unsigned border_color_type; 2661 2662 if (rstate == NULL) { 2663 return NULL; 2664 } 2665 2666 if (sampler_state_needs_border_color(state)) 2667 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 2668 else 2669 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 2670 2671 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 2672 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 2673 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 2674 r600_tex_aniso_filter(state->max_anisotropy) << 9 | 2675 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 2676 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 2677 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map)); 2678 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 2679 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 2680 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 2681 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) | 2682 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) | 2683 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); 2684 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type); 2685 2686 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2687 memcpy(rstate->border_color, state->border_color.ui, 2688 sizeof(rstate->border_color)); 2689 } 2690 2691 return rstate; 2692} 2693 2694/* Upload border colors and update the pointers in resource descriptors. 2695 * There can only be 4096 border colors per context. 2696 * 2697 * XXX: This is broken if the buffer gets reallocated. 2698 */ 2699static void si_set_border_colors(struct si_context *sctx, unsigned count, 2700 void **states) 2701{ 2702 struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states; 2703 uint32_t *border_color_table = NULL; 2704 int i, j; 2705 2706 for (i = 0; i < count; i++) { 2707 if (rstates[i] && 2708 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) == 2709 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2710 if (!sctx->border_color_table || 2711 ((sctx->border_color_offset + count - i) & 2712 C_008F3C_BORDER_COLOR_PTR)) { 2713 r600_resource_reference(&sctx->border_color_table, NULL); 2714 sctx->border_color_offset = 0; 2715 2716 sctx->border_color_table = 2717 si_resource_create_custom(&sctx->screen->b.b, 2718 PIPE_USAGE_DYNAMIC, 2719 4096 * 4 * 4); 2720 } 2721 2722 if (!border_color_table) { 2723 border_color_table = 2724 sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf, 2725 sctx->b.rings.gfx.cs, 2726 PIPE_TRANSFER_WRITE | 2727 PIPE_TRANSFER_UNSYNCHRONIZED); 2728 } 2729 2730 for (j = 0; j < 4; j++) { 2731 border_color_table[4 * sctx->border_color_offset + j] = 2732 util_le32_to_cpu(rstates[i]->border_color[j]); 2733 } 2734 2735 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR; 2736 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++); 2737 } 2738 } 2739 2740 if (border_color_table) { 2741 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 2742 2743 uint64_t va_offset = sctx->border_color_table->gpu_address; 2744 2745 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); 2746 if (sctx->b.chip_class >= CIK) 2747 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40); 2748 si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ, 2749 RADEON_PRIO_SHADER_DATA); 2750 si_pm4_set_state(sctx, ta_bordercolor_base, pm4); 2751 } 2752} 2753 2754static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, 2755 unsigned start, unsigned count, 2756 void **states) 2757{ 2758 struct si_context *sctx = (struct si_context *)ctx; 2759 2760 if (!count || shader >= SI_NUM_SHADERS) 2761 return; 2762 2763 si_set_border_colors(sctx, count, states); 2764 si_set_sampler_descriptors(sctx, shader, start, count, states); 2765} 2766 2767static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 2768{ 2769 struct si_context *sctx = (struct si_context *)ctx; 2770 struct si_state_sample_mask *state = CALLOC_STRUCT(si_state_sample_mask); 2771 struct si_pm4_state *pm4 = &state->pm4; 2772 uint16_t mask = sample_mask; 2773 2774 if (state == NULL) 2775 return; 2776 2777 state->sample_mask = mask; 2778 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16)); 2779 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16)); 2780 2781 si_pm4_set_state(sctx, sample_mask, state); 2782} 2783 2784static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 2785{ 2786 free(state); 2787} 2788 2789/* 2790 * Vertex elements & buffers 2791 */ 2792 2793static void *si_create_vertex_elements(struct pipe_context *ctx, 2794 unsigned count, 2795 const struct pipe_vertex_element *elements) 2796{ 2797 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 2798 int i; 2799 2800 assert(count < PIPE_MAX_ATTRIBS); 2801 if (!v) 2802 return NULL; 2803 2804 v->count = count; 2805 for (i = 0; i < count; ++i) { 2806 const struct util_format_description *desc; 2807 unsigned data_format, num_format; 2808 int first_non_void; 2809 2810 desc = util_format_description(elements[i].src_format); 2811 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 2812 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2813 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2814 2815 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2816 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2817 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2818 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2819 S_008F0C_NUM_FORMAT(num_format) | 2820 S_008F0C_DATA_FORMAT(data_format); 2821 v->format_size[i] = desc->block.bits / 8; 2822 } 2823 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 2824 2825 return v; 2826} 2827 2828static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 2829{ 2830 struct si_context *sctx = (struct si_context *)ctx; 2831 struct si_vertex_element *v = (struct si_vertex_element*)state; 2832 2833 sctx->vertex_elements = v; 2834 sctx->vertex_buffers_dirty = true; 2835} 2836 2837static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 2838{ 2839 struct si_context *sctx = (struct si_context *)ctx; 2840 2841 if (sctx->vertex_elements == state) 2842 sctx->vertex_elements = NULL; 2843 FREE(state); 2844} 2845 2846static void si_set_vertex_buffers(struct pipe_context *ctx, 2847 unsigned start_slot, unsigned count, 2848 const struct pipe_vertex_buffer *buffers) 2849{ 2850 struct si_context *sctx = (struct si_context *)ctx; 2851 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 2852 int i; 2853 2854 assert(start_slot + count <= Elements(sctx->vertex_buffer)); 2855 2856 if (buffers) { 2857 for (i = 0; i < count; i++) { 2858 const struct pipe_vertex_buffer *src = buffers + i; 2859 struct pipe_vertex_buffer *dsti = dst + i; 2860 2861 pipe_resource_reference(&dsti->buffer, src->buffer); 2862 dsti->buffer_offset = src->buffer_offset; 2863 dsti->stride = src->stride; 2864 } 2865 } else { 2866 for (i = 0; i < count; i++) { 2867 pipe_resource_reference(&dst[i].buffer, NULL); 2868 } 2869 } 2870 sctx->vertex_buffers_dirty = true; 2871} 2872 2873static void si_set_index_buffer(struct pipe_context *ctx, 2874 const struct pipe_index_buffer *ib) 2875{ 2876 struct si_context *sctx = (struct si_context *)ctx; 2877 2878 if (ib) { 2879 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer); 2880 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 2881 } else { 2882 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 2883 } 2884} 2885 2886/* 2887 * Misc 2888 */ 2889static void si_set_polygon_stipple(struct pipe_context *ctx, 2890 const struct pipe_poly_stipple *state) 2891{ 2892} 2893 2894static void si_texture_barrier(struct pipe_context *ctx) 2895{ 2896 struct si_context *sctx = (struct si_context *)ctx; 2897 2898 sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | 2899 R600_CONTEXT_FLUSH_AND_INV_CB; 2900} 2901 2902static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 2903{ 2904 struct pipe_blend_state blend; 2905 2906 memset(&blend, 0, sizeof(blend)); 2907 blend.independent_blend_enable = true; 2908 blend.rt[0].colormask = 0xf; 2909 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 2910} 2911 2912static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 2913{ 2914 /* XXX Turn this into a proper state. Right now the queries are 2915 * enabled in draw_vbo, which snoops r600_common_context to see 2916 * if any occlusion queries are active. */ 2917} 2918 2919static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 2920 bool include_draw_vbo) 2921{ 2922 si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo); 2923} 2924 2925void si_init_state_functions(struct si_context *sctx) 2926{ 2927 si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0); 2928 2929 sctx->b.b.create_blend_state = si_create_blend_state; 2930 sctx->b.b.bind_blend_state = si_bind_blend_state; 2931 sctx->b.b.delete_blend_state = si_delete_blend_state; 2932 sctx->b.b.set_blend_color = si_set_blend_color; 2933 2934 sctx->b.b.create_rasterizer_state = si_create_rs_state; 2935 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 2936 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 2937 2938 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 2939 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 2940 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 2941 2942 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 2943 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 2944 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 2945 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 2946 2947 sctx->b.b.set_clip_state = si_set_clip_state; 2948 sctx->b.b.set_scissor_states = si_set_scissor_states; 2949 sctx->b.b.set_viewport_states = si_set_viewport_states; 2950 sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref; 2951 2952 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 2953 sctx->b.b.get_sample_position = cayman_get_sample_position; 2954 2955 sctx->b.b.create_vs_state = si_create_vs_state; 2956 sctx->b.b.create_fs_state = si_create_fs_state; 2957 sctx->b.b.bind_vs_state = si_bind_vs_shader; 2958 sctx->b.b.bind_fs_state = si_bind_ps_shader; 2959 sctx->b.b.delete_vs_state = si_delete_vs_shader; 2960 sctx->b.b.delete_fs_state = si_delete_ps_shader; 2961 2962 sctx->b.b.create_gs_state = si_create_gs_state; 2963 sctx->b.b.bind_gs_state = si_bind_gs_shader; 2964 sctx->b.b.delete_gs_state = si_delete_gs_shader; 2965 2966 sctx->b.b.create_sampler_state = si_create_sampler_state; 2967 sctx->b.b.bind_sampler_states = si_bind_sampler_states; 2968 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 2969 2970 sctx->b.b.create_sampler_view = si_create_sampler_view; 2971 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 2972 2973 sctx->b.b.set_sample_mask = si_set_sample_mask; 2974 2975 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 2976 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 2977 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 2978 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 2979 sctx->b.b.set_index_buffer = si_set_index_buffer; 2980 2981 sctx->b.b.texture_barrier = si_texture_barrier; 2982 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; 2983 sctx->b.b.set_min_samples = si_set_min_samples; 2984 2985 sctx->b.dma_copy = si_dma_copy; 2986 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 2987 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 2988 2989 sctx->b.b.draw_vbo = si_draw_vbo; 2990} 2991 2992void si_init_config(struct si_context *sctx) 2993{ 2994 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 2995 2996 if (pm4 == NULL) 2997 return; 2998 2999 si_cmd_context_control(pm4); 3000 3001 si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0); 3002 si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0); 3003 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0); 3004 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0); 3005 si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0); 3006 si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0); 3007 si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0); 3008 si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0); 3009 si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0); 3010 si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0); 3011 si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0); 3012 si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0); 3013 3014 /* FIXME calculate these values somehow ??? */ 3015 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80); 3016 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3017 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3018 3019 si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0); 3020 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3021 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0); 3022 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3023 3024 si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0); 3025 si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0); 3026 si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0); 3027 si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0); 3028 3029 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3030 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000); 3031 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3032 if (sctx->b.chip_class < CIK) 3033 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3034 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3035 3036 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3037 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3038 3039 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3040 3041 if (sctx->b.chip_class >= CIK) { 3042 switch (sctx->screen->b.family) { 3043 case CHIP_BONAIRE: 3044 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012); 3045 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 3046 break; 3047 case CHIP_HAWAII: 3048 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a); 3049 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e); 3050 break; 3051 case CHIP_KAVERI: 3052 /* XXX todo */ 3053 case CHIP_KABINI: 3054 /* XXX todo */ 3055 case CHIP_MULLINS: 3056 /* XXX todo */ 3057 default: 3058 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3059 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 3060 break; 3061 } 3062 } else { 3063 switch (sctx->screen->b.family) { 3064 case CHIP_TAHITI: 3065 case CHIP_PITCAIRN: 3066 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a); 3067 break; 3068 case CHIP_VERDE: 3069 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a); 3070 break; 3071 case CHIP_OLAND: 3072 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082); 3073 break; 3074 case CHIP_HAINAN: 3075 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3076 break; 3077 default: 3078 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3079 break; 3080 } 3081 } 3082 3083 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 3084 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 3085 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 3086 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 3087 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 3088 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 3089 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 3090 3091 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3092 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 3093 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000); 3094 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000); 3095 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL, 0x0000043F); 3096 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000); 3097 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000); 3098 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000); 3099 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000); 3100 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000); 3101 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000); 3102 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000); 3103 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000); 3104 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 3105 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 3106 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 3107 3108 /* There is a hang if stencil is used and fast stencil is enabled 3109 * regardless of whether HTILE is depth-only or not. 3110 */ 3111 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 3112 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 3113 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | 3114 S_02800C_FAST_STENCIL_DISABLE(1)); 3115 3116 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 3117 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 3118 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 3119 3120 if (sctx->b.chip_class >= CIK) { 3121 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 3122 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0)); 3123 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 3124 } 3125 3126 si_pm4_set_state(sctx, init, pm4); 3127} 3128