si_state.c revision d808de31bd3bac26cdea4d1d9464ad7f010d77d1
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "si_pipe.h" 28#include "si_shader.h" 29#include "sid.h" 30#include "../radeon/r600_cs.h" 31 32#include "tgsi/tgsi_parse.h" 33#include "tgsi/tgsi_scan.h" 34#include "util/u_format.h" 35#include "util/u_format_s3tc.h" 36#include "util/u_framebuffer.h" 37#include "util/u_helpers.h" 38#include "util/u_memory.h" 39 40static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem, 41 void (*emit)(struct si_context *ctx, struct r600_atom *state), 42 unsigned num_dw) 43{ 44 atom->emit = (void*)emit; 45 atom->num_dw = num_dw; 46 atom->dirty = false; 47 *list_elem = atom; 48} 49 50uint32_t si_num_banks(struct si_screen *sscreen, unsigned bpe, unsigned tile_split, 51 unsigned tile_mode_index) 52{ 53 if ((sscreen->b.chip_class == CIK) && 54 sscreen->b.info.cik_macrotile_mode_array_valid) { 55 unsigned index, tileb; 56 57 tileb = 8 * 8 * bpe; 58 tileb = MIN2(tile_split, tileb); 59 60 for (index = 0; tileb > 64; index++) { 61 tileb >>= 1; 62 } 63 assert(index < 16); 64 65 return (sscreen->b.info.cik_macrotile_mode_array[index] >> 6) & 0x3; 66 } 67 68 if ((sscreen->b.chip_class == SI) && 69 sscreen->b.info.si_tile_mode_array_valid) { 70 assert(tile_mode_index < 32); 71 72 return (sscreen->b.info.si_tile_mode_array[tile_mode_index] >> 20) & 0x3; 73 } 74 75 /* The old way. */ 76 switch (sscreen->b.tiling_info.num_banks) { 77 case 2: 78 return V_02803C_ADDR_SURF_2_BANK; 79 case 4: 80 return V_02803C_ADDR_SURF_4_BANK; 81 case 8: 82 default: 83 return V_02803C_ADDR_SURF_8_BANK; 84 case 16: 85 return V_02803C_ADDR_SURF_16_BANK; 86 } 87} 88 89unsigned cik_tile_split(unsigned tile_split) 90{ 91 switch (tile_split) { 92 case 64: 93 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B; 94 break; 95 case 128: 96 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B; 97 break; 98 case 256: 99 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B; 100 break; 101 case 512: 102 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B; 103 break; 104 default: 105 case 1024: 106 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB; 107 break; 108 case 2048: 109 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB; 110 break; 111 case 4096: 112 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB; 113 break; 114 } 115 return tile_split; 116} 117 118unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect) 119{ 120 switch (macro_tile_aspect) { 121 default: 122 case 1: 123 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1; 124 break; 125 case 2: 126 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2; 127 break; 128 case 4: 129 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4; 130 break; 131 case 8: 132 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8; 133 break; 134 } 135 return macro_tile_aspect; 136} 137 138unsigned cik_bank_wh(unsigned bankwh) 139{ 140 switch (bankwh) { 141 default: 142 case 1: 143 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1; 144 break; 145 case 2: 146 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2; 147 break; 148 case 4: 149 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4; 150 break; 151 case 8: 152 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8; 153 break; 154 } 155 return bankwh; 156} 157 158unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode) 159{ 160 if (sscreen->b.info.si_tile_mode_array_valid) { 161 uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode]; 162 163 return G_009910_PIPE_CONFIG(gb_tile_mode); 164 } 165 166 /* This is probably broken for a lot of chips, but it's only used 167 * if the kernel cannot return the tile mode array for CIK. */ 168 switch (sscreen->b.info.r600_num_tile_pipes) { 169 case 16: 170 return V_02803C_X_ADDR_SURF_P16_32X32_16X16; 171 case 8: 172 return V_02803C_X_ADDR_SURF_P8_32X32_16X16; 173 case 4: 174 default: 175 if (sscreen->b.info.r600_num_backends == 4) 176 return V_02803C_X_ADDR_SURF_P4_16X16; 177 else 178 return V_02803C_X_ADDR_SURF_P4_8X16; 179 case 2: 180 return V_02803C_ADDR_SURF_P2; 181 } 182} 183 184static unsigned si_map_swizzle(unsigned swizzle) 185{ 186 switch (swizzle) { 187 case UTIL_FORMAT_SWIZZLE_Y: 188 return V_008F0C_SQ_SEL_Y; 189 case UTIL_FORMAT_SWIZZLE_Z: 190 return V_008F0C_SQ_SEL_Z; 191 case UTIL_FORMAT_SWIZZLE_W: 192 return V_008F0C_SQ_SEL_W; 193 case UTIL_FORMAT_SWIZZLE_0: 194 return V_008F0C_SQ_SEL_0; 195 case UTIL_FORMAT_SWIZZLE_1: 196 return V_008F0C_SQ_SEL_1; 197 default: /* UTIL_FORMAT_SWIZZLE_X */ 198 return V_008F0C_SQ_SEL_X; 199 } 200} 201 202static uint32_t S_FIXED(float value, uint32_t frac_bits) 203{ 204 return value * (1 << frac_bits); 205} 206 207/* 12.4 fixed-point */ 208static unsigned si_pack_float_12p4(float x) 209{ 210 return x <= 0 ? 0 : 211 x >= 4096 ? 0xffff : x * 16; 212} 213 214/* 215 * inferred framebuffer and blender state 216 */ 217static void si_update_fb_blend_state(struct si_context *sctx) 218{ 219 struct si_pm4_state *pm4; 220 struct si_state_blend *blend = sctx->queued.named.blend; 221 uint32_t mask; 222 223 if (blend == NULL) 224 return; 225 226 pm4 = si_pm4_alloc_state(sctx); 227 if (pm4 == NULL) 228 return; 229 230 mask = (1ULL << ((unsigned)sctx->framebuffer.state.nr_cbufs * 4)) - 1; 231 mask &= blend->cb_target_mask; 232 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask); 233 234 si_pm4_set_state(sctx, fb_blend, pm4); 235} 236 237/* 238 * Blender functions 239 */ 240 241static uint32_t si_translate_blend_function(int blend_func) 242{ 243 switch (blend_func) { 244 case PIPE_BLEND_ADD: 245 return V_028780_COMB_DST_PLUS_SRC; 246 case PIPE_BLEND_SUBTRACT: 247 return V_028780_COMB_SRC_MINUS_DST; 248 case PIPE_BLEND_REVERSE_SUBTRACT: 249 return V_028780_COMB_DST_MINUS_SRC; 250 case PIPE_BLEND_MIN: 251 return V_028780_COMB_MIN_DST_SRC; 252 case PIPE_BLEND_MAX: 253 return V_028780_COMB_MAX_DST_SRC; 254 default: 255 R600_ERR("Unknown blend function %d\n", blend_func); 256 assert(0); 257 break; 258 } 259 return 0; 260} 261 262static uint32_t si_translate_blend_factor(int blend_fact) 263{ 264 switch (blend_fact) { 265 case PIPE_BLENDFACTOR_ONE: 266 return V_028780_BLEND_ONE; 267 case PIPE_BLENDFACTOR_SRC_COLOR: 268 return V_028780_BLEND_SRC_COLOR; 269 case PIPE_BLENDFACTOR_SRC_ALPHA: 270 return V_028780_BLEND_SRC_ALPHA; 271 case PIPE_BLENDFACTOR_DST_ALPHA: 272 return V_028780_BLEND_DST_ALPHA; 273 case PIPE_BLENDFACTOR_DST_COLOR: 274 return V_028780_BLEND_DST_COLOR; 275 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 276 return V_028780_BLEND_SRC_ALPHA_SATURATE; 277 case PIPE_BLENDFACTOR_CONST_COLOR: 278 return V_028780_BLEND_CONSTANT_COLOR; 279 case PIPE_BLENDFACTOR_CONST_ALPHA: 280 return V_028780_BLEND_CONSTANT_ALPHA; 281 case PIPE_BLENDFACTOR_ZERO: 282 return V_028780_BLEND_ZERO; 283 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 284 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 285 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 286 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 287 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 288 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 289 case PIPE_BLENDFACTOR_INV_DST_COLOR: 290 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 291 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 292 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 293 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 294 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 295 case PIPE_BLENDFACTOR_SRC1_COLOR: 296 return V_028780_BLEND_SRC1_COLOR; 297 case PIPE_BLENDFACTOR_SRC1_ALPHA: 298 return V_028780_BLEND_SRC1_ALPHA; 299 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 300 return V_028780_BLEND_INV_SRC1_COLOR; 301 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 302 return V_028780_BLEND_INV_SRC1_ALPHA; 303 default: 304 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 305 assert(0); 306 break; 307 } 308 return 0; 309} 310 311static void *si_create_blend_state_mode(struct pipe_context *ctx, 312 const struct pipe_blend_state *state, 313 unsigned mode) 314{ 315 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 316 struct si_pm4_state *pm4 = &blend->pm4; 317 318 uint32_t color_control = 0; 319 320 if (blend == NULL) 321 return NULL; 322 323 blend->alpha_to_one = state->alpha_to_one; 324 325 if (state->logicop_enable) { 326 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 327 } else { 328 color_control |= S_028808_ROP3(0xcc); 329 } 330 331 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 332 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 333 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 334 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 335 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 336 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 337 338 blend->cb_target_mask = 0; 339 for (int i = 0; i < 8; i++) { 340 /* state->rt entries > 0 only written if independent blending */ 341 const int j = state->independent_blend_enable ? i : 0; 342 343 unsigned eqRGB = state->rt[j].rgb_func; 344 unsigned srcRGB = state->rt[j].rgb_src_factor; 345 unsigned dstRGB = state->rt[j].rgb_dst_factor; 346 unsigned eqA = state->rt[j].alpha_func; 347 unsigned srcA = state->rt[j].alpha_src_factor; 348 unsigned dstA = state->rt[j].alpha_dst_factor; 349 350 unsigned blend_cntl = 0; 351 352 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ 353 blend->cb_target_mask |= state->rt[j].colormask << (4 * i); 354 355 if (!state->rt[j].blend_enable) { 356 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 357 continue; 358 } 359 360 blend_cntl |= S_028780_ENABLE(1); 361 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 362 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 363 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 364 365 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 366 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 367 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 368 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 369 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 370 } 371 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 372 } 373 374 if (blend->cb_target_mask) { 375 color_control |= S_028808_MODE(mode); 376 } else { 377 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 378 } 379 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 380 381 return blend; 382} 383 384static void *si_create_blend_state(struct pipe_context *ctx, 385 const struct pipe_blend_state *state) 386{ 387 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 388} 389 390static void si_bind_blend_state(struct pipe_context *ctx, void *state) 391{ 392 struct si_context *sctx = (struct si_context *)ctx; 393 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 394 si_update_fb_blend_state(sctx); 395} 396 397static void si_delete_blend_state(struct pipe_context *ctx, void *state) 398{ 399 struct si_context *sctx = (struct si_context *)ctx; 400 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 401} 402 403static void si_set_blend_color(struct pipe_context *ctx, 404 const struct pipe_blend_color *state) 405{ 406 struct si_context *sctx = (struct si_context *)ctx; 407 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 408 409 if (pm4 == NULL) 410 return; 411 412 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0])); 413 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1])); 414 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2])); 415 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3])); 416 417 si_pm4_set_state(sctx, blend_color, pm4); 418} 419 420/* 421 * Clipping, scissors and viewport 422 */ 423 424static void si_set_clip_state(struct pipe_context *ctx, 425 const struct pipe_clip_state *state) 426{ 427 struct si_context *sctx = (struct si_context *)ctx; 428 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 429 struct pipe_constant_buffer cb; 430 431 if (pm4 == NULL) 432 return; 433 434 for (int i = 0; i < 6; i++) { 435 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16, 436 fui(state->ucp[i][0])); 437 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16, 438 fui(state->ucp[i][1])); 439 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16, 440 fui(state->ucp[i][2])); 441 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16, 442 fui(state->ucp[i][3])); 443 } 444 445 cb.buffer = NULL; 446 cb.user_buffer = state->ucp; 447 cb.buffer_offset = 0; 448 cb.buffer_size = 4*4*8; 449 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, SI_DRIVER_STATE_CONST_BUF, &cb); 450 pipe_resource_reference(&cb.buffer, NULL); 451 452 si_pm4_set_state(sctx, clip, pm4); 453} 454 455static void si_set_scissor_states(struct pipe_context *ctx, 456 unsigned start_slot, 457 unsigned num_scissors, 458 const struct pipe_scissor_state *state) 459{ 460 struct si_context *sctx = (struct si_context *)ctx; 461 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 462 463 if (pm4 == NULL) 464 return; 465 466 si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 467 S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) | 468 S_028250_WINDOW_OFFSET_DISABLE(1)); 469 si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 470 S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy)); 471 472 si_pm4_set_state(sctx, scissor, pm4); 473} 474 475static void si_set_viewport_states(struct pipe_context *ctx, 476 unsigned start_slot, 477 unsigned num_viewports, 478 const struct pipe_viewport_state *state) 479{ 480 struct si_context *sctx = (struct si_context *)ctx; 481 struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport); 482 struct si_pm4_state *pm4 = &viewport->pm4; 483 484 if (viewport == NULL) 485 return; 486 487 viewport->viewport = *state; 488 si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0])); 489 si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0])); 490 si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1])); 491 si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1])); 492 si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2])); 493 si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2])); 494 495 si_pm4_set_state(sctx, viewport, viewport); 496} 497 498/* 499 * inferred state between framebuffer and rasterizer 500 */ 501static void si_update_fb_rs_state(struct si_context *sctx) 502{ 503 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 504 struct si_pm4_state *pm4; 505 float offset_units; 506 507 if (!rs || !sctx->framebuffer.state.zsbuf) 508 return; 509 510 offset_units = sctx->queued.named.rasterizer->offset_units; 511 switch (sctx->framebuffer.state.zsbuf->texture->format) { 512 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 513 case PIPE_FORMAT_X8Z24_UNORM: 514 case PIPE_FORMAT_Z24X8_UNORM: 515 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 516 offset_units *= 2.0f; 517 break; 518 case PIPE_FORMAT_Z32_FLOAT: 519 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 520 offset_units *= 1.0f; 521 break; 522 case PIPE_FORMAT_Z16_UNORM: 523 offset_units *= 4.0f; 524 break; 525 default: 526 return; 527 } 528 529 pm4 = si_pm4_alloc_state(sctx); 530 531 if (pm4 == NULL) 532 return; 533 534 /* FIXME some of those reg can be computed with cso */ 535 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 536 fui(sctx->queued.named.rasterizer->offset_scale)); 537 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 538 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 539 fui(sctx->queued.named.rasterizer->offset_scale)); 540 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 541 542 si_pm4_set_state(sctx, fb_rs, pm4); 543} 544 545/* 546 * Rasterizer 547 */ 548 549static uint32_t si_translate_fill(uint32_t func) 550{ 551 switch(func) { 552 case PIPE_POLYGON_MODE_FILL: 553 return V_028814_X_DRAW_TRIANGLES; 554 case PIPE_POLYGON_MODE_LINE: 555 return V_028814_X_DRAW_LINES; 556 case PIPE_POLYGON_MODE_POINT: 557 return V_028814_X_DRAW_POINTS; 558 default: 559 assert(0); 560 return V_028814_X_DRAW_POINTS; 561 } 562} 563 564static void *si_create_rs_state(struct pipe_context *ctx, 565 const struct pipe_rasterizer_state *state) 566{ 567 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 568 struct si_pm4_state *pm4 = &rs->pm4; 569 unsigned tmp; 570 unsigned prov_vtx = 1, polygon_dual_mode; 571 float psize_min, psize_max; 572 573 if (rs == NULL) { 574 return NULL; 575 } 576 577 rs->two_side = state->light_twoside; 578 rs->multisample_enable = state->multisample; 579 rs->clip_plane_enable = state->clip_plane_enable; 580 rs->line_stipple_enable = state->line_stipple_enable; 581 582 polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || 583 state->fill_back != PIPE_POLYGON_MODE_FILL); 584 585 if (state->flatshade_first) 586 prov_vtx = 0; 587 588 rs->flatshade = state->flatshade; 589 rs->sprite_coord_enable = state->sprite_coord_enable; 590 rs->pa_sc_line_stipple = state->line_stipple_enable ? 591 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 592 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 593 rs->pa_su_sc_mode_cntl = 594 S_028814_PROVOKING_VTX_LAST(prov_vtx) | 595 S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 596 S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 597 S_028814_FACE(!state->front_ccw) | 598 S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | 599 S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | 600 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | 601 S_028814_POLY_MODE(polygon_dual_mode) | 602 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 603 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)); 604 rs->pa_cl_clip_cntl = 605 S_028810_PS_UCP_MODE(3) | 606 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 607 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 608 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 609 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 610 611 /* offset */ 612 rs->offset_units = state->offset_units; 613 rs->offset_scale = state->offset_scale * 12.0f; 614 615 tmp = S_0286D4_FLAT_SHADE_ENA(1); 616 if (state->sprite_coord_enable) { 617 tmp |= S_0286D4_PNT_SPRITE_ENA(1) | 618 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 619 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 620 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 621 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1); 622 if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { 623 tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); 624 } 625 } 626 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp); 627 628 /* point size 12.4 fixed point */ 629 tmp = (unsigned)(state->point_size * 8.0); 630 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 631 632 if (state->point_size_per_vertex) { 633 psize_min = util_get_min_point_size(state); 634 psize_max = 8192; 635 } else { 636 /* Force the point size to be as if the vertex output was disabled. */ 637 psize_min = state->point_size; 638 psize_max = state->point_size; 639 } 640 /* Divide by two, because 0.5 = 1 pixel. */ 641 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 642 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 643 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 644 645 tmp = (unsigned)state->line_width * 8; 646 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 647 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 648 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 649 S_028A48_MSAA_ENABLE(state->multisample) | 650 S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); 651 652 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 653 S_028BE4_PIX_CENTER(state->half_pixel_center) | 654 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 655 656 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 657 658 return rs; 659} 660 661static void si_bind_rs_state(struct pipe_context *ctx, void *state) 662{ 663 struct si_context *sctx = (struct si_context *)ctx; 664 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 665 666 if (state == NULL) 667 return; 668 669 // TODO 670 sctx->sprite_coord_enable = rs->sprite_coord_enable; 671 sctx->pa_sc_line_stipple = rs->pa_sc_line_stipple; 672 sctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl; 673 674 si_pm4_bind_state(sctx, rasterizer, rs); 675 si_update_fb_rs_state(sctx); 676} 677 678static void si_delete_rs_state(struct pipe_context *ctx, void *state) 679{ 680 struct si_context *sctx = (struct si_context *)ctx; 681 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 682} 683 684/* 685 * infeered state between dsa and stencil ref 686 */ 687static void si_update_dsa_stencil_ref(struct si_context *sctx) 688{ 689 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 690 struct pipe_stencil_ref *ref = &sctx->stencil_ref; 691 struct si_state_dsa *dsa = sctx->queued.named.dsa; 692 693 if (pm4 == NULL) 694 return; 695 696 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK, 697 S_028430_STENCILTESTVAL(ref->ref_value[0]) | 698 S_028430_STENCILMASK(dsa->valuemask[0]) | 699 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 700 S_028430_STENCILOPVAL(1)); 701 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF, 702 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 703 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 704 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 705 S_028434_STENCILOPVAL_BF(1)); 706 707 si_pm4_set_state(sctx, dsa_stencil_ref, pm4); 708} 709 710static void si_set_pipe_stencil_ref(struct pipe_context *ctx, 711 const struct pipe_stencil_ref *state) 712{ 713 struct si_context *sctx = (struct si_context *)ctx; 714 sctx->stencil_ref = *state; 715 si_update_dsa_stencil_ref(sctx); 716} 717 718 719/* 720 * DSA 721 */ 722 723static uint32_t si_translate_stencil_op(int s_op) 724{ 725 switch (s_op) { 726 case PIPE_STENCIL_OP_KEEP: 727 return V_02842C_STENCIL_KEEP; 728 case PIPE_STENCIL_OP_ZERO: 729 return V_02842C_STENCIL_ZERO; 730 case PIPE_STENCIL_OP_REPLACE: 731 return V_02842C_STENCIL_REPLACE_TEST; 732 case PIPE_STENCIL_OP_INCR: 733 return V_02842C_STENCIL_ADD_CLAMP; 734 case PIPE_STENCIL_OP_DECR: 735 return V_02842C_STENCIL_SUB_CLAMP; 736 case PIPE_STENCIL_OP_INCR_WRAP: 737 return V_02842C_STENCIL_ADD_WRAP; 738 case PIPE_STENCIL_OP_DECR_WRAP: 739 return V_02842C_STENCIL_SUB_WRAP; 740 case PIPE_STENCIL_OP_INVERT: 741 return V_02842C_STENCIL_INVERT; 742 default: 743 R600_ERR("Unknown stencil op %d", s_op); 744 assert(0); 745 break; 746 } 747 return 0; 748} 749 750static void *si_create_dsa_state(struct pipe_context *ctx, 751 const struct pipe_depth_stencil_alpha_state *state) 752{ 753 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 754 struct si_pm4_state *pm4 = &dsa->pm4; 755 unsigned db_depth_control; 756 unsigned db_render_control; 757 uint32_t db_stencil_control = 0; 758 759 if (dsa == NULL) { 760 return NULL; 761 } 762 763 dsa->valuemask[0] = state->stencil[0].valuemask; 764 dsa->valuemask[1] = state->stencil[1].valuemask; 765 dsa->writemask[0] = state->stencil[0].writemask; 766 dsa->writemask[1] = state->stencil[1].writemask; 767 768 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 769 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 770 S_028800_ZFUNC(state->depth.func); 771 772 /* stencil */ 773 if (state->stencil[0].enabled) { 774 db_depth_control |= S_028800_STENCIL_ENABLE(1); 775 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 776 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 777 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 778 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 779 780 if (state->stencil[1].enabled) { 781 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 782 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 783 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 784 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 785 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 786 } 787 } 788 789 /* alpha */ 790 if (state->alpha.enabled) { 791 dsa->alpha_func = state->alpha.func; 792 dsa->alpha_ref = state->alpha.ref_value; 793 794 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 795 SI_SGPR_ALPHA_REF * 4, fui(dsa->alpha_ref)); 796 } else { 797 dsa->alpha_func = PIPE_FUNC_ALWAYS; 798 } 799 800 /* misc */ 801 db_render_control = 0; 802 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 803 si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, db_render_control); 804 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 805 806 return dsa; 807} 808 809static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 810{ 811 struct si_context *sctx = (struct si_context *)ctx; 812 struct si_state_dsa *dsa = state; 813 814 if (state == NULL) 815 return; 816 817 si_pm4_bind_state(sctx, dsa, dsa); 818 si_update_dsa_stencil_ref(sctx); 819} 820 821static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 822{ 823 struct si_context *sctx = (struct si_context *)ctx; 824 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 825} 826 827static void *si_create_db_flush_dsa(struct si_context *sctx, bool copy_depth, 828 bool copy_stencil, int sample) 829{ 830 struct pipe_depth_stencil_alpha_state dsa; 831 struct si_state_dsa *state; 832 833 memset(&dsa, 0, sizeof(dsa)); 834 835 state = sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 836 if (copy_depth || copy_stencil) { 837 si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL, 838 S_028000_DEPTH_COPY(copy_depth) | 839 S_028000_STENCIL_COPY(copy_stencil) | 840 S_028000_COPY_CENTROID(1) | 841 S_028000_COPY_SAMPLE(sample)); 842 } else { 843 si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL, 844 S_028000_DEPTH_COMPRESS_DISABLE(1) | 845 S_028000_STENCIL_COMPRESS_DISABLE(1)); 846 } 847 848 return state; 849} 850 851/* 852 * format translation 853 */ 854static uint32_t si_translate_colorformat(enum pipe_format format) 855{ 856 const struct util_format_description *desc = util_format_description(format); 857 858#define HAS_SIZE(x,y,z,w) \ 859 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 860 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 861 862 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 863 return V_028C70_COLOR_10_11_11; 864 865 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 866 return V_028C70_COLOR_INVALID; 867 868 switch (desc->nr_channels) { 869 case 1: 870 switch (desc->channel[0].size) { 871 case 8: 872 return V_028C70_COLOR_8; 873 case 16: 874 return V_028C70_COLOR_16; 875 case 32: 876 return V_028C70_COLOR_32; 877 } 878 break; 879 case 2: 880 if (desc->channel[0].size == desc->channel[1].size) { 881 switch (desc->channel[0].size) { 882 case 8: 883 return V_028C70_COLOR_8_8; 884 case 16: 885 return V_028C70_COLOR_16_16; 886 case 32: 887 return V_028C70_COLOR_32_32; 888 } 889 } else if (HAS_SIZE(8,24,0,0)) { 890 return V_028C70_COLOR_24_8; 891 } else if (HAS_SIZE(24,8,0,0)) { 892 return V_028C70_COLOR_8_24; 893 } 894 break; 895 case 3: 896 if (HAS_SIZE(5,6,5,0)) { 897 return V_028C70_COLOR_5_6_5; 898 } else if (HAS_SIZE(32,8,24,0)) { 899 return V_028C70_COLOR_X24_8_32_FLOAT; 900 } 901 break; 902 case 4: 903 if (desc->channel[0].size == desc->channel[1].size && 904 desc->channel[0].size == desc->channel[2].size && 905 desc->channel[0].size == desc->channel[3].size) { 906 switch (desc->channel[0].size) { 907 case 4: 908 return V_028C70_COLOR_4_4_4_4; 909 case 8: 910 return V_028C70_COLOR_8_8_8_8; 911 case 16: 912 return V_028C70_COLOR_16_16_16_16; 913 case 32: 914 return V_028C70_COLOR_32_32_32_32; 915 } 916 } else if (HAS_SIZE(5,5,5,1)) { 917 return V_028C70_COLOR_1_5_5_5; 918 } else if (HAS_SIZE(10,10,10,2)) { 919 return V_028C70_COLOR_2_10_10_10; 920 } 921 break; 922 } 923 return V_028C70_COLOR_INVALID; 924} 925 926static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 927{ 928 if (SI_BIG_ENDIAN) { 929 switch(colorformat) { 930 /* 8-bit buffers. */ 931 case V_028C70_COLOR_8: 932 return V_028C70_ENDIAN_NONE; 933 934 /* 16-bit buffers. */ 935 case V_028C70_COLOR_5_6_5: 936 case V_028C70_COLOR_1_5_5_5: 937 case V_028C70_COLOR_4_4_4_4: 938 case V_028C70_COLOR_16: 939 case V_028C70_COLOR_8_8: 940 return V_028C70_ENDIAN_8IN16; 941 942 /* 32-bit buffers. */ 943 case V_028C70_COLOR_8_8_8_8: 944 case V_028C70_COLOR_2_10_10_10: 945 case V_028C70_COLOR_8_24: 946 case V_028C70_COLOR_24_8: 947 case V_028C70_COLOR_16_16: 948 return V_028C70_ENDIAN_8IN32; 949 950 /* 64-bit buffers. */ 951 case V_028C70_COLOR_16_16_16_16: 952 return V_028C70_ENDIAN_8IN16; 953 954 case V_028C70_COLOR_32_32: 955 return V_028C70_ENDIAN_8IN32; 956 957 /* 128-bit buffers. */ 958 case V_028C70_COLOR_32_32_32_32: 959 return V_028C70_ENDIAN_8IN32; 960 default: 961 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 962 } 963 } else { 964 return V_028C70_ENDIAN_NONE; 965 } 966} 967 968/* Returns the size in bits of the widest component of a CB format */ 969static unsigned si_colorformat_max_comp_size(uint32_t colorformat) 970{ 971 switch(colorformat) { 972 case V_028C70_COLOR_4_4_4_4: 973 return 4; 974 975 case V_028C70_COLOR_1_5_5_5: 976 case V_028C70_COLOR_5_5_5_1: 977 return 5; 978 979 case V_028C70_COLOR_5_6_5: 980 return 6; 981 982 case V_028C70_COLOR_8: 983 case V_028C70_COLOR_8_8: 984 case V_028C70_COLOR_8_8_8_8: 985 return 8; 986 987 case V_028C70_COLOR_10_10_10_2: 988 case V_028C70_COLOR_2_10_10_10: 989 return 10; 990 991 case V_028C70_COLOR_10_11_11: 992 case V_028C70_COLOR_11_11_10: 993 return 11; 994 995 case V_028C70_COLOR_16: 996 case V_028C70_COLOR_16_16: 997 case V_028C70_COLOR_16_16_16_16: 998 return 16; 999 1000 case V_028C70_COLOR_8_24: 1001 case V_028C70_COLOR_24_8: 1002 return 24; 1003 1004 case V_028C70_COLOR_32: 1005 case V_028C70_COLOR_32_32: 1006 case V_028C70_COLOR_32_32_32_32: 1007 case V_028C70_COLOR_X24_8_32_FLOAT: 1008 return 32; 1009 } 1010 1011 assert(!"Unknown maximum component size"); 1012 return 0; 1013} 1014 1015static uint32_t si_translate_dbformat(enum pipe_format format) 1016{ 1017 switch (format) { 1018 case PIPE_FORMAT_Z16_UNORM: 1019 return V_028040_Z_16; 1020 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1021 case PIPE_FORMAT_X8Z24_UNORM: 1022 case PIPE_FORMAT_Z24X8_UNORM: 1023 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1024 return V_028040_Z_24; /* deprecated on SI */ 1025 case PIPE_FORMAT_Z32_FLOAT: 1026 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1027 return V_028040_Z_32_FLOAT; 1028 default: 1029 return V_028040_Z_INVALID; 1030 } 1031} 1032 1033/* 1034 * Texture translation 1035 */ 1036 1037static uint32_t si_translate_texformat(struct pipe_screen *screen, 1038 enum pipe_format format, 1039 const struct util_format_description *desc, 1040 int first_non_void) 1041{ 1042 struct si_screen *sscreen = (struct si_screen*)screen; 1043 bool enable_s3tc = sscreen->b.info.drm_minor >= 31; 1044 boolean uniform = TRUE; 1045 int i; 1046 1047 /* Colorspace (return non-RGB formats directly). */ 1048 switch (desc->colorspace) { 1049 /* Depth stencil formats */ 1050 case UTIL_FORMAT_COLORSPACE_ZS: 1051 switch (format) { 1052 case PIPE_FORMAT_Z16_UNORM: 1053 return V_008F14_IMG_DATA_FORMAT_16; 1054 case PIPE_FORMAT_X24S8_UINT: 1055 case PIPE_FORMAT_Z24X8_UNORM: 1056 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1057 return V_008F14_IMG_DATA_FORMAT_8_24; 1058 case PIPE_FORMAT_X8Z24_UNORM: 1059 case PIPE_FORMAT_S8X24_UINT: 1060 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1061 return V_008F14_IMG_DATA_FORMAT_24_8; 1062 case PIPE_FORMAT_S8_UINT: 1063 return V_008F14_IMG_DATA_FORMAT_8; 1064 case PIPE_FORMAT_Z32_FLOAT: 1065 return V_008F14_IMG_DATA_FORMAT_32; 1066 case PIPE_FORMAT_X32_S8X24_UINT: 1067 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1068 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1069 default: 1070 goto out_unknown; 1071 } 1072 1073 case UTIL_FORMAT_COLORSPACE_YUV: 1074 goto out_unknown; /* TODO */ 1075 1076 case UTIL_FORMAT_COLORSPACE_SRGB: 1077 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1078 goto out_unknown; 1079 break; 1080 1081 default: 1082 break; 1083 } 1084 1085 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1086 if (!enable_s3tc) 1087 goto out_unknown; 1088 1089 switch (format) { 1090 case PIPE_FORMAT_RGTC1_SNORM: 1091 case PIPE_FORMAT_LATC1_SNORM: 1092 case PIPE_FORMAT_RGTC1_UNORM: 1093 case PIPE_FORMAT_LATC1_UNORM: 1094 return V_008F14_IMG_DATA_FORMAT_BC4; 1095 case PIPE_FORMAT_RGTC2_SNORM: 1096 case PIPE_FORMAT_LATC2_SNORM: 1097 case PIPE_FORMAT_RGTC2_UNORM: 1098 case PIPE_FORMAT_LATC2_UNORM: 1099 return V_008F14_IMG_DATA_FORMAT_BC5; 1100 default: 1101 goto out_unknown; 1102 } 1103 } 1104 1105 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1106 switch (format) { 1107 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1108 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1109 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1110 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1111 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1112 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1113 default: 1114 goto out_unknown; 1115 } 1116 } 1117 1118 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1119 1120 if (!enable_s3tc) 1121 goto out_unknown; 1122 1123 if (!util_format_s3tc_enabled) { 1124 goto out_unknown; 1125 } 1126 1127 switch (format) { 1128 case PIPE_FORMAT_DXT1_RGB: 1129 case PIPE_FORMAT_DXT1_RGBA: 1130 case PIPE_FORMAT_DXT1_SRGB: 1131 case PIPE_FORMAT_DXT1_SRGBA: 1132 return V_008F14_IMG_DATA_FORMAT_BC1; 1133 case PIPE_FORMAT_DXT3_RGBA: 1134 case PIPE_FORMAT_DXT3_SRGBA: 1135 return V_008F14_IMG_DATA_FORMAT_BC2; 1136 case PIPE_FORMAT_DXT5_RGBA: 1137 case PIPE_FORMAT_DXT5_SRGBA: 1138 return V_008F14_IMG_DATA_FORMAT_BC3; 1139 default: 1140 goto out_unknown; 1141 } 1142 } 1143 1144 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1145 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1146 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1147 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1148 } 1149 1150 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1151 1152 /* See whether the components are of the same size. */ 1153 for (i = 1; i < desc->nr_channels; i++) { 1154 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1155 } 1156 1157 /* Non-uniform formats. */ 1158 if (!uniform) { 1159 switch(desc->nr_channels) { 1160 case 3: 1161 if (desc->channel[0].size == 5 && 1162 desc->channel[1].size == 6 && 1163 desc->channel[2].size == 5) { 1164 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1165 } 1166 goto out_unknown; 1167 case 4: 1168 if (desc->channel[0].size == 5 && 1169 desc->channel[1].size == 5 && 1170 desc->channel[2].size == 5 && 1171 desc->channel[3].size == 1) { 1172 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1173 } 1174 if (desc->channel[0].size == 10 && 1175 desc->channel[1].size == 10 && 1176 desc->channel[2].size == 10 && 1177 desc->channel[3].size == 2) { 1178 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1179 } 1180 goto out_unknown; 1181 } 1182 goto out_unknown; 1183 } 1184 1185 if (first_non_void < 0 || first_non_void > 3) 1186 goto out_unknown; 1187 1188 /* uniform formats */ 1189 switch (desc->channel[first_non_void].size) { 1190 case 4: 1191 switch (desc->nr_channels) { 1192#if 0 /* Not supported for render targets */ 1193 case 2: 1194 return V_008F14_IMG_DATA_FORMAT_4_4; 1195#endif 1196 case 4: 1197 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1198 } 1199 break; 1200 case 8: 1201 switch (desc->nr_channels) { 1202 case 1: 1203 return V_008F14_IMG_DATA_FORMAT_8; 1204 case 2: 1205 return V_008F14_IMG_DATA_FORMAT_8_8; 1206 case 4: 1207 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1208 } 1209 break; 1210 case 16: 1211 switch (desc->nr_channels) { 1212 case 1: 1213 return V_008F14_IMG_DATA_FORMAT_16; 1214 case 2: 1215 return V_008F14_IMG_DATA_FORMAT_16_16; 1216 case 4: 1217 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1218 } 1219 break; 1220 case 32: 1221 switch (desc->nr_channels) { 1222 case 1: 1223 return V_008F14_IMG_DATA_FORMAT_32; 1224 case 2: 1225 return V_008F14_IMG_DATA_FORMAT_32_32; 1226#if 0 /* Not supported for render targets */ 1227 case 3: 1228 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1229#endif 1230 case 4: 1231 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1232 } 1233 } 1234 1235out_unknown: 1236 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1237 return ~0; 1238} 1239 1240static unsigned si_tex_wrap(unsigned wrap) 1241{ 1242 switch (wrap) { 1243 default: 1244 case PIPE_TEX_WRAP_REPEAT: 1245 return V_008F30_SQ_TEX_WRAP; 1246 case PIPE_TEX_WRAP_CLAMP: 1247 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1248 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1249 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1250 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1251 return V_008F30_SQ_TEX_CLAMP_BORDER; 1252 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1253 return V_008F30_SQ_TEX_MIRROR; 1254 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1255 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1256 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1257 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1258 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1259 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1260 } 1261} 1262 1263static unsigned si_tex_filter(unsigned filter) 1264{ 1265 switch (filter) { 1266 default: 1267 case PIPE_TEX_FILTER_NEAREST: 1268 return V_008F38_SQ_TEX_XY_FILTER_POINT; 1269 case PIPE_TEX_FILTER_LINEAR: 1270 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 1271 } 1272} 1273 1274static unsigned si_tex_mipfilter(unsigned filter) 1275{ 1276 switch (filter) { 1277 case PIPE_TEX_MIPFILTER_NEAREST: 1278 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1279 case PIPE_TEX_MIPFILTER_LINEAR: 1280 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1281 default: 1282 case PIPE_TEX_MIPFILTER_NONE: 1283 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1284 } 1285} 1286 1287static unsigned si_tex_compare(unsigned compare) 1288{ 1289 switch (compare) { 1290 default: 1291 case PIPE_FUNC_NEVER: 1292 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1293 case PIPE_FUNC_LESS: 1294 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1295 case PIPE_FUNC_EQUAL: 1296 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1297 case PIPE_FUNC_LEQUAL: 1298 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1299 case PIPE_FUNC_GREATER: 1300 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1301 case PIPE_FUNC_NOTEQUAL: 1302 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1303 case PIPE_FUNC_GEQUAL: 1304 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1305 case PIPE_FUNC_ALWAYS: 1306 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1307 } 1308} 1309 1310static unsigned si_tex_dim(unsigned dim, unsigned nr_samples) 1311{ 1312 switch (dim) { 1313 default: 1314 case PIPE_TEXTURE_1D: 1315 return V_008F1C_SQ_RSRC_IMG_1D; 1316 case PIPE_TEXTURE_1D_ARRAY: 1317 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1318 case PIPE_TEXTURE_2D: 1319 case PIPE_TEXTURE_RECT: 1320 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1321 V_008F1C_SQ_RSRC_IMG_2D; 1322 case PIPE_TEXTURE_2D_ARRAY: 1323 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1324 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1325 case PIPE_TEXTURE_3D: 1326 return V_008F1C_SQ_RSRC_IMG_3D; 1327 case PIPE_TEXTURE_CUBE: 1328 case PIPE_TEXTURE_CUBE_ARRAY: 1329 return V_008F1C_SQ_RSRC_IMG_CUBE; 1330 } 1331} 1332 1333/* 1334 * Format support testing 1335 */ 1336 1337static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1338{ 1339 return si_translate_texformat(screen, format, util_format_description(format), 1340 util_format_get_first_non_void_channel(format)) != ~0U; 1341} 1342 1343static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1344 const struct util_format_description *desc, 1345 int first_non_void) 1346{ 1347 unsigned type = desc->channel[first_non_void].type; 1348 int i; 1349 1350 if (type == UTIL_FORMAT_TYPE_FIXED) 1351 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1352 1353 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1354 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1355 1356 if (desc->nr_channels == 4 && 1357 desc->channel[0].size == 10 && 1358 desc->channel[1].size == 10 && 1359 desc->channel[2].size == 10 && 1360 desc->channel[3].size == 2) 1361 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1362 1363 /* See whether the components are of the same size. */ 1364 for (i = 0; i < desc->nr_channels; i++) { 1365 if (desc->channel[first_non_void].size != desc->channel[i].size) 1366 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1367 } 1368 1369 switch (desc->channel[first_non_void].size) { 1370 case 8: 1371 switch (desc->nr_channels) { 1372 case 1: 1373 return V_008F0C_BUF_DATA_FORMAT_8; 1374 case 2: 1375 return V_008F0C_BUF_DATA_FORMAT_8_8; 1376 case 3: 1377 case 4: 1378 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1379 } 1380 break; 1381 case 16: 1382 switch (desc->nr_channels) { 1383 case 1: 1384 return V_008F0C_BUF_DATA_FORMAT_16; 1385 case 2: 1386 return V_008F0C_BUF_DATA_FORMAT_16_16; 1387 case 3: 1388 case 4: 1389 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1390 } 1391 break; 1392 case 32: 1393 /* From the Southern Islands ISA documentation about MTBUF: 1394 * 'Memory reads of data in memory that is 32 or 64 bits do not 1395 * undergo any format conversion.' 1396 */ 1397 if (type != UTIL_FORMAT_TYPE_FLOAT && 1398 !desc->channel[first_non_void].pure_integer) 1399 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1400 1401 switch (desc->nr_channels) { 1402 case 1: 1403 return V_008F0C_BUF_DATA_FORMAT_32; 1404 case 2: 1405 return V_008F0C_BUF_DATA_FORMAT_32_32; 1406 case 3: 1407 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1408 case 4: 1409 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1410 } 1411 break; 1412 } 1413 1414 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1415} 1416 1417static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1418 const struct util_format_description *desc, 1419 int first_non_void) 1420{ 1421 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1422 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1423 1424 switch (desc->channel[first_non_void].type) { 1425 case UTIL_FORMAT_TYPE_SIGNED: 1426 if (desc->channel[first_non_void].normalized) 1427 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1428 else if (desc->channel[first_non_void].pure_integer) 1429 return V_008F0C_BUF_NUM_FORMAT_SINT; 1430 else 1431 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1432 break; 1433 case UTIL_FORMAT_TYPE_UNSIGNED: 1434 if (desc->channel[first_non_void].normalized) 1435 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1436 else if (desc->channel[first_non_void].pure_integer) 1437 return V_008F0C_BUF_NUM_FORMAT_UINT; 1438 else 1439 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1440 break; 1441 case UTIL_FORMAT_TYPE_FLOAT: 1442 default: 1443 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1444 } 1445} 1446 1447static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1448{ 1449 const struct util_format_description *desc; 1450 int first_non_void; 1451 unsigned data_format; 1452 1453 desc = util_format_description(format); 1454 first_non_void = util_format_get_first_non_void_channel(format); 1455 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1456 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1457} 1458 1459static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1460{ 1461 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1462 r600_translate_colorswap(format) != ~0U; 1463} 1464 1465static bool si_is_zs_format_supported(enum pipe_format format) 1466{ 1467 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1468} 1469 1470boolean si_is_format_supported(struct pipe_screen *screen, 1471 enum pipe_format format, 1472 enum pipe_texture_target target, 1473 unsigned sample_count, 1474 unsigned usage) 1475{ 1476 struct si_screen *sscreen = (struct si_screen *)screen; 1477 unsigned retval = 0; 1478 1479 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1480 R600_ERR("r600: unsupported texture type %d\n", target); 1481 return FALSE; 1482 } 1483 1484 if (!util_format_is_supported(format, usage)) 1485 return FALSE; 1486 1487 if (sample_count > 1) { 1488 /* 2D tiling on CIK is supported since DRM 2.35.0 */ 1489 if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35) 1490 return FALSE; 1491 1492 switch (sample_count) { 1493 case 2: 1494 case 4: 1495 case 8: 1496 break; 1497 default: 1498 return FALSE; 1499 } 1500 } 1501 1502 if (usage & PIPE_BIND_SAMPLER_VIEW) { 1503 if (target == PIPE_BUFFER) { 1504 if (si_is_vertex_format_supported(screen, format)) 1505 retval |= PIPE_BIND_SAMPLER_VIEW; 1506 } else { 1507 if (si_is_sampler_format_supported(screen, format)) 1508 retval |= PIPE_BIND_SAMPLER_VIEW; 1509 } 1510 } 1511 1512 if ((usage & (PIPE_BIND_RENDER_TARGET | 1513 PIPE_BIND_DISPLAY_TARGET | 1514 PIPE_BIND_SCANOUT | 1515 PIPE_BIND_SHARED)) && 1516 si_is_colorbuffer_format_supported(format)) { 1517 retval |= usage & 1518 (PIPE_BIND_RENDER_TARGET | 1519 PIPE_BIND_DISPLAY_TARGET | 1520 PIPE_BIND_SCANOUT | 1521 PIPE_BIND_SHARED); 1522 } 1523 1524 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1525 si_is_zs_format_supported(format)) { 1526 retval |= PIPE_BIND_DEPTH_STENCIL; 1527 } 1528 1529 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 1530 si_is_vertex_format_supported(screen, format)) { 1531 retval |= PIPE_BIND_VERTEX_BUFFER; 1532 } 1533 1534 if (usage & PIPE_BIND_TRANSFER_READ) 1535 retval |= PIPE_BIND_TRANSFER_READ; 1536 if (usage & PIPE_BIND_TRANSFER_WRITE) 1537 retval |= PIPE_BIND_TRANSFER_WRITE; 1538 1539 return retval == usage; 1540} 1541 1542unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil) 1543{ 1544 unsigned tile_mode_index = 0; 1545 1546 if (stencil) { 1547 tile_mode_index = rtex->surface.stencil_tiling_index[level]; 1548 } else { 1549 tile_mode_index = rtex->surface.tiling_index[level]; 1550 } 1551 return tile_mode_index; 1552} 1553 1554/* 1555 * framebuffer handling 1556 */ 1557 1558static void si_initialize_color_surface(struct si_context *sctx, 1559 struct r600_surface *surf) 1560{ 1561 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1562 unsigned level = surf->base.u.tex.level; 1563 uint64_t offset = rtex->surface.level[level].offset; 1564 unsigned pitch, slice; 1565 unsigned color_info, color_attrib, color_pitch, color_view; 1566 unsigned tile_mode_index; 1567 unsigned format, swap, ntype, endian; 1568 const struct util_format_description *desc; 1569 int i; 1570 unsigned blend_clamp = 0, blend_bypass = 0; 1571 unsigned max_comp_size; 1572 1573 /* Layered rendering doesn't work with LINEAR_GENERAL. 1574 * (LINEAR_ALIGNED and others work) */ 1575 if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) { 1576 assert(surf->base.u.tex.first_layer == surf->base.u.tex.last_layer); 1577 offset += rtex->surface.level[level].slice_size * 1578 surf->base.u.tex.first_layer; 1579 color_view = 0; 1580 } else { 1581 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 1582 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 1583 } 1584 1585 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1; 1586 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; 1587 if (slice) { 1588 slice = slice - 1; 1589 } 1590 1591 tile_mode_index = si_tile_mode_index(rtex, level, false); 1592 1593 desc = util_format_description(surf->base.format); 1594 for (i = 0; i < 4; i++) { 1595 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 1596 break; 1597 } 1598 } 1599 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 1600 ntype = V_028C70_NUMBER_FLOAT; 1601 } else { 1602 ntype = V_028C70_NUMBER_UNORM; 1603 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 1604 ntype = V_028C70_NUMBER_SRGB; 1605 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1606 if (desc->channel[i].pure_integer) { 1607 ntype = V_028C70_NUMBER_SINT; 1608 } else { 1609 assert(desc->channel[i].normalized); 1610 ntype = V_028C70_NUMBER_SNORM; 1611 } 1612 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1613 if (desc->channel[i].pure_integer) { 1614 ntype = V_028C70_NUMBER_UINT; 1615 } else { 1616 assert(desc->channel[i].normalized); 1617 ntype = V_028C70_NUMBER_UNORM; 1618 } 1619 } 1620 } 1621 1622 format = si_translate_colorformat(surf->base.format); 1623 if (format == V_028C70_COLOR_INVALID) { 1624 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 1625 } 1626 assert(format != V_028C70_COLOR_INVALID); 1627 swap = r600_translate_colorswap(surf->base.format); 1628 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) { 1629 endian = V_028C70_ENDIAN_NONE; 1630 } else { 1631 endian = si_colorformat_endian_swap(format); 1632 } 1633 1634 /* blend clamp should be set for all NORM/SRGB types */ 1635 if (ntype == V_028C70_NUMBER_UNORM || 1636 ntype == V_028C70_NUMBER_SNORM || 1637 ntype == V_028C70_NUMBER_SRGB) 1638 blend_clamp = 1; 1639 1640 /* set blend bypass according to docs if SINT/UINT or 1641 8/24 COLOR variants */ 1642 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 1643 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 1644 format == V_028C70_COLOR_X24_8_32_FLOAT) { 1645 blend_clamp = 0; 1646 blend_bypass = 1; 1647 } 1648 1649 color_info = S_028C70_FORMAT(format) | 1650 S_028C70_COMP_SWAP(swap) | 1651 S_028C70_BLEND_CLAMP(blend_clamp) | 1652 S_028C70_BLEND_BYPASS(blend_bypass) | 1653 S_028C70_NUMBER_TYPE(ntype) | 1654 S_028C70_ENDIAN(endian); 1655 1656 color_pitch = S_028C64_TILE_MAX(pitch); 1657 1658 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) | 1659 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1); 1660 1661 if (rtex->resource.b.b.nr_samples > 1) { 1662 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 1663 1664 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 1665 S_028C74_NUM_FRAGMENTS(log_samples); 1666 1667 if (rtex->fmask.size) { 1668 color_info |= S_028C70_COMPRESSION(1); 1669 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 1670 1671 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index); 1672 1673 if (sctx->b.chip_class == SI) { 1674 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 1675 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 1676 } 1677 if (sctx->b.chip_class >= CIK) { 1678 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch / 8 - 1); 1679 } 1680 } 1681 } 1682 1683 offset += r600_resource_va(sctx->b.b.screen, surf->base.texture); 1684 1685 surf->cb_color_base = offset >> 8; 1686 surf->cb_color_pitch = color_pitch; 1687 surf->cb_color_slice = S_028C68_TILE_MAX(slice); 1688 surf->cb_color_view = color_view; 1689 surf->cb_color_info = color_info; 1690 surf->cb_color_attrib = color_attrib; 1691 1692 if (rtex->fmask.size) { 1693 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; 1694 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); 1695 } else { 1696 /* This must be set for fast clear to work without FMASK. */ 1697 surf->cb_color_fmask = surf->cb_color_base; 1698 surf->cb_color_fmask_slice = surf->cb_color_slice; 1699 surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 1700 1701 if (sctx->b.chip_class == SI) { 1702 unsigned bankh = util_logbase2(rtex->surface.bankh); 1703 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 1704 } 1705 1706 if (sctx->b.chip_class >= CIK) { 1707 surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch); 1708 } 1709 } 1710 1711 /* Determine pixel shader export format */ 1712 max_comp_size = si_colorformat_max_comp_size(format); 1713 if (ntype == V_028C70_NUMBER_SRGB || 1714 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && 1715 max_comp_size <= 10) || 1716 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { 1717 surf->export_16bpc = true; 1718 } 1719 1720 surf->color_initialized = true; 1721} 1722 1723static void si_init_depth_surface(struct si_context *sctx, 1724 struct r600_surface *surf) 1725{ 1726 struct si_screen *sscreen = sctx->screen; 1727 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 1728 unsigned level = surf->base.u.tex.level; 1729 unsigned pitch, slice, format, tile_mode_index, array_mode; 1730 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config; 1731 uint32_t z_info, s_info, db_depth_info; 1732 uint64_t z_offs, s_offs; 1733 uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0; 1734 1735 switch (sctx->framebuffer.state.zsbuf->texture->format) { 1736 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1737 case PIPE_FORMAT_X8Z24_UNORM: 1738 case PIPE_FORMAT_Z24X8_UNORM: 1739 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1740 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 1741 break; 1742 case PIPE_FORMAT_Z32_FLOAT: 1743 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1744 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 1745 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 1746 break; 1747 case PIPE_FORMAT_Z16_UNORM: 1748 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 1749 break; 1750 default: 1751 assert(0); 1752 } 1753 1754 format = si_translate_dbformat(rtex->resource.b.b.format); 1755 1756 if (format == V_028040_Z_INVALID) { 1757 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 1758 } 1759 assert(format != V_028040_Z_INVALID); 1760 1761 s_offs = z_offs = r600_resource_va(sctx->b.b.screen, surf->base.texture); 1762 z_offs += rtex->surface.level[level].offset; 1763 s_offs += rtex->surface.stencil_level[level].offset; 1764 1765 pitch = (rtex->surface.level[level].nblk_x / 8) - 1; 1766 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; 1767 if (slice) { 1768 slice = slice - 1; 1769 } 1770 1771 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 1772 1773 z_info = S_028040_FORMAT(format); 1774 if (rtex->resource.b.b.nr_samples > 1) { 1775 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 1776 } 1777 1778 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 1779 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 1780 else 1781 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 1782 1783 if (sctx->b.chip_class >= CIK) { 1784 switch (rtex->surface.level[level].mode) { 1785 case RADEON_SURF_MODE_2D: 1786 array_mode = V_02803C_ARRAY_2D_TILED_THIN1; 1787 break; 1788 case RADEON_SURF_MODE_1D: 1789 case RADEON_SURF_MODE_LINEAR_ALIGNED: 1790 case RADEON_SURF_MODE_LINEAR: 1791 default: 1792 array_mode = V_02803C_ARRAY_1D_TILED_THIN1; 1793 break; 1794 } 1795 tile_split = rtex->surface.tile_split; 1796 stile_split = rtex->surface.stencil_tile_split; 1797 macro_aspect = rtex->surface.mtilea; 1798 bankw = rtex->surface.bankw; 1799 bankh = rtex->surface.bankh; 1800 tile_split = cik_tile_split(tile_split); 1801 stile_split = cik_tile_split(stile_split); 1802 macro_aspect = cik_macro_tile_aspect(macro_aspect); 1803 bankw = cik_bank_wh(bankw); 1804 bankh = cik_bank_wh(bankh); 1805 nbanks = si_num_banks(sscreen, rtex->surface.bpe, rtex->surface.tile_split, 1806 ~0); 1807 tile_mode_index = si_tile_mode_index(rtex, level, false); 1808 pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); 1809 1810 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) | 1811 S_02803C_PIPE_CONFIG(pipe_config) | 1812 S_02803C_BANK_WIDTH(bankw) | 1813 S_02803C_BANK_HEIGHT(bankh) | 1814 S_02803C_MACRO_TILE_ASPECT(macro_aspect) | 1815 S_02803C_NUM_BANKS(nbanks); 1816 z_info |= S_028040_TILE_SPLIT(tile_split); 1817 s_info |= S_028044_TILE_SPLIT(stile_split); 1818 } else { 1819 tile_mode_index = si_tile_mode_index(rtex, level, false); 1820 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 1821 tile_mode_index = si_tile_mode_index(rtex, level, true); 1822 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 1823 } 1824 1825 /* HiZ aka depth buffer htile */ 1826 /* use htile only for first level */ 1827 if (rtex->htile_buffer && !level) { 1828 const struct util_format_description *fmt_desc; 1829 1830 z_info |= S_028040_TILE_SURFACE_ENABLE(1); 1831 1832 /* This is optimal for the clear value of 1.0 and using 1833 * the LESS and LEQUAL test functions. Set this to 0 1834 * for the opposite case. This can only be changed when 1835 * clearing. */ 1836 z_info |= S_028040_ZRANGE_PRECISION(1); 1837 1838 fmt_desc = util_format_description(rtex->resource.b.b.format); 1839 if (!util_format_has_stencil(fmt_desc)) { 1840 /* Use all of the htile_buffer for depth */ 1841 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 1842 } 1843 1844 uint64_t va = r600_resource_va(&sctx->screen->b.b, &rtex->htile_buffer->b.b); 1845 db_htile_data_base = va >> 8; 1846 db_htile_surface = S_028ABC_FULL_CACHE(1); 1847 } else { 1848 db_htile_data_base = 0; 1849 db_htile_surface = 0; 1850 } 1851 1852 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 1853 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 1854 surf->db_htile_data_base = db_htile_data_base; 1855 surf->db_depth_info = db_depth_info; 1856 surf->db_z_info = z_info; 1857 surf->db_stencil_info = s_info; 1858 surf->db_depth_base = z_offs >> 8; 1859 surf->db_stencil_base = s_offs >> 8; 1860 surf->db_depth_size = S_028058_PITCH_TILE_MAX(pitch); 1861 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(slice); 1862 surf->db_htile_surface = db_htile_surface; 1863 surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl; 1864 1865 surf->depth_initialized = true; 1866} 1867 1868static void si_set_framebuffer_state(struct pipe_context *ctx, 1869 const struct pipe_framebuffer_state *state) 1870{ 1871 struct si_context *sctx = (struct si_context *)ctx; 1872 struct pipe_constant_buffer constbuf = {0}; 1873 struct r600_surface *surf = NULL; 1874 struct r600_texture *rtex; 1875 int i; 1876 1877 if (sctx->framebuffer.state.nr_cbufs) { 1878 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB | 1879 R600_CONTEXT_FLUSH_AND_INV_CB_META; 1880 } 1881 if (sctx->framebuffer.state.zsbuf) { 1882 sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_DB | 1883 R600_CONTEXT_FLUSH_AND_INV_DB_META; 1884 } 1885 1886 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 1887 1888 sctx->framebuffer.export_16bpc = 0; 1889 sctx->framebuffer.compressed_cb_mask = 0; 1890 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 1891 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 1892 sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] && 1893 util_format_is_pure_integer(state->cbufs[0]->format); 1894 1895 for (i = 0; i < state->nr_cbufs; i++) { 1896 if (!state->cbufs[i]) 1897 continue; 1898 1899 surf = (struct r600_surface*)state->cbufs[i]; 1900 rtex = (struct r600_texture*)surf->base.texture; 1901 1902 if (!surf->color_initialized) { 1903 si_initialize_color_surface(sctx, surf); 1904 } 1905 1906 if (surf->export_16bpc) { 1907 sctx->framebuffer.export_16bpc |= 1 << i; 1908 } 1909 1910 if (rtex->fmask.size && rtex->cmask.size) { 1911 sctx->framebuffer.compressed_cb_mask |= 1 << i; 1912 } 1913 } 1914 /* Set the 16BPC export for possible dual-src blending. */ 1915 if (i == 1 && surf && surf->export_16bpc) { 1916 sctx->framebuffer.export_16bpc |= 1 << 1; 1917 } 1918 1919 assert(!(sctx->framebuffer.export_16bpc & ~0xff)); 1920 1921 if (state->zsbuf) { 1922 surf = (struct r600_surface*)state->zsbuf; 1923 1924 if (!surf->depth_initialized) { 1925 si_init_depth_surface(sctx, surf); 1926 } 1927 } 1928 1929 si_update_fb_rs_state(sctx); 1930 si_update_fb_blend_state(sctx); 1931 1932 sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3; 1933 sctx->framebuffer.atom.num_dw += state->zsbuf ? 23 : 4; 1934 sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */ 1935 sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */ 1936 sctx->framebuffer.atom.dirty = true; 1937 sctx->msaa_config.dirty = true; 1938 1939 /* Set sample locations as fragment shader constants. */ 1940 switch (sctx->framebuffer.nr_samples) { 1941 case 1: 1942 constbuf.user_buffer = sctx->b.sample_locations_1x; 1943 break; 1944 case 2: 1945 constbuf.user_buffer = sctx->b.sample_locations_2x; 1946 break; 1947 case 4: 1948 constbuf.user_buffer = sctx->b.sample_locations_4x; 1949 break; 1950 case 8: 1951 constbuf.user_buffer = sctx->b.sample_locations_8x; 1952 break; 1953 case 16: 1954 constbuf.user_buffer = sctx->b.sample_locations_16x; 1955 break; 1956 default: 1957 assert(0); 1958 } 1959 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 1960 ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, 1961 SI_DRIVER_STATE_CONST_BUF, &constbuf); 1962} 1963 1964static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 1965{ 1966 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 1967 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 1968 unsigned i, nr_cbufs = state->nr_cbufs; 1969 struct r600_texture *tex = NULL; 1970 struct r600_surface *cb = NULL; 1971 1972 /* Colorbuffers. */ 1973 for (i = 0; i < nr_cbufs; i++) { 1974 cb = (struct r600_surface*)state->cbufs[i]; 1975 if (!cb) { 1976 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 1977 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 1978 continue; 1979 } 1980 1981 tex = (struct r600_texture *)cb->base.texture; 1982 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 1983 &tex->resource, RADEON_USAGE_READWRITE, 1984 tex->surface.nsamples > 1 ? 1985 RADEON_PRIO_COLOR_BUFFER_MSAA : 1986 RADEON_PRIO_COLOR_BUFFER); 1987 1988 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 1989 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 1990 tex->cmask_buffer, RADEON_USAGE_READWRITE, 1991 RADEON_PRIO_COLOR_META); 1992 } 1993 1994 r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13); 1995 radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 1996 radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 1997 radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 1998 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 1999 radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2000 radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2001 radeon_emit(cs, 0); /* R_028C78 unused */ 2002 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2003 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2004 radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2005 radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2006 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2007 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2008 } 2009 /* set CB_COLOR1_INFO for possible dual-src blending */ 2010 if (i == 1 && state->cbufs[0]) { 2011 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, 2012 cb->cb_color_info | tex->cb_color_info); 2013 i++; 2014 } 2015 for (; i < 8 ; i++) { 2016 r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2017 } 2018 2019 /* ZS buffer. */ 2020 if (state->zsbuf) { 2021 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2022 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2023 2024 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2025 &rtex->resource, RADEON_USAGE_READWRITE, 2026 zb->base.texture->nr_samples > 1 ? 2027 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2028 RADEON_PRIO_DEPTH_BUFFER); 2029 2030 if (zb->db_htile_data_base) { 2031 r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, 2032 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2033 RADEON_PRIO_DEPTH_META); 2034 } 2035 2036 r600_write_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2037 r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2038 2039 r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2040 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2041 radeon_emit(cs, zb->db_z_info); /* R_028040_DB_Z_INFO */ 2042 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2043 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2044 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2045 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2046 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2047 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2048 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2049 2050 r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2051 r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2052 zb->pa_su_poly_offset_db_fmt_cntl); 2053 } else { 2054 r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2055 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2056 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2057 } 2058 2059 /* Framebuffer dimensions. */ 2060 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2061 r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2062 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2063 2064 cayman_emit_msaa_sample_locs(cs, sctx->framebuffer.nr_samples); 2065} 2066 2067static void si_emit_msaa_config(struct r600_common_context *rctx, struct r600_atom *atom) 2068{ 2069 struct si_context *sctx = (struct si_context *)rctx; 2070 struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; 2071 2072 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2073 sctx->ps_iter_samples); 2074} 2075 2076const struct r600_atom si_atom_msaa_config = { si_emit_msaa_config, 10 }; /* number of CS dwords */ 2077 2078static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2079{ 2080 struct si_context *sctx = (struct si_context *)ctx; 2081 2082 if (sctx->ps_iter_samples == min_samples) 2083 return; 2084 2085 sctx->ps_iter_samples = min_samples; 2086 2087 if (sctx->framebuffer.nr_samples > 1) 2088 sctx->msaa_config.dirty = true; 2089} 2090 2091/* 2092 * shaders 2093 */ 2094 2095/* Compute the key for the hw shader variant */ 2096static INLINE void si_shader_selector_key(struct pipe_context *ctx, 2097 struct si_pipe_shader_selector *sel, 2098 union si_shader_key *key) 2099{ 2100 struct si_context *sctx = (struct si_context *)ctx; 2101 memset(key, 0, sizeof(*key)); 2102 2103 if ((sel->type == PIPE_SHADER_VERTEX || sel->type == PIPE_SHADER_GEOMETRY) && 2104 sctx->queued.named.rasterizer) { 2105 if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf0) 2106 key->vs.ucps_enabled |= 0x2; 2107 if (sctx->queued.named.rasterizer->clip_plane_enable & 0xf) 2108 key->vs.ucps_enabled |= 0x1; 2109 } 2110 2111 if (sel->type == PIPE_SHADER_VERTEX) { 2112 unsigned i; 2113 if (!sctx->vertex_elements) 2114 return; 2115 2116 for (i = 0; i < sctx->vertex_elements->count; ++i) 2117 key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; 2118 2119 key->vs.as_es = sctx->gs_shader != NULL; 2120 } else if (sel->type == PIPE_SHADER_FRAGMENT) { 2121 if (sel->fs_write_all) 2122 key->ps.nr_cbufs = sctx->framebuffer.state.nr_cbufs; 2123 key->ps.export_16bpc = sctx->framebuffer.export_16bpc; 2124 2125 if (sctx->queued.named.rasterizer) { 2126 key->ps.color_two_side = sctx->queued.named.rasterizer->two_side; 2127 key->ps.flatshade = sctx->queued.named.rasterizer->flatshade; 2128 key->ps.interp_at_sample = sctx->framebuffer.nr_samples > 1 && 2129 sctx->ps_iter_samples == sctx->framebuffer.nr_samples; 2130 2131 if (sctx->queued.named.blend) { 2132 key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one && 2133 sctx->queued.named.rasterizer->multisample_enable && 2134 !sctx->framebuffer.cb0_is_integer; 2135 } 2136 } 2137 if (sctx->queued.named.dsa) { 2138 key->ps.alpha_func = sctx->queued.named.dsa->alpha_func; 2139 2140 /* Alpha-test should be disabled if colorbuffer 0 is integer. */ 2141 if (sctx->framebuffer.cb0_is_integer) 2142 key->ps.alpha_func = PIPE_FUNC_ALWAYS; 2143 } else { 2144 key->ps.alpha_func = PIPE_FUNC_ALWAYS; 2145 } 2146 } 2147} 2148 2149/* Select the hw shader variant depending on the current state. */ 2150int si_shader_select(struct pipe_context *ctx, 2151 struct si_pipe_shader_selector *sel) 2152{ 2153 union si_shader_key key; 2154 struct si_pipe_shader * shader = NULL; 2155 int r; 2156 2157 si_shader_selector_key(ctx, sel, &key); 2158 2159 /* Check if we don't need to change anything. 2160 * This path is also used for most shaders that don't need multiple 2161 * variants, it will cost just a computation of the key and this 2162 * test. */ 2163 if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) { 2164 return 0; 2165 } 2166 2167 /* lookup if we have other variants in the list */ 2168 if (sel->num_shaders > 1) { 2169 struct si_pipe_shader *p = sel->current, *c = p->next_variant; 2170 2171 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) { 2172 p = c; 2173 c = c->next_variant; 2174 } 2175 2176 if (c) { 2177 p->next_variant = c->next_variant; 2178 shader = c; 2179 } 2180 } 2181 2182 if (shader) { 2183 shader->next_variant = sel->current; 2184 sel->current = shader; 2185 } else { 2186 shader = CALLOC(1, sizeof(struct si_pipe_shader)); 2187 shader->selector = sel; 2188 shader->key = key; 2189 2190 shader->next_variant = sel->current; 2191 sel->current = shader; 2192 r = si_pipe_shader_create(ctx, shader); 2193 if (unlikely(r)) { 2194 R600_ERR("Failed to build shader variant (type=%u) %d\n", 2195 sel->type, r); 2196 sel->current = NULL; 2197 FREE(shader); 2198 return r; 2199 } 2200 sel->num_shaders++; 2201 } 2202 2203 return 0; 2204} 2205 2206static void *si_create_shader_state(struct pipe_context *ctx, 2207 const struct pipe_shader_state *state, 2208 unsigned pipe_shader_type) 2209{ 2210 struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector); 2211 int r; 2212 2213 sel->type = pipe_shader_type; 2214 sel->tokens = tgsi_dup_tokens(state->tokens); 2215 sel->so = state->stream_output; 2216 2217 if (pipe_shader_type == PIPE_SHADER_FRAGMENT) { 2218 struct tgsi_shader_info info; 2219 2220 tgsi_scan_shader(state->tokens, &info); 2221 sel->fs_write_all = info.color0_writes_all_cbufs; 2222 } 2223 2224 r = si_shader_select(ctx, sel); 2225 if (r) { 2226 free(sel); 2227 return NULL; 2228 } 2229 2230 return sel; 2231} 2232 2233static void *si_create_fs_state(struct pipe_context *ctx, 2234 const struct pipe_shader_state *state) 2235{ 2236 return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT); 2237} 2238 2239static void *si_create_gs_state(struct pipe_context *ctx, 2240 const struct pipe_shader_state *state) 2241{ 2242 return si_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY); 2243} 2244 2245static void *si_create_vs_state(struct pipe_context *ctx, 2246 const struct pipe_shader_state *state) 2247{ 2248 return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX); 2249} 2250 2251static void si_bind_vs_shader(struct pipe_context *ctx, void *state) 2252{ 2253 struct si_context *sctx = (struct si_context *)ctx; 2254 struct si_pipe_shader_selector *sel = state; 2255 2256 if (sctx->vs_shader == sel) 2257 return; 2258 2259 if (!sel || !sel->current) 2260 return; 2261 2262 sctx->vs_shader = sel; 2263} 2264 2265static void si_bind_gs_shader(struct pipe_context *ctx, void *state) 2266{ 2267 struct si_context *sctx = (struct si_context *)ctx; 2268 struct si_pipe_shader_selector *sel = state; 2269 2270 if (sctx->gs_shader == sel) 2271 return; 2272 2273 sctx->gs_shader = sel; 2274} 2275 2276static void si_bind_ps_shader(struct pipe_context *ctx, void *state) 2277{ 2278 struct si_context *sctx = (struct si_context *)ctx; 2279 struct si_pipe_shader_selector *sel = state; 2280 2281 /* skip if supplied shader is one already in use */ 2282 if (sctx->ps_shader == sel) 2283 return; 2284 2285 /* use dummy shader if supplied shader is corrupt */ 2286 if (!sel || !sel->current) 2287 sel = sctx->dummy_pixel_shader; 2288 2289 sctx->ps_shader = sel; 2290} 2291 2292static void si_delete_shader_selector(struct pipe_context *ctx, 2293 struct si_pipe_shader_selector *sel) 2294{ 2295 struct si_context *sctx = (struct si_context *)ctx; 2296 struct si_pipe_shader *p = sel->current, *c; 2297 2298 while (p) { 2299 c = p->next_variant; 2300 if (sel->type == PIPE_SHADER_GEOMETRY) 2301 si_pm4_delete_state(sctx, gs, p->pm4); 2302 else if (sel->type == PIPE_SHADER_FRAGMENT) 2303 si_pm4_delete_state(sctx, ps, p->pm4); 2304 else if (p->key.vs.as_es) 2305 si_pm4_delete_state(sctx, es, p->pm4); 2306 else 2307 si_pm4_delete_state(sctx, vs, p->pm4); 2308 si_pipe_shader_destroy(ctx, p); 2309 free(p); 2310 p = c; 2311 } 2312 2313 free(sel->tokens); 2314 free(sel); 2315 } 2316 2317static void si_delete_vs_shader(struct pipe_context *ctx, void *state) 2318{ 2319 struct si_context *sctx = (struct si_context *)ctx; 2320 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; 2321 2322 if (sctx->vs_shader == sel) { 2323 sctx->vs_shader = NULL; 2324 } 2325 2326 si_delete_shader_selector(ctx, sel); 2327} 2328 2329static void si_delete_gs_shader(struct pipe_context *ctx, void *state) 2330{ 2331 struct si_context *sctx = (struct si_context *)ctx; 2332 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; 2333 2334 if (sctx->gs_shader == sel) { 2335 sctx->gs_shader = NULL; 2336 } 2337 2338 si_delete_shader_selector(ctx, sel); 2339} 2340 2341static void si_delete_ps_shader(struct pipe_context *ctx, void *state) 2342{ 2343 struct si_context *sctx = (struct si_context *)ctx; 2344 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; 2345 2346 if (sctx->ps_shader == sel) { 2347 sctx->ps_shader = NULL; 2348 } 2349 2350 si_delete_shader_selector(ctx, sel); 2351} 2352 2353/* 2354 * Samplers 2355 */ 2356 2357static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx, 2358 struct pipe_resource *texture, 2359 const struct pipe_sampler_view *state) 2360{ 2361 struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view); 2362 struct r600_texture *tmp = (struct r600_texture*)texture; 2363 const struct util_format_description *desc; 2364 unsigned format, num_format; 2365 uint32_t pitch = 0; 2366 unsigned char state_swizzle[4], swizzle[4]; 2367 unsigned height, depth, width; 2368 enum pipe_format pipe_format = state->format; 2369 struct radeon_surface_level *surflevel; 2370 int first_non_void; 2371 uint64_t va; 2372 2373 if (view == NULL) 2374 return NULL; 2375 2376 /* initialize base object */ 2377 view->base = *state; 2378 view->base.texture = NULL; 2379 pipe_resource_reference(&view->base.texture, texture); 2380 view->base.reference.count = 1; 2381 view->base.context = ctx; 2382 view->resource = &tmp->resource; 2383 2384 /* Buffer resource. */ 2385 if (texture->target == PIPE_BUFFER) { 2386 unsigned stride; 2387 2388 desc = util_format_description(state->format); 2389 first_non_void = util_format_get_first_non_void_channel(state->format); 2390 stride = desc->block.bits / 8; 2391 va = r600_resource_va(ctx->screen, texture) + state->u.buf.first_element*stride; 2392 format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2393 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2394 2395 view->state[0] = va; 2396 view->state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 2397 S_008F04_STRIDE(stride); 2398 view->state[2] = state->u.buf.last_element + 1 - state->u.buf.first_element; 2399 view->state[3] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2400 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2401 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2402 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2403 S_008F0C_NUM_FORMAT(num_format) | 2404 S_008F0C_DATA_FORMAT(format); 2405 return &view->base; 2406 } 2407 2408 state_swizzle[0] = state->swizzle_r; 2409 state_swizzle[1] = state->swizzle_g; 2410 state_swizzle[2] = state->swizzle_b; 2411 state_swizzle[3] = state->swizzle_a; 2412 2413 surflevel = tmp->surface.level; 2414 2415 /* Texturing with separate depth and stencil. */ 2416 if (tmp->is_depth && !tmp->is_flushing_texture) { 2417 switch (pipe_format) { 2418 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2419 pipe_format = PIPE_FORMAT_Z32_FLOAT; 2420 break; 2421 case PIPE_FORMAT_X8Z24_UNORM: 2422 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2423 /* Z24 is always stored like this. */ 2424 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 2425 break; 2426 case PIPE_FORMAT_X24S8_UINT: 2427 case PIPE_FORMAT_S8X24_UINT: 2428 case PIPE_FORMAT_X32_S8X24_UINT: 2429 pipe_format = PIPE_FORMAT_S8_UINT; 2430 surflevel = tmp->surface.stencil_level; 2431 break; 2432 default:; 2433 } 2434 } 2435 2436 desc = util_format_description(pipe_format); 2437 2438 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2439 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2440 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2441 2442 switch (pipe_format) { 2443 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2444 case PIPE_FORMAT_X24S8_UINT: 2445 case PIPE_FORMAT_X32_S8X24_UINT: 2446 case PIPE_FORMAT_X8Z24_UNORM: 2447 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2448 break; 2449 default: 2450 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2451 } 2452 } else { 2453 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2454 } 2455 2456 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2457 2458 switch (pipe_format) { 2459 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2460 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2461 break; 2462 default: 2463 if (first_non_void < 0) { 2464 if (util_format_is_compressed(pipe_format)) { 2465 switch (pipe_format) { 2466 case PIPE_FORMAT_DXT1_SRGB: 2467 case PIPE_FORMAT_DXT1_SRGBA: 2468 case PIPE_FORMAT_DXT3_SRGBA: 2469 case PIPE_FORMAT_DXT5_SRGBA: 2470 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2471 break; 2472 case PIPE_FORMAT_RGTC1_SNORM: 2473 case PIPE_FORMAT_LATC1_SNORM: 2474 case PIPE_FORMAT_RGTC2_SNORM: 2475 case PIPE_FORMAT_LATC2_SNORM: 2476 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2477 break; 2478 default: 2479 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2480 break; 2481 } 2482 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2483 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2484 } else { 2485 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2486 } 2487 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2488 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2489 } else { 2490 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2491 2492 switch (desc->channel[first_non_void].type) { 2493 case UTIL_FORMAT_TYPE_FLOAT: 2494 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2495 break; 2496 case UTIL_FORMAT_TYPE_SIGNED: 2497 if (desc->channel[first_non_void].normalized) 2498 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2499 else if (desc->channel[first_non_void].pure_integer) 2500 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2501 else 2502 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2503 break; 2504 case UTIL_FORMAT_TYPE_UNSIGNED: 2505 if (desc->channel[first_non_void].normalized) 2506 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2507 else if (desc->channel[first_non_void].pure_integer) 2508 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2509 else 2510 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2511 } 2512 } 2513 } 2514 2515 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void); 2516 if (format == ~0) { 2517 format = 0; 2518 } 2519 2520 /* not supported any more */ 2521 //endian = si_colorformat_endian_swap(format); 2522 2523 width = surflevel[0].npix_x; 2524 height = surflevel[0].npix_y; 2525 depth = surflevel[0].npix_z; 2526 pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format); 2527 2528 if (texture->target == PIPE_TEXTURE_1D_ARRAY) { 2529 height = 1; 2530 depth = texture->array_size; 2531 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { 2532 depth = texture->array_size; 2533 } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) 2534 depth = texture->array_size / 6; 2535 2536 va = r600_resource_va(ctx->screen, texture); 2537 va += surflevel[0].offset; 2538 va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size; 2539 2540 view->state[0] = va >> 8; 2541 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) | 2542 S_008F14_DATA_FORMAT(format) | 2543 S_008F14_NUM_FORMAT(num_format)); 2544 view->state[2] = (S_008F18_WIDTH(width - 1) | 2545 S_008F18_HEIGHT(height - 1)); 2546 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2547 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2548 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2549 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2550 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ? 2551 0 : state->u.tex.first_level - tmp->mipmap_shift) | 2552 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ? 2553 util_logbase2(texture->nr_samples) : 2554 state->u.tex.last_level - tmp->mipmap_shift) | 2555 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) | 2556 S_008F1C_POW2_PAD(texture->last_level > 0) | 2557 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples))); 2558 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); 2559 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2560 S_008F24_LAST_ARRAY(state->u.tex.last_layer)); 2561 view->state[6] = 0; 2562 view->state[7] = 0; 2563 2564 /* Initialize the sampler view for FMASK. */ 2565 if (tmp->fmask.size) { 2566 uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset; 2567 uint32_t fmask_format; 2568 2569 switch (texture->nr_samples) { 2570 case 2: 2571 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2572 break; 2573 case 4: 2574 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2575 break; 2576 case 8: 2577 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2578 break; 2579 default: 2580 assert(0); 2581 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2582 } 2583 2584 view->fmask_state[0] = va >> 8; 2585 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2586 S_008F14_DATA_FORMAT(fmask_format) | 2587 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2588 view->fmask_state[2] = S_008F18_WIDTH(width - 1) | 2589 S_008F18_HEIGHT(height - 1); 2590 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2591 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2592 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2593 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2594 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | 2595 S_008F1C_TYPE(si_tex_dim(texture->target, 0)); 2596 view->fmask_state[4] = S_008F20_DEPTH(depth - 1) | 2597 S_008F20_PITCH(tmp->fmask.pitch - 1); 2598 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2599 S_008F24_LAST_ARRAY(state->u.tex.last_layer); 2600 view->fmask_state[6] = 0; 2601 view->fmask_state[7] = 0; 2602 } 2603 2604 return &view->base; 2605} 2606 2607static void si_sampler_view_destroy(struct pipe_context *ctx, 2608 struct pipe_sampler_view *state) 2609{ 2610 struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; 2611 2612 pipe_resource_reference(&state->texture, NULL); 2613 FREE(resource); 2614} 2615 2616static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 2617{ 2618 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 2619 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 2620 (linear_filter && 2621 (wrap == PIPE_TEX_WRAP_CLAMP || 2622 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 2623} 2624 2625static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 2626{ 2627 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 2628 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 2629 2630 return (state->border_color.ui[0] || state->border_color.ui[1] || 2631 state->border_color.ui[2] || state->border_color.ui[3]) && 2632 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 2633 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 2634 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 2635} 2636 2637static void *si_create_sampler_state(struct pipe_context *ctx, 2638 const struct pipe_sampler_state *state) 2639{ 2640 struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state); 2641 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; 2642 unsigned border_color_type; 2643 2644 if (rstate == NULL) { 2645 return NULL; 2646 } 2647 2648 if (sampler_state_needs_border_color(state)) 2649 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 2650 else 2651 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 2652 2653 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 2654 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 2655 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 2656 r600_tex_aniso_filter(state->max_anisotropy) << 9 | 2657 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 2658 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 2659 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map)); 2660 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 2661 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 2662 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 2663 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) | 2664 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) | 2665 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); 2666 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type); 2667 2668 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2669 memcpy(rstate->border_color, state->border_color.ui, 2670 sizeof(rstate->border_color)); 2671 } 2672 2673 return rstate; 2674} 2675 2676/* Upload border colors and update the pointers in resource descriptors. 2677 * There can only be 4096 border colors per context. 2678 * 2679 * XXX: This is broken if the buffer gets reallocated. 2680 */ 2681static void si_set_border_colors(struct si_context *sctx, unsigned count, 2682 void **states) 2683{ 2684 struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states; 2685 uint32_t *border_color_table = NULL; 2686 int i, j; 2687 2688 for (i = 0; i < count; i++) { 2689 if (rstates[i] && 2690 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) == 2691 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2692 if (!sctx->border_color_table || 2693 ((sctx->border_color_offset + count - i) & 2694 C_008F3C_BORDER_COLOR_PTR)) { 2695 r600_resource_reference(&sctx->border_color_table, NULL); 2696 sctx->border_color_offset = 0; 2697 2698 sctx->border_color_table = 2699 si_resource_create_custom(&sctx->screen->b.b, 2700 PIPE_USAGE_STAGING, 2701 4096 * 4 * 4); 2702 } 2703 2704 if (!border_color_table) { 2705 border_color_table = 2706 sctx->b.ws->buffer_map(sctx->border_color_table->cs_buf, 2707 sctx->b.rings.gfx.cs, 2708 PIPE_TRANSFER_WRITE | 2709 PIPE_TRANSFER_UNSYNCHRONIZED); 2710 } 2711 2712 for (j = 0; j < 4; j++) { 2713 border_color_table[4 * sctx->border_color_offset + j] = 2714 util_le32_to_cpu(rstates[i]->border_color[j]); 2715 } 2716 2717 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR; 2718 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(sctx->border_color_offset++); 2719 } 2720 } 2721 2722 if (border_color_table) { 2723 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 2724 2725 uint64_t va_offset = 2726 r600_resource_va(&sctx->screen->b.b, 2727 (void*)sctx->border_color_table); 2728 2729 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); 2730 if (sctx->b.chip_class >= CIK) 2731 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40); 2732 si_pm4_add_bo(pm4, sctx->border_color_table, RADEON_USAGE_READ, 2733 RADEON_PRIO_SHADER_DATA); 2734 si_pm4_set_state(sctx, ta_bordercolor_base, pm4); 2735 } 2736} 2737 2738static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, 2739 unsigned start, unsigned count, 2740 void **states) 2741{ 2742 struct si_context *sctx = (struct si_context *)ctx; 2743 2744 if (!count || shader >= SI_NUM_SHADERS) 2745 return; 2746 2747 si_set_border_colors(sctx, count, states); 2748 si_set_sampler_descriptors(sctx, shader, start, count, states); 2749} 2750 2751static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 2752{ 2753 struct si_context *sctx = (struct si_context *)ctx; 2754 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 2755 uint16_t mask = sample_mask; 2756 2757 if (pm4 == NULL) 2758 return; 2759 2760 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16)); 2761 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16)); 2762 2763 si_pm4_set_state(sctx, sample_mask, pm4); 2764} 2765 2766static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 2767{ 2768 free(state); 2769} 2770 2771/* 2772 * Vertex elements & buffers 2773 */ 2774 2775static void *si_create_vertex_elements(struct pipe_context *ctx, 2776 unsigned count, 2777 const struct pipe_vertex_element *elements) 2778{ 2779 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 2780 int i; 2781 2782 assert(count < PIPE_MAX_ATTRIBS); 2783 if (!v) 2784 return NULL; 2785 2786 v->count = count; 2787 for (i = 0; i < count; ++i) { 2788 const struct util_format_description *desc; 2789 unsigned data_format, num_format; 2790 int first_non_void; 2791 2792 desc = util_format_description(elements[i].src_format); 2793 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 2794 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 2795 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 2796 2797 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2798 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2799 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2800 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2801 S_008F0C_NUM_FORMAT(num_format) | 2802 S_008F0C_DATA_FORMAT(data_format); 2803 v->format_size[i] = desc->block.bits / 8; 2804 } 2805 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 2806 2807 return v; 2808} 2809 2810static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 2811{ 2812 struct si_context *sctx = (struct si_context *)ctx; 2813 struct si_vertex_element *v = (struct si_vertex_element*)state; 2814 2815 sctx->vertex_elements = v; 2816 sctx->vertex_buffers_dirty = true; 2817} 2818 2819static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 2820{ 2821 struct si_context *sctx = (struct si_context *)ctx; 2822 2823 if (sctx->vertex_elements == state) 2824 sctx->vertex_elements = NULL; 2825 FREE(state); 2826} 2827 2828static void si_set_vertex_buffers(struct pipe_context *ctx, 2829 unsigned start_slot, unsigned count, 2830 const struct pipe_vertex_buffer *buffers) 2831{ 2832 struct si_context *sctx = (struct si_context *)ctx; 2833 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 2834 int i; 2835 2836 assert(start_slot + count <= Elements(sctx->vertex_buffer)); 2837 2838 if (buffers) { 2839 for (i = 0; i < count; i++) { 2840 const struct pipe_vertex_buffer *src = buffers + i; 2841 struct pipe_vertex_buffer *dsti = dst + i; 2842 2843 pipe_resource_reference(&dsti->buffer, src->buffer); 2844 dsti->buffer_offset = src->buffer_offset; 2845 dsti->stride = src->stride; 2846 } 2847 } else { 2848 for (i = 0; i < count; i++) { 2849 pipe_resource_reference(&dst[i].buffer, NULL); 2850 } 2851 } 2852 sctx->vertex_buffers_dirty = true; 2853} 2854 2855static void si_set_index_buffer(struct pipe_context *ctx, 2856 const struct pipe_index_buffer *ib) 2857{ 2858 struct si_context *sctx = (struct si_context *)ctx; 2859 2860 if (ib) { 2861 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer); 2862 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 2863 } else { 2864 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 2865 } 2866} 2867 2868/* 2869 * Misc 2870 */ 2871static void si_set_polygon_stipple(struct pipe_context *ctx, 2872 const struct pipe_poly_stipple *state) 2873{ 2874} 2875 2876static void si_texture_barrier(struct pipe_context *ctx) 2877{ 2878 struct si_context *sctx = (struct si_context *)ctx; 2879 2880 sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | 2881 R600_CONTEXT_FLUSH_AND_INV_CB; 2882} 2883 2884static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 2885{ 2886 struct pipe_blend_state blend; 2887 2888 memset(&blend, 0, sizeof(blend)); 2889 blend.independent_blend_enable = true; 2890 blend.rt[0].colormask = 0xf; 2891 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 2892} 2893 2894static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 2895{ 2896 /* XXX Turn this into a proper state. Right now the queries are 2897 * enabled in draw_vbo, which snoops r600_common_context to see 2898 * if any occlusion queries are active. */ 2899} 2900 2901static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 2902 bool include_draw_vbo) 2903{ 2904 si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo); 2905} 2906 2907void si_init_state_functions(struct si_context *sctx) 2908{ 2909 int i; 2910 2911 si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0); 2912 2913 sctx->b.b.create_blend_state = si_create_blend_state; 2914 sctx->b.b.bind_blend_state = si_bind_blend_state; 2915 sctx->b.b.delete_blend_state = si_delete_blend_state; 2916 sctx->b.b.set_blend_color = si_set_blend_color; 2917 2918 sctx->b.b.create_rasterizer_state = si_create_rs_state; 2919 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 2920 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 2921 2922 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 2923 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 2924 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 2925 2926 for (i = 0; i < 8; i++) { 2927 sctx->custom_dsa_flush_depth_stencil[i] = si_create_db_flush_dsa(sctx, true, true, i); 2928 sctx->custom_dsa_flush_depth[i] = si_create_db_flush_dsa(sctx, true, false, i); 2929 sctx->custom_dsa_flush_stencil[i] = si_create_db_flush_dsa(sctx, false, true, i); 2930 } 2931 sctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(sctx, false, false, 0); 2932 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 2933 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 2934 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 2935 2936 sctx->b.b.set_clip_state = si_set_clip_state; 2937 sctx->b.b.set_scissor_states = si_set_scissor_states; 2938 sctx->b.b.set_viewport_states = si_set_viewport_states; 2939 sctx->b.b.set_stencil_ref = si_set_pipe_stencil_ref; 2940 2941 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 2942 sctx->b.b.get_sample_position = cayman_get_sample_position; 2943 2944 sctx->b.b.create_vs_state = si_create_vs_state; 2945 sctx->b.b.create_fs_state = si_create_fs_state; 2946 sctx->b.b.bind_vs_state = si_bind_vs_shader; 2947 sctx->b.b.bind_fs_state = si_bind_ps_shader; 2948 sctx->b.b.delete_vs_state = si_delete_vs_shader; 2949 sctx->b.b.delete_fs_state = si_delete_ps_shader; 2950 2951 sctx->b.b.create_gs_state = si_create_gs_state; 2952 sctx->b.b.bind_gs_state = si_bind_gs_shader; 2953 sctx->b.b.delete_gs_state = si_delete_gs_shader; 2954 2955 sctx->b.b.create_sampler_state = si_create_sampler_state; 2956 sctx->b.b.bind_sampler_states = si_bind_sampler_states; 2957 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 2958 2959 sctx->b.b.create_sampler_view = si_create_sampler_view; 2960 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 2961 2962 sctx->b.b.set_sample_mask = si_set_sample_mask; 2963 2964 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 2965 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 2966 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 2967 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 2968 sctx->b.b.set_index_buffer = si_set_index_buffer; 2969 2970 sctx->b.b.texture_barrier = si_texture_barrier; 2971 sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; 2972 sctx->b.b.set_min_samples = si_set_min_samples; 2973 2974 sctx->b.dma_copy = si_dma_copy; 2975 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 2976 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 2977 2978 sctx->b.b.draw_vbo = si_draw_vbo; 2979} 2980 2981void si_init_config(struct si_context *sctx) 2982{ 2983 struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); 2984 2985 if (pm4 == NULL) 2986 return; 2987 2988 si_cmd_context_control(pm4); 2989 2990 si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0); 2991 si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0); 2992 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0); 2993 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0); 2994 si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0); 2995 si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0); 2996 si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0); 2997 si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0); 2998 si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0); 2999 si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0); 3000 si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0); 3001 si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0); 3002 3003 /* FIXME calculate these values somehow ??? */ 3004 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80); 3005 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3006 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3007 3008 si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0); 3009 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3010 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0); 3011 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3012 3013 si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0); 3014 si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0); 3015 si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0); 3016 si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0); 3017 3018 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3019 if (sctx->b.chip_class == SI) { 3020 si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM, 3021 S_028AA8_SWITCH_ON_EOP(1) | 3022 S_028AA8_PARTIAL_VS_WAVE_ON(1) | 3023 S_028AA8_PRIMGROUP_SIZE(63)); 3024 } 3025 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000); 3026 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3027 if (sctx->b.chip_class < CIK) 3028 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3029 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3030 3031 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3032 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3033 3034 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3035 3036 if (sctx->b.chip_class >= CIK) { 3037 switch (sctx->screen->b.family) { 3038 case CHIP_BONAIRE: 3039 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012); 3040 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 3041 break; 3042 case CHIP_HAWAII: 3043 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a); 3044 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e); 3045 break; 3046 case CHIP_KAVERI: 3047 /* XXX todo */ 3048 case CHIP_KABINI: 3049 /* XXX todo */ 3050 case CHIP_MULLINS: 3051 /* XXX todo */ 3052 default: 3053 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3054 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 3055 break; 3056 } 3057 } else { 3058 switch (sctx->screen->b.family) { 3059 case CHIP_TAHITI: 3060 case CHIP_PITCAIRN: 3061 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a); 3062 break; 3063 case CHIP_VERDE: 3064 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a); 3065 break; 3066 case CHIP_OLAND: 3067 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082); 3068 break; 3069 case CHIP_HAINAN: 3070 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3071 break; 3072 default: 3073 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3074 break; 3075 } 3076 } 3077 3078 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 3079 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 3080 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 3081 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 3082 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 3083 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 3084 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 3085 3086 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 3087 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 3088 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000); 3089 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000); 3090 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL, 0x0000043F); 3091 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000); 3092 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000); 3093 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000); 3094 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000); 3095 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000); 3096 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000); 3097 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000); 3098 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000); 3099 si_pm4_set_reg(pm4, R_02802C_DB_DEPTH_CLEAR, 0x3F800000); 3100 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 3101 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 3102 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 3103 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 3104 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 3105 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); 3106 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 3107 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 3108 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 3109 3110 if (sctx->b.chip_class >= CIK) { 3111 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 3112 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0)); 3113 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 3114 } 3115 3116 si_pm4_set_state(sctx, init, pm4); 3117} 3118