si_state.c revision 21d9a1b5ef51ce449e9a82641d0d605c5448b41c
1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian König <christian.koenig@amd.com> 25 */ 26 27#include "util/u_memory.h" 28#include "util/u_framebuffer.h" 29#include "util/u_blitter.h" 30#include "util/u_helpers.h" 31#include "util/u_math.h" 32#include "util/u_pack_color.h" 33#include "util/u_upload_mgr.h" 34#include "util/u_format_s3tc.h" 35#include "tgsi/tgsi_parse.h" 36#include "radeonsi_pipe.h" 37#include "radeonsi_shader.h" 38#include "si_state.h" 39#include "sid.h" 40 41static uint32_t cik_num_banks(uint32_t nbanks) 42{ 43 switch (nbanks) { 44 case 2: 45 return V_02803C_ADDR_SURF_2_BANK; 46 case 4: 47 return V_02803C_ADDR_SURF_4_BANK; 48 case 8: 49 default: 50 return V_02803C_ADDR_SURF_8_BANK; 51 case 16: 52 return V_02803C_ADDR_SURF_16_BANK; 53 } 54} 55 56 57static unsigned cik_tile_split(unsigned tile_split) 58{ 59 switch (tile_split) { 60 case 64: 61 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_64B; 62 break; 63 case 128: 64 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_128B; 65 break; 66 case 256: 67 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_256B; 68 break; 69 case 512: 70 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_512B; 71 break; 72 default: 73 case 1024: 74 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_1KB; 75 break; 76 case 2048: 77 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_2KB; 78 break; 79 case 4096: 80 tile_split = V_028040_ADDR_SURF_TILE_SPLIT_4KB; 81 break; 82 } 83 return tile_split; 84} 85 86static unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect) 87{ 88 switch (macro_tile_aspect) { 89 default: 90 case 1: 91 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_1; 92 break; 93 case 2: 94 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_2; 95 break; 96 case 4: 97 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_4; 98 break; 99 case 8: 100 macro_tile_aspect = V_02803C_ADDR_SURF_MACRO_ASPECT_8; 101 break; 102 } 103 return macro_tile_aspect; 104} 105 106static unsigned cik_bank_wh(unsigned bankwh) 107{ 108 switch (bankwh) { 109 default: 110 case 1: 111 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_1; 112 break; 113 case 2: 114 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_2; 115 break; 116 case 4: 117 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_4; 118 break; 119 case 8: 120 bankwh = V_02803C_ADDR_SURF_BANK_WIDTH_8; 121 break; 122 } 123 return bankwh; 124} 125 126static unsigned cik_db_pipe_config(unsigned tile_pipes, 127 unsigned num_rbs) 128{ 129 unsigned pipe_config; 130 131 switch (tile_pipes) { 132 case 8: 133 pipe_config = V_02803C_X_ADDR_SURF_P8_32X32_16X16; 134 break; 135 case 4: 136 default: 137 if (num_rbs == 4) 138 pipe_config = V_02803C_X_ADDR_SURF_P4_16X16; 139 else 140 pipe_config = V_02803C_X_ADDR_SURF_P4_8X16; 141 break; 142 case 2: 143 pipe_config = V_02803C_ADDR_SURF_P2; 144 break; 145 } 146 return pipe_config; 147} 148 149/* 150 * inferred framebuffer and blender state 151 */ 152static void si_update_fb_blend_state(struct r600_context *rctx) 153{ 154 struct si_pm4_state *pm4; 155 struct si_state_blend *blend = rctx->queued.named.blend; 156 uint32_t mask; 157 158 if (blend == NULL) 159 return; 160 161 pm4 = si_pm4_alloc_state(rctx); 162 if (pm4 == NULL) 163 return; 164 165 mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1; 166 mask &= blend->cb_target_mask; 167 si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask); 168 169 si_pm4_set_state(rctx, fb_blend, pm4); 170} 171 172/* 173 * Blender functions 174 */ 175 176static uint32_t si_translate_blend_function(int blend_func) 177{ 178 switch (blend_func) { 179 case PIPE_BLEND_ADD: 180 return V_028780_COMB_DST_PLUS_SRC; 181 case PIPE_BLEND_SUBTRACT: 182 return V_028780_COMB_SRC_MINUS_DST; 183 case PIPE_BLEND_REVERSE_SUBTRACT: 184 return V_028780_COMB_DST_MINUS_SRC; 185 case PIPE_BLEND_MIN: 186 return V_028780_COMB_MIN_DST_SRC; 187 case PIPE_BLEND_MAX: 188 return V_028780_COMB_MAX_DST_SRC; 189 default: 190 R600_ERR("Unknown blend function %d\n", blend_func); 191 assert(0); 192 break; 193 } 194 return 0; 195} 196 197static uint32_t si_translate_blend_factor(int blend_fact) 198{ 199 switch (blend_fact) { 200 case PIPE_BLENDFACTOR_ONE: 201 return V_028780_BLEND_ONE; 202 case PIPE_BLENDFACTOR_SRC_COLOR: 203 return V_028780_BLEND_SRC_COLOR; 204 case PIPE_BLENDFACTOR_SRC_ALPHA: 205 return V_028780_BLEND_SRC_ALPHA; 206 case PIPE_BLENDFACTOR_DST_ALPHA: 207 return V_028780_BLEND_DST_ALPHA; 208 case PIPE_BLENDFACTOR_DST_COLOR: 209 return V_028780_BLEND_DST_COLOR; 210 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 211 return V_028780_BLEND_SRC_ALPHA_SATURATE; 212 case PIPE_BLENDFACTOR_CONST_COLOR: 213 return V_028780_BLEND_CONSTANT_COLOR; 214 case PIPE_BLENDFACTOR_CONST_ALPHA: 215 return V_028780_BLEND_CONSTANT_ALPHA; 216 case PIPE_BLENDFACTOR_ZERO: 217 return V_028780_BLEND_ZERO; 218 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 219 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 220 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 221 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 222 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 223 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 224 case PIPE_BLENDFACTOR_INV_DST_COLOR: 225 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 226 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 227 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 228 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 229 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 230 case PIPE_BLENDFACTOR_SRC1_COLOR: 231 return V_028780_BLEND_SRC1_COLOR; 232 case PIPE_BLENDFACTOR_SRC1_ALPHA: 233 return V_028780_BLEND_SRC1_ALPHA; 234 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 235 return V_028780_BLEND_INV_SRC1_COLOR; 236 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 237 return V_028780_BLEND_INV_SRC1_ALPHA; 238 default: 239 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 240 assert(0); 241 break; 242 } 243 return 0; 244} 245 246static void *si_create_blend_state_mode(struct pipe_context *ctx, 247 const struct pipe_blend_state *state, 248 unsigned mode) 249{ 250 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 251 struct si_pm4_state *pm4 = &blend->pm4; 252 253 uint32_t color_control; 254 255 if (blend == NULL) 256 return NULL; 257 258 blend->alpha_to_one = state->alpha_to_one; 259 260 color_control = S_028808_MODE(mode); 261 if (state->logicop_enable) { 262 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 263 } else { 264 color_control |= S_028808_ROP3(0xcc); 265 } 266 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 267 268 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 269 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 270 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 271 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 272 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 273 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 274 275 blend->cb_target_mask = 0; 276 for (int i = 0; i < 8; i++) { 277 /* state->rt entries > 0 only written if independent blending */ 278 const int j = state->independent_blend_enable ? i : 0; 279 280 unsigned eqRGB = state->rt[j].rgb_func; 281 unsigned srcRGB = state->rt[j].rgb_src_factor; 282 unsigned dstRGB = state->rt[j].rgb_dst_factor; 283 unsigned eqA = state->rt[j].alpha_func; 284 unsigned srcA = state->rt[j].alpha_src_factor; 285 unsigned dstA = state->rt[j].alpha_dst_factor; 286 287 unsigned blend_cntl = 0; 288 289 /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */ 290 blend->cb_target_mask |= state->rt[j].colormask << (4 * i); 291 292 if (!state->rt[j].blend_enable) { 293 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 294 continue; 295 } 296 297 blend_cntl |= S_028780_ENABLE(1); 298 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 299 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 300 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 301 302 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 303 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 304 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 305 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 306 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 307 } 308 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 309 } 310 311 return blend; 312} 313 314static void *si_create_blend_state(struct pipe_context *ctx, 315 const struct pipe_blend_state *state) 316{ 317 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 318} 319 320static void si_bind_blend_state(struct pipe_context *ctx, void *state) 321{ 322 struct r600_context *rctx = (struct r600_context *)ctx; 323 si_pm4_bind_state(rctx, blend, (struct si_state_blend *)state); 324 si_update_fb_blend_state(rctx); 325} 326 327static void si_delete_blend_state(struct pipe_context *ctx, void *state) 328{ 329 struct r600_context *rctx = (struct r600_context *)ctx; 330 si_pm4_delete_state(rctx, blend, (struct si_state_blend *)state); 331} 332 333static void si_set_blend_color(struct pipe_context *ctx, 334 const struct pipe_blend_color *state) 335{ 336 struct r600_context *rctx = (struct r600_context *)ctx; 337 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 338 339 if (pm4 == NULL) 340 return; 341 342 si_pm4_set_reg(pm4, R_028414_CB_BLEND_RED, fui(state->color[0])); 343 si_pm4_set_reg(pm4, R_028418_CB_BLEND_GREEN, fui(state->color[1])); 344 si_pm4_set_reg(pm4, R_02841C_CB_BLEND_BLUE, fui(state->color[2])); 345 si_pm4_set_reg(pm4, R_028420_CB_BLEND_ALPHA, fui(state->color[3])); 346 347 si_pm4_set_state(rctx, blend_color, pm4); 348} 349 350/* 351 * Clipping, scissors and viewport 352 */ 353 354static void si_set_clip_state(struct pipe_context *ctx, 355 const struct pipe_clip_state *state) 356{ 357 struct r600_context *rctx = (struct r600_context *)ctx; 358 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 359 struct pipe_constant_buffer cb; 360 361 if (pm4 == NULL) 362 return; 363 364 for (int i = 0; i < 6; i++) { 365 si_pm4_set_reg(pm4, R_0285BC_PA_CL_UCP_0_X + i * 16, 366 fui(state->ucp[i][0])); 367 si_pm4_set_reg(pm4, R_0285C0_PA_CL_UCP_0_Y + i * 16, 368 fui(state->ucp[i][1])); 369 si_pm4_set_reg(pm4, R_0285C4_PA_CL_UCP_0_Z + i * 16, 370 fui(state->ucp[i][2])); 371 si_pm4_set_reg(pm4, R_0285C8_PA_CL_UCP_0_W + i * 16, 372 fui(state->ucp[i][3])); 373 } 374 375 cb.buffer = NULL; 376 cb.user_buffer = state->ucp; 377 cb.buffer_offset = 0; 378 cb.buffer_size = 4*4*8; 379 ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, 1, &cb); 380 pipe_resource_reference(&cb.buffer, NULL); 381 382 si_pm4_set_state(rctx, clip, pm4); 383} 384 385static void si_set_scissor_states(struct pipe_context *ctx, 386 unsigned start_slot, 387 unsigned num_scissors, 388 const struct pipe_scissor_state *state) 389{ 390 struct r600_context *rctx = (struct r600_context *)ctx; 391 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 392 uint32_t tl, br; 393 394 if (pm4 == NULL) 395 return; 396 397 tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny); 398 br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); 399 si_pm4_set_reg(pm4, R_028210_PA_SC_CLIPRECT_0_TL, tl); 400 si_pm4_set_reg(pm4, R_028214_PA_SC_CLIPRECT_0_BR, br); 401 si_pm4_set_reg(pm4, R_028218_PA_SC_CLIPRECT_1_TL, tl); 402 si_pm4_set_reg(pm4, R_02821C_PA_SC_CLIPRECT_1_BR, br); 403 si_pm4_set_reg(pm4, R_028220_PA_SC_CLIPRECT_2_TL, tl); 404 si_pm4_set_reg(pm4, R_028224_PA_SC_CLIPRECT_2_BR, br); 405 si_pm4_set_reg(pm4, R_028228_PA_SC_CLIPRECT_3_TL, tl); 406 si_pm4_set_reg(pm4, R_02822C_PA_SC_CLIPRECT_3_BR, br); 407 408 si_pm4_set_state(rctx, scissor, pm4); 409} 410 411static void si_set_viewport_states(struct pipe_context *ctx, 412 unsigned start_slot, 413 unsigned num_viewports, 414 const struct pipe_viewport_state *state) 415{ 416 struct r600_context *rctx = (struct r600_context *)ctx; 417 struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport); 418 struct si_pm4_state *pm4 = &viewport->pm4; 419 420 if (viewport == NULL) 421 return; 422 423 viewport->viewport = *state; 424 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000); 425 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000); 426 si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0])); 427 si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0])); 428 si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1])); 429 si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1])); 430 si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2])); 431 si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2])); 432 si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL, 0x0000043F); 433 434 si_pm4_set_state(rctx, viewport, viewport); 435} 436 437/* 438 * inferred state between framebuffer and rasterizer 439 */ 440static void si_update_fb_rs_state(struct r600_context *rctx) 441{ 442 struct si_state_rasterizer *rs = rctx->queued.named.rasterizer; 443 struct si_pm4_state *pm4; 444 unsigned offset_db_fmt_cntl = 0, depth; 445 float offset_units; 446 447 if (!rs || !rctx->framebuffer.zsbuf) 448 return; 449 450 offset_units = rctx->queued.named.rasterizer->offset_units; 451 switch (rctx->framebuffer.zsbuf->texture->format) { 452 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 453 case PIPE_FORMAT_X8Z24_UNORM: 454 case PIPE_FORMAT_Z24X8_UNORM: 455 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 456 depth = -24; 457 offset_units *= 2.0f; 458 break; 459 case PIPE_FORMAT_Z32_FLOAT: 460 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 461 depth = -23; 462 offset_units *= 1.0f; 463 offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 464 break; 465 case PIPE_FORMAT_Z16_UNORM: 466 depth = -16; 467 offset_units *= 4.0f; 468 break; 469 default: 470 return; 471 } 472 473 pm4 = si_pm4_alloc_state(rctx); 474 475 if (pm4 == NULL) 476 return; 477 478 /* FIXME some of those reg can be computed with cso */ 479 offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth); 480 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 481 fui(rctx->queued.named.rasterizer->offset_scale)); 482 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 483 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 484 fui(rctx->queued.named.rasterizer->offset_scale)); 485 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 486 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, offset_db_fmt_cntl); 487 488 si_pm4_set_state(rctx, fb_rs, pm4); 489} 490 491/* 492 * Rasterizer 493 */ 494 495static uint32_t si_translate_fill(uint32_t func) 496{ 497 switch(func) { 498 case PIPE_POLYGON_MODE_FILL: 499 return V_028814_X_DRAW_TRIANGLES; 500 case PIPE_POLYGON_MODE_LINE: 501 return V_028814_X_DRAW_LINES; 502 case PIPE_POLYGON_MODE_POINT: 503 return V_028814_X_DRAW_POINTS; 504 default: 505 assert(0); 506 return V_028814_X_DRAW_POINTS; 507 } 508} 509 510static void *si_create_rs_state(struct pipe_context *ctx, 511 const struct pipe_rasterizer_state *state) 512{ 513 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 514 struct si_pm4_state *pm4 = &rs->pm4; 515 unsigned tmp; 516 unsigned prov_vtx = 1, polygon_dual_mode; 517 unsigned clip_rule; 518 float psize_min, psize_max; 519 520 if (rs == NULL) { 521 return NULL; 522 } 523 524 rs->two_side = state->light_twoside; 525 rs->multisample_enable = state->multisample; 526 rs->clip_plane_enable = state->clip_plane_enable; 527 528 polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL || 529 state->fill_back != PIPE_POLYGON_MODE_FILL); 530 531 if (state->flatshade_first) 532 prov_vtx = 0; 533 534 rs->flatshade = state->flatshade; 535 rs->sprite_coord_enable = state->sprite_coord_enable; 536 rs->pa_sc_line_stipple = state->line_stipple_enable ? 537 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 538 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 539 rs->pa_su_sc_mode_cntl = 540 S_028814_PROVOKING_VTX_LAST(prov_vtx) | 541 S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 542 S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 543 S_028814_FACE(!state->front_ccw) | 544 S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) | 545 S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) | 546 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) | 547 S_028814_POLY_MODE(polygon_dual_mode) | 548 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 549 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)); 550 rs->pa_cl_clip_cntl = 551 S_028810_PS_UCP_MODE(3) | 552 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 553 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 554 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 555 556 clip_rule = state->scissor ? 0xAAAA : 0xFFFF; 557 558 /* offset */ 559 rs->offset_units = state->offset_units; 560 rs->offset_scale = state->offset_scale * 12.0f; 561 562 tmp = S_0286D4_FLAT_SHADE_ENA(1); 563 if (state->sprite_coord_enable) { 564 tmp |= S_0286D4_PNT_SPRITE_ENA(1) | 565 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 566 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 567 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 568 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1); 569 if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { 570 tmp |= S_0286D4_PNT_SPRITE_TOP_1(1); 571 } 572 } 573 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp); 574 575 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0x00000000); 576 /* point size 12.4 fixed point */ 577 tmp = (unsigned)(state->point_size * 8.0); 578 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 579 580 if (state->point_size_per_vertex) { 581 psize_min = util_get_min_point_size(state); 582 psize_max = 8192; 583 } else { 584 /* Force the point size to be as if the vertex output was disabled. */ 585 psize_min = state->point_size; 586 psize_max = state->point_size; 587 } 588 /* Divide by two, because 0.5 = 1 pixel. */ 589 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 590 S_028A04_MIN_SIZE(r600_pack_float_12p4(psize_min/2)) | 591 S_028A04_MAX_SIZE(r600_pack_float_12p4(psize_max/2))); 592 593 tmp = (unsigned)state->line_width * 8; 594 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 595 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 596 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 597 S_028A48_MSAA_ENABLE(state->multisample)); 598 599 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 600 S_028BE4_PIX_CENTER(state->half_pixel_center) | 601 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 602 si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000); 603 si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000); 604 si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000); 605 si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000); 606 607 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 608 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule); 609 610 return rs; 611} 612 613static void si_bind_rs_state(struct pipe_context *ctx, void *state) 614{ 615 struct r600_context *rctx = (struct r600_context *)ctx; 616 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 617 618 if (state == NULL) 619 return; 620 621 // TODO 622 rctx->sprite_coord_enable = rs->sprite_coord_enable; 623 rctx->pa_sc_line_stipple = rs->pa_sc_line_stipple; 624 rctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl; 625 626 si_pm4_bind_state(rctx, rasterizer, rs); 627 si_update_fb_rs_state(rctx); 628} 629 630static void si_delete_rs_state(struct pipe_context *ctx, void *state) 631{ 632 struct r600_context *rctx = (struct r600_context *)ctx; 633 si_pm4_delete_state(rctx, rasterizer, (struct si_state_rasterizer *)state); 634} 635 636/* 637 * infeered state between dsa and stencil ref 638 */ 639static void si_update_dsa_stencil_ref(struct r600_context *rctx) 640{ 641 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 642 struct pipe_stencil_ref *ref = &rctx->stencil_ref; 643 struct si_state_dsa *dsa = rctx->queued.named.dsa; 644 645 if (pm4 == NULL) 646 return; 647 648 si_pm4_set_reg(pm4, R_028430_DB_STENCILREFMASK, 649 S_028430_STENCILTESTVAL(ref->ref_value[0]) | 650 S_028430_STENCILMASK(dsa->valuemask[0]) | 651 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 652 S_028430_STENCILOPVAL(1)); 653 si_pm4_set_reg(pm4, R_028434_DB_STENCILREFMASK_BF, 654 S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 655 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 656 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 657 S_028434_STENCILOPVAL_BF(1)); 658 659 si_pm4_set_state(rctx, dsa_stencil_ref, pm4); 660} 661 662static void si_set_pipe_stencil_ref(struct pipe_context *ctx, 663 const struct pipe_stencil_ref *state) 664{ 665 struct r600_context *rctx = (struct r600_context *)ctx; 666 rctx->stencil_ref = *state; 667 si_update_dsa_stencil_ref(rctx); 668} 669 670 671/* 672 * DSA 673 */ 674 675static uint32_t si_translate_stencil_op(int s_op) 676{ 677 switch (s_op) { 678 case PIPE_STENCIL_OP_KEEP: 679 return V_02842C_STENCIL_KEEP; 680 case PIPE_STENCIL_OP_ZERO: 681 return V_02842C_STENCIL_ZERO; 682 case PIPE_STENCIL_OP_REPLACE: 683 return V_02842C_STENCIL_REPLACE_TEST; 684 case PIPE_STENCIL_OP_INCR: 685 return V_02842C_STENCIL_ADD_CLAMP; 686 case PIPE_STENCIL_OP_DECR: 687 return V_02842C_STENCIL_SUB_CLAMP; 688 case PIPE_STENCIL_OP_INCR_WRAP: 689 return V_02842C_STENCIL_ADD_WRAP; 690 case PIPE_STENCIL_OP_DECR_WRAP: 691 return V_02842C_STENCIL_SUB_WRAP; 692 case PIPE_STENCIL_OP_INVERT: 693 return V_02842C_STENCIL_INVERT; 694 default: 695 R600_ERR("Unknown stencil op %d", s_op); 696 assert(0); 697 break; 698 } 699 return 0; 700} 701 702static void *si_create_dsa_state(struct pipe_context *ctx, 703 const struct pipe_depth_stencil_alpha_state *state) 704{ 705 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 706 struct si_pm4_state *pm4 = &dsa->pm4; 707 unsigned db_depth_control; 708 unsigned db_render_override, db_render_control; 709 uint32_t db_stencil_control = 0; 710 711 if (dsa == NULL) { 712 return NULL; 713 } 714 715 dsa->valuemask[0] = state->stencil[0].valuemask; 716 dsa->valuemask[1] = state->stencil[1].valuemask; 717 dsa->writemask[0] = state->stencil[0].writemask; 718 dsa->writemask[1] = state->stencil[1].writemask; 719 720 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 721 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 722 S_028800_ZFUNC(state->depth.func); 723 724 /* stencil */ 725 if (state->stencil[0].enabled) { 726 db_depth_control |= S_028800_STENCIL_ENABLE(1); 727 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 728 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 729 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 730 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 731 732 if (state->stencil[1].enabled) { 733 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 734 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 735 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 736 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 737 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 738 } 739 } 740 741 /* alpha */ 742 if (state->alpha.enabled) { 743 dsa->alpha_func = state->alpha.func; 744 dsa->alpha_ref = state->alpha.ref_value; 745 } else { 746 dsa->alpha_func = PIPE_FUNC_ALWAYS; 747 } 748 749 /* misc */ 750 db_render_control = 0; 751 db_render_override = S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) | 752 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 753 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); 754 /* TODO db_render_override depends on query */ 755 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0x00000000); 756 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0x00000000); 757 si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0x00000000); 758 si_pm4_set_reg(pm4, R_02802C_DB_DEPTH_CLEAR, 0x3F800000); 759 //si_pm4_set_reg(pm4, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control); 760 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 761 si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, db_render_control); 762 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, db_render_override); 763 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 764 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 765 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 766 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 767 dsa->db_render_override = db_render_override; 768 769 return dsa; 770} 771 772static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 773{ 774 struct r600_context *rctx = (struct r600_context *)ctx; 775 struct si_state_dsa *dsa = state; 776 777 if (state == NULL) 778 return; 779 780 si_pm4_bind_state(rctx, dsa, dsa); 781 si_update_dsa_stencil_ref(rctx); 782} 783 784static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 785{ 786 struct r600_context *rctx = (struct r600_context *)ctx; 787 si_pm4_delete_state(rctx, dsa, (struct si_state_dsa *)state); 788} 789 790static void *si_create_db_flush_dsa(struct r600_context *rctx, bool copy_depth, 791 bool copy_stencil, int sample) 792{ 793 struct pipe_depth_stencil_alpha_state dsa; 794 struct si_state_dsa *state; 795 796 memset(&dsa, 0, sizeof(dsa)); 797 798 state = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa); 799 if (copy_depth || copy_stencil) { 800 si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL, 801 S_028000_DEPTH_COPY(copy_depth) | 802 S_028000_STENCIL_COPY(copy_stencil) | 803 S_028000_COPY_CENTROID(1) | 804 S_028000_COPY_SAMPLE(sample)); 805 } else { 806 si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL, 807 S_028000_DEPTH_COMPRESS_DISABLE(1) | 808 S_028000_STENCIL_COMPRESS_DISABLE(1)); 809 si_pm4_set_reg(&state->pm4, R_02800C_DB_RENDER_OVERRIDE, 810 S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) | 811 S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 812 S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) | 813 S_02800C_DISABLE_TILE_RATE_TILES(1)); 814 } 815 816 return state; 817} 818 819/* 820 * format translation 821 */ 822static uint32_t si_translate_colorformat(enum pipe_format format) 823{ 824 switch (format) { 825 /* 8-bit buffers. */ 826 case PIPE_FORMAT_A8_UNORM: 827 case PIPE_FORMAT_A8_SNORM: 828 case PIPE_FORMAT_A8_UINT: 829 case PIPE_FORMAT_A8_SINT: 830 case PIPE_FORMAT_I8_UNORM: 831 case PIPE_FORMAT_I8_SNORM: 832 case PIPE_FORMAT_I8_UINT: 833 case PIPE_FORMAT_I8_SINT: 834 case PIPE_FORMAT_L8_UNORM: 835 case PIPE_FORMAT_L8_SNORM: 836 case PIPE_FORMAT_L8_UINT: 837 case PIPE_FORMAT_L8_SINT: 838 case PIPE_FORMAT_L8_SRGB: 839 case PIPE_FORMAT_R8_UNORM: 840 case PIPE_FORMAT_R8_SNORM: 841 case PIPE_FORMAT_R8_UINT: 842 case PIPE_FORMAT_R8_SINT: 843 return V_028C70_COLOR_8; 844 845 /* 16-bit buffers. */ 846 case PIPE_FORMAT_B5G6R5_UNORM: 847 return V_028C70_COLOR_5_6_5; 848 849 case PIPE_FORMAT_B5G5R5A1_UNORM: 850 case PIPE_FORMAT_B5G5R5X1_UNORM: 851 return V_028C70_COLOR_1_5_5_5; 852 853 case PIPE_FORMAT_B4G4R4A4_UNORM: 854 case PIPE_FORMAT_B4G4R4X4_UNORM: 855 return V_028C70_COLOR_4_4_4_4; 856 857 case PIPE_FORMAT_L8A8_UNORM: 858 case PIPE_FORMAT_L8A8_SNORM: 859 case PIPE_FORMAT_L8A8_UINT: 860 case PIPE_FORMAT_L8A8_SINT: 861 case PIPE_FORMAT_R8G8_SNORM: 862 case PIPE_FORMAT_R8G8_UNORM: 863 case PIPE_FORMAT_R8G8_UINT: 864 case PIPE_FORMAT_R8G8_SINT: 865 return V_028C70_COLOR_8_8; 866 867 case PIPE_FORMAT_Z16_UNORM: 868 case PIPE_FORMAT_R16_UNORM: 869 case PIPE_FORMAT_R16_SNORM: 870 case PIPE_FORMAT_R16_UINT: 871 case PIPE_FORMAT_R16_SINT: 872 case PIPE_FORMAT_R16_FLOAT: 873 case PIPE_FORMAT_L16_UNORM: 874 case PIPE_FORMAT_L16_SNORM: 875 case PIPE_FORMAT_L16_FLOAT: 876 case PIPE_FORMAT_I16_UNORM: 877 case PIPE_FORMAT_I16_SNORM: 878 case PIPE_FORMAT_I16_FLOAT: 879 case PIPE_FORMAT_A16_UNORM: 880 case PIPE_FORMAT_A16_SNORM: 881 case PIPE_FORMAT_A16_FLOAT: 882 return V_028C70_COLOR_16; 883 884 /* 32-bit buffers. */ 885 case PIPE_FORMAT_A8B8G8R8_SRGB: 886 case PIPE_FORMAT_A8B8G8R8_UNORM: 887 case PIPE_FORMAT_A8R8G8B8_UNORM: 888 case PIPE_FORMAT_B8G8R8A8_SRGB: 889 case PIPE_FORMAT_B8G8R8A8_UNORM: 890 case PIPE_FORMAT_B8G8R8X8_UNORM: 891 case PIPE_FORMAT_R8G8B8A8_SNORM: 892 case PIPE_FORMAT_R8G8B8A8_UNORM: 893 case PIPE_FORMAT_R8G8B8X8_UNORM: 894 case PIPE_FORMAT_R8G8B8X8_SNORM: 895 case PIPE_FORMAT_R8G8B8X8_SRGB: 896 case PIPE_FORMAT_R8G8B8X8_UINT: 897 case PIPE_FORMAT_R8G8B8X8_SINT: 898 case PIPE_FORMAT_R8SG8SB8UX8U_NORM: 899 case PIPE_FORMAT_X8B8G8R8_UNORM: 900 case PIPE_FORMAT_X8R8G8B8_UNORM: 901 case PIPE_FORMAT_R8G8B8A8_SSCALED: 902 case PIPE_FORMAT_R8G8B8A8_USCALED: 903 case PIPE_FORMAT_R8G8B8A8_SINT: 904 case PIPE_FORMAT_R8G8B8A8_UINT: 905 return V_028C70_COLOR_8_8_8_8; 906 907 case PIPE_FORMAT_R10G10B10A2_UNORM: 908 case PIPE_FORMAT_R10G10B10X2_SNORM: 909 case PIPE_FORMAT_B10G10R10A2_UNORM: 910 case PIPE_FORMAT_B10G10R10A2_UINT: 911 case PIPE_FORMAT_B10G10R10X2_UNORM: 912 case PIPE_FORMAT_R10SG10SB10SA2U_NORM: 913 return V_028C70_COLOR_2_10_10_10; 914 915 case PIPE_FORMAT_Z24X8_UNORM: 916 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 917 return V_028C70_COLOR_8_24; 918 919 case PIPE_FORMAT_S8X24_UINT: 920 case PIPE_FORMAT_X8Z24_UNORM: 921 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 922 return V_028C70_COLOR_24_8; 923 924 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 925 return V_028C70_COLOR_X24_8_32_FLOAT; 926 927 case PIPE_FORMAT_I32_FLOAT: 928 case PIPE_FORMAT_L32_FLOAT: 929 case PIPE_FORMAT_R32_FLOAT: 930 case PIPE_FORMAT_A32_FLOAT: 931 case PIPE_FORMAT_Z32_FLOAT: 932 return V_028C70_COLOR_32; 933 934 case PIPE_FORMAT_L16A16_UNORM: 935 case PIPE_FORMAT_L16A16_SNORM: 936 case PIPE_FORMAT_L16A16_FLOAT: 937 case PIPE_FORMAT_R16G16_SSCALED: 938 case PIPE_FORMAT_R16G16_UNORM: 939 case PIPE_FORMAT_R16G16_SNORM: 940 case PIPE_FORMAT_R16G16_UINT: 941 case PIPE_FORMAT_R16G16_SINT: 942 case PIPE_FORMAT_R16G16_FLOAT: 943 return V_028C70_COLOR_16_16; 944 945 case PIPE_FORMAT_R11G11B10_FLOAT: 946 return V_028C70_COLOR_10_11_11; 947 948 /* 64-bit buffers. */ 949 case PIPE_FORMAT_R16G16B16A16_UINT: 950 case PIPE_FORMAT_R16G16B16A16_SINT: 951 case PIPE_FORMAT_R16G16B16A16_USCALED: 952 case PIPE_FORMAT_R16G16B16A16_SSCALED: 953 case PIPE_FORMAT_R16G16B16A16_UNORM: 954 case PIPE_FORMAT_R16G16B16A16_SNORM: 955 case PIPE_FORMAT_R16G16B16A16_FLOAT: 956 case PIPE_FORMAT_R16G16B16X16_UNORM: 957 case PIPE_FORMAT_R16G16B16X16_SNORM: 958 case PIPE_FORMAT_R16G16B16X16_FLOAT: 959 case PIPE_FORMAT_R16G16B16X16_UINT: 960 case PIPE_FORMAT_R16G16B16X16_SINT: 961 return V_028C70_COLOR_16_16_16_16; 962 963 case PIPE_FORMAT_L32A32_FLOAT: 964 case PIPE_FORMAT_L32A32_UINT: 965 case PIPE_FORMAT_L32A32_SINT: 966 case PIPE_FORMAT_R32G32_FLOAT: 967 case PIPE_FORMAT_R32G32_USCALED: 968 case PIPE_FORMAT_R32G32_SSCALED: 969 case PIPE_FORMAT_R32G32_SINT: 970 case PIPE_FORMAT_R32G32_UINT: 971 return V_028C70_COLOR_32_32; 972 973 /* 128-bit buffers. */ 974 case PIPE_FORMAT_R32G32B32A32_SNORM: 975 case PIPE_FORMAT_R32G32B32A32_UNORM: 976 case PIPE_FORMAT_R32G32B32A32_SSCALED: 977 case PIPE_FORMAT_R32G32B32A32_USCALED: 978 case PIPE_FORMAT_R32G32B32A32_SINT: 979 case PIPE_FORMAT_R32G32B32A32_UINT: 980 case PIPE_FORMAT_R32G32B32A32_FLOAT: 981 case PIPE_FORMAT_R32G32B32X32_FLOAT: 982 case PIPE_FORMAT_R32G32B32X32_UINT: 983 case PIPE_FORMAT_R32G32B32X32_SINT: 984 return V_028C70_COLOR_32_32_32_32; 985 986 /* YUV buffers. */ 987 case PIPE_FORMAT_UYVY: 988 case PIPE_FORMAT_YUYV: 989 /* 96-bit buffers. */ 990 case PIPE_FORMAT_R32G32B32_FLOAT: 991 /* 8-bit buffers. */ 992 case PIPE_FORMAT_L4A4_UNORM: 993 case PIPE_FORMAT_R4A4_UNORM: 994 case PIPE_FORMAT_A4R4_UNORM: 995 default: 996 return V_028C70_COLOR_INVALID; /* Unsupported. */ 997 } 998} 999 1000static uint32_t si_translate_colorswap(enum pipe_format format) 1001{ 1002 switch (format) { 1003 /* 8-bit buffers. */ 1004 case PIPE_FORMAT_L4A4_UNORM: 1005 case PIPE_FORMAT_A4R4_UNORM: 1006 return V_028C70_SWAP_ALT; 1007 1008 case PIPE_FORMAT_A8_UNORM: 1009 case PIPE_FORMAT_A8_SNORM: 1010 case PIPE_FORMAT_A8_UINT: 1011 case PIPE_FORMAT_A8_SINT: 1012 case PIPE_FORMAT_R4A4_UNORM: 1013 return V_028C70_SWAP_ALT_REV; 1014 case PIPE_FORMAT_I8_UNORM: 1015 case PIPE_FORMAT_I8_SNORM: 1016 case PIPE_FORMAT_L8_UNORM: 1017 case PIPE_FORMAT_L8_SNORM: 1018 case PIPE_FORMAT_I8_UINT: 1019 case PIPE_FORMAT_I8_SINT: 1020 case PIPE_FORMAT_L8_UINT: 1021 case PIPE_FORMAT_L8_SINT: 1022 case PIPE_FORMAT_L8_SRGB: 1023 case PIPE_FORMAT_R8_UNORM: 1024 case PIPE_FORMAT_R8_SNORM: 1025 case PIPE_FORMAT_R8_UINT: 1026 case PIPE_FORMAT_R8_SINT: 1027 return V_028C70_SWAP_STD; 1028 1029 /* 16-bit buffers. */ 1030 case PIPE_FORMAT_B5G6R5_UNORM: 1031 return V_028C70_SWAP_STD_REV; 1032 1033 case PIPE_FORMAT_B5G5R5A1_UNORM: 1034 case PIPE_FORMAT_B5G5R5X1_UNORM: 1035 return V_028C70_SWAP_ALT; 1036 1037 case PIPE_FORMAT_B4G4R4A4_UNORM: 1038 case PIPE_FORMAT_B4G4R4X4_UNORM: 1039 return V_028C70_SWAP_ALT; 1040 1041 case PIPE_FORMAT_Z16_UNORM: 1042 return V_028C70_SWAP_STD; 1043 1044 case PIPE_FORMAT_L8A8_UNORM: 1045 case PIPE_FORMAT_L8A8_SNORM: 1046 case PIPE_FORMAT_L8A8_UINT: 1047 case PIPE_FORMAT_L8A8_SINT: 1048 return V_028C70_SWAP_ALT; 1049 case PIPE_FORMAT_R8G8_SNORM: 1050 case PIPE_FORMAT_R8G8_UNORM: 1051 case PIPE_FORMAT_R8G8_UINT: 1052 case PIPE_FORMAT_R8G8_SINT: 1053 return V_028C70_SWAP_STD; 1054 1055 case PIPE_FORMAT_I16_UNORM: 1056 case PIPE_FORMAT_I16_SNORM: 1057 case PIPE_FORMAT_I16_FLOAT: 1058 case PIPE_FORMAT_L16_UNORM: 1059 case PIPE_FORMAT_L16_SNORM: 1060 case PIPE_FORMAT_L16_FLOAT: 1061 case PIPE_FORMAT_R16_UNORM: 1062 case PIPE_FORMAT_R16_SNORM: 1063 case PIPE_FORMAT_R16_UINT: 1064 case PIPE_FORMAT_R16_SINT: 1065 case PIPE_FORMAT_R16_FLOAT: 1066 return V_028C70_SWAP_STD; 1067 1068 case PIPE_FORMAT_A16_UNORM: 1069 case PIPE_FORMAT_A16_SNORM: 1070 case PIPE_FORMAT_A16_FLOAT: 1071 return V_028C70_SWAP_ALT_REV; 1072 1073 /* 32-bit buffers. */ 1074 case PIPE_FORMAT_A8B8G8R8_SRGB: 1075 return V_028C70_SWAP_STD_REV; 1076 case PIPE_FORMAT_B8G8R8A8_SRGB: 1077 return V_028C70_SWAP_ALT; 1078 1079 case PIPE_FORMAT_B8G8R8A8_UNORM: 1080 case PIPE_FORMAT_B8G8R8X8_UNORM: 1081 return V_028C70_SWAP_ALT; 1082 1083 case PIPE_FORMAT_A8R8G8B8_UNORM: 1084 case PIPE_FORMAT_X8R8G8B8_UNORM: 1085 return V_028C70_SWAP_ALT_REV; 1086 case PIPE_FORMAT_R8G8B8A8_SNORM: 1087 case PIPE_FORMAT_R8G8B8A8_UNORM: 1088 case PIPE_FORMAT_R8G8B8A8_SSCALED: 1089 case PIPE_FORMAT_R8G8B8A8_USCALED: 1090 case PIPE_FORMAT_R8G8B8A8_SINT: 1091 case PIPE_FORMAT_R8G8B8A8_UINT: 1092 case PIPE_FORMAT_R8G8B8X8_UNORM: 1093 case PIPE_FORMAT_R8G8B8X8_SNORM: 1094 case PIPE_FORMAT_R8G8B8X8_SRGB: 1095 case PIPE_FORMAT_R8G8B8X8_UINT: 1096 case PIPE_FORMAT_R8G8B8X8_SINT: 1097 return V_028C70_SWAP_STD; 1098 1099 case PIPE_FORMAT_A8B8G8R8_UNORM: 1100 case PIPE_FORMAT_X8B8G8R8_UNORM: 1101 /* case PIPE_FORMAT_R8SG8SB8UX8U_NORM: */ 1102 return V_028C70_SWAP_STD_REV; 1103 1104 case PIPE_FORMAT_Z24X8_UNORM: 1105 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1106 return V_028C70_SWAP_STD; 1107 1108 case PIPE_FORMAT_S8X24_UINT: 1109 case PIPE_FORMAT_X8Z24_UNORM: 1110 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1111 return V_028C70_SWAP_STD_REV; 1112 1113 case PIPE_FORMAT_R10G10B10A2_UNORM: 1114 case PIPE_FORMAT_R10G10B10X2_SNORM: 1115 case PIPE_FORMAT_R10SG10SB10SA2U_NORM: 1116 return V_028C70_SWAP_STD; 1117 1118 case PIPE_FORMAT_B10G10R10A2_UNORM: 1119 case PIPE_FORMAT_B10G10R10A2_UINT: 1120 case PIPE_FORMAT_B10G10R10X2_UNORM: 1121 return V_028C70_SWAP_ALT; 1122 1123 case PIPE_FORMAT_R11G11B10_FLOAT: 1124 case PIPE_FORMAT_I32_FLOAT: 1125 case PIPE_FORMAT_L32_FLOAT: 1126 case PIPE_FORMAT_R32_FLOAT: 1127 case PIPE_FORMAT_R32_UINT: 1128 case PIPE_FORMAT_R32_SINT: 1129 case PIPE_FORMAT_Z32_FLOAT: 1130 case PIPE_FORMAT_R16G16_FLOAT: 1131 case PIPE_FORMAT_R16G16_UNORM: 1132 case PIPE_FORMAT_R16G16_SNORM: 1133 case PIPE_FORMAT_R16G16_UINT: 1134 case PIPE_FORMAT_R16G16_SINT: 1135 return V_028C70_SWAP_STD; 1136 1137 case PIPE_FORMAT_L16A16_UNORM: 1138 case PIPE_FORMAT_L16A16_SNORM: 1139 case PIPE_FORMAT_L16A16_FLOAT: 1140 return V_028C70_SWAP_ALT; 1141 1142 case PIPE_FORMAT_A32_FLOAT: 1143 return V_028C70_SWAP_ALT_REV; 1144 1145 /* 64-bit buffers. */ 1146 case PIPE_FORMAT_R32G32_FLOAT: 1147 case PIPE_FORMAT_R32G32_UINT: 1148 case PIPE_FORMAT_R32G32_SINT: 1149 case PIPE_FORMAT_R16G16B16A16_UNORM: 1150 case PIPE_FORMAT_R16G16B16A16_SNORM: 1151 case PIPE_FORMAT_R16G16B16A16_USCALED: 1152 case PIPE_FORMAT_R16G16B16A16_SSCALED: 1153 case PIPE_FORMAT_R16G16B16A16_UINT: 1154 case PIPE_FORMAT_R16G16B16A16_SINT: 1155 case PIPE_FORMAT_R16G16B16A16_FLOAT: 1156 case PIPE_FORMAT_R16G16B16X16_UNORM: 1157 case PIPE_FORMAT_R16G16B16X16_SNORM: 1158 case PIPE_FORMAT_R16G16B16X16_FLOAT: 1159 case PIPE_FORMAT_R16G16B16X16_UINT: 1160 case PIPE_FORMAT_R16G16B16X16_SINT: 1161 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1162 return V_028C70_SWAP_STD; 1163 1164 case PIPE_FORMAT_L32A32_FLOAT: 1165 case PIPE_FORMAT_L32A32_UINT: 1166 case PIPE_FORMAT_L32A32_SINT: 1167 return V_028C70_SWAP_ALT; 1168 1169 /* 128-bit buffers. */ 1170 case PIPE_FORMAT_R32G32B32A32_FLOAT: 1171 case PIPE_FORMAT_R32G32B32A32_SNORM: 1172 case PIPE_FORMAT_R32G32B32A32_UNORM: 1173 case PIPE_FORMAT_R32G32B32A32_SSCALED: 1174 case PIPE_FORMAT_R32G32B32A32_USCALED: 1175 case PIPE_FORMAT_R32G32B32A32_SINT: 1176 case PIPE_FORMAT_R32G32B32A32_UINT: 1177 case PIPE_FORMAT_R32G32B32X32_FLOAT: 1178 case PIPE_FORMAT_R32G32B32X32_UINT: 1179 case PIPE_FORMAT_R32G32B32X32_SINT: 1180 return V_028C70_SWAP_STD; 1181 default: 1182 R600_ERR("unsupported colorswap format %d\n", format); 1183 return ~0U; 1184 } 1185 return ~0U; 1186} 1187 1188static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1189{ 1190 if (R600_BIG_ENDIAN) { 1191 switch(colorformat) { 1192 /* 8-bit buffers. */ 1193 case V_028C70_COLOR_8: 1194 return V_028C70_ENDIAN_NONE; 1195 1196 /* 16-bit buffers. */ 1197 case V_028C70_COLOR_5_6_5: 1198 case V_028C70_COLOR_1_5_5_5: 1199 case V_028C70_COLOR_4_4_4_4: 1200 case V_028C70_COLOR_16: 1201 case V_028C70_COLOR_8_8: 1202 return V_028C70_ENDIAN_8IN16; 1203 1204 /* 32-bit buffers. */ 1205 case V_028C70_COLOR_8_8_8_8: 1206 case V_028C70_COLOR_2_10_10_10: 1207 case V_028C70_COLOR_8_24: 1208 case V_028C70_COLOR_24_8: 1209 case V_028C70_COLOR_16_16: 1210 return V_028C70_ENDIAN_8IN32; 1211 1212 /* 64-bit buffers. */ 1213 case V_028C70_COLOR_16_16_16_16: 1214 return V_028C70_ENDIAN_8IN16; 1215 1216 case V_028C70_COLOR_32_32: 1217 return V_028C70_ENDIAN_8IN32; 1218 1219 /* 128-bit buffers. */ 1220 case V_028C70_COLOR_32_32_32_32: 1221 return V_028C70_ENDIAN_8IN32; 1222 default: 1223 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1224 } 1225 } else { 1226 return V_028C70_ENDIAN_NONE; 1227 } 1228} 1229 1230/* Returns the size in bits of the widest component of a CB format */ 1231static unsigned si_colorformat_max_comp_size(uint32_t colorformat) 1232{ 1233 switch(colorformat) { 1234 case V_028C70_COLOR_4_4_4_4: 1235 return 4; 1236 1237 case V_028C70_COLOR_1_5_5_5: 1238 case V_028C70_COLOR_5_5_5_1: 1239 return 5; 1240 1241 case V_028C70_COLOR_5_6_5: 1242 return 6; 1243 1244 case V_028C70_COLOR_8: 1245 case V_028C70_COLOR_8_8: 1246 case V_028C70_COLOR_8_8_8_8: 1247 return 8; 1248 1249 case V_028C70_COLOR_10_10_10_2: 1250 case V_028C70_COLOR_2_10_10_10: 1251 return 10; 1252 1253 case V_028C70_COLOR_10_11_11: 1254 case V_028C70_COLOR_11_11_10: 1255 return 11; 1256 1257 case V_028C70_COLOR_16: 1258 case V_028C70_COLOR_16_16: 1259 case V_028C70_COLOR_16_16_16_16: 1260 return 16; 1261 1262 case V_028C70_COLOR_8_24: 1263 case V_028C70_COLOR_24_8: 1264 return 24; 1265 1266 case V_028C70_COLOR_32: 1267 case V_028C70_COLOR_32_32: 1268 case V_028C70_COLOR_32_32_32_32: 1269 case V_028C70_COLOR_X24_8_32_FLOAT: 1270 return 32; 1271 } 1272 1273 assert(!"Unknown maximum component size"); 1274 return 0; 1275} 1276 1277static uint32_t si_translate_dbformat(enum pipe_format format) 1278{ 1279 switch (format) { 1280 case PIPE_FORMAT_Z16_UNORM: 1281 return V_028040_Z_16; 1282 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1283 case PIPE_FORMAT_X8Z24_UNORM: 1284 case PIPE_FORMAT_Z24X8_UNORM: 1285 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1286 return V_028040_Z_24; /* deprecated on SI */ 1287 case PIPE_FORMAT_Z32_FLOAT: 1288 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1289 return V_028040_Z_32_FLOAT; 1290 default: 1291 return V_028040_Z_INVALID; 1292 } 1293} 1294 1295/* 1296 * Texture translation 1297 */ 1298 1299static uint32_t si_translate_texformat(struct pipe_screen *screen, 1300 enum pipe_format format, 1301 const struct util_format_description *desc, 1302 int first_non_void) 1303{ 1304 struct r600_screen *rscreen = (struct r600_screen*)screen; 1305 bool enable_s3tc = rscreen->info.drm_minor >= 31; 1306 boolean uniform = TRUE; 1307 int i; 1308 1309 /* Colorspace (return non-RGB formats directly). */ 1310 switch (desc->colorspace) { 1311 /* Depth stencil formats */ 1312 case UTIL_FORMAT_COLORSPACE_ZS: 1313 switch (format) { 1314 case PIPE_FORMAT_Z16_UNORM: 1315 return V_008F14_IMG_DATA_FORMAT_16; 1316 case PIPE_FORMAT_X24S8_UINT: 1317 case PIPE_FORMAT_Z24X8_UNORM: 1318 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1319 return V_008F14_IMG_DATA_FORMAT_8_24; 1320 case PIPE_FORMAT_X8Z24_UNORM: 1321 case PIPE_FORMAT_S8X24_UINT: 1322 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1323 return V_008F14_IMG_DATA_FORMAT_24_8; 1324 case PIPE_FORMAT_S8_UINT: 1325 return V_008F14_IMG_DATA_FORMAT_8; 1326 case PIPE_FORMAT_Z32_FLOAT: 1327 return V_008F14_IMG_DATA_FORMAT_32; 1328 case PIPE_FORMAT_X32_S8X24_UINT: 1329 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1330 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1331 default: 1332 goto out_unknown; 1333 } 1334 1335 case UTIL_FORMAT_COLORSPACE_YUV: 1336 goto out_unknown; /* TODO */ 1337 1338 case UTIL_FORMAT_COLORSPACE_SRGB: 1339 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1340 goto out_unknown; 1341 break; 1342 1343 default: 1344 break; 1345 } 1346 1347 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1348 if (!enable_s3tc) 1349 goto out_unknown; 1350 1351 switch (format) { 1352 case PIPE_FORMAT_RGTC1_SNORM: 1353 case PIPE_FORMAT_LATC1_SNORM: 1354 case PIPE_FORMAT_RGTC1_UNORM: 1355 case PIPE_FORMAT_LATC1_UNORM: 1356 return V_008F14_IMG_DATA_FORMAT_BC4; 1357 case PIPE_FORMAT_RGTC2_SNORM: 1358 case PIPE_FORMAT_LATC2_SNORM: 1359 case PIPE_FORMAT_RGTC2_UNORM: 1360 case PIPE_FORMAT_LATC2_UNORM: 1361 return V_008F14_IMG_DATA_FORMAT_BC5; 1362 default: 1363 goto out_unknown; 1364 } 1365 } 1366 1367 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1368 1369 if (!enable_s3tc) 1370 goto out_unknown; 1371 1372 if (!util_format_s3tc_enabled) { 1373 goto out_unknown; 1374 } 1375 1376 switch (format) { 1377 case PIPE_FORMAT_DXT1_RGB: 1378 case PIPE_FORMAT_DXT1_RGBA: 1379 case PIPE_FORMAT_DXT1_SRGB: 1380 case PIPE_FORMAT_DXT1_SRGBA: 1381 return V_008F14_IMG_DATA_FORMAT_BC1; 1382 case PIPE_FORMAT_DXT3_RGBA: 1383 case PIPE_FORMAT_DXT3_SRGBA: 1384 return V_008F14_IMG_DATA_FORMAT_BC2; 1385 case PIPE_FORMAT_DXT5_RGBA: 1386 case PIPE_FORMAT_DXT5_SRGBA: 1387 return V_008F14_IMG_DATA_FORMAT_BC3; 1388 default: 1389 goto out_unknown; 1390 } 1391 } 1392 1393 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1394 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1395 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1396 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1397 } 1398 1399 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1400 1401 /* See whether the components are of the same size. */ 1402 for (i = 1; i < desc->nr_channels; i++) { 1403 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1404 } 1405 1406 /* Non-uniform formats. */ 1407 if (!uniform) { 1408 switch(desc->nr_channels) { 1409 case 3: 1410 if (desc->channel[0].size == 5 && 1411 desc->channel[1].size == 6 && 1412 desc->channel[2].size == 5) { 1413 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1414 } 1415 goto out_unknown; 1416 case 4: 1417 if (desc->channel[0].size == 5 && 1418 desc->channel[1].size == 5 && 1419 desc->channel[2].size == 5 && 1420 desc->channel[3].size == 1) { 1421 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1422 } 1423 if (desc->channel[0].size == 10 && 1424 desc->channel[1].size == 10 && 1425 desc->channel[2].size == 10 && 1426 desc->channel[3].size == 2) { 1427 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1428 } 1429 goto out_unknown; 1430 } 1431 goto out_unknown; 1432 } 1433 1434 if (first_non_void < 0 || first_non_void > 3) 1435 goto out_unknown; 1436 1437 /* uniform formats */ 1438 switch (desc->channel[first_non_void].size) { 1439 case 4: 1440 switch (desc->nr_channels) { 1441#if 0 /* Not supported for render targets */ 1442 case 2: 1443 return V_008F14_IMG_DATA_FORMAT_4_4; 1444#endif 1445 case 4: 1446 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1447 } 1448 break; 1449 case 8: 1450 switch (desc->nr_channels) { 1451 case 1: 1452 return V_008F14_IMG_DATA_FORMAT_8; 1453 case 2: 1454 return V_008F14_IMG_DATA_FORMAT_8_8; 1455 case 4: 1456 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1457 } 1458 break; 1459 case 16: 1460 switch (desc->nr_channels) { 1461 case 1: 1462 return V_008F14_IMG_DATA_FORMAT_16; 1463 case 2: 1464 return V_008F14_IMG_DATA_FORMAT_16_16; 1465 case 4: 1466 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1467 } 1468 break; 1469 case 32: 1470 switch (desc->nr_channels) { 1471 case 1: 1472 return V_008F14_IMG_DATA_FORMAT_32; 1473 case 2: 1474 return V_008F14_IMG_DATA_FORMAT_32_32; 1475#if 0 /* Not supported for render targets */ 1476 case 3: 1477 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1478#endif 1479 case 4: 1480 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1481 } 1482 } 1483 1484out_unknown: 1485 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1486 return ~0; 1487} 1488 1489static unsigned si_tex_wrap(unsigned wrap) 1490{ 1491 switch (wrap) { 1492 default: 1493 case PIPE_TEX_WRAP_REPEAT: 1494 return V_008F30_SQ_TEX_WRAP; 1495 case PIPE_TEX_WRAP_CLAMP: 1496 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1497 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1498 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1499 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1500 return V_008F30_SQ_TEX_CLAMP_BORDER; 1501 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1502 return V_008F30_SQ_TEX_MIRROR; 1503 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1504 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1505 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1506 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1507 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1508 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1509 } 1510} 1511 1512static unsigned si_tex_filter(unsigned filter) 1513{ 1514 switch (filter) { 1515 default: 1516 case PIPE_TEX_FILTER_NEAREST: 1517 return V_008F38_SQ_TEX_XY_FILTER_POINT; 1518 case PIPE_TEX_FILTER_LINEAR: 1519 return V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 1520 } 1521} 1522 1523static unsigned si_tex_mipfilter(unsigned filter) 1524{ 1525 switch (filter) { 1526 case PIPE_TEX_MIPFILTER_NEAREST: 1527 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1528 case PIPE_TEX_MIPFILTER_LINEAR: 1529 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1530 default: 1531 case PIPE_TEX_MIPFILTER_NONE: 1532 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1533 } 1534} 1535 1536static unsigned si_tex_compare(unsigned compare) 1537{ 1538 switch (compare) { 1539 default: 1540 case PIPE_FUNC_NEVER: 1541 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1542 case PIPE_FUNC_LESS: 1543 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1544 case PIPE_FUNC_EQUAL: 1545 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1546 case PIPE_FUNC_LEQUAL: 1547 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1548 case PIPE_FUNC_GREATER: 1549 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1550 case PIPE_FUNC_NOTEQUAL: 1551 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1552 case PIPE_FUNC_GEQUAL: 1553 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1554 case PIPE_FUNC_ALWAYS: 1555 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1556 } 1557} 1558 1559static unsigned si_tex_dim(unsigned dim, unsigned nr_samples) 1560{ 1561 switch (dim) { 1562 default: 1563 case PIPE_TEXTURE_1D: 1564 return V_008F1C_SQ_RSRC_IMG_1D; 1565 case PIPE_TEXTURE_1D_ARRAY: 1566 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1567 case PIPE_TEXTURE_2D: 1568 case PIPE_TEXTURE_RECT: 1569 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1570 V_008F1C_SQ_RSRC_IMG_2D; 1571 case PIPE_TEXTURE_2D_ARRAY: 1572 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1573 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1574 case PIPE_TEXTURE_3D: 1575 return V_008F1C_SQ_RSRC_IMG_3D; 1576 case PIPE_TEXTURE_CUBE: 1577 return V_008F1C_SQ_RSRC_IMG_CUBE; 1578 } 1579} 1580 1581/* 1582 * Format support testing 1583 */ 1584 1585static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1586{ 1587 return si_translate_texformat(screen, format, util_format_description(format), 1588 util_format_get_first_non_void_channel(format)) != ~0U; 1589} 1590 1591static uint32_t si_translate_vertexformat(struct pipe_screen *screen, 1592 enum pipe_format format, 1593 const struct util_format_description *desc, 1594 int first_non_void) 1595{ 1596 unsigned type = desc->channel[first_non_void].type; 1597 int i; 1598 1599 if (type == UTIL_FORMAT_TYPE_FIXED) 1600 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1601 1602 /* See whether the components are of the same size. */ 1603 for (i = 0; i < desc->nr_channels; i++) { 1604 if (desc->channel[first_non_void].size != desc->channel[i].size) 1605 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1606 } 1607 1608 switch (desc->channel[first_non_void].size) { 1609 case 8: 1610 switch (desc->nr_channels) { 1611 case 1: 1612 return V_008F0C_BUF_DATA_FORMAT_8; 1613 case 2: 1614 return V_008F0C_BUF_DATA_FORMAT_8_8; 1615 case 3: 1616 case 4: 1617 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1618 } 1619 break; 1620 case 16: 1621 switch (desc->nr_channels) { 1622 case 1: 1623 return V_008F0C_BUF_DATA_FORMAT_16; 1624 case 2: 1625 return V_008F0C_BUF_DATA_FORMAT_16_16; 1626 case 3: 1627 case 4: 1628 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1629 } 1630 break; 1631 case 32: 1632 if (type != UTIL_FORMAT_TYPE_FLOAT) 1633 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1634 1635 switch (desc->nr_channels) { 1636 case 1: 1637 return V_008F0C_BUF_DATA_FORMAT_32; 1638 case 2: 1639 return V_008F0C_BUF_DATA_FORMAT_32_32; 1640 case 3: 1641 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1642 case 4: 1643 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1644 } 1645 break; 1646 } 1647 1648 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1649} 1650 1651static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format) 1652{ 1653 const struct util_format_description *desc; 1654 int first_non_void; 1655 unsigned data_format; 1656 1657 desc = util_format_description(format); 1658 first_non_void = util_format_get_first_non_void_channel(format); 1659 data_format = si_translate_vertexformat(screen, format, desc, first_non_void); 1660 return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID; 1661} 1662 1663static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1664{ 1665 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1666 si_translate_colorswap(format) != ~0U; 1667} 1668 1669static bool si_is_zs_format_supported(enum pipe_format format) 1670{ 1671 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1672} 1673 1674boolean si_is_format_supported(struct pipe_screen *screen, 1675 enum pipe_format format, 1676 enum pipe_texture_target target, 1677 unsigned sample_count, 1678 unsigned usage) 1679{ 1680 struct r600_screen *rscreen = (struct r600_screen *)screen; 1681 unsigned retval = 0; 1682 1683 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1684 R600_ERR("r600: unsupported texture type %d\n", target); 1685 return FALSE; 1686 } 1687 1688 if (!util_format_is_supported(format, usage)) 1689 return FALSE; 1690 1691 if (sample_count > 1) { 1692 if (HAVE_LLVM < 0x0304 || rscreen->chip_class != SI) 1693 return FALSE; 1694 1695 switch (sample_count) { 1696 case 2: 1697 case 4: 1698 case 8: 1699 break; 1700 default: 1701 return FALSE; 1702 } 1703 } 1704 1705 if ((usage & PIPE_BIND_SAMPLER_VIEW) && 1706 si_is_sampler_format_supported(screen, format)) { 1707 retval |= PIPE_BIND_SAMPLER_VIEW; 1708 } 1709 1710 if ((usage & (PIPE_BIND_RENDER_TARGET | 1711 PIPE_BIND_DISPLAY_TARGET | 1712 PIPE_BIND_SCANOUT | 1713 PIPE_BIND_SHARED)) && 1714 si_is_colorbuffer_format_supported(format)) { 1715 retval |= usage & 1716 (PIPE_BIND_RENDER_TARGET | 1717 PIPE_BIND_DISPLAY_TARGET | 1718 PIPE_BIND_SCANOUT | 1719 PIPE_BIND_SHARED); 1720 } 1721 1722 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1723 si_is_zs_format_supported(format)) { 1724 retval |= PIPE_BIND_DEPTH_STENCIL; 1725 } 1726 1727 if ((usage & PIPE_BIND_VERTEX_BUFFER) && 1728 si_is_vertex_format_supported(screen, format)) { 1729 retval |= PIPE_BIND_VERTEX_BUFFER; 1730 } 1731 1732 if (usage & PIPE_BIND_TRANSFER_READ) 1733 retval |= PIPE_BIND_TRANSFER_READ; 1734 if (usage & PIPE_BIND_TRANSFER_WRITE) 1735 retval |= PIPE_BIND_TRANSFER_WRITE; 1736 1737 return retval == usage; 1738} 1739 1740static unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil) 1741{ 1742 unsigned tile_mode_index = 0; 1743 1744 if (stencil) { 1745 tile_mode_index = rtex->surface.stencil_tiling_index[level]; 1746 } else { 1747 tile_mode_index = rtex->surface.tiling_index[level]; 1748 } 1749 return tile_mode_index; 1750} 1751 1752/* 1753 * framebuffer handling 1754 */ 1755 1756static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4, 1757 const struct pipe_framebuffer_state *state, int cb) 1758{ 1759 struct r600_texture *rtex; 1760 struct r600_surface *surf; 1761 unsigned level = state->cbufs[cb]->u.tex.level; 1762 unsigned pitch, slice; 1763 unsigned color_info, color_attrib; 1764 unsigned tile_mode_index; 1765 unsigned format, swap, ntype, endian; 1766 uint64_t offset; 1767 const struct util_format_description *desc; 1768 int i; 1769 unsigned blend_clamp = 0, blend_bypass = 0; 1770 unsigned max_comp_size; 1771 1772 surf = (struct r600_surface *)state->cbufs[cb]; 1773 rtex = (struct r600_texture*)state->cbufs[cb]->texture; 1774 1775 offset = rtex->surface.level[level].offset; 1776 if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) { 1777 offset += rtex->surface.level[level].slice_size * 1778 state->cbufs[cb]->u.tex.first_layer; 1779 } 1780 pitch = (rtex->surface.level[level].nblk_x) / 8 - 1; 1781 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; 1782 if (slice) { 1783 slice = slice - 1; 1784 } 1785 1786 tile_mode_index = si_tile_mode_index(rtex, level, false); 1787 1788 desc = util_format_description(surf->base.format); 1789 for (i = 0; i < 4; i++) { 1790 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 1791 break; 1792 } 1793 } 1794 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 1795 ntype = V_028C70_NUMBER_FLOAT; 1796 } else { 1797 ntype = V_028C70_NUMBER_UNORM; 1798 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 1799 ntype = V_028C70_NUMBER_SRGB; 1800 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 1801 if (desc->channel[i].pure_integer) { 1802 ntype = V_028C70_NUMBER_SINT; 1803 } else { 1804 assert(desc->channel[i].normalized); 1805 ntype = V_028C70_NUMBER_SNORM; 1806 } 1807 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 1808 if (desc->channel[i].pure_integer) { 1809 ntype = V_028C70_NUMBER_UINT; 1810 } else { 1811 assert(desc->channel[i].normalized); 1812 ntype = V_028C70_NUMBER_UNORM; 1813 } 1814 } 1815 } 1816 1817 format = si_translate_colorformat(surf->base.format); 1818 if (format == V_028C70_COLOR_INVALID) { 1819 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 1820 } 1821 assert(format != V_028C70_COLOR_INVALID); 1822 swap = si_translate_colorswap(surf->base.format); 1823 if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) { 1824 endian = V_028C70_ENDIAN_NONE; 1825 } else { 1826 endian = si_colorformat_endian_swap(format); 1827 } 1828 1829 /* blend clamp should be set for all NORM/SRGB types */ 1830 if (ntype == V_028C70_NUMBER_UNORM || 1831 ntype == V_028C70_NUMBER_SNORM || 1832 ntype == V_028C70_NUMBER_SRGB) 1833 blend_clamp = 1; 1834 1835 /* set blend bypass according to docs if SINT/UINT or 1836 8/24 COLOR variants */ 1837 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 1838 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 1839 format == V_028C70_COLOR_X24_8_32_FLOAT) { 1840 blend_clamp = 0; 1841 blend_bypass = 1; 1842 } 1843 1844 color_info = S_028C70_FORMAT(format) | 1845 S_028C70_COMP_SWAP(swap) | 1846 S_028C70_BLEND_CLAMP(blend_clamp) | 1847 S_028C70_BLEND_BYPASS(blend_bypass) | 1848 S_028C70_NUMBER_TYPE(ntype) | 1849 S_028C70_ENDIAN(endian); 1850 1851 color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) | 1852 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1); 1853 1854 if (rtex->resource.b.b.nr_samples > 1) { 1855 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 1856 1857 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 1858 S_028C74_NUM_FRAGMENTS(log_samples); 1859 1860 if (rtex->fmask.size) { 1861 color_info |= S_028C70_COMPRESSION(1); 1862 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 1863 1864 /* due to a bug in the hw, FMASK_BANK_HEIGHT must be set on SI too */ 1865 color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index) | 1866 S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 1867 } 1868 } 1869 1870 if (rtex->cmask.size) { 1871 color_info |= S_028C70_FAST_CLEAR(1); 1872 } 1873 1874 offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture); 1875 offset >>= 8; 1876 1877 /* FIXME handle enabling of CB beyond BASE8 which has different offset */ 1878 si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE); 1879 si_pm4_set_reg(pm4, R_028C60_CB_COLOR0_BASE + cb * 0x3C, offset); 1880 si_pm4_set_reg(pm4, R_028C64_CB_COLOR0_PITCH + cb * 0x3C, S_028C64_TILE_MAX(pitch)); 1881 si_pm4_set_reg(pm4, R_028C68_CB_COLOR0_SLICE + cb * 0x3C, S_028C68_TILE_MAX(slice)); 1882 1883 if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) { 1884 si_pm4_set_reg(pm4, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, 0x00000000); 1885 } else { 1886 si_pm4_set_reg(pm4, R_028C6C_CB_COLOR0_VIEW + cb * 0x3C, 1887 S_028C6C_SLICE_START(state->cbufs[cb]->u.tex.first_layer) | 1888 S_028C6C_SLICE_MAX(state->cbufs[cb]->u.tex.last_layer)); 1889 } 1890 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info); 1891 si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib); 1892 1893 if (rtex->cmask.size) { 1894 si_pm4_set_reg(pm4, R_028C7C_CB_COLOR0_CMASK + cb * 0x3C, 1895 offset + (rtex->cmask.offset >> 8)); 1896 si_pm4_set_reg(pm4, R_028C80_CB_COLOR0_CMASK_SLICE + cb * 0x3C, 1897 S_028C80_TILE_MAX(rtex->cmask.slice_tile_max)); 1898 } 1899 if (rtex->fmask.size) { 1900 si_pm4_set_reg(pm4, R_028C84_CB_COLOR0_FMASK + cb * 0x3C, 1901 offset + (rtex->fmask.offset >> 8)); 1902 si_pm4_set_reg(pm4, R_028C88_CB_COLOR0_FMASK_SLICE + cb * 0x3C, 1903 S_028C88_TILE_MAX(rtex->fmask.slice_tile_max)); 1904 } 1905 1906 /* set CB_COLOR1_INFO for possible dual-src blending */ 1907 if (state->nr_cbufs == 1) { 1908 assert(cb == 0); 1909 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, color_info); 1910 } 1911 1912 /* Determine pixel shader export format */ 1913 max_comp_size = si_colorformat_max_comp_size(format); 1914 if (ntype == V_028C70_NUMBER_SRGB || 1915 ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && 1916 max_comp_size <= 10) || 1917 (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { 1918 rctx->export_16bpc |= 1 << cb; 1919 /* set SPI_SHADER_COL_FORMAT for possible dual-src blending */ 1920 if (state->nr_cbufs == 1) 1921 rctx->export_16bpc |= 1 << 1; 1922 } 1923} 1924 1925static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4, 1926 const struct pipe_framebuffer_state *state) 1927{ 1928 struct r600_screen *rscreen = rctx->screen; 1929 struct r600_texture *rtex; 1930 struct r600_surface *surf; 1931 unsigned level, pitch, slice, format, tile_mode_index, array_mode; 1932 unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config; 1933 uint32_t z_info, s_info, db_depth_info; 1934 uint64_t z_offs, s_offs; 1935 1936 if (state->zsbuf == NULL) { 1937 si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, S_028040_FORMAT(V_028040_Z_INVALID)); 1938 si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, S_028044_FORMAT(V_028044_STENCIL_INVALID)); 1939 return; 1940 } 1941 1942 surf = (struct r600_surface *)state->zsbuf; 1943 level = surf->base.u.tex.level; 1944 rtex = (struct r600_texture*)surf->base.texture; 1945 1946 format = si_translate_dbformat(rtex->resource.b.b.format); 1947 1948 if (format == V_028040_Z_INVALID) { 1949 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 1950 } 1951 assert(format != V_028040_Z_INVALID); 1952 1953 s_offs = z_offs = r600_resource_va(rctx->context.screen, surf->base.texture); 1954 z_offs += rtex->surface.level[level].offset; 1955 s_offs += rtex->surface.stencil_level[level].offset; 1956 1957 z_offs >>= 8; 1958 s_offs >>= 8; 1959 1960 pitch = (rtex->surface.level[level].nblk_x / 8) - 1; 1961 slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64; 1962 if (slice) { 1963 slice = slice - 1; 1964 } 1965 1966 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 1967 1968 z_info = S_028040_FORMAT(format); 1969 if (rtex->resource.b.b.nr_samples > 1) { 1970 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 1971 } 1972 1973 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 1974 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 1975 else 1976 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 1977 1978 if (rctx->chip_class >= CIK) { 1979 switch (rtex->surface.level[level].mode) { 1980 case RADEON_SURF_MODE_2D: 1981 array_mode = V_02803C_ARRAY_2D_TILED_THIN1; 1982 break; 1983 case RADEON_SURF_MODE_1D: 1984 case RADEON_SURF_MODE_LINEAR_ALIGNED: 1985 case RADEON_SURF_MODE_LINEAR: 1986 default: 1987 array_mode = V_02803C_ARRAY_1D_TILED_THIN1; 1988 break; 1989 } 1990 tile_split = rtex->surface.tile_split; 1991 stile_split = rtex->surface.stencil_tile_split; 1992 macro_aspect = rtex->surface.mtilea; 1993 bankw = rtex->surface.bankw; 1994 bankh = rtex->surface.bankh; 1995 tile_split = cik_tile_split(tile_split); 1996 stile_split = cik_tile_split(stile_split); 1997 macro_aspect = cik_macro_tile_aspect(macro_aspect); 1998 bankw = cik_bank_wh(bankw); 1999 bankh = cik_bank_wh(bankh); 2000 nbanks = cik_num_banks(rscreen->tiling_info.num_banks); 2001 pipe_config = cik_db_pipe_config(rscreen->info.r600_num_tile_pipes, 2002 rscreen->info.r600_num_backends); 2003 2004 db_depth_info |= S_02803C_ARRAY_MODE(array_mode) | 2005 S_02803C_PIPE_CONFIG(pipe_config) | 2006 S_02803C_BANK_WIDTH(bankw) | 2007 S_02803C_BANK_HEIGHT(bankh) | 2008 S_02803C_MACRO_TILE_ASPECT(macro_aspect) | 2009 S_02803C_NUM_BANKS(nbanks); 2010 z_info |= S_028040_TILE_SPLIT(tile_split); 2011 s_info |= S_028044_TILE_SPLIT(stile_split); 2012 } else { 2013 tile_mode_index = si_tile_mode_index(rtex, level, false); 2014 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2015 tile_mode_index = si_tile_mode_index(rtex, level, true); 2016 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2017 } 2018 2019 si_pm4_set_reg(pm4, R_028008_DB_DEPTH_VIEW, 2020 S_028008_SLICE_START(state->zsbuf->u.tex.first_layer) | 2021 S_028008_SLICE_MAX(state->zsbuf->u.tex.last_layer)); 2022 2023 si_pm4_set_reg(pm4, R_02803C_DB_DEPTH_INFO, db_depth_info); 2024 si_pm4_set_reg(pm4, R_028040_DB_Z_INFO, z_info); 2025 si_pm4_set_reg(pm4, R_028044_DB_STENCIL_INFO, s_info); 2026 2027 si_pm4_add_bo(pm4, &rtex->resource, RADEON_USAGE_READWRITE); 2028 si_pm4_set_reg(pm4, R_028048_DB_Z_READ_BASE, z_offs); 2029 si_pm4_set_reg(pm4, R_02804C_DB_STENCIL_READ_BASE, s_offs); 2030 si_pm4_set_reg(pm4, R_028050_DB_Z_WRITE_BASE, z_offs); 2031 si_pm4_set_reg(pm4, R_028054_DB_STENCIL_WRITE_BASE, s_offs); 2032 2033 si_pm4_set_reg(pm4, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch)); 2034 si_pm4_set_reg(pm4, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice)); 2035} 2036 2037#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ 2038 (((s0x) & 0xf) | (((s0y) & 0xf) << 4) | \ 2039 (((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) | \ 2040 (((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) | \ 2041 (((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28)) 2042 2043/* 2xMSAA 2044 * There are two locations (-4, 4), (4, -4). */ 2045static uint32_t sample_locs_2x[] = { 2046 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), 2047 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), 2048 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), 2049 FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4), 2050}; 2051static unsigned max_dist_2x = 4; 2052/* 4xMSAA 2053 * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */ 2054static uint32_t sample_locs_4x[] = { 2055 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), 2056 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), 2057 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), 2058 FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6), 2059}; 2060static unsigned max_dist_4x = 6; 2061/* Cayman/SI 8xMSAA */ 2062static uint32_t cm_sample_locs_8x[] = { 2063 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), 2064 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), 2065 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), 2066 FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2), 2067 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), 2068 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), 2069 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), 2070 FILL_SREG( 6, 0, 0, 0, -5, 3, 4, 4), 2071}; 2072static unsigned cm_max_dist_8x = 8; 2073/* Cayman/SI 16xMSAA */ 2074static uint32_t cm_sample_locs_16x[] = { 2075 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), 2076 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), 2077 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), 2078 FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5), 2079 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), 2080 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), 2081 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), 2082 FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1), 2083 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), 2084 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), 2085 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), 2086 FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6), 2087 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), 2088 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), 2089 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), 2090 FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0), 2091}; 2092static unsigned cm_max_dist_16x = 8; 2093 2094static void si_get_sample_position(struct pipe_context *ctx, 2095 unsigned sample_count, 2096 unsigned sample_index, 2097 float *out_value) 2098{ 2099 int offset, index; 2100 struct { 2101 int idx:4; 2102 } val; 2103 switch (sample_count) { 2104 case 1: 2105 default: 2106 out_value[0] = out_value[1] = 0.5; 2107 break; 2108 case 2: 2109 offset = 4 * (sample_index * 2); 2110 val.idx = (sample_locs_2x[0] >> offset) & 0xf; 2111 out_value[0] = (float)(val.idx + 8) / 16.0f; 2112 val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf; 2113 out_value[1] = (float)(val.idx + 8) / 16.0f; 2114 break; 2115 case 4: 2116 offset = 4 * (sample_index * 2); 2117 val.idx = (sample_locs_4x[0] >> offset) & 0xf; 2118 out_value[0] = (float)(val.idx + 8) / 16.0f; 2119 val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf; 2120 out_value[1] = (float)(val.idx + 8) / 16.0f; 2121 break; 2122 case 8: 2123 offset = 4 * (sample_index % 4 * 2); 2124 index = (sample_index / 4) * 4; 2125 val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf; 2126 out_value[0] = (float)(val.idx + 8) / 16.0f; 2127 val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf; 2128 out_value[1] = (float)(val.idx + 8) / 16.0f; 2129 break; 2130 case 16: 2131 offset = 4 * (sample_index % 4 * 2); 2132 index = (sample_index / 4) * 4; 2133 val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf; 2134 out_value[0] = (float)(val.idx + 8) / 16.0f; 2135 val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf; 2136 out_value[1] = (float)(val.idx + 8) / 16.0f; 2137 break; 2138 } 2139} 2140 2141static void si_set_msaa_state(struct r600_context *rctx, struct si_pm4_state *pm4, int nr_samples) 2142{ 2143 unsigned max_dist = 0; 2144 2145 switch (nr_samples) { 2146 default: 2147 nr_samples = 0; 2148 break; 2149 case 2: 2150 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]); 2151 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]); 2152 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]); 2153 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]); 2154 max_dist = max_dist_2x; 2155 break; 2156 case 4: 2157 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]); 2158 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]); 2159 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]); 2160 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]); 2161 max_dist = max_dist_4x; 2162 break; 2163 case 8: 2164 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_8x[0]); 2165 si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_8x[4]); 2166 si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 0); 2167 si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 0); 2168 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_8x[1]); 2169 si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_8x[5]); 2170 si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 0); 2171 si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 0); 2172 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_8x[2]); 2173 si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_8x[6]); 2174 si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 0); 2175 si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 0); 2176 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_8x[3]); 2177 si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_8x[7]); 2178 max_dist = cm_max_dist_8x; 2179 break; 2180 case 16: 2181 si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_16x[0]); 2182 si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_16x[4]); 2183 si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, cm_sample_locs_16x[8]); 2184 si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, cm_sample_locs_16x[12]); 2185 si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_16x[1]); 2186 si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_16x[5]); 2187 si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, cm_sample_locs_16x[9]); 2188 si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, cm_sample_locs_16x[13]); 2189 si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_16x[2]); 2190 si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_16x[6]); 2191 si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, cm_sample_locs_16x[10]); 2192 si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, cm_sample_locs_16x[14]); 2193 si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_16x[3]); 2194 si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_16x[7]); 2195 si_pm4_set_reg(pm4, R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, cm_sample_locs_16x[11]); 2196 si_pm4_set_reg(pm4, R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, cm_sample_locs_16x[15]); 2197 max_dist = cm_max_dist_16x; 2198 break; 2199 } 2200 2201 if (nr_samples > 1) { 2202 unsigned log_samples = util_logbase2(nr_samples); 2203 2204 si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, 2205 S_028BDC_LAST_PIXEL(1) | 2206 S_028BDC_EXPAND_LINE_WIDTH(1)); 2207 si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 2208 S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 2209 S_028BE0_MAX_SAMPLE_DIST(max_dist) | 2210 S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); 2211 2212 si_pm4_set_reg(pm4, R_028804_DB_EQAA, 2213 S_028804_MAX_ANCHOR_SAMPLES(log_samples) | 2214 S_028804_PS_ITER_SAMPLES(log_samples) | 2215 S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 2216 S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | 2217 S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 2218 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); 2219 } else { 2220 si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_LAST_PIXEL(1)); 2221 si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0); 2222 2223 si_pm4_set_reg(pm4, R_028804_DB_EQAA, 2224 S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 2225 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); 2226 } 2227} 2228 2229static void si_set_framebuffer_state(struct pipe_context *ctx, 2230 const struct pipe_framebuffer_state *state) 2231{ 2232 struct r600_context *rctx = (struct r600_context *)ctx; 2233 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 2234 uint32_t tl, br; 2235 int tl_x, tl_y, br_x, br_y, nr_samples, i; 2236 2237 if (pm4 == NULL) 2238 return; 2239 2240 si_pm4_inval_fb_cache(pm4, state->nr_cbufs); 2241 rctx->flush_and_inv_cb_meta = true; 2242 2243 if (state->zsbuf) 2244 si_pm4_inval_zsbuf_cache(pm4); 2245 2246 util_copy_framebuffer_state(&rctx->framebuffer, state); 2247 2248 /* build states */ 2249 rctx->export_16bpc = 0; 2250 rctx->fb_compressed_cb_mask = 0; 2251 for (i = 0; i < state->nr_cbufs; i++) { 2252 struct r600_texture *rtex = 2253 (struct r600_texture*)state->cbufs[i]->texture; 2254 2255 si_cb(rctx, pm4, state, i); 2256 2257 if (rtex->fmask.size || rtex->cmask.size) { 2258 rctx->fb_compressed_cb_mask |= 1 << i; 2259 } 2260 } 2261 for (; i < 8; i++) { 2262 si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2263 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2264 } 2265 2266 assert(!(rctx->export_16bpc & ~0xff)); 2267 si_db(rctx, pm4, state); 2268 2269 tl_x = 0; 2270 tl_y = 0; 2271 br_x = state->width; 2272 br_y = state->height; 2273 2274 tl = S_028240_TL_X(tl_x) | S_028240_TL_Y(tl_y); 2275 br = S_028244_BR_X(br_x) | S_028244_BR_Y(br_y); 2276 2277 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl); 2278 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, br); 2279 si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl); 2280 si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br); 2281 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, tl); 2282 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, br); 2283 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, tl); 2284 si_pm4_set_reg(pm4, R_028208_PA_SC_WINDOW_SCISSOR_BR, br); 2285 si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000); 2286 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 2287 2288 if (state->nr_cbufs) 2289 nr_samples = state->cbufs[0]->texture->nr_samples; 2290 else if (state->zsbuf) 2291 nr_samples = state->zsbuf->texture->nr_samples; 2292 else 2293 nr_samples = 0; 2294 2295 si_set_msaa_state(rctx, pm4, nr_samples); 2296 rctx->fb_log_samples = util_logbase2(nr_samples); 2297 rctx->fb_cb0_is_integer = state->nr_cbufs && 2298 util_format_is_pure_integer(state->cbufs[0]->format); 2299 2300 si_pm4_set_state(rctx, framebuffer, pm4); 2301 si_update_fb_rs_state(rctx); 2302 si_update_fb_blend_state(rctx); 2303} 2304 2305/* 2306 * shaders 2307 */ 2308 2309/* Compute the key for the hw shader variant */ 2310static INLINE void si_shader_selector_key(struct pipe_context *ctx, 2311 struct si_pipe_shader_selector *sel, 2312 union si_shader_key *key) 2313{ 2314 struct r600_context *rctx = (struct r600_context *)ctx; 2315 memset(key, 0, sizeof(*key)); 2316 2317 if (sel->type == PIPE_SHADER_VERTEX) { 2318 unsigned i; 2319 if (!rctx->vertex_elements) 2320 return; 2321 2322 for (i = 0; i < rctx->vertex_elements->count; ++i) 2323 key->vs.instance_divisors[i] = rctx->vertex_elements->elements[i].instance_divisor; 2324 2325 if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf0) 2326 key->vs.ucps_enabled |= 0x2; 2327 if (rctx->queued.named.rasterizer->clip_plane_enable & 0xf) 2328 key->vs.ucps_enabled |= 0x1; 2329 } else if (sel->type == PIPE_SHADER_FRAGMENT) { 2330 if (sel->fs_write_all) 2331 key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs; 2332 key->ps.export_16bpc = rctx->export_16bpc; 2333 2334 if (rctx->queued.named.rasterizer) { 2335 key->ps.color_two_side = rctx->queued.named.rasterizer->two_side; 2336 key->ps.flatshade = rctx->queued.named.rasterizer->flatshade; 2337 2338 if (rctx->queued.named.blend) { 2339 key->ps.alpha_to_one = rctx->queued.named.blend->alpha_to_one && 2340 rctx->queued.named.rasterizer->multisample_enable && 2341 !rctx->fb_cb0_is_integer; 2342 } 2343 } 2344 if (rctx->queued.named.dsa) { 2345 key->ps.alpha_func = rctx->queued.named.dsa->alpha_func; 2346 key->ps.alpha_ref = rctx->queued.named.dsa->alpha_ref; 2347 } else { 2348 key->ps.alpha_func = PIPE_FUNC_ALWAYS; 2349 } 2350 } 2351} 2352 2353/* Select the hw shader variant depending on the current state. 2354 * (*dirty) is set to 1 if current variant was changed */ 2355int si_shader_select(struct pipe_context *ctx, 2356 struct si_pipe_shader_selector *sel, 2357 unsigned *dirty) 2358{ 2359 union si_shader_key key; 2360 struct si_pipe_shader * shader = NULL; 2361 int r; 2362 2363 si_shader_selector_key(ctx, sel, &key); 2364 2365 /* Check if we don't need to change anything. 2366 * This path is also used for most shaders that don't need multiple 2367 * variants, it will cost just a computation of the key and this 2368 * test. */ 2369 if (likely(sel->current && memcmp(&sel->current->key, &key, sizeof(key)) == 0)) { 2370 return 0; 2371 } 2372 2373 /* lookup if we have other variants in the list */ 2374 if (sel->num_shaders > 1) { 2375 struct si_pipe_shader *p = sel->current, *c = p->next_variant; 2376 2377 while (c && memcmp(&c->key, &key, sizeof(key)) != 0) { 2378 p = c; 2379 c = c->next_variant; 2380 } 2381 2382 if (c) { 2383 p->next_variant = c->next_variant; 2384 shader = c; 2385 } 2386 } 2387 2388 if (unlikely(!shader)) { 2389 shader = CALLOC(1, sizeof(struct si_pipe_shader)); 2390 shader->selector = sel; 2391 shader->key = key; 2392 2393 r = si_pipe_shader_create(ctx, shader); 2394 if (unlikely(r)) { 2395 R600_ERR("Failed to build shader variant (type=%u) %d\n", 2396 sel->type, r); 2397 sel->current = NULL; 2398 FREE(shader); 2399 return r; 2400 } 2401 2402 /* We don't know the value of fs_write_all property until we built 2403 * at least one variant, so we may need to recompute the key (include 2404 * rctx->framebuffer.nr_cbufs) after building first variant. */ 2405 if (sel->type == PIPE_SHADER_FRAGMENT && 2406 sel->num_shaders == 0 && 2407 shader->shader.fs_write_all) { 2408 sel->fs_write_all = 1; 2409 si_shader_selector_key(ctx, sel, &shader->key); 2410 } 2411 2412 sel->num_shaders++; 2413 } 2414 2415 if (dirty) 2416 *dirty = 1; 2417 2418 shader->next_variant = sel->current; 2419 sel->current = shader; 2420 2421 return 0; 2422} 2423 2424static void *si_create_shader_state(struct pipe_context *ctx, 2425 const struct pipe_shader_state *state, 2426 unsigned pipe_shader_type) 2427{ 2428 struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector); 2429 int r; 2430 2431 sel->type = pipe_shader_type; 2432 sel->tokens = tgsi_dup_tokens(state->tokens); 2433 sel->so = state->stream_output; 2434 2435 r = si_shader_select(ctx, sel, NULL); 2436 if (r) { 2437 free(sel); 2438 return NULL; 2439 } 2440 2441 return sel; 2442} 2443 2444static void *si_create_fs_state(struct pipe_context *ctx, 2445 const struct pipe_shader_state *state) 2446{ 2447 return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT); 2448} 2449 2450static void *si_create_vs_state(struct pipe_context *ctx, 2451 const struct pipe_shader_state *state) 2452{ 2453 return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX); 2454} 2455 2456static void si_bind_vs_shader(struct pipe_context *ctx, void *state) 2457{ 2458 struct r600_context *rctx = (struct r600_context *)ctx; 2459 struct si_pipe_shader_selector *sel = state; 2460 2461 if (rctx->vs_shader == sel) 2462 return; 2463 2464 rctx->vs_shader = sel; 2465 2466 if (sel && sel->current) 2467 si_pm4_bind_state(rctx, vs, sel->current->pm4); 2468 else 2469 si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4); 2470} 2471 2472static void si_bind_ps_shader(struct pipe_context *ctx, void *state) 2473{ 2474 struct r600_context *rctx = (struct r600_context *)ctx; 2475 struct si_pipe_shader_selector *sel = state; 2476 2477 if (rctx->ps_shader == sel) 2478 return; 2479 2480 rctx->ps_shader = sel; 2481 2482 if (sel && sel->current) 2483 si_pm4_bind_state(rctx, ps, sel->current->pm4); 2484 else 2485 si_pm4_bind_state(rctx, ps, rctx->dummy_pixel_shader->pm4); 2486} 2487 2488static void si_delete_shader_selector(struct pipe_context *ctx, 2489 struct si_pipe_shader_selector *sel) 2490{ 2491 struct r600_context *rctx = (struct r600_context *)ctx; 2492 struct si_pipe_shader *p = sel->current, *c; 2493 2494 while (p) { 2495 c = p->next_variant; 2496 si_pm4_delete_state(rctx, vs, p->pm4); 2497 si_pipe_shader_destroy(ctx, p); 2498 free(p); 2499 p = c; 2500 } 2501 2502 free(sel->tokens); 2503 free(sel); 2504 } 2505 2506static void si_delete_vs_shader(struct pipe_context *ctx, void *state) 2507{ 2508 struct r600_context *rctx = (struct r600_context *)ctx; 2509 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; 2510 2511 if (rctx->vs_shader == sel) { 2512 rctx->vs_shader = NULL; 2513 } 2514 2515 si_delete_shader_selector(ctx, sel); 2516} 2517 2518static void si_delete_ps_shader(struct pipe_context *ctx, void *state) 2519{ 2520 struct r600_context *rctx = (struct r600_context *)ctx; 2521 struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state; 2522 2523 if (rctx->ps_shader == sel) { 2524 rctx->ps_shader = NULL; 2525 } 2526 2527 si_delete_shader_selector(ctx, sel); 2528} 2529 2530/* 2531 * Samplers 2532 */ 2533 2534static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx, 2535 struct pipe_resource *texture, 2536 const struct pipe_sampler_view *state) 2537{ 2538 struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view); 2539 struct r600_texture *tmp = (struct r600_texture*)texture; 2540 const struct util_format_description *desc; 2541 unsigned format, num_format; 2542 uint32_t pitch = 0; 2543 unsigned char state_swizzle[4], swizzle[4]; 2544 unsigned height, depth, width; 2545 enum pipe_format pipe_format = state->format; 2546 struct radeon_surface_level *surflevel; 2547 int first_non_void; 2548 uint64_t va; 2549 2550 if (view == NULL) 2551 return NULL; 2552 2553 /* initialize base object */ 2554 view->base = *state; 2555 view->base.texture = NULL; 2556 pipe_reference(NULL, &texture->reference); 2557 view->base.texture = texture; 2558 view->base.reference.count = 1; 2559 view->base.context = ctx; 2560 2561 state_swizzle[0] = state->swizzle_r; 2562 state_swizzle[1] = state->swizzle_g; 2563 state_swizzle[2] = state->swizzle_b; 2564 state_swizzle[3] = state->swizzle_a; 2565 2566 surflevel = tmp->surface.level; 2567 2568 /* Texturing with separate depth and stencil. */ 2569 if (tmp->is_depth && !tmp->is_flushing_texture) { 2570 switch (pipe_format) { 2571 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 2572 pipe_format = PIPE_FORMAT_Z32_FLOAT; 2573 break; 2574 case PIPE_FORMAT_X8Z24_UNORM: 2575 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2576 /* Z24 is always stored like this. */ 2577 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 2578 break; 2579 case PIPE_FORMAT_X24S8_UINT: 2580 case PIPE_FORMAT_S8X24_UINT: 2581 case PIPE_FORMAT_X32_S8X24_UINT: 2582 pipe_format = PIPE_FORMAT_S8_UINT; 2583 surflevel = tmp->surface.stencil_level; 2584 break; 2585 default:; 2586 } 2587 } 2588 2589 desc = util_format_description(pipe_format); 2590 2591 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2592 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2593 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2594 2595 switch (pipe_format) { 2596 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2597 case PIPE_FORMAT_X24S8_UINT: 2598 case PIPE_FORMAT_X32_S8X24_UINT: 2599 case PIPE_FORMAT_X8Z24_UNORM: 2600 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2601 break; 2602 default: 2603 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2604 } 2605 } else { 2606 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2607 } 2608 2609 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2610 2611 switch (pipe_format) { 2612 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2613 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2614 break; 2615 default: 2616 if (first_non_void < 0) { 2617 if (util_format_is_compressed(pipe_format)) { 2618 switch (pipe_format) { 2619 case PIPE_FORMAT_DXT1_SRGB: 2620 case PIPE_FORMAT_DXT1_SRGBA: 2621 case PIPE_FORMAT_DXT3_SRGBA: 2622 case PIPE_FORMAT_DXT5_SRGBA: 2623 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2624 break; 2625 case PIPE_FORMAT_RGTC1_SNORM: 2626 case PIPE_FORMAT_LATC1_SNORM: 2627 case PIPE_FORMAT_RGTC2_SNORM: 2628 case PIPE_FORMAT_LATC2_SNORM: 2629 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2630 break; 2631 default: 2632 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2633 break; 2634 } 2635 } else { 2636 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2637 } 2638 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2639 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2640 } else { 2641 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2642 2643 switch (desc->channel[first_non_void].type) { 2644 case UTIL_FORMAT_TYPE_FLOAT: 2645 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2646 break; 2647 case UTIL_FORMAT_TYPE_SIGNED: 2648 if (desc->channel[first_non_void].normalized) 2649 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2650 else if (desc->channel[first_non_void].pure_integer) 2651 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2652 else 2653 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2654 break; 2655 case UTIL_FORMAT_TYPE_UNSIGNED: 2656 if (desc->channel[first_non_void].normalized) 2657 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2658 else if (desc->channel[first_non_void].pure_integer) 2659 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2660 else 2661 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2662 } 2663 } 2664 } 2665 2666 format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void); 2667 if (format == ~0) { 2668 format = 0; 2669 } 2670 2671 view->resource = &tmp->resource; 2672 2673 /* not supported any more */ 2674 //endian = si_colorformat_endian_swap(format); 2675 2676 width = surflevel[0].npix_x; 2677 height = surflevel[0].npix_y; 2678 depth = surflevel[0].npix_z; 2679 pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format); 2680 2681 if (texture->target == PIPE_TEXTURE_1D_ARRAY) { 2682 height = 1; 2683 depth = texture->array_size; 2684 } else if (texture->target == PIPE_TEXTURE_2D_ARRAY) { 2685 depth = texture->array_size; 2686 } 2687 2688 va = r600_resource_va(ctx->screen, texture); 2689 va += surflevel[0].offset; 2690 view->state[0] = va >> 8; 2691 view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) | 2692 S_008F14_DATA_FORMAT(format) | 2693 S_008F14_NUM_FORMAT(num_format)); 2694 view->state[2] = (S_008F18_WIDTH(width - 1) | 2695 S_008F18_HEIGHT(height - 1)); 2696 view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2697 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2698 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2699 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2700 S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ? 2701 0 : state->u.tex.first_level) | 2702 S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ? 2703 util_logbase2(texture->nr_samples) : 2704 state->u.tex.last_level) | 2705 S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) | 2706 S_008F1C_POW2_PAD(texture->last_level > 0) | 2707 S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples))); 2708 view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); 2709 view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2710 S_008F24_LAST_ARRAY(state->u.tex.last_layer)); 2711 view->state[6] = 0; 2712 view->state[7] = 0; 2713 2714 /* Initialize the sampler view for FMASK. */ 2715 if (tmp->fmask.size) { 2716 uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset; 2717 uint32_t fmask_format; 2718 2719 switch (texture->nr_samples) { 2720 case 2: 2721 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2722 break; 2723 case 4: 2724 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2725 break; 2726 case 8: 2727 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2728 break; 2729 default: 2730 assert(0); 2731 } 2732 2733 view->fmask_state[0] = va >> 8; 2734 view->fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2735 S_008F14_DATA_FORMAT(fmask_format) | 2736 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2737 view->fmask_state[2] = S_008F18_WIDTH(width - 1) | 2738 S_008F18_HEIGHT(height - 1); 2739 view->fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 2740 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 2741 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 2742 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 2743 S_008F1C_TILING_INDEX(tmp->fmask.tile_mode_index) | 2744 S_008F1C_TYPE(si_tex_dim(texture->target, 0)); 2745 view->fmask_state[4] = S_008F20_PITCH(tmp->fmask.pitch - 1); 2746 view->fmask_state[5] = S_008F24_BASE_ARRAY(state->u.tex.first_layer) | 2747 S_008F24_LAST_ARRAY(state->u.tex.last_layer); 2748 view->fmask_state[6] = 0; 2749 view->fmask_state[7] = 0; 2750 } 2751 2752 return &view->base; 2753} 2754 2755static void si_sampler_view_destroy(struct pipe_context *ctx, 2756 struct pipe_sampler_view *state) 2757{ 2758 struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; 2759 2760 pipe_resource_reference(&state->texture, NULL); 2761 FREE(resource); 2762} 2763 2764static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 2765{ 2766 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 2767 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 2768 (linear_filter && 2769 (wrap == PIPE_TEX_WRAP_CLAMP || 2770 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 2771} 2772 2773static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 2774{ 2775 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 2776 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 2777 2778 return (state->border_color.ui[0] || state->border_color.ui[1] || 2779 state->border_color.ui[2] || state->border_color.ui[3]) && 2780 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 2781 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 2782 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 2783} 2784 2785static void *si_create_sampler_state(struct pipe_context *ctx, 2786 const struct pipe_sampler_state *state) 2787{ 2788 struct si_pipe_sampler_state *rstate = CALLOC_STRUCT(si_pipe_sampler_state); 2789 unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 2 : 0; 2790 unsigned border_color_type; 2791 2792 if (rstate == NULL) { 2793 return NULL; 2794 } 2795 2796 if (sampler_state_needs_border_color(state)) 2797 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 2798 else 2799 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 2800 2801 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 2802 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 2803 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 2804 (state->max_anisotropy & 0x7) << 9 | /* XXX */ 2805 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 2806 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 2807 aniso_flag_offset << 16 | /* XXX */ 2808 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map)); 2809 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 2810 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8))); 2811 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 2812 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter)) | 2813 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter)) | 2814 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); 2815 rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type); 2816 2817 if (border_color_type == V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2818 memcpy(rstate->border_color, state->border_color.ui, 2819 sizeof(rstate->border_color)); 2820 } 2821 2822 return rstate; 2823} 2824 2825/* XXX consider moving this function to si_descriptors.c for gcc to inline 2826 * the si_set_sampler_view calls. LTO might help too. */ 2827static struct si_pm4_state *si_set_sampler_views(struct r600_context *rctx, 2828 unsigned shader, unsigned count, 2829 struct pipe_sampler_view **views) 2830{ 2831 struct r600_textures_info *samplers = &rctx->samplers[shader]; 2832 struct si_pipe_sampler_view **rviews = (struct si_pipe_sampler_view **)views; 2833 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 2834 int i; 2835 2836 si_pm4_inval_texture_cache(pm4); 2837 2838 for (i = 0; i < count; i++) { 2839 if (views[i]) { 2840 struct r600_texture *rtex = 2841 (struct r600_texture*)views[i]->texture; 2842 2843 if (rtex->is_depth && !rtex->is_flushing_texture) { 2844 samplers->depth_texture_mask |= 1 << i; 2845 } else { 2846 samplers->depth_texture_mask &= ~(1 << i); 2847 } 2848 if (rtex->cmask.size || rtex->fmask.size) { 2849 samplers->compressed_colortex_mask |= 1 << i; 2850 } else { 2851 samplers->compressed_colortex_mask &= ~(1 << i); 2852 } 2853 2854 si_set_sampler_view(rctx, shader, i, views[i], rviews[i]->state); 2855 2856 if (rtex->fmask.size) { 2857 si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i, 2858 views[i], rviews[i]->fmask_state); 2859 } else { 2860 si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i, 2861 NULL, NULL); 2862 } 2863 } else { 2864 samplers->depth_texture_mask &= ~(1 << i); 2865 samplers->compressed_colortex_mask &= ~(1 << i); 2866 si_set_sampler_view(rctx, shader, i, NULL, NULL); 2867 si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i, 2868 NULL, NULL); 2869 } 2870 } 2871 for (; i < samplers->n_views; i++) { 2872 samplers->depth_texture_mask &= ~(1 << i); 2873 samplers->compressed_colortex_mask &= ~(1 << i); 2874 si_set_sampler_view(rctx, shader, i, NULL, NULL); 2875 si_set_sampler_view(rctx, shader, FMASK_TEX_OFFSET + i, 2876 NULL, NULL); 2877 } 2878 2879 samplers->n_views = count; 2880 return pm4; 2881} 2882 2883static void si_set_vs_sampler_views(struct pipe_context *ctx, unsigned count, 2884 struct pipe_sampler_view **views) 2885{ 2886 struct r600_context *rctx = (struct r600_context *)ctx; 2887 struct si_pm4_state *pm4; 2888 2889 pm4 = si_set_sampler_views(rctx, PIPE_SHADER_VERTEX, count, views); 2890 si_pm4_set_state(rctx, vs_sampler_views, pm4); 2891} 2892 2893static void si_set_ps_sampler_views(struct pipe_context *ctx, unsigned count, 2894 struct pipe_sampler_view **views) 2895{ 2896 struct r600_context *rctx = (struct r600_context *)ctx; 2897 struct si_pm4_state *pm4; 2898 2899 pm4 = si_set_sampler_views(rctx, PIPE_SHADER_FRAGMENT, count, views); 2900 si_pm4_set_state(rctx, ps_sampler_views, pm4); 2901} 2902 2903static struct si_pm4_state *si_bind_sampler_states(struct r600_context *rctx, unsigned count, 2904 void **states, 2905 struct r600_textures_info *samplers, 2906 unsigned user_data_reg) 2907{ 2908 struct si_pipe_sampler_state **rstates = (struct si_pipe_sampler_state **)states; 2909 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 2910 uint32_t *border_color_table = NULL; 2911 int i, j; 2912 2913 if (!count) 2914 goto out; 2915 2916 si_pm4_inval_texture_cache(pm4); 2917 2918 si_pm4_sh_data_begin(pm4); 2919 for (i = 0; i < count; i++) { 2920 if (rstates[i] && 2921 G_008F3C_BORDER_COLOR_TYPE(rstates[i]->val[3]) == 2922 V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER) { 2923 if (!rctx->border_color_table || 2924 ((rctx->border_color_offset + count - i) & 2925 C_008F3C_BORDER_COLOR_PTR)) { 2926 si_resource_reference(&rctx->border_color_table, NULL); 2927 rctx->border_color_offset = 0; 2928 2929 rctx->border_color_table = 2930 si_resource_create_custom(&rctx->screen->screen, 2931 PIPE_USAGE_STAGING, 2932 4096 * 4 * 4); 2933 } 2934 2935 if (!border_color_table) { 2936 border_color_table = 2937 rctx->ws->buffer_map(rctx->border_color_table->cs_buf, 2938 rctx->cs, 2939 PIPE_TRANSFER_WRITE | 2940 PIPE_TRANSFER_UNSYNCHRONIZED); 2941 } 2942 2943 for (j = 0; j < 4; j++) { 2944 border_color_table[4 * rctx->border_color_offset + j] = 2945 util_le32_to_cpu(rstates[i]->border_color[j]); 2946 } 2947 2948 rstates[i]->val[3] &= C_008F3C_BORDER_COLOR_PTR; 2949 rstates[i]->val[3] |= S_008F3C_BORDER_COLOR_PTR(rctx->border_color_offset++); 2950 } 2951 2952 for (j = 0; j < Elements(rstates[i]->val); ++j) { 2953 si_pm4_sh_data_add(pm4, rstates[i] ? rstates[i]->val[j] : 0); 2954 } 2955 } 2956 si_pm4_sh_data_end(pm4, user_data_reg, SI_SGPR_SAMPLER); 2957 2958 if (border_color_table) { 2959 uint64_t va_offset = 2960 r600_resource_va(&rctx->screen->screen, 2961 (void*)rctx->border_color_table); 2962 2963 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); 2964 if (rctx->chip_class >= CIK) 2965 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va_offset >> 40); 2966 rctx->ws->buffer_unmap(rctx->border_color_table->cs_buf); 2967 si_pm4_add_bo(pm4, rctx->border_color_table, RADEON_USAGE_READ); 2968 } 2969 2970 memcpy(samplers->samplers, states, sizeof(void*) * count); 2971 2972out: 2973 samplers->n_samplers = count; 2974 return pm4; 2975} 2976 2977static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count, void **states) 2978{ 2979 struct r600_context *rctx = (struct r600_context *)ctx; 2980 struct si_pm4_state *pm4; 2981 2982 pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_VERTEX], 2983 R_00B130_SPI_SHADER_USER_DATA_VS_0); 2984 si_pm4_set_state(rctx, vs_sampler, pm4); 2985} 2986 2987static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count, void **states) 2988{ 2989 struct r600_context *rctx = (struct r600_context *)ctx; 2990 struct si_pm4_state *pm4; 2991 2992 pm4 = si_bind_sampler_states(rctx, count, states, &rctx->samplers[PIPE_SHADER_FRAGMENT], 2993 R_00B030_SPI_SHADER_USER_DATA_PS_0); 2994 si_pm4_set_state(rctx, ps_sampler, pm4); 2995} 2996 2997static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 2998{ 2999 struct r600_context *rctx = (struct r600_context *)ctx; 3000 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 3001 uint16_t mask = sample_mask; 3002 3003 if (pm4 == NULL) 3004 return; 3005 3006 si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16)); 3007 si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16)); 3008 3009 si_pm4_set_state(rctx, sample_mask, pm4); 3010} 3011 3012static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 3013{ 3014 free(state); 3015} 3016 3017/* 3018 * Constants 3019 */ 3020static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, 3021 struct pipe_constant_buffer *input) 3022{ 3023 struct r600_context *rctx = (struct r600_context *)ctx; 3024 struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; 3025 struct pipe_constant_buffer *cb; 3026 const uint8_t *ptr; 3027 3028 /* Note that the state tracker can unbind constant buffers by 3029 * passing NULL here. 3030 */ 3031 if (unlikely(!input || (!input->buffer && !input->user_buffer))) { 3032 state->enabled_mask &= ~(1 << index); 3033 state->dirty_mask &= ~(1 << index); 3034 pipe_resource_reference(&state->cb[index].buffer, NULL); 3035 return; 3036 } 3037 3038 cb = &state->cb[index]; 3039 cb->buffer_size = input->buffer_size; 3040 3041 ptr = input->user_buffer; 3042 3043 if (ptr) { 3044 r600_upload_const_buffer(rctx, 3045 (struct si_resource**)&cb->buffer, ptr, 3046 cb->buffer_size, &cb->buffer_offset); 3047 } else { 3048 /* Setup the hw buffer. */ 3049 cb->buffer_offset = input->buffer_offset; 3050 pipe_resource_reference(&cb->buffer, input->buffer); 3051 } 3052 3053 state->enabled_mask |= 1 << index; 3054 state->dirty_mask |= 1 << index; 3055} 3056 3057/* 3058 * Vertex elements & buffers 3059 */ 3060 3061static void *si_create_vertex_elements(struct pipe_context *ctx, 3062 unsigned count, 3063 const struct pipe_vertex_element *elements) 3064{ 3065 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 3066 int i; 3067 3068 assert(count < PIPE_MAX_ATTRIBS); 3069 if (!v) 3070 return NULL; 3071 3072 v->count = count; 3073 for (i = 0; i < count; ++i) { 3074 const struct util_format_description *desc; 3075 unsigned data_format, num_format; 3076 int first_non_void; 3077 3078 desc = util_format_description(elements[i].src_format); 3079 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 3080 data_format = si_translate_vertexformat(ctx->screen, elements[i].src_format, 3081 desc, first_non_void); 3082 3083 switch (desc->channel[first_non_void].type) { 3084 case UTIL_FORMAT_TYPE_FIXED: 3085 num_format = V_008F0C_BUF_NUM_FORMAT_USCALED; /* XXX */ 3086 break; 3087 case UTIL_FORMAT_TYPE_SIGNED: 3088 if (desc->channel[first_non_void].normalized) 3089 num_format = V_008F0C_BUF_NUM_FORMAT_SNORM; 3090 else if (desc->channel[first_non_void].pure_integer) 3091 num_format = V_008F0C_BUF_NUM_FORMAT_SINT; 3092 else 3093 num_format = V_008F0C_BUF_NUM_FORMAT_SSCALED; 3094 break; 3095 case UTIL_FORMAT_TYPE_UNSIGNED: 3096 if (desc->channel[first_non_void].normalized) 3097 num_format = V_008F0C_BUF_NUM_FORMAT_UNORM; 3098 else if (desc->channel[first_non_void].pure_integer) 3099 num_format = V_008F0C_BUF_NUM_FORMAT_UINT; 3100 else 3101 num_format = V_008F0C_BUF_NUM_FORMAT_USCALED; 3102 break; 3103 case UTIL_FORMAT_TYPE_FLOAT: 3104 default: 3105 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 3106 } 3107 3108 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3109 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3110 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3111 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3112 S_008F0C_NUM_FORMAT(num_format) | 3113 S_008F0C_DATA_FORMAT(data_format); 3114 } 3115 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 3116 3117 return v; 3118} 3119 3120static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 3121{ 3122 struct r600_context *rctx = (struct r600_context *)ctx; 3123 struct si_vertex_element *v = (struct si_vertex_element*)state; 3124 3125 rctx->vertex_elements = v; 3126} 3127 3128static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 3129{ 3130 struct r600_context *rctx = (struct r600_context *)ctx; 3131 3132 if (rctx->vertex_elements == state) 3133 rctx->vertex_elements = NULL; 3134 FREE(state); 3135} 3136 3137static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count, 3138 const struct pipe_vertex_buffer *buffers) 3139{ 3140 struct r600_context *rctx = (struct r600_context *)ctx; 3141 3142 util_set_vertex_buffers_count(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, start_slot, count); 3143} 3144 3145static void si_set_index_buffer(struct pipe_context *ctx, 3146 const struct pipe_index_buffer *ib) 3147{ 3148 struct r600_context *rctx = (struct r600_context *)ctx; 3149 3150 if (ib) { 3151 pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); 3152 memcpy(&rctx->index_buffer, ib, sizeof(*ib)); 3153 } else { 3154 pipe_resource_reference(&rctx->index_buffer.buffer, NULL); 3155 } 3156} 3157 3158/* 3159 * Misc 3160 */ 3161static void si_set_polygon_stipple(struct pipe_context *ctx, 3162 const struct pipe_poly_stipple *state) 3163{ 3164} 3165 3166static void si_texture_barrier(struct pipe_context *ctx) 3167{ 3168 struct r600_context *rctx = (struct r600_context *)ctx; 3169 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 3170 3171 if (pm4 == NULL) 3172 return; 3173 3174 si_pm4_inval_texture_cache(pm4); 3175 si_pm4_inval_fb_cache(pm4, rctx->framebuffer.nr_cbufs); 3176 si_pm4_set_state(rctx, texture_barrier, pm4); 3177} 3178 3179static void *si_create_blend_custom(struct r600_context *rctx, unsigned mode) 3180{ 3181 struct pipe_blend_state blend; 3182 3183 memset(&blend, 0, sizeof(blend)); 3184 blend.independent_blend_enable = true; 3185 blend.rt[0].colormask = 0xf; 3186 return si_create_blend_state_mode(&rctx->context, &blend, mode); 3187} 3188 3189void si_init_state_functions(struct r600_context *rctx) 3190{ 3191 int i; 3192 3193 rctx->context.create_blend_state = si_create_blend_state; 3194 rctx->context.bind_blend_state = si_bind_blend_state; 3195 rctx->context.delete_blend_state = si_delete_blend_state; 3196 rctx->context.set_blend_color = si_set_blend_color; 3197 3198 rctx->context.create_rasterizer_state = si_create_rs_state; 3199 rctx->context.bind_rasterizer_state = si_bind_rs_state; 3200 rctx->context.delete_rasterizer_state = si_delete_rs_state; 3201 3202 rctx->context.create_depth_stencil_alpha_state = si_create_dsa_state; 3203 rctx->context.bind_depth_stencil_alpha_state = si_bind_dsa_state; 3204 rctx->context.delete_depth_stencil_alpha_state = si_delete_dsa_state; 3205 3206 for (i = 0; i < 8; i++) { 3207 rctx->custom_dsa_flush_depth_stencil[i] = si_create_db_flush_dsa(rctx, true, true, i); 3208 rctx->custom_dsa_flush_depth[i] = si_create_db_flush_dsa(rctx, true, false, i); 3209 rctx->custom_dsa_flush_stencil[i] = si_create_db_flush_dsa(rctx, false, true, i); 3210 } 3211 rctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(rctx, false, false, 0); 3212 rctx->custom_blend_resolve = si_create_blend_custom(rctx, V_028808_CB_RESOLVE); 3213 rctx->custom_blend_decompress = si_create_blend_custom(rctx, V_028808_CB_FMASK_DECOMPRESS); 3214 3215 rctx->context.set_clip_state = si_set_clip_state; 3216 rctx->context.set_scissor_states = si_set_scissor_states; 3217 rctx->context.set_viewport_states = si_set_viewport_states; 3218 rctx->context.set_stencil_ref = si_set_pipe_stencil_ref; 3219 3220 rctx->context.set_framebuffer_state = si_set_framebuffer_state; 3221 rctx->context.get_sample_position = si_get_sample_position; 3222 3223 rctx->context.create_vs_state = si_create_vs_state; 3224 rctx->context.create_fs_state = si_create_fs_state; 3225 rctx->context.bind_vs_state = si_bind_vs_shader; 3226 rctx->context.bind_fs_state = si_bind_ps_shader; 3227 rctx->context.delete_vs_state = si_delete_vs_shader; 3228 rctx->context.delete_fs_state = si_delete_ps_shader; 3229 3230 rctx->context.create_sampler_state = si_create_sampler_state; 3231 rctx->context.bind_vertex_sampler_states = si_bind_vs_sampler_states; 3232 rctx->context.bind_fragment_sampler_states = si_bind_ps_sampler_states; 3233 rctx->context.delete_sampler_state = si_delete_sampler_state; 3234 3235 rctx->context.create_sampler_view = si_create_sampler_view; 3236 rctx->context.set_vertex_sampler_views = si_set_vs_sampler_views; 3237 rctx->context.set_fragment_sampler_views = si_set_ps_sampler_views; 3238 rctx->context.sampler_view_destroy = si_sampler_view_destroy; 3239 3240 rctx->context.set_sample_mask = si_set_sample_mask; 3241 3242 rctx->context.set_constant_buffer = si_set_constant_buffer; 3243 3244 rctx->context.create_vertex_elements_state = si_create_vertex_elements; 3245 rctx->context.bind_vertex_elements_state = si_bind_vertex_elements; 3246 rctx->context.delete_vertex_elements_state = si_delete_vertex_element; 3247 rctx->context.set_vertex_buffers = si_set_vertex_buffers; 3248 rctx->context.set_index_buffer = si_set_index_buffer; 3249 3250 rctx->context.create_stream_output_target = si_create_so_target; 3251 rctx->context.stream_output_target_destroy = si_so_target_destroy; 3252 rctx->context.set_stream_output_targets = si_set_so_targets; 3253 3254 rctx->context.texture_barrier = si_texture_barrier; 3255 rctx->context.set_polygon_stipple = si_set_polygon_stipple; 3256 3257 rctx->context.draw_vbo = si_draw_vbo; 3258} 3259 3260void si_init_config(struct r600_context *rctx) 3261{ 3262 struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); 3263 3264 if (pm4 == NULL) 3265 return; 3266 3267 si_cmd_context_control(pm4); 3268 3269 si_pm4_set_reg(pm4, R_028A4C_PA_SC_MODE_CNTL_1, 0x0); 3270 3271 si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0); 3272 si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0); 3273 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0); 3274 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0); 3275 si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0); 3276 si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0); 3277 si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0); 3278 si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0); 3279 si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0); 3280 si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0); 3281 si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0); 3282 si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0); 3283 si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, 0x0); 3284 si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0); 3285 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3286 si_pm4_set_reg(pm4, R_028B94_VGT_STRMOUT_CONFIG, 0x0); 3287 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3288 si_pm4_set_reg(pm4, R_028AA8_IA_MULTI_VGT_PARAM, 3289 S_028AA8_SWITCH_ON_EOP(1) | 3290 S_028AA8_PARTIAL_VS_WAVE_ON(1) | 3291 S_028AA8_PRIMGROUP_SIZE(63)); 3292 si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0x00000000); 3293 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3294 if (rctx->chip_class < CIK) 3295 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3296 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3297 3298 si_pm4_set_reg(pm4, R_028B54_VGT_SHADER_STAGES_EN, 0); 3299 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3300 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3301 3302 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3303 3304 if (rctx->chip_class >= CIK) { 3305 switch (rctx->screen->family) { 3306 case CHIP_BONAIRE: 3307 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012); 3308 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 3309 break; 3310 case CHIP_KAVERI: 3311 /* XXX todo */ 3312 case CHIP_KABINI: 3313 /* XXX todo */ 3314 default: 3315 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3316 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); 3317 break; 3318 } 3319 } else { 3320 switch (rctx->screen->family) { 3321 case CHIP_TAHITI: 3322 case CHIP_PITCAIRN: 3323 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a); 3324 break; 3325 case CHIP_VERDE: 3326 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a); 3327 break; 3328 case CHIP_OLAND: 3329 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082); 3330 break; 3331 case CHIP_HAINAN: 3332 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3333 break; 3334 default: 3335 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); 3336 break; 3337 } 3338 } 3339 3340 si_pm4_set_state(rctx, init, pm4); 3341} 3342