r600_hw_context.c revision 8a56ed8d6dc37c23d4695d0fc292d7bcc2976f08
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jerome Glisse 25 */ 26#include "r600_hw_context_priv.h" 27#include "r600_pipe.h" 28#include "r600d.h" 29#include "util/u_memory.h" 30#include <errno.h> 31 32/* Get backends mask */ 33void r600_get_backend_mask(struct r600_context *ctx) 34{ 35 struct radeon_winsys_cs *cs = ctx->cs; 36 struct r600_resource *buffer; 37 uint32_t *results; 38 unsigned num_backends = ctx->screen->info.r600_num_backends; 39 unsigned i, mask = 0; 40 41 /* if backend_map query is supported by the kernel */ 42 if (ctx->screen->info.r600_backend_map_valid) { 43 unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes; 44 unsigned backend_map = ctx->screen->info.r600_backend_map; 45 unsigned item_width, item_mask; 46 47 if (ctx->chip_class >= EVERGREEN) { 48 item_width = 4; 49 item_mask = 0x7; 50 } else { 51 item_width = 2; 52 item_mask = 0x3; 53 } 54 55 while(num_tile_pipes--) { 56 i = backend_map & item_mask; 57 mask |= (1<<i); 58 backend_map >>= item_width; 59 } 60 if (mask != 0) { 61 ctx->backend_mask = mask; 62 return; 63 } 64 } 65 66 /* otherwise backup path for older kernels */ 67 68 /* create buffer for event data */ 69 buffer = (struct r600_resource*) 70 pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, 71 PIPE_USAGE_STAGING, ctx->max_db*16); 72 if (!buffer) 73 goto err; 74 75 /* initialize buffer with zeroes */ 76 results = ctx->ws->buffer_map(buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE); 77 if (results) { 78 memset(results, 0, ctx->max_db * 4 * 4); 79 ctx->ws->buffer_unmap(buffer->buf); 80 81 /* emit EVENT_WRITE for ZPASS_DONE */ 82 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 83 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); 84 cs->buf[cs->cdw++] = 0; 85 cs->buf[cs->cdw++] = 0; 86 87 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 88 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE); 89 90 /* analyze results */ 91 results = ctx->ws->buffer_map(buffer->buf, ctx->cs, PIPE_TRANSFER_READ); 92 if (results) { 93 for(i = 0; i < ctx->max_db; i++) { 94 /* at least highest bit will be set if backend is used */ 95 if (results[i*4 + 1]) 96 mask |= (1<<i); 97 } 98 ctx->ws->buffer_unmap(buffer->buf); 99 } 100 } 101 102 pipe_resource_reference((struct pipe_resource**)&buffer, NULL); 103 104 if (mask != 0) { 105 ctx->backend_mask = mask; 106 return; 107 } 108 109err: 110 /* fallback to old method - set num_backends lower bits to 1 */ 111 ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends); 112 return; 113} 114 115void r600_context_ps_partial_flush(struct r600_context *ctx) 116{ 117 struct radeon_winsys_cs *cs = ctx->cs; 118 119 if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) 120 return; 121 122 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 123 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); 124 125 ctx->flags &= ~R600_CONTEXT_DRAW_PENDING; 126} 127 128static void r600_init_block(struct r600_context *ctx, 129 struct r600_block *block, 130 const struct r600_reg *reg, int index, int nreg, 131 unsigned opcode, unsigned offset_base) 132{ 133 int i = index; 134 int j, n = nreg; 135 136 /* initialize block */ 137 if (opcode == PKT3_SET_RESOURCE) { 138 block->flags = BLOCK_FLAG_RESOURCE; 139 block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; /* dirty all blocks at start */ 140 } else { 141 block->flags = 0; 142 block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */ 143 } 144 block->start_offset = reg[i].offset; 145 block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0); 146 block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2; 147 block->reg = &block->pm4[block->pm4_ndwords]; 148 block->pm4_ndwords += n; 149 block->nreg = n; 150 block->nreg_dirty = n; 151 LIST_INITHEAD(&block->list); 152 LIST_INITHEAD(&block->enable_list); 153 154 for (j = 0; j < n; j++) { 155 if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) { 156 block->flags |= REG_FLAG_DIRTY_ALWAYS; 157 } 158 if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) { 159 if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { 160 block->status |= R600_BLOCK_STATUS_ENABLED; 161 LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); 162 LIST_ADDTAIL(&block->list,&ctx->dirty); 163 } 164 } 165 if (reg[i+j].flags & REG_FLAG_FLUSH_CHANGE) { 166 block->flags |= REG_FLAG_FLUSH_CHANGE; 167 } 168 169 if (reg[i+j].flags & REG_FLAG_NEED_BO) { 170 block->nbo++; 171 assert(block->nbo < R600_BLOCK_MAX_BO); 172 block->pm4_bo_index[j] = block->nbo; 173 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); 174 block->pm4[block->pm4_ndwords++] = 0x00000000; 175 block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; 176 } 177 if ((ctx->family > CHIP_R600) && 178 (ctx->family < CHIP_RV770) && reg[i+j].flags & REG_FLAG_RV6XX_SBU) { 179 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); 180 block->pm4[block->pm4_ndwords++] = reg[i+j].sbu_flags; 181 } 182 } 183 /* check that we stay in limit */ 184 assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); 185} 186 187int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, 188 unsigned opcode, unsigned offset_base) 189{ 190 struct r600_block *block; 191 struct r600_range *range; 192 int offset; 193 194 for (unsigned i = 0, n = 0; i < nreg; i += n) { 195 /* ignore new block balise */ 196 if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) { 197 n = 1; 198 continue; 199 } 200 201 /* ignore regs not on R600 on R600 */ 202 if ((reg[i].flags & REG_FLAG_NOT_R600) && ctx->family == CHIP_R600) { 203 n = 1; 204 continue; 205 } 206 207 /* register that need relocation are in their own group */ 208 /* find number of consecutive registers */ 209 n = 0; 210 offset = reg[i].offset; 211 while (reg[i + n].offset == offset) { 212 n++; 213 offset += 4; 214 if ((n + i) >= nreg) 215 break; 216 if (n >= (R600_BLOCK_MAX_REG - 2)) 217 break; 218 } 219 220 /* allocate new block */ 221 block = calloc(1, sizeof(struct r600_block)); 222 if (block == NULL) { 223 return -ENOMEM; 224 } 225 ctx->nblocks++; 226 for (int j = 0; j < n; j++) { 227 range = &ctx->range[CTX_RANGE_ID(reg[i + j].offset)]; 228 /* create block table if it doesn't exist */ 229 if (!range->blocks) 230 range->blocks = calloc(1 << HASH_SHIFT, sizeof(void *)); 231 if (!range->blocks) 232 return -1; 233 234 range->blocks[CTX_BLOCK_ID(reg[i + j].offset)] = block; 235 } 236 237 r600_init_block(ctx, block, reg, i, n, opcode, offset_base); 238 239 } 240 return 0; 241} 242 243/* R600/R700 configuration */ 244static const struct r600_reg r600_config_reg_list[] = { 245 {R_008958_VGT_PRIMITIVE_TYPE, 0, 0}, 246 {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0}, 247 {R_009508_TA_CNTL_AUX, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0}, 248}; 249 250static const struct r600_reg r600_ctl_const_list[] = { 251 {R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0}, 252}; 253 254static const struct r600_reg r600_context_reg_list[] = { 255 {R_028A4C_PA_SC_MODE_CNTL, 0, 0}, 256 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 257 {R_028040_CB_COLOR0_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(0)}, 258 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 259 {R_0280A0_CB_COLOR0_INFO, REG_FLAG_NEED_BO, 0}, 260 {R_028060_CB_COLOR0_SIZE, 0, 0}, 261 {R_028080_CB_COLOR0_VIEW, 0, 0}, 262 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 263 {R_0280E0_CB_COLOR0_FRAG, REG_FLAG_NEED_BO, 0}, 264 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 265 {R_0280C0_CB_COLOR0_TILE, REG_FLAG_NEED_BO, 0}, 266 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 267 {R_028044_CB_COLOR1_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(1)}, 268 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 269 {R_0280A4_CB_COLOR1_INFO, REG_FLAG_NEED_BO, 0}, 270 {R_028064_CB_COLOR1_SIZE, 0, 0}, 271 {R_028084_CB_COLOR1_VIEW, 0, 0}, 272 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 273 {R_0280E4_CB_COLOR1_FRAG, REG_FLAG_NEED_BO, 0}, 274 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 275 {R_0280C4_CB_COLOR1_TILE, REG_FLAG_NEED_BO, 0}, 276 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 277 {R_028048_CB_COLOR2_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(2)}, 278 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 279 {R_0280A8_CB_COLOR2_INFO, REG_FLAG_NEED_BO, 0}, 280 {R_028068_CB_COLOR2_SIZE, 0, 0}, 281 {R_028088_CB_COLOR2_VIEW, 0, 0}, 282 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 283 {R_0280E8_CB_COLOR2_FRAG, REG_FLAG_NEED_BO, 0}, 284 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 285 {R_0280C8_CB_COLOR2_TILE, REG_FLAG_NEED_BO, 0}, 286 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 287 {R_02804C_CB_COLOR3_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(3)}, 288 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 289 {R_0280AC_CB_COLOR3_INFO, REG_FLAG_NEED_BO, 0}, 290 {R_02806C_CB_COLOR3_SIZE, 0, 0}, 291 {R_02808C_CB_COLOR3_VIEW, 0, 0}, 292 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 293 {R_0280EC_CB_COLOR3_FRAG, REG_FLAG_NEED_BO, 0}, 294 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 295 {R_0280CC_CB_COLOR3_TILE, REG_FLAG_NEED_BO, 0}, 296 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 297 {R_028050_CB_COLOR4_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(4)}, 298 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 299 {R_0280B0_CB_COLOR4_INFO, REG_FLAG_NEED_BO, 0}, 300 {R_028070_CB_COLOR4_SIZE, 0, 0}, 301 {R_028090_CB_COLOR4_VIEW, 0, 0}, 302 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 303 {R_0280F0_CB_COLOR4_FRAG, REG_FLAG_NEED_BO, 0}, 304 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 305 {R_0280D0_CB_COLOR4_TILE, REG_FLAG_NEED_BO, 0}, 306 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 307 {R_028054_CB_COLOR5_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(5)}, 308 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 309 {R_0280B4_CB_COLOR5_INFO, REG_FLAG_NEED_BO, 0}, 310 {R_028074_CB_COLOR5_SIZE, 0, 0}, 311 {R_028094_CB_COLOR5_VIEW, 0, 0}, 312 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 313 {R_0280F4_CB_COLOR5_FRAG, REG_FLAG_NEED_BO, 0}, 314 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 315 {R_0280D4_CB_COLOR5_TILE, REG_FLAG_NEED_BO, 0}, 316 {R_028058_CB_COLOR6_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(6)}, 317 {R_0280B8_CB_COLOR6_INFO, REG_FLAG_NEED_BO, 0}, 318 {R_028078_CB_COLOR6_SIZE, 0, 0}, 319 {R_028098_CB_COLOR6_VIEW, 0, 0}, 320 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 321 {R_0280F8_CB_COLOR6_FRAG, REG_FLAG_NEED_BO, 0}, 322 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 323 {R_0280D8_CB_COLOR6_TILE, REG_FLAG_NEED_BO, 0}, 324 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 325 {R_02805C_CB_COLOR7_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(7)}, 326 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 327 {R_0280BC_CB_COLOR7_INFO, REG_FLAG_NEED_BO, 0}, 328 {R_02807C_CB_COLOR7_SIZE, 0, 0}, 329 {R_02809C_CB_COLOR7_VIEW, 0, 0}, 330 {R_0280FC_CB_COLOR7_FRAG, REG_FLAG_NEED_BO, 0}, 331 {R_0280DC_CB_COLOR7_TILE, REG_FLAG_NEED_BO, 0}, 332 {R_028120_CB_CLEAR_RED, 0, 0}, 333 {R_028124_CB_CLEAR_GREEN, 0, 0}, 334 {R_028128_CB_CLEAR_BLUE, 0, 0}, 335 {R_02812C_CB_CLEAR_ALPHA, 0, 0}, 336 {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0}, 337 {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0}, 338 {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0}, 339 {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0}, 340 {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0}, 341 {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0}, 342 {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0}, 343 {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0}, 344 {R_02823C_CB_SHADER_MASK, 0, 0}, 345 {R_028238_CB_TARGET_MASK, 0, 0}, 346 {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0}, 347 {R_028414_CB_BLEND_RED, 0, 0}, 348 {R_028418_CB_BLEND_GREEN, 0, 0}, 349 {R_02841C_CB_BLEND_BLUE, 0, 0}, 350 {R_028420_CB_BLEND_ALPHA, 0, 0}, 351 {R_028424_CB_FOG_RED, 0, 0}, 352 {R_028428_CB_FOG_GREEN, 0, 0}, 353 {R_02842C_CB_FOG_BLUE, 0, 0}, 354 {R_028430_DB_STENCILREFMASK, 0, 0}, 355 {R_028434_DB_STENCILREFMASK_BF, 0, 0}, 356 {R_028438_SX_ALPHA_REF, 0, 0}, 357 {R_028780_CB_BLEND0_CONTROL, REG_FLAG_NOT_R600, 0}, 358 {R_028784_CB_BLEND1_CONTROL, REG_FLAG_NOT_R600, 0}, 359 {R_028788_CB_BLEND2_CONTROL, REG_FLAG_NOT_R600, 0}, 360 {R_02878C_CB_BLEND3_CONTROL, REG_FLAG_NOT_R600, 0}, 361 {R_028790_CB_BLEND4_CONTROL, REG_FLAG_NOT_R600, 0}, 362 {R_028794_CB_BLEND5_CONTROL, REG_FLAG_NOT_R600, 0}, 363 {R_028798_CB_BLEND6_CONTROL, REG_FLAG_NOT_R600, 0}, 364 {R_02879C_CB_BLEND7_CONTROL, REG_FLAG_NOT_R600, 0}, 365 {R_0287A0_CB_SHADER_CONTROL, 0, 0}, 366 {R_028800_DB_DEPTH_CONTROL, 0, 0}, 367 {R_028804_CB_BLEND_CONTROL, 0, 0}, 368 {R_028808_CB_COLOR_CONTROL, 0, 0}, 369 {R_02880C_DB_SHADER_CONTROL, 0, 0}, 370 {R_02800C_DB_DEPTH_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_DEPTH}, 371 {R_028000_DB_DEPTH_SIZE, 0, 0}, 372 {R_028004_DB_DEPTH_VIEW, 0, 0}, 373 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 374 {R_028010_DB_DEPTH_INFO, REG_FLAG_NEED_BO, 0}, 375 {R_028A6C_VGT_GS_OUT_PRIM_TYPE, 0, 0}, 376 {R_028D24_DB_HTILE_SURFACE, 0, 0}, 377 {R_028D34_DB_PREFETCH_LIMIT, 0, 0}, 378 {R_028034_PA_SC_SCREEN_SCISSOR_BR, 0, 0}, 379 {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0}, 380 {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0}, 381 {R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0}, 382 {R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0}, 383 {R_02843C_PA_CL_VPORT_XSCALE_0, 0, 0}, 384 {R_028440_PA_CL_VPORT_XOFFSET_0, 0, 0}, 385 {R_028444_PA_CL_VPORT_YSCALE_0, 0, 0}, 386 {R_028448_PA_CL_VPORT_YOFFSET_0, 0, 0}, 387 {R_02844C_PA_CL_VPORT_ZSCALE_0, 0, 0}, 388 {R_028450_PA_CL_VPORT_ZOFFSET_0, 0, 0}, 389 {R_0286D4_SPI_INTERP_CONTROL_0, 0, 0}, 390 {R_028810_PA_CL_CLIP_CNTL, 0, 0}, 391 {R_028814_PA_SU_SC_MODE_CNTL, 0, 0}, 392 {R_02881C_PA_CL_VS_OUT_CNTL, 0, 0}, 393 {R_028A00_PA_SU_POINT_SIZE, 0, 0}, 394 {R_028A04_PA_SU_POINT_MINMAX, 0, 0}, 395 {R_028A08_PA_SU_LINE_CNTL, 0, 0}, 396 {R_028A0C_PA_SC_LINE_STIPPLE, 0, 0}, 397 {R_028C08_PA_SU_VTX_CNTL, 0, 0}, 398 {R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0}, 399 {R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0, 0}, 400 {R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0}, 401 {R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0}, 402 {R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0}, 403 {R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0}, 404 {R_028E20_PA_CL_UCP0_X, 0, 0}, 405 {R_028E24_PA_CL_UCP0_Y, 0, 0}, 406 {R_028E28_PA_CL_UCP0_Z, 0, 0}, 407 {R_028E2C_PA_CL_UCP0_W, 0, 0}, 408 {R_028E30_PA_CL_UCP1_X, 0, 0}, 409 {R_028E34_PA_CL_UCP1_Y, 0, 0}, 410 {R_028E38_PA_CL_UCP1_Z, 0, 0}, 411 {R_028E3C_PA_CL_UCP1_W, 0, 0}, 412 {R_028E40_PA_CL_UCP2_X, 0, 0}, 413 {R_028E44_PA_CL_UCP2_Y, 0, 0}, 414 {R_028E48_PA_CL_UCP2_Z, 0, 0}, 415 {R_028E4C_PA_CL_UCP2_W, 0, 0}, 416 {R_028E50_PA_CL_UCP3_X, 0, 0}, 417 {R_028E54_PA_CL_UCP3_Y, 0, 0}, 418 {R_028E58_PA_CL_UCP3_Z, 0, 0}, 419 {R_028E5C_PA_CL_UCP3_W, 0, 0}, 420 {R_028E60_PA_CL_UCP4_X, 0, 0}, 421 {R_028E64_PA_CL_UCP4_Y, 0, 0}, 422 {R_028E68_PA_CL_UCP4_Z, 0, 0}, 423 {R_028E6C_PA_CL_UCP4_W, 0, 0}, 424 {R_028E70_PA_CL_UCP5_X, 0, 0}, 425 {R_028E74_PA_CL_UCP5_Y, 0, 0}, 426 {R_028E78_PA_CL_UCP5_Z, 0, 0}, 427 {R_028E7C_PA_CL_UCP5_W, 0, 0}, 428 {R_028380_SQ_VTX_SEMANTIC_0, 0, 0}, 429 {R_028384_SQ_VTX_SEMANTIC_1, 0, 0}, 430 {R_028388_SQ_VTX_SEMANTIC_2, 0, 0}, 431 {R_02838C_SQ_VTX_SEMANTIC_3, 0, 0}, 432 {R_028390_SQ_VTX_SEMANTIC_4, 0, 0}, 433 {R_028394_SQ_VTX_SEMANTIC_5, 0, 0}, 434 {R_028398_SQ_VTX_SEMANTIC_6, 0, 0}, 435 {R_02839C_SQ_VTX_SEMANTIC_7, 0, 0}, 436 {R_0283A0_SQ_VTX_SEMANTIC_8, 0, 0}, 437 {R_0283A4_SQ_VTX_SEMANTIC_9, 0, 0}, 438 {R_0283A8_SQ_VTX_SEMANTIC_10, 0, 0}, 439 {R_0283AC_SQ_VTX_SEMANTIC_11, 0, 0}, 440 {R_0283B0_SQ_VTX_SEMANTIC_12, 0, 0}, 441 {R_0283B4_SQ_VTX_SEMANTIC_13, 0, 0}, 442 {R_0283B8_SQ_VTX_SEMANTIC_14, 0, 0}, 443 {R_0283BC_SQ_VTX_SEMANTIC_15, 0, 0}, 444 {R_0283C0_SQ_VTX_SEMANTIC_16, 0, 0}, 445 {R_0283C4_SQ_VTX_SEMANTIC_17, 0, 0}, 446 {R_0283C8_SQ_VTX_SEMANTIC_18, 0, 0}, 447 {R_0283CC_SQ_VTX_SEMANTIC_19, 0, 0}, 448 {R_0283D0_SQ_VTX_SEMANTIC_20, 0, 0}, 449 {R_0283D4_SQ_VTX_SEMANTIC_21, 0, 0}, 450 {R_0283D8_SQ_VTX_SEMANTIC_22, 0, 0}, 451 {R_0283DC_SQ_VTX_SEMANTIC_23, 0, 0}, 452 {R_0283E0_SQ_VTX_SEMANTIC_24, 0, 0}, 453 {R_0283E4_SQ_VTX_SEMANTIC_25, 0, 0}, 454 {R_0283E8_SQ_VTX_SEMANTIC_26, 0, 0}, 455 {R_0283EC_SQ_VTX_SEMANTIC_27, 0, 0}, 456 {R_0283F0_SQ_VTX_SEMANTIC_28, 0, 0}, 457 {R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0}, 458 {R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0}, 459 {R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0}, 460 {R_028614_SPI_VS_OUT_ID_0, 0, 0}, 461 {R_028618_SPI_VS_OUT_ID_1, 0, 0}, 462 {R_02861C_SPI_VS_OUT_ID_2, 0, 0}, 463 {R_028620_SPI_VS_OUT_ID_3, 0, 0}, 464 {R_028624_SPI_VS_OUT_ID_4, 0, 0}, 465 {R_028628_SPI_VS_OUT_ID_5, 0, 0}, 466 {R_02862C_SPI_VS_OUT_ID_6, 0, 0}, 467 {R_028630_SPI_VS_OUT_ID_7, 0, 0}, 468 {R_028634_SPI_VS_OUT_ID_8, 0, 0}, 469 {R_028638_SPI_VS_OUT_ID_9, 0, 0}, 470 {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0}, 471 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 472 {R_028858_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0}, 473 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 474 {R_028868_SQ_PGM_RESOURCES_VS, 0, 0}, 475 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 476 {R_028894_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0}, 477 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 478 {R_0288A4_SQ_PGM_RESOURCES_FS, 0, 0}, 479 {R_0288DC_SQ_PGM_CF_OFFSET_FS, 0, 0}, 480 {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0}, 481 {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0}, 482 {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0}, 483 {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0}, 484 {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0}, 485 {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0}, 486 {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0}, 487 {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0}, 488 {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0}, 489 {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0}, 490 {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0}, 491 {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0}, 492 {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0}, 493 {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0}, 494 {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0}, 495 {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0}, 496 {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0}, 497 {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0}, 498 {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0}, 499 {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0}, 500 {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0}, 501 {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0}, 502 {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0}, 503 {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0}, 504 {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0}, 505 {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0}, 506 {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0}, 507 {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0}, 508 {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0}, 509 {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0}, 510 {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0}, 511 {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0}, 512 {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0}, 513 {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0}, 514 {R_0286D8_SPI_INPUT_Z, 0, 0}, 515 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 516 {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0}, 517 {GROUP_FORCE_NEW_BLOCK, 0, 0}, 518 {R_028850_SQ_PGM_RESOURCES_PS, 0, 0}, 519 {R_028854_SQ_PGM_EXPORTS_PS, 0, 0}, 520 {R_028408_VGT_INDX_OFFSET, 0, 0}, 521 {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0}, 522 {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0}, 523}; 524 525/* SHADER RESOURCE R600/R700 */ 526int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base) 527{ 528 int i; 529 struct r600_block *block; 530 range->blocks = calloc(nblocks, sizeof(struct r600_block *)); 531 if (range->blocks == NULL) 532 return -ENOMEM; 533 534 reg[0].offset += offset; 535 for (i = 0; i < nblocks; i++) { 536 block = calloc(1, sizeof(struct r600_block)); 537 if (block == NULL) { 538 return -ENOMEM; 539 } 540 ctx->nblocks++; 541 range->blocks[i] = block; 542 r600_init_block(ctx, block, reg, 0, nreg, PKT3_SET_RESOURCE, offset_base); 543 544 reg[0].offset += stride; 545 } 546 return 0; 547} 548 549 550static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) 551{ 552 struct r600_reg r600_shader_resource[] = { 553 {R_038000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, 0}, 554 {R_038004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, 0}, 555 {R_038008_RESOURCE0_WORD2, 0, 0}, 556 {R_03800C_RESOURCE0_WORD3, 0, 0}, 557 {R_038010_RESOURCE0_WORD4, 0, 0}, 558 {R_038014_RESOURCE0_WORD5, 0, 0}, 559 {R_038018_RESOURCE0_WORD6, 0, 0}, 560 }; 561 unsigned nreg = Elements(r600_shader_resource); 562 563 return r600_resource_init(ctx, range, offset, nblocks, stride, r600_shader_resource, nreg, R600_RESOURCE_OFFSET); 564} 565 566/* SHADER SAMPLER R600/R700/EG/CM */ 567int r600_state_sampler_init(struct r600_context *ctx, uint32_t offset) 568{ 569 struct r600_reg r600_shader_sampler[] = { 570 {R_03C000_SQ_TEX_SAMPLER_WORD0_0, 0, 0}, 571 {R_03C004_SQ_TEX_SAMPLER_WORD1_0, 0, 0}, 572 {R_03C008_SQ_TEX_SAMPLER_WORD2_0, 0, 0}, 573 }; 574 unsigned nreg = Elements(r600_shader_sampler); 575 576 for (int i = 0; i < nreg; i++) { 577 r600_shader_sampler[i].offset += offset; 578 } 579 return r600_context_add_block(ctx, r600_shader_sampler, nreg, PKT3_SET_SAMPLER, R600_SAMPLER_OFFSET); 580} 581 582/* SHADER SAMPLER BORDER R600/R700 */ 583static int r600_state_sampler_border_init(struct r600_context *ctx, uint32_t offset) 584{ 585 struct r600_reg r600_shader_sampler_border[] = { 586 {R_00A400_TD_PS_SAMPLER0_BORDER_RED, 0, 0}, 587 {R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, 0, 0}, 588 {R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, 0, 0}, 589 {R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, 0, 0}, 590 }; 591 unsigned nreg = Elements(r600_shader_sampler_border); 592 593 for (int i = 0; i < nreg; i++) { 594 r600_shader_sampler_border[i].offset += offset; 595 } 596 return r600_context_add_block(ctx, r600_shader_sampler_border, nreg, PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET); 597} 598 599static int r600_loop_const_init(struct r600_context *ctx, uint32_t offset) 600{ 601 unsigned nreg = 32; 602 struct r600_reg r600_loop_consts[32]; 603 int i; 604 605 for (i = 0; i < nreg; i++) { 606 r600_loop_consts[i].offset = R600_LOOP_CONST_OFFSET + ((offset + i) * 4); 607 r600_loop_consts[i].flags = REG_FLAG_DIRTY_ALWAYS; 608 r600_loop_consts[i].sbu_flags = 0; 609 } 610 return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, R600_LOOP_CONST_OFFSET); 611} 612 613static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks) 614{ 615 struct r600_block *block; 616 int i; 617 618 if (!range->blocks) { 619 return; /* nothing to do */ 620 } 621 622 for (i = 0; i < nblocks; i++) { 623 block = range->blocks[i]; 624 if (block) { 625 for (int k = 1; k <= block->nbo; k++) 626 pipe_resource_reference((struct pipe_resource**)&block->reloc[k].bo, NULL); 627 free(block); 628 } 629 } 630 free(range->blocks); 631} 632 633/* initialize */ 634void r600_context_fini(struct r600_context *ctx) 635{ 636 struct r600_block *block; 637 struct r600_range *range; 638 639 if (ctx->range) { 640 for (int i = 0; i < NUM_RANGES; i++) { 641 if (!ctx->range[i].blocks) 642 continue; 643 for (int j = 0; j < (1 << HASH_SHIFT); j++) { 644 block = ctx->range[i].blocks[j]; 645 if (block) { 646 for (int k = 0, offset = block->start_offset; k < block->nreg; k++, offset += 4) { 647 range = &ctx->range[CTX_RANGE_ID(offset)]; 648 range->blocks[CTX_BLOCK_ID(offset)] = NULL; 649 } 650 for (int k = 1; k <= block->nbo; k++) { 651 pipe_resource_reference((struct pipe_resource**)&block->reloc[k].bo, NULL); 652 } 653 free(block); 654 } 655 } 656 free(ctx->range[i].blocks); 657 } 658 } 659 r600_free_resource_range(ctx, &ctx->ps_resources, ctx->num_ps_resources); 660 r600_free_resource_range(ctx, &ctx->vs_resources, ctx->num_vs_resources); 661 r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); 662 free(ctx->blocks); 663} 664 665static void r600_add_resource_block(struct r600_context *ctx, struct r600_range *range, int num_blocks, int *index) 666{ 667 int c = *index; 668 for (int j = 0; j < num_blocks; j++) { 669 if (!range->blocks[j]) 670 continue; 671 672 ctx->blocks[c++] = range->blocks[j]; 673 } 674 *index = c; 675} 676 677int r600_setup_block_table(struct r600_context *ctx) 678{ 679 /* setup block table */ 680 int c = 0; 681 ctx->blocks = calloc(ctx->nblocks, sizeof(void*)); 682 if (!ctx->blocks) 683 return -ENOMEM; 684 for (int i = 0; i < NUM_RANGES; i++) { 685 if (!ctx->range[i].blocks) 686 continue; 687 for (int j = 0, add; j < (1 << HASH_SHIFT); j++) { 688 if (!ctx->range[i].blocks[j]) 689 continue; 690 691 add = 1; 692 for (int k = 0; k < c; k++) { 693 if (ctx->blocks[k] == ctx->range[i].blocks[j]) { 694 add = 0; 695 break; 696 } 697 } 698 if (add) { 699 assert(c < ctx->nblocks); 700 ctx->blocks[c++] = ctx->range[i].blocks[j]; 701 j += (ctx->range[i].blocks[j]->nreg) - 1; 702 } 703 } 704 } 705 706 r600_add_resource_block(ctx, &ctx->ps_resources, ctx->num_ps_resources, &c); 707 r600_add_resource_block(ctx, &ctx->vs_resources, ctx->num_vs_resources, &c); 708 r600_add_resource_block(ctx, &ctx->fs_resources, ctx->num_fs_resources, &c); 709 return 0; 710} 711 712int r600_context_init(struct r600_context *ctx) 713{ 714 int r; 715 716 /* add blocks */ 717 r = r600_context_add_block(ctx, r600_config_reg_list, 718 Elements(r600_config_reg_list), PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET); 719 if (r) 720 goto out_err; 721 r = r600_context_add_block(ctx, r600_context_reg_list, 722 Elements(r600_context_reg_list), PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET); 723 if (r) 724 goto out_err; 725 r = r600_context_add_block(ctx, r600_ctl_const_list, 726 Elements(r600_ctl_const_list), PKT3_SET_CTL_CONST, R600_CTL_CONST_OFFSET); 727 if (r) 728 goto out_err; 729 730 /* PS SAMPLER BORDER */ 731 for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) { 732 r = r600_state_sampler_border_init(ctx, offset); 733 if (r) 734 goto out_err; 735 } 736 737 /* VS SAMPLER BORDER */ 738 for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) { 739 r = r600_state_sampler_border_init(ctx, offset); 740 if (r) 741 goto out_err; 742 } 743 /* PS SAMPLER */ 744 for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) { 745 r = r600_state_sampler_init(ctx, offset); 746 if (r) 747 goto out_err; 748 } 749 /* VS SAMPLER */ 750 for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) { 751 r = r600_state_sampler_init(ctx, offset); 752 if (r) 753 goto out_err; 754 } 755 756 ctx->num_ps_resources = 160; 757 ctx->num_vs_resources = 160; 758 ctx->num_fs_resources = 16; 759 r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 160, 0x1c); 760 if (r) 761 goto out_err; 762 r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1180, 160, 0x1c); 763 if (r) 764 goto out_err; 765 r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x2300, 16, 0x1c); 766 if (r) 767 goto out_err; 768 769 /* PS loop const */ 770 r600_loop_const_init(ctx, 0); 771 /* VS loop const */ 772 r600_loop_const_init(ctx, 32); 773 774 r = r600_setup_block_table(ctx); 775 if (r) 776 goto out_err; 777 778 ctx->max_db = 4; 779 return 0; 780out_err: 781 r600_context_fini(ctx); 782 return r; 783} 784 785void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, 786 boolean count_draw_in) 787{ 788 struct r600_atom *state; 789 790 /* The number of dwords we already used in the CS so far. */ 791 num_dw += ctx->cs->cdw; 792 793 if (count_draw_in) { 794 /* The number of dwords all the dirty states would take. */ 795 LIST_FOR_EACH_ENTRY(state, &ctx->dirty_states, head) { 796 num_dw += state->num_dw; 797 } 798 799 num_dw += ctx->pm4_dirty_cdwords; 800 801 /* The upper-bound of how much a draw command would take. */ 802 num_dw += R600_MAX_DRAW_CS_DWORDS; 803 } 804 805 /* Count in queries_suspend. */ 806 num_dw += ctx->num_cs_dw_nontimer_queries_suspend; 807 num_dw += ctx->num_cs_dw_timer_queries_suspend; 808 809 /* Count in streamout_end at the end of CS. */ 810 num_dw += ctx->num_cs_dw_streamout_end; 811 812 /* Count in render_condition(NULL) at the end of CS. */ 813 if (ctx->predicate_drawing) { 814 num_dw += 3; 815 } 816 817 /* Count in framebuffer cache flushes at the end of CS. */ 818 num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */ 819 820 /* Save 16 dwords for the fence mechanism. */ 821 num_dw += 16; 822 823 /* Flush if there's not enough space. */ 824 if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { 825 r600_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC); 826 } 827} 828 829void r600_context_dirty_block(struct r600_context *ctx, 830 struct r600_block *block, 831 int dirty, int index) 832{ 833 if ((index + 1) > block->nreg_dirty) 834 block->nreg_dirty = index + 1; 835 836 if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { 837 block->status |= R600_BLOCK_STATUS_DIRTY; 838 ctx->pm4_dirty_cdwords += block->pm4_ndwords; 839 if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { 840 block->status |= R600_BLOCK_STATUS_ENABLED; 841 LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); 842 } 843 LIST_ADDTAIL(&block->list,&ctx->dirty); 844 845 if (block->flags & REG_FLAG_FLUSH_CHANGE) { 846 r600_context_ps_partial_flush(ctx); 847 } 848 } 849} 850 851void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state) 852{ 853 struct r600_block *block; 854 int dirty; 855 for (int i = 0; i < state->nregs; i++) { 856 unsigned id, reloc_id; 857 struct r600_pipe_reg *reg = &state->regs[i]; 858 859 block = reg->block; 860 id = reg->id; 861 862 dirty = block->status & R600_BLOCK_STATUS_DIRTY; 863 864 if (reg->value != block->reg[id]) { 865 block->reg[id] = reg->value; 866 dirty |= R600_BLOCK_STATUS_DIRTY; 867 } 868 if (block->flags & REG_FLAG_DIRTY_ALWAYS) 869 dirty |= R600_BLOCK_STATUS_DIRTY; 870 if (block->pm4_bo_index[id]) { 871 /* find relocation */ 872 reloc_id = block->pm4_bo_index[id]; 873 pipe_resource_reference((struct pipe_resource**)&block->reloc[reloc_id].bo, ®->bo->b.b.b); 874 block->reloc[reloc_id].bo_usage = reg->bo_usage; 875 /* always force dirty for relocs for now */ 876 dirty |= R600_BLOCK_STATUS_DIRTY; 877 } 878 879 if (dirty) 880 r600_context_dirty_block(ctx, block, dirty, id); 881 } 882} 883 884static void r600_context_dirty_resource_block(struct r600_context *ctx, 885 struct r600_block *block, 886 int dirty, int index) 887{ 888 block->nreg_dirty = index + 1; 889 890 if ((dirty != (block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { 891 block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 892 ctx->pm4_dirty_cdwords += block->pm4_ndwords; 893 if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { 894 block->status |= R600_BLOCK_STATUS_ENABLED; 895 LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); 896 } 897 LIST_ADDTAIL(&block->list,&ctx->resource_dirty); 898 } 899} 900 901void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block) 902{ 903 int dirty; 904 int num_regs = ctx->chip_class >= EVERGREEN ? 8 : 7; 905 boolean is_vertex; 906 907 if (state == NULL) { 908 block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY); 909 pipe_resource_reference((struct pipe_resource**)&block->reloc[1].bo, NULL); 910 pipe_resource_reference((struct pipe_resource**)&block->reloc[2].bo, NULL); 911 LIST_DELINIT(&block->list); 912 LIST_DELINIT(&block->enable_list); 913 return; 914 } 915 916 is_vertex = ((state->val[num_regs-1] & 0xc0000000) == 0xc0000000); 917 dirty = block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY; 918 919 if (memcmp(block->reg, state->val, num_regs*4)) { 920 memcpy(block->reg, state->val, num_regs * 4); 921 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 922 } 923 924 /* if no BOs on block, force dirty */ 925 if (!block->reloc[1].bo || !block->reloc[2].bo) 926 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 927 928 if (!dirty) { 929 if (is_vertex) { 930 if (block->reloc[1].bo->buf != state->bo[0]->buf) 931 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 932 } else { 933 if ((block->reloc[1].bo->buf != state->bo[0]->buf) || 934 (block->reloc[2].bo->buf != state->bo[1]->buf)) 935 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 936 } 937 } 938 939 if (dirty) { 940 if (is_vertex) { 941 /* VERTEX RESOURCE, we preted there is 2 bo to relocate so 942 * we have single case btw VERTEX & TEXTURE resource 943 */ 944 pipe_resource_reference((struct pipe_resource**)&block->reloc[1].bo, &state->bo[0]->b.b.b); 945 block->reloc[1].bo_usage = state->bo_usage[0]; 946 pipe_resource_reference((struct pipe_resource**)&block->reloc[2].bo, NULL); 947 } else { 948 /* TEXTURE RESOURCE */ 949 pipe_resource_reference((struct pipe_resource**)&block->reloc[1].bo, &state->bo[0]->b.b.b); 950 block->reloc[1].bo_usage = state->bo_usage[0]; 951 pipe_resource_reference((struct pipe_resource**)&block->reloc[2].bo, &state->bo[1]->b.b.b); 952 block->reloc[2].bo_usage = state->bo_usage[1]; 953 } 954 955 if (is_vertex) 956 block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX; 957 else 958 block->status &= ~R600_BLOCK_STATUS_RESOURCE_VERTEX; 959 960 r600_context_dirty_resource_block(ctx, block, dirty, num_regs - 1); 961 } 962} 963 964void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) 965{ 966 struct r600_block *block = ctx->ps_resources.blocks[rid]; 967 968 r600_context_pipe_state_set_resource(ctx, state, block); 969} 970 971void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) 972{ 973 struct r600_block *block = ctx->vs_resources.blocks[rid]; 974 975 r600_context_pipe_state_set_resource(ctx, state, block); 976} 977 978void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) 979{ 980 struct r600_block *block = ctx->fs_resources.blocks[rid]; 981 982 r600_context_pipe_state_set_resource(ctx, state, block); 983} 984 985void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) 986{ 987 struct r600_range *range; 988 struct r600_block *block; 989 int i; 990 int dirty; 991 992 range = &ctx->range[CTX_RANGE_ID(offset)]; 993 block = range->blocks[CTX_BLOCK_ID(offset)]; 994 if (state == NULL) { 995 block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); 996 LIST_DELINIT(&block->list); 997 LIST_DELINIT(&block->enable_list); 998 return; 999 } 1000 dirty = block->status & R600_BLOCK_STATUS_DIRTY; 1001 1002 for (i = 0; i < 3; i++) { 1003 if (block->reg[i] != state->regs[i].value) { 1004 block->reg[i] = state->regs[i].value; 1005 dirty |= R600_BLOCK_STATUS_DIRTY; 1006 } 1007 } 1008 1009 if (dirty) 1010 r600_context_dirty_block(ctx, block, dirty, 2); 1011} 1012 1013static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) 1014{ 1015 struct r600_range *range; 1016 struct r600_block *block; 1017 int i; 1018 int dirty; 1019 1020 range = &ctx->range[CTX_RANGE_ID(offset)]; 1021 block = range->blocks[CTX_BLOCK_ID(offset)]; 1022 if (state == NULL) { 1023 block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); 1024 LIST_DELINIT(&block->list); 1025 LIST_DELINIT(&block->enable_list); 1026 return; 1027 } 1028 if (state->nregs <= 3) { 1029 return; 1030 } 1031 dirty = block->status & R600_BLOCK_STATUS_DIRTY; 1032 for (i = 0; i < 4; i++) { 1033 if (block->reg[i] != state->regs[i + 3].value) { 1034 block->reg[i] = state->regs[i + 3].value; 1035 dirty |= R600_BLOCK_STATUS_DIRTY; 1036 } 1037 } 1038 1039 /* We have to flush the shaders before we change the border color 1040 * registers, or previous draw commands that haven't completed yet 1041 * will end up using the new border color. */ 1042 if (dirty & R600_BLOCK_STATUS_DIRTY) 1043 r600_context_ps_partial_flush(ctx); 1044 if (dirty) 1045 r600_context_dirty_block(ctx, block, dirty, 3); 1046} 1047 1048void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) 1049{ 1050 unsigned offset; 1051 1052 offset = R_03C000_SQ_TEX_SAMPLER_WORD0_0 + 12*id; 1053 r600_context_pipe_state_set_sampler(ctx, state, offset); 1054 offset = R_00A400_TD_PS_SAMPLER0_BORDER_RED + 16*id; 1055 r600_context_pipe_state_set_sampler_border(ctx, state, offset); 1056} 1057 1058void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) 1059{ 1060 unsigned offset; 1061 1062 offset = R_03C000_SQ_TEX_SAMPLER_WORD0_0 + 12*(id + 18); 1063 r600_context_pipe_state_set_sampler(ctx, state, offset); 1064 offset = R_00A600_TD_VS_SAMPLER0_BORDER_RED + 16*id; 1065 r600_context_pipe_state_set_sampler_border(ctx, state, offset); 1066} 1067 1068struct r600_resource *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) 1069{ 1070 struct r600_range *range; 1071 struct r600_block *block; 1072 unsigned id; 1073 1074 range = &ctx->range[CTX_RANGE_ID(offset)]; 1075 block = range->blocks[CTX_BLOCK_ID(offset)]; 1076 offset -= block->start_offset; 1077 id = block->pm4_bo_index[offset >> 2]; 1078 if (block->reloc[id].bo) { 1079 return block->reloc[id].bo; 1080 } 1081 return NULL; 1082} 1083 1084void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block) 1085{ 1086 struct radeon_winsys_cs *cs = ctx->cs; 1087 int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); 1088 int cp_dwords = block->pm4_ndwords, start_dword = 0; 1089 int new_dwords = 0; 1090 int nbo = block->nbo; 1091 1092 if (block->nreg_dirty == 0 && optional) { 1093 goto out; 1094 } 1095 1096 if (nbo) { 1097 for (int j = 0; j < block->nreg; j++) { 1098 if (block->pm4_bo_index[j]) { 1099 /* find relocation */ 1100 struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; 1101 if (reloc->bo) { 1102 block->pm4[reloc->bo_pm4_index] = 1103 r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); 1104 } else { 1105 block->pm4[reloc->bo_pm4_index] = 0; 1106 } 1107 nbo--; 1108 if (nbo == 0) 1109 break; 1110 1111 } 1112 } 1113 } 1114 1115 optional &= (block->nreg_dirty != block->nreg); 1116 if (optional) { 1117 new_dwords = block->nreg_dirty; 1118 start_dword = cs->cdw; 1119 cp_dwords = new_dwords + 2; 1120 } 1121 memcpy(&cs->buf[cs->cdw], block->pm4, cp_dwords * 4); 1122 cs->cdw += cp_dwords; 1123 1124 if (optional) { 1125 uint32_t newword; 1126 1127 newword = cs->buf[start_dword]; 1128 newword &= PKT_COUNT_C; 1129 newword |= PKT_COUNT_S(new_dwords); 1130 cs->buf[start_dword] = newword; 1131 } 1132out: 1133 block->status ^= R600_BLOCK_STATUS_DIRTY; 1134 block->nreg_dirty = 0; 1135 LIST_DELINIT(&block->list); 1136} 1137 1138void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block) 1139{ 1140 struct radeon_winsys_cs *cs = ctx->cs; 1141 int cp_dwords = block->pm4_ndwords; 1142 int nbo = block->nbo; 1143 1144 if (block->status & R600_BLOCK_STATUS_RESOURCE_VERTEX) { 1145 nbo = 1; 1146 cp_dwords -= 2; /* don't copy the second NOP */ 1147 } 1148 1149 for (int j = 0; j < nbo; j++) { 1150 if (block->pm4_bo_index[j]) { 1151 /* find relocation */ 1152 struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; 1153 block->pm4[reloc->bo_pm4_index] = 1154 r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); 1155 } 1156 } 1157 1158 memcpy(&cs->buf[cs->cdw], block->pm4, cp_dwords * 4); 1159 cs->cdw += cp_dwords; 1160 1161 block->status ^= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1162 block->nreg_dirty = 0; 1163 LIST_DELINIT(&block->list); 1164} 1165 1166void r600_inval_shader_cache(struct r600_context *ctx) 1167{ 1168 ctx->atom_surface_sync.flush_flags |= S_0085F0_SH_ACTION_ENA(1); 1169 r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); 1170} 1171 1172void r600_inval_texture_cache(struct r600_context *ctx) 1173{ 1174 ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1); 1175 r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); 1176} 1177 1178void r600_inval_vertex_cache(struct r600_context *ctx) 1179{ 1180 if (ctx->family == CHIP_RV610 || 1181 ctx->family == CHIP_RV620 || 1182 ctx->family == CHIP_RS780 || 1183 ctx->family == CHIP_RS880 || 1184 ctx->family == CHIP_RV710 || 1185 ctx->family == CHIP_CEDAR || 1186 ctx->family == CHIP_PALM || 1187 ctx->family == CHIP_SUMO || 1188 ctx->family == CHIP_SUMO2 || 1189 ctx->family == CHIP_CAICOS || 1190 ctx->family == CHIP_CAYMAN) { 1191 /* Some GPUs don't have the vertex cache and must use the texture cache instead. */ 1192 ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1); 1193 } else { 1194 ctx->atom_surface_sync.flush_flags |= S_0085F0_VC_ACTION_ENA(1); 1195 } 1196 r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); 1197} 1198 1199void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now) 1200{ 1201 if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) 1202 return; 1203 1204 ctx->atom_surface_sync.flush_flags |= 1205 r600_get_cb_flush_flags(ctx) | 1206 (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0); 1207 1208 if (flush_now) { 1209 r600_emit_atom(ctx, &ctx->atom_surface_sync.atom); 1210 } else { 1211 r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); 1212 } 1213 1214 /* Also add a complete cache flush to work around broken flushing on R6xx. */ 1215 if (ctx->chip_class == R600) { 1216 if (flush_now) { 1217 r600_emit_atom(ctx, &ctx->atom_r6xx_flush_and_inv); 1218 } else { 1219 r600_atom_dirty(ctx, &ctx->atom_r6xx_flush_and_inv); 1220 } 1221 } 1222 1223 ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; 1224} 1225 1226void r600_context_flush(struct r600_context *ctx, unsigned flags) 1227{ 1228 struct radeon_winsys_cs *cs = ctx->cs; 1229 struct r600_block *enable_block = NULL; 1230 bool timer_queries_suspended = false; 1231 bool nontimer_queries_suspended = false; 1232 bool streamout_suspended = false; 1233 1234 if (cs->cdw == ctx->atom_start_cs.atom.num_dw) 1235 return; 1236 1237 /* suspend queries */ 1238 if (ctx->num_cs_dw_timer_queries_suspend) { 1239 r600_suspend_timer_queries(ctx); 1240 timer_queries_suspended = true; 1241 } 1242 if (ctx->num_cs_dw_nontimer_queries_suspend) { 1243 r600_suspend_nontimer_queries(ctx); 1244 nontimer_queries_suspended = true; 1245 } 1246 1247 if (ctx->num_cs_dw_streamout_end) { 1248 r600_context_streamout_end(ctx); 1249 streamout_suspended = true; 1250 } 1251 1252 r600_flush_framebuffer(ctx, true); 1253 1254 /* partial flush is needed to avoid lockups on some chips with user fences */ 1255 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1256 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); 1257 1258 /* force to keep tiling flags */ 1259 flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; 1260 1261 /* Flush the CS. */ 1262 ctx->ws->cs_flush(ctx->cs, flags); 1263 1264 ctx->pm4_dirty_cdwords = 0; 1265 ctx->flags = 0; 1266 1267 r600_emit_atom(ctx, &ctx->atom_start_cs.atom); 1268 r600_atom_dirty(ctx, &ctx->atom_db_misc_state.atom); 1269 if (ctx->chip_class >= EVERGREEN) 1270 r600_atom_dirty(ctx, &ctx->atom_eg_strmout_config.atom); 1271 1272 if (streamout_suspended) { 1273 ctx->streamout_start = TRUE; 1274 ctx->streamout_append_bitmask = ~0; 1275 } 1276 1277 /* resume queries */ 1278 if (timer_queries_suspended) { 1279 r600_resume_timer_queries(ctx); 1280 } 1281 if (nontimer_queries_suspended) { 1282 r600_resume_nontimer_queries(ctx); 1283 } 1284 1285 /* set all valid group as dirty so they get reemited on 1286 * next draw command 1287 */ 1288 LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) { 1289 if (!(enable_block->flags & BLOCK_FLAG_RESOURCE)) { 1290 if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) { 1291 LIST_ADDTAIL(&enable_block->list,&ctx->dirty); 1292 enable_block->status |= R600_BLOCK_STATUS_DIRTY; 1293 } 1294 } else { 1295 if(!(enable_block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) { 1296 LIST_ADDTAIL(&enable_block->list,&ctx->resource_dirty); 1297 enable_block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1298 } 1299 } 1300 ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords; 1301 enable_block->nreg_dirty = enable_block->nreg; 1302 } 1303} 1304 1305void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value) 1306{ 1307 struct radeon_winsys_cs *cs = ctx->cs; 1308 uint64_t va; 1309 1310 r600_need_cs_space(ctx, 10, FALSE); 1311 1312 va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); 1313 va = va + (offset << 2); 1314 1315 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1316 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); 1317 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 1318 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 1319 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ 1320 /* DATA_SEL | INT_EN | ADDRESS_HI */ 1321 cs->buf[cs->cdw++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF); 1322 cs->buf[cs->cdw++] = value; /* DATA_LO */ 1323 cs->buf[cs->cdw++] = 0; /* DATA_HI */ 1324 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1325 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE); 1326} 1327 1328static void r600_flush_vgt_streamout(struct r600_context *ctx) 1329{ 1330 struct radeon_winsys_cs *cs = ctx->cs; 1331 1332 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0); 1333 cs->buf[cs->cdw++] = (R_008490_CP_STRMOUT_CNTL - R600_CONFIG_REG_OFFSET) >> 2; 1334 cs->buf[cs->cdw++] = 0; 1335 1336 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1337 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0); 1338 1339 cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0); 1340 cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */ 1341 cs->buf[cs->cdw++] = R_008490_CP_STRMOUT_CNTL >> 2; /* register */ 1342 cs->buf[cs->cdw++] = 0; 1343 cs->buf[cs->cdw++] = S_008490_OFFSET_UPDATE_DONE(1); /* reference value */ 1344 cs->buf[cs->cdw++] = S_008490_OFFSET_UPDATE_DONE(1); /* mask */ 1345 cs->buf[cs->cdw++] = 4; /* poll interval */ 1346} 1347 1348static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit) 1349{ 1350 struct radeon_winsys_cs *cs = ctx->cs; 1351 1352 if (buffer_enable_bit) { 1353 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1354 cs->buf[cs->cdw++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2; 1355 cs->buf[cs->cdw++] = S_028AB0_STREAMOUT(1); 1356 1357 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1358 cs->buf[cs->cdw++] = (R_028B20_VGT_STRMOUT_BUFFER_EN - R600_CONTEXT_REG_OFFSET) >> 2; 1359 cs->buf[cs->cdw++] = buffer_enable_bit; 1360 } else { 1361 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1362 cs->buf[cs->cdw++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2; 1363 cs->buf[cs->cdw++] = S_028AB0_STREAMOUT(0); 1364 } 1365} 1366 1367void r600_context_streamout_begin(struct r600_context *ctx) 1368{ 1369 struct radeon_winsys_cs *cs = ctx->cs; 1370 struct r600_so_target **t = ctx->so_targets; 1371 unsigned *stride_in_dw = ctx->vs_shader->so.stride; 1372 unsigned buffer_en, i, update_flags = 0; 1373 uint64_t va; 1374 1375 buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) | 1376 (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) | 1377 (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) | 1378 (ctx->num_so_targets >= 4 && t[3] ? 8 : 0); 1379 1380 ctx->num_cs_dw_streamout_end = 1381 12 + /* flush_vgt_streamout */ 1382 util_bitcount(buffer_en) * 8 + 1383 3; 1384 1385 r600_need_cs_space(ctx, 1386 12 + /* flush_vgt_streamout */ 1387 6 + /* enables */ 1388 util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + 1389 util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + 1390 (ctx->family > CHIP_R600 && ctx->family < CHIP_RV770 ? 2 : 0) + 1391 ctx->num_cs_dw_streamout_end, TRUE); 1392 1393 if (ctx->chip_class >= EVERGREEN) { 1394 evergreen_flush_vgt_streamout(ctx); 1395 evergreen_set_streamout_enable(ctx, buffer_en); 1396 } else { 1397 r600_flush_vgt_streamout(ctx); 1398 r600_set_streamout_enable(ctx, buffer_en); 1399 } 1400 1401 for (i = 0; i < ctx->num_so_targets; i++) { 1402 if (t[i]) { 1403 t[i]->stride_in_dw = stride_in_dw[i]; 1404 t[i]->so_index = i; 1405 va = r600_resource_va(&ctx->screen->screen, 1406 (void*)t[i]->b.buffer); 1407 1408 update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i); 1409 1410 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0); 1411 cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 1412 16*i - R600_CONTEXT_REG_OFFSET) >> 2; 1413 cs->buf[cs->cdw++] = (t[i]->b.buffer_offset + 1414 t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */ 1415 cs->buf[cs->cdw++] = stride_in_dw[i]; /* VTX_STRIDE (in DW) */ 1416 cs->buf[cs->cdw++] = va >> 8; /* BUFFER_BASE */ 1417 1418 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1419 cs->buf[cs->cdw++] = 1420 r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer), 1421 RADEON_USAGE_WRITE); 1422 1423 if (ctx->streamout_append_bitmask & (1 << i)) { 1424 va = r600_resource_va(&ctx->screen->screen, 1425 (void*)t[i]->filled_size); 1426 /* Append. */ 1427 cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); 1428 cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | 1429 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */ 1430 cs->buf[cs->cdw++] = 0; /* unused */ 1431 cs->buf[cs->cdw++] = 0; /* unused */ 1432 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* src address lo */ 1433 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* src address hi */ 1434 1435 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1436 cs->buf[cs->cdw++] = 1437 r600_context_bo_reloc(ctx, t[i]->filled_size, 1438 RADEON_USAGE_READ); 1439 } else { 1440 /* Start from the beginning. */ 1441 cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); 1442 cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | 1443 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */ 1444 cs->buf[cs->cdw++] = 0; /* unused */ 1445 cs->buf[cs->cdw++] = 0; /* unused */ 1446 cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */ 1447 cs->buf[cs->cdw++] = 0; /* unused */ 1448 } 1449 } 1450 } 1451 1452 if (ctx->family > CHIP_R600 && ctx->family < CHIP_RV770) { 1453 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); 1454 cs->buf[cs->cdw++] = update_flags; 1455 } 1456} 1457 1458void r600_context_streamout_end(struct r600_context *ctx) 1459{ 1460 struct radeon_winsys_cs *cs = ctx->cs; 1461 struct r600_so_target **t = ctx->so_targets; 1462 unsigned i, flush_flags = 0; 1463 uint64_t va; 1464 1465 if (ctx->chip_class >= EVERGREEN) { 1466 evergreen_flush_vgt_streamout(ctx); 1467 } else { 1468 r600_flush_vgt_streamout(ctx); 1469 } 1470 1471 for (i = 0; i < ctx->num_so_targets; i++) { 1472 if (t[i]) { 1473 va = r600_resource_va(&ctx->screen->screen, 1474 (void*)t[i]->filled_size); 1475 cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); 1476 cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | 1477 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | 1478 STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */ 1479 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* dst address lo */ 1480 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* dst address hi */ 1481 cs->buf[cs->cdw++] = 0; /* unused */ 1482 cs->buf[cs->cdw++] = 0; /* unused */ 1483 1484 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1485 cs->buf[cs->cdw++] = 1486 r600_context_bo_reloc(ctx, t[i]->filled_size, 1487 RADEON_USAGE_WRITE); 1488 1489 flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i; 1490 } 1491 } 1492 1493 if (ctx->chip_class >= EVERGREEN) { 1494 evergreen_set_streamout_enable(ctx, 0); 1495 } else { 1496 r600_set_streamout_enable(ctx, 0); 1497 } 1498 1499 /* This is needed to fix cache flushes on r600. */ 1500 if (ctx->chip_class == R600) { 1501 if (ctx->family == CHIP_RV670 || 1502 ctx->family == CHIP_RS780 || 1503 ctx->family == CHIP_RS880) { 1504 flush_flags |= S_0085F0_DEST_BASE_0_ENA(1); 1505 } 1506 1507 r600_atom_dirty(ctx, &ctx->atom_r6xx_flush_and_inv); 1508 } 1509 1510 /* Flush streamout caches. */ 1511 ctx->atom_surface_sync.flush_flags |= flush_flags; 1512 r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); 1513 1514 ctx->num_cs_dw_streamout_end = 0; 1515 1516#if 0 1517 for (i = 0; i < ctx->num_so_targets; i++) { 1518 if (!t[i]) 1519 continue; 1520 1521 uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->buf, ctx->cs, RADEON_USAGE_READ); 1522 printf("FILLED_SIZE%i: %u\n", i, *ptr); 1523 ctx->ws->buffer_unmap(t[i]->filled_size->buf); 1524 } 1525#endif 1526} 1527 1528void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t) 1529{ 1530 struct radeon_winsys_cs *cs = ctx->cs; 1531 uint64_t va = r600_resource_va(&ctx->screen->screen, 1532 (void*)t->filled_size); 1533 1534 r600_need_cs_space(ctx, 14 + 21, TRUE); 1535 1536 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1537 cs->buf[cs->cdw++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - R600_CONTEXT_REG_OFFSET) >> 2; 1538 cs->buf[cs->cdw++] = 0; 1539 1540 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1541 cs->buf[cs->cdw++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - R600_CONTEXT_REG_OFFSET) >> 2; 1542 cs->buf[cs->cdw++] = t->stride_in_dw; 1543 1544 cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0); 1545 cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG; 1546 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* src address lo */ 1547 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* src address hi */ 1548 cs->buf[cs->cdw++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */ 1549 cs->buf[cs->cdw++] = 0; /* unused */ 1550 1551 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1552 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ); 1553} 1554