r600_hw_context.c revision 0813e58a3e41faf6f2072d034dfdc6198a3a1fee
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jerome Glisse 25 */ 26#include "r600_hw_context_priv.h" 27#include "r600_pipe.h" 28#include "r600d.h" 29#include "util/u_memory.h" 30#include <errno.h> 31 32#define GROUP_FORCE_NEW_BLOCK 0 33 34/* Get backends mask */ 35void r600_get_backend_mask(struct r600_context *ctx) 36{ 37 struct radeon_winsys_cs *cs = ctx->cs; 38 struct r600_resource *buffer; 39 uint32_t *results; 40 unsigned num_backends = ctx->screen->info.r600_num_backends; 41 unsigned i, mask = 0; 42 43 /* if backend_map query is supported by the kernel */ 44 if (ctx->screen->info.r600_backend_map_valid) { 45 unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes; 46 unsigned backend_map = ctx->screen->info.r600_backend_map; 47 unsigned item_width, item_mask; 48 49 if (ctx->screen->chip_class >= EVERGREEN) { 50 item_width = 4; 51 item_mask = 0x7; 52 } else { 53 item_width = 2; 54 item_mask = 0x3; 55 } 56 57 while(num_tile_pipes--) { 58 i = backend_map & item_mask; 59 mask |= (1<<i); 60 backend_map >>= item_width; 61 } 62 if (mask != 0) { 63 ctx->backend_mask = mask; 64 return; 65 } 66 } 67 68 /* otherwise backup path for older kernels */ 69 70 /* create buffer for event data */ 71 buffer = (struct r600_resource*) 72 pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, 73 PIPE_USAGE_STAGING, ctx->max_db*16); 74 if (!buffer) 75 goto err; 76 77 /* initialize buffer with zeroes */ 78 results = ctx->ws->buffer_map(buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE); 79 if (results) { 80 memset(results, 0, ctx->max_db * 4 * 4); 81 ctx->ws->buffer_unmap(buffer->buf); 82 83 /* emit EVENT_WRITE for ZPASS_DONE */ 84 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 85 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); 86 cs->buf[cs->cdw++] = 0; 87 cs->buf[cs->cdw++] = 0; 88 89 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 90 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE); 91 92 /* analyze results */ 93 results = ctx->ws->buffer_map(buffer->buf, ctx->cs, PIPE_TRANSFER_READ); 94 if (results) { 95 for(i = 0; i < ctx->max_db; i++) { 96 /* at least highest bit will be set if backend is used */ 97 if (results[i*4 + 1]) 98 mask |= (1<<i); 99 } 100 ctx->ws->buffer_unmap(buffer->buf); 101 } 102 } 103 104 pipe_resource_reference((struct pipe_resource**)&buffer, NULL); 105 106 if (mask != 0) { 107 ctx->backend_mask = mask; 108 return; 109 } 110 111err: 112 /* fallback to old method - set num_backends lower bits to 1 */ 113 ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends); 114 return; 115} 116 117static inline void r600_context_ps_partial_flush(struct r600_context *ctx) 118{ 119 struct radeon_winsys_cs *cs = ctx->cs; 120 121 if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) 122 return; 123 124 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 125 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); 126 127 ctx->flags &= ~R600_CONTEXT_DRAW_PENDING; 128} 129 130void r600_init_cs(struct r600_context *ctx) 131{ 132 struct radeon_winsys_cs *cs = ctx->cs; 133 134 /* R6xx requires this packet at the start of each command buffer */ 135 if (ctx->screen->family < CHIP_RV770) { 136 cs->buf[cs->cdw++] = PKT3(PKT3_START_3D_CMDBUF, 0, 0); 137 cs->buf[cs->cdw++] = 0x00000000; 138 } 139 /* All asics require this one */ 140 cs->buf[cs->cdw++] = PKT3(PKT3_CONTEXT_CONTROL, 1, 0); 141 cs->buf[cs->cdw++] = 0x80000000; 142 cs->buf[cs->cdw++] = 0x80000000; 143 144 ctx->init_dwords = cs->cdw; 145} 146 147static void r600_init_block(struct r600_context *ctx, 148 struct r600_block *block, 149 const struct r600_reg *reg, int index, int nreg, 150 unsigned opcode, unsigned offset_base) 151{ 152 int i = index; 153 int j, n = nreg; 154 155 /* initialize block */ 156 if (opcode == PKT3_SET_RESOURCE) { 157 block->flags = BLOCK_FLAG_RESOURCE; 158 block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; /* dirty all blocks at start */ 159 } else { 160 block->flags = 0; 161 block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */ 162 } 163 block->start_offset = reg[i].offset; 164 block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0); 165 block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2; 166 block->reg = &block->pm4[block->pm4_ndwords]; 167 block->pm4_ndwords += n; 168 block->nreg = n; 169 block->nreg_dirty = n; 170 LIST_INITHEAD(&block->list); 171 LIST_INITHEAD(&block->enable_list); 172 173 for (j = 0; j < n; j++) { 174 if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) { 175 block->flags |= REG_FLAG_DIRTY_ALWAYS; 176 } 177 if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) { 178 if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { 179 block->status |= R600_BLOCK_STATUS_ENABLED; 180 LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); 181 LIST_ADDTAIL(&block->list,&ctx->dirty); 182 } 183 } 184 if (reg[i+j].flags & REG_FLAG_FLUSH_CHANGE) { 185 block->flags |= REG_FLAG_FLUSH_CHANGE; 186 } 187 188 if (reg[i+j].flags & REG_FLAG_NEED_BO) { 189 block->nbo++; 190 assert(block->nbo < R600_BLOCK_MAX_BO); 191 block->pm4_bo_index[j] = block->nbo; 192 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); 193 block->pm4[block->pm4_ndwords++] = 0x00000000; 194 if (reg[i+j].flags & REG_FLAG_RV6XX_SBU) { 195 block->reloc[block->nbo].flush_flags = 0; 196 block->reloc[block->nbo].flush_mask = 0; 197 } else { 198 block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; 199 block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; 200 } 201 block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; 202 } 203 if ((ctx->screen->family > CHIP_R600) && 204 (ctx->screen->family < CHIP_RV770) && reg[i+j].flags & REG_FLAG_RV6XX_SBU) { 205 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); 206 block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; 207 } 208 } 209 for (j = 0; j < n; j++) { 210 if (reg[i+j].flush_flags) { 211 block->pm4_flush_ndwords += 7; 212 } 213 } 214 /* check that we stay in limit */ 215 assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); 216} 217 218int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, 219 unsigned opcode, unsigned offset_base) 220{ 221 struct r600_block *block; 222 struct r600_range *range; 223 int offset; 224 225 for (unsigned i = 0, n = 0; i < nreg; i += n) { 226 /* ignore new block balise */ 227 if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) { 228 n = 1; 229 continue; 230 } 231 232 /* ignore regs not on R600 on R600 */ 233 if ((reg[i].flags & REG_FLAG_NOT_R600) && ctx->screen->family == CHIP_R600) { 234 n = 1; 235 continue; 236 } 237 238 /* register that need relocation are in their own group */ 239 /* find number of consecutive registers */ 240 n = 0; 241 offset = reg[i].offset; 242 while (reg[i + n].offset == offset) { 243 n++; 244 offset += 4; 245 if ((n + i) >= nreg) 246 break; 247 if (n >= (R600_BLOCK_MAX_REG - 2)) 248 break; 249 } 250 251 /* allocate new block */ 252 block = calloc(1, sizeof(struct r600_block)); 253 if (block == NULL) { 254 return -ENOMEM; 255 } 256 ctx->nblocks++; 257 for (int j = 0; j < n; j++) { 258 range = &ctx->range[CTX_RANGE_ID(reg[i + j].offset)]; 259 /* create block table if it doesn't exist */ 260 if (!range->blocks) 261 range->blocks = calloc(1 << HASH_SHIFT, sizeof(void *)); 262 if (!range->blocks) 263 return -1; 264 265 range->blocks[CTX_BLOCK_ID(reg[i + j].offset)] = block; 266 } 267 268 r600_init_block(ctx, block, reg, i, n, opcode, offset_base); 269 270 } 271 return 0; 272} 273 274/* R600/R700 configuration */ 275static const struct r600_reg r600_config_reg_list[] = { 276 {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, 277 {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 278 {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 279 {R_008C08_SQ_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 280 {R_008C0C_SQ_THREAD_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 281 {R_008C10_SQ_STACK_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 282 {R_008C14_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 283 {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 284 {R_009508_TA_CNTL_AUX, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 285 {R_009714_VC_ENHANCE, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 286 {R_009830_DB_DEBUG, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 287 {R_009838_DB_WATERMARKS, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 288}; 289 290static const struct r600_reg r600_ctl_const_list[] = { 291 {R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0, 0}, 292 {R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0, 0}, 293}; 294 295static const struct r600_reg r600_context_reg_list[] = { 296 {R_028350_SX_MISC, 0, 0, 0}, 297 {R_0286C8_SPI_THREAD_GROUPING, 0, 0, 0}, 298 {R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0}, 299 {R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0, 0, 0}, 300 {R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0, 0, 0}, 301 {R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0, 0, 0}, 302 {R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0, 0, 0}, 303 {R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0, 0, 0}, 304 {R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0, 0, 0}, 305 {R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0, 0, 0}, 306 {R_0288C8_SQ_GS_VERT_ITEMSIZE, 0, 0, 0}, 307 {R_028A10_VGT_OUTPUT_PATH_CNTL, 0, 0, 0}, 308 {R_028A14_VGT_HOS_CNTL, 0, 0, 0}, 309 {R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0, 0, 0}, 310 {R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0, 0, 0}, 311 {R_028A20_VGT_HOS_REUSE_DEPTH, 0, 0, 0}, 312 {R_028A24_VGT_GROUP_PRIM_TYPE, 0, 0, 0}, 313 {R_028A28_VGT_GROUP_FIRST_DECR, 0, 0, 0}, 314 {R_028A2C_VGT_GROUP_DECR, 0, 0, 0}, 315 {R_028A30_VGT_GROUP_VECT_0_CNTL, 0, 0, 0}, 316 {R_028A34_VGT_GROUP_VECT_1_CNTL, 0, 0, 0}, 317 {R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0, 0, 0}, 318 {R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0, 0, 0}, 319 {R_028A40_VGT_GS_MODE, 0, 0, 0}, 320 {R_028A4C_PA_SC_MODE_CNTL, 0, 0, 0}, 321 {R_028AB0_VGT_STRMOUT_EN, 0, 0, 0}, 322 {R_028AB4_VGT_REUSE_OFF, 0, 0, 0}, 323 {R_028AB8_VGT_VTX_CNT_EN, 0, 0, 0}, 324 {R_028B20_VGT_STRMOUT_BUFFER_EN, 0, 0, 0}, 325 {R_028028_DB_STENCIL_CLEAR, 0, 0, 0}, 326 {R_02802C_DB_DEPTH_CLEAR, 0, 0, 0}, 327 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 328 {R_028040_CB_COLOR0_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(0), 0}, 329 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 330 {R_0280A0_CB_COLOR0_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 331 {R_028060_CB_COLOR0_SIZE, 0, 0, 0}, 332 {R_028080_CB_COLOR0_VIEW, 0, 0, 0}, 333 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 334 {R_0280E0_CB_COLOR0_FRAG, REG_FLAG_NEED_BO, 0, 0}, 335 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 336 {R_0280C0_CB_COLOR0_TILE, REG_FLAG_NEED_BO, 0, 0}, 337 {R_028100_CB_COLOR0_MASK, 0, 0, 0}, 338 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 339 {R_028044_CB_COLOR1_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(1), 0}, 340 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 341 {R_0280A4_CB_COLOR1_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 342 {R_028064_CB_COLOR1_SIZE, 0, 0, 0}, 343 {R_028084_CB_COLOR1_VIEW, 0, 0, 0}, 344 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 345 {R_0280E4_CB_COLOR1_FRAG, REG_FLAG_NEED_BO, 0, 0}, 346 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 347 {R_0280C4_CB_COLOR1_TILE, REG_FLAG_NEED_BO, 0, 0}, 348 {R_028104_CB_COLOR1_MASK, 0, 0, 0}, 349 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 350 {R_028048_CB_COLOR2_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(2), 0}, 351 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 352 {R_0280A8_CB_COLOR2_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 353 {R_028068_CB_COLOR2_SIZE, 0, 0, 0}, 354 {R_028088_CB_COLOR2_VIEW, 0, 0, 0}, 355 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 356 {R_0280E8_CB_COLOR2_FRAG, REG_FLAG_NEED_BO, 0, 0}, 357 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 358 {R_0280C8_CB_COLOR2_TILE, REG_FLAG_NEED_BO, 0, 0}, 359 {R_028108_CB_COLOR2_MASK, 0, 0, 0}, 360 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 361 {R_02804C_CB_COLOR3_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(3), 0}, 362 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 363 {R_0280AC_CB_COLOR3_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 364 {R_02806C_CB_COLOR3_SIZE, 0, 0, 0}, 365 {R_02808C_CB_COLOR3_VIEW, 0, 0, 0}, 366 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 367 {R_0280EC_CB_COLOR3_FRAG, REG_FLAG_NEED_BO, 0, 0}, 368 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 369 {R_0280CC_CB_COLOR3_TILE, REG_FLAG_NEED_BO, 0, 0}, 370 {R_02810C_CB_COLOR3_MASK, 0, 0, 0}, 371 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 372 {R_028050_CB_COLOR4_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(4), 0}, 373 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 374 {R_0280B0_CB_COLOR4_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 375 {R_028070_CB_COLOR4_SIZE, 0, 0, 0}, 376 {R_028090_CB_COLOR4_VIEW, 0, 0, 0}, 377 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 378 {R_0280F0_CB_COLOR4_FRAG, REG_FLAG_NEED_BO, 0, 0}, 379 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 380 {R_0280D0_CB_COLOR4_TILE, REG_FLAG_NEED_BO, 0, 0}, 381 {R_028110_CB_COLOR4_MASK, 0, 0, 0}, 382 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 383 {R_028054_CB_COLOR5_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(5), 0}, 384 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 385 {R_0280B4_CB_COLOR5_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 386 {R_028074_CB_COLOR5_SIZE, 0, 0, 0}, 387 {R_028094_CB_COLOR5_VIEW, 0, 0, 0}, 388 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 389 {R_0280F4_CB_COLOR5_FRAG, REG_FLAG_NEED_BO, 0, 0}, 390 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 391 {R_0280D4_CB_COLOR5_TILE, REG_FLAG_NEED_BO, 0, 0}, 392 {R_028114_CB_COLOR5_MASK, 0, 0, 0}, 393 {R_028058_CB_COLOR6_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(6), 0}, 394 {R_0280B8_CB_COLOR6_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 395 {R_028078_CB_COLOR6_SIZE, 0, 0, 0}, 396 {R_028098_CB_COLOR6_VIEW, 0, 0, 0}, 397 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 398 {R_0280F8_CB_COLOR6_FRAG, REG_FLAG_NEED_BO, 0, 0}, 399 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 400 {R_0280D8_CB_COLOR6_TILE, REG_FLAG_NEED_BO, 0, 0}, 401 {R_028118_CB_COLOR6_MASK, 0, 0, 0}, 402 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 403 {R_02805C_CB_COLOR7_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(7), 0}, 404 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 405 {R_0280BC_CB_COLOR7_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 406 {R_02807C_CB_COLOR7_SIZE, 0, 0, 0}, 407 {R_02809C_CB_COLOR7_VIEW, 0, 0, 0}, 408 {R_0280FC_CB_COLOR7_FRAG, REG_FLAG_NEED_BO, 0, 0}, 409 {R_0280DC_CB_COLOR7_TILE, REG_FLAG_NEED_BO, 0, 0}, 410 {R_02811C_CB_COLOR7_MASK, 0, 0, 0}, 411 {R_028120_CB_CLEAR_RED, 0, 0, 0}, 412 {R_028124_CB_CLEAR_GREEN, 0, 0, 0}, 413 {R_028128_CB_CLEAR_BLUE, 0, 0, 0}, 414 {R_02812C_CB_CLEAR_ALPHA, 0, 0, 0}, 415 {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0}, 416 {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0}, 417 {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0}, 418 {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0}, 419 {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, 0, 0}, 420 {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, 0, 0}, 421 {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, 0, 0}, 422 {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, 0, 0}, 423 {R_02823C_CB_SHADER_MASK, 0, 0, 0}, 424 {R_028238_CB_TARGET_MASK, 0, 0, 0}, 425 {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, 426 {R_028414_CB_BLEND_RED, 0, 0, 0}, 427 {R_028418_CB_BLEND_GREEN, 0, 0, 0}, 428 {R_02841C_CB_BLEND_BLUE, 0, 0, 0}, 429 {R_028420_CB_BLEND_ALPHA, 0, 0, 0}, 430 {R_028424_CB_FOG_RED, 0, 0, 0}, 431 {R_028428_CB_FOG_GREEN, 0, 0, 0}, 432 {R_02842C_CB_FOG_BLUE, 0, 0, 0}, 433 {R_028430_DB_STENCILREFMASK, 0, 0, 0}, 434 {R_028434_DB_STENCILREFMASK_BF, 0, 0, 0}, 435 {R_028438_SX_ALPHA_REF, 0, 0, 0}, 436 {R_0286DC_SPI_FOG_CNTL, 0, 0, 0}, 437 {R_0286E0_SPI_FOG_FUNC_SCALE, 0, 0, 0}, 438 {R_0286E4_SPI_FOG_FUNC_BIAS, 0, 0, 0}, 439 {R_028780_CB_BLEND0_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 440 {R_028784_CB_BLEND1_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 441 {R_028788_CB_BLEND2_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 442 {R_02878C_CB_BLEND3_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 443 {R_028790_CB_BLEND4_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 444 {R_028794_CB_BLEND5_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 445 {R_028798_CB_BLEND6_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 446 {R_02879C_CB_BLEND7_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 447 {R_0287A0_CB_SHADER_CONTROL, 0, 0, 0}, 448 {R_028800_DB_DEPTH_CONTROL, 0, 0, 0}, 449 {R_028804_CB_BLEND_CONTROL, 0, 0, 0}, 450 {R_028808_CB_COLOR_CONTROL, 0, 0, 0}, 451 {R_02880C_DB_SHADER_CONTROL, 0, 0, 0}, 452 {R_028C04_PA_SC_AA_CONFIG, 0, 0, 0}, 453 {R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 0, 0, 0}, 454 {R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, 0, 0, 0}, 455 {R_028C30_CB_CLRCMP_CONTROL, 0, 0, 0}, 456 {R_028C34_CB_CLRCMP_SRC, 0, 0, 0}, 457 {R_028C38_CB_CLRCMP_DST, 0, 0, 0}, 458 {R_028C3C_CB_CLRCMP_MSK, 0, 0, 0}, 459 {R_028C48_PA_SC_AA_MASK, 0, 0, 0}, 460 {R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0, 0, 0}, 461 {R_028D44_DB_ALPHA_TO_MASK, 0, 0, 0}, 462 {R_02800C_DB_DEPTH_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_DEPTH, 0}, 463 {R_028000_DB_DEPTH_SIZE, 0, 0, 0}, 464 {R_028004_DB_DEPTH_VIEW, 0, 0, 0}, 465 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 466 {R_028010_DB_DEPTH_INFO, REG_FLAG_NEED_BO, 0, 0}, 467 {R_028D0C_DB_RENDER_CONTROL, 0, 0, 0}, 468 {R_028D10_DB_RENDER_OVERRIDE, 0, 0, 0}, 469 {R_028D24_DB_HTILE_SURFACE, 0, 0, 0}, 470 {R_028D30_DB_PRELOAD_CONTROL, 0, 0, 0}, 471 {R_028D34_DB_PREFETCH_LIMIT, 0, 0, 0}, 472 {R_028030_PA_SC_SCREEN_SCISSOR_TL, 0, 0, 0}, 473 {R_028034_PA_SC_SCREEN_SCISSOR_BR, 0, 0, 0}, 474 {R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0}, 475 {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0}, 476 {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0}, 477 {R_02820C_PA_SC_CLIPRECT_RULE, 0, 0, 0}, 478 {R_028210_PA_SC_CLIPRECT_0_TL, 0, 0, 0}, 479 {R_028214_PA_SC_CLIPRECT_0_BR, 0, 0, 0}, 480 {R_028218_PA_SC_CLIPRECT_1_TL, 0, 0, 0}, 481 {R_02821C_PA_SC_CLIPRECT_1_BR, 0, 0, 0}, 482 {R_028220_PA_SC_CLIPRECT_2_TL, 0, 0, 0}, 483 {R_028224_PA_SC_CLIPRECT_2_BR, 0, 0, 0}, 484 {R_028228_PA_SC_CLIPRECT_3_TL, 0, 0, 0}, 485 {R_02822C_PA_SC_CLIPRECT_3_BR, 0, 0, 0}, 486 {R_028230_PA_SC_EDGERULE, 0, 0, 0}, 487 {R_028240_PA_SC_GENERIC_SCISSOR_TL, 0, 0, 0}, 488 {R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0}, 489 {R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0}, 490 {R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0}, 491 {R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0}, 492 {R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0}, 493 {R_02843C_PA_CL_VPORT_XSCALE_0, 0, 0, 0}, 494 {R_028440_PA_CL_VPORT_XOFFSET_0, 0, 0, 0}, 495 {R_028444_PA_CL_VPORT_YSCALE_0, 0, 0, 0}, 496 {R_028448_PA_CL_VPORT_YOFFSET_0, 0, 0, 0}, 497 {R_02844C_PA_CL_VPORT_ZSCALE_0, 0, 0, 0}, 498 {R_028450_PA_CL_VPORT_ZOFFSET_0, 0, 0, 0}, 499 {R_0286D4_SPI_INTERP_CONTROL_0, 0, 0, 0}, 500 {R_028810_PA_CL_CLIP_CNTL, 0, 0, 0}, 501 {R_028814_PA_SU_SC_MODE_CNTL, 0, 0, 0}, 502 {R_028818_PA_CL_VTE_CNTL, 0, 0, 0}, 503 {R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0}, 504 {R_028820_PA_CL_NANINF_CNTL, 0, 0, 0}, 505 {R_028A00_PA_SU_POINT_SIZE, 0, 0, 0}, 506 {R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0}, 507 {R_028A08_PA_SU_LINE_CNTL, 0, 0, 0}, 508 {R_028A0C_PA_SC_LINE_STIPPLE, 0, 0, 0}, 509 {R_028A48_PA_SC_MPASS_PS_CNTL, 0, 0, 0}, 510 {R_028C00_PA_SC_LINE_CNTL, 0, 0, 0}, 511 {R_028C08_PA_SU_VTX_CNTL, 0, 0, 0}, 512 {R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0, 0, 0}, 513 {R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0, 0, 0}, 514 {R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0, 0, 0}, 515 {R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0, 0, 0}, 516 {R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0, 0}, 517 {R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0, 0, 0}, 518 {R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0, 0}, 519 {R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0, 0}, 520 {R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0, 0}, 521 {R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0, 0}, 522 {R_028E20_PA_CL_UCP0_X, 0, 0, 0}, 523 {R_028E24_PA_CL_UCP0_Y, 0, 0, 0}, 524 {R_028E28_PA_CL_UCP0_Z, 0, 0, 0}, 525 {R_028E2C_PA_CL_UCP0_W, 0, 0, 0}, 526 {R_028E30_PA_CL_UCP1_X, 0, 0, 0}, 527 {R_028E34_PA_CL_UCP1_Y, 0, 0, 0}, 528 {R_028E38_PA_CL_UCP1_Z, 0, 0, 0}, 529 {R_028E3C_PA_CL_UCP1_W, 0, 0, 0}, 530 {R_028E40_PA_CL_UCP2_X, 0, 0, 0}, 531 {R_028E44_PA_CL_UCP2_Y, 0, 0, 0}, 532 {R_028E48_PA_CL_UCP2_Z, 0, 0, 0}, 533 {R_028E4C_PA_CL_UCP2_W, 0, 0, 0}, 534 {R_028E50_PA_CL_UCP3_X, 0, 0, 0}, 535 {R_028E54_PA_CL_UCP3_Y, 0, 0, 0}, 536 {R_028E58_PA_CL_UCP3_Z, 0, 0, 0}, 537 {R_028E5C_PA_CL_UCP3_W, 0, 0, 0}, 538 {R_028E60_PA_CL_UCP4_X, 0, 0, 0}, 539 {R_028E64_PA_CL_UCP4_Y, 0, 0, 0}, 540 {R_028E68_PA_CL_UCP4_Z, 0, 0, 0}, 541 {R_028E6C_PA_CL_UCP4_W, 0, 0, 0}, 542 {R_028E70_PA_CL_UCP5_X, 0, 0, 0}, 543 {R_028E74_PA_CL_UCP5_Y, 0, 0, 0}, 544 {R_028E78_PA_CL_UCP5_Z, 0, 0, 0}, 545 {R_028E7C_PA_CL_UCP5_W, 0, 0, 0}, 546 {R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0}, 547 {R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0}, 548 {R_028388_SQ_VTX_SEMANTIC_2, 0, 0, 0}, 549 {R_02838C_SQ_VTX_SEMANTIC_3, 0, 0, 0}, 550 {R_028390_SQ_VTX_SEMANTIC_4, 0, 0, 0}, 551 {R_028394_SQ_VTX_SEMANTIC_5, 0, 0, 0}, 552 {R_028398_SQ_VTX_SEMANTIC_6, 0, 0, 0}, 553 {R_02839C_SQ_VTX_SEMANTIC_7, 0, 0, 0}, 554 {R_0283A0_SQ_VTX_SEMANTIC_8, 0, 0, 0}, 555 {R_0283A4_SQ_VTX_SEMANTIC_9, 0, 0, 0}, 556 {R_0283A8_SQ_VTX_SEMANTIC_10, 0, 0, 0}, 557 {R_0283AC_SQ_VTX_SEMANTIC_11, 0, 0, 0}, 558 {R_0283B0_SQ_VTX_SEMANTIC_12, 0, 0, 0}, 559 {R_0283B4_SQ_VTX_SEMANTIC_13, 0, 0, 0}, 560 {R_0283B8_SQ_VTX_SEMANTIC_14, 0, 0, 0}, 561 {R_0283BC_SQ_VTX_SEMANTIC_15, 0, 0, 0}, 562 {R_0283C0_SQ_VTX_SEMANTIC_16, 0, 0, 0}, 563 {R_0283C4_SQ_VTX_SEMANTIC_17, 0, 0, 0}, 564 {R_0283C8_SQ_VTX_SEMANTIC_18, 0, 0, 0}, 565 {R_0283CC_SQ_VTX_SEMANTIC_19, 0, 0, 0}, 566 {R_0283D0_SQ_VTX_SEMANTIC_20, 0, 0, 0}, 567 {R_0283D4_SQ_VTX_SEMANTIC_21, 0, 0, 0}, 568 {R_0283D8_SQ_VTX_SEMANTIC_22, 0, 0, 0}, 569 {R_0283DC_SQ_VTX_SEMANTIC_23, 0, 0, 0}, 570 {R_0283E0_SQ_VTX_SEMANTIC_24, 0, 0, 0}, 571 {R_0283E4_SQ_VTX_SEMANTIC_25, 0, 0, 0}, 572 {R_0283E8_SQ_VTX_SEMANTIC_26, 0, 0, 0}, 573 {R_0283EC_SQ_VTX_SEMANTIC_27, 0, 0, 0}, 574 {R_0283F0_SQ_VTX_SEMANTIC_28, 0, 0, 0}, 575 {R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0}, 576 {R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0}, 577 {R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0}, 578 {R_028614_SPI_VS_OUT_ID_0, 0, 0, 0}, 579 {R_028618_SPI_VS_OUT_ID_1, 0, 0, 0}, 580 {R_02861C_SPI_VS_OUT_ID_2, 0, 0, 0}, 581 {R_028620_SPI_VS_OUT_ID_3, 0, 0, 0}, 582 {R_028624_SPI_VS_OUT_ID_4, 0, 0, 0}, 583 {R_028628_SPI_VS_OUT_ID_5, 0, 0, 0}, 584 {R_02862C_SPI_VS_OUT_ID_6, 0, 0, 0}, 585 {R_028630_SPI_VS_OUT_ID_7, 0, 0, 0}, 586 {R_028634_SPI_VS_OUT_ID_8, 0, 0, 0}, 587 {R_028638_SPI_VS_OUT_ID_9, 0, 0, 0}, 588 {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0}, 589 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 590 {R_028858_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0, 0}, 591 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 592 {R_028868_SQ_PGM_RESOURCES_VS, 0, 0, 0}, 593 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 594 {R_028894_SQ_PGM_START_FS, REG_FLAG_NEED_BO, 0, 0}, 595 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 596 {R_0288A4_SQ_PGM_RESOURCES_FS, 0, 0, 0}, 597 {R_0288D0_SQ_PGM_CF_OFFSET_VS, 0, 0, 0}, 598 {R_0288DC_SQ_PGM_CF_OFFSET_FS, 0, 0, 0}, 599 {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0, 0}, 600 {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0, 0}, 601 {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0, 0}, 602 {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0, 0}, 603 {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0, 0}, 604 {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0, 0}, 605 {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0, 0}, 606 {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0, 0}, 607 {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0, 0}, 608 {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0, 0}, 609 {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0, 0}, 610 {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0, 0}, 611 {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0, 0}, 612 {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0, 0}, 613 {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0, 0}, 614 {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0, 0}, 615 {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0, 0}, 616 {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0, 0}, 617 {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0, 0}, 618 {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0, 0}, 619 {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0, 0}, 620 {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0, 0}, 621 {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0, 0}, 622 {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0, 0}, 623 {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0, 0}, 624 {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0, 0}, 625 {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0, 0}, 626 {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0, 0}, 627 {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0, 0}, 628 {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0, 0}, 629 {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0, 0}, 630 {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0, 0}, 631 {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0, 0}, 632 {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0}, 633 {R_0286D8_SPI_INPUT_Z, 0, 0, 0}, 634 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 635 {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0, 0}, 636 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 637 {R_028850_SQ_PGM_RESOURCES_PS, 0, 0, 0}, 638 {R_028854_SQ_PGM_EXPORTS_PS, 0, 0, 0}, 639 {R_0288CC_SQ_PGM_CF_OFFSET_PS, 0, 0, 0}, 640 {R_028400_VGT_MAX_VTX_INDX, 0, 0, 0}, 641 {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, 642 {R_028408_VGT_INDX_OFFSET, 0, 0, 0}, 643 {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, 644 {R_028A84_VGT_PRIMITIVEID_EN, 0, 0, 0}, 645 {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0}, 646 {R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0, 0, 0}, 647 {R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0, 0, 0}, 648}; 649 650/* SHADER RESOURCE R600/R700 */ 651int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base) 652{ 653 int i; 654 struct r600_block *block; 655 range->blocks = calloc(nblocks, sizeof(struct r600_block *)); 656 if (range->blocks == NULL) 657 return -ENOMEM; 658 659 reg[0].offset += offset; 660 for (i = 0; i < nblocks; i++) { 661 block = calloc(1, sizeof(struct r600_block)); 662 if (block == NULL) { 663 return -ENOMEM; 664 } 665 ctx->nblocks++; 666 range->blocks[i] = block; 667 r600_init_block(ctx, block, reg, 0, nreg, PKT3_SET_RESOURCE, offset_base); 668 669 reg[0].offset += stride; 670 } 671 return 0; 672} 673 674 675static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) 676{ 677 struct r600_reg r600_shader_resource[] = { 678 {R_038000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, 0, 0}, 679 {R_038004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, 0, 0}, 680 {R_038008_RESOURCE0_WORD2, 0, 0, 0}, 681 {R_03800C_RESOURCE0_WORD3, 0, 0, 0}, 682 {R_038010_RESOURCE0_WORD4, 0, 0, 0}, 683 {R_038014_RESOURCE0_WORD5, 0, 0, 0}, 684 {R_038018_RESOURCE0_WORD6, 0, 0, 0}, 685 }; 686 unsigned nreg = Elements(r600_shader_resource); 687 688 return r600_resource_init(ctx, range, offset, nblocks, stride, r600_shader_resource, nreg, R600_RESOURCE_OFFSET); 689} 690 691/* SHADER SAMPLER R600/R700 */ 692static int r600_state_sampler_init(struct r600_context *ctx, uint32_t offset) 693{ 694 struct r600_reg r600_shader_sampler[] = { 695 {R_03C000_SQ_TEX_SAMPLER_WORD0_0, 0, 0, 0}, 696 {R_03C004_SQ_TEX_SAMPLER_WORD1_0, 0, 0, 0}, 697 {R_03C008_SQ_TEX_SAMPLER_WORD2_0, 0, 0, 0}, 698 }; 699 unsigned nreg = Elements(r600_shader_sampler); 700 701 for (int i = 0; i < nreg; i++) { 702 r600_shader_sampler[i].offset += offset; 703 } 704 return r600_context_add_block(ctx, r600_shader_sampler, nreg, PKT3_SET_SAMPLER, R600_SAMPLER_OFFSET); 705} 706 707/* SHADER SAMPLER BORDER R600/R700 */ 708static int r600_state_sampler_border_init(struct r600_context *ctx, uint32_t offset) 709{ 710 struct r600_reg r600_shader_sampler_border[] = { 711 {R_00A400_TD_PS_SAMPLER0_BORDER_RED, 0, 0, 0}, 712 {R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, 0, 0, 0}, 713 {R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, 0, 0, 0}, 714 {R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, 0, 0, 0}, 715 }; 716 unsigned nreg = Elements(r600_shader_sampler_border); 717 718 for (int i = 0; i < nreg; i++) { 719 r600_shader_sampler_border[i].offset += offset; 720 } 721 return r600_context_add_block(ctx, r600_shader_sampler_border, nreg, PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET); 722} 723 724static int r600_loop_const_init(struct r600_context *ctx, uint32_t offset) 725{ 726 unsigned nreg = 32; 727 struct r600_reg r600_loop_consts[32]; 728 int i; 729 730 for (i = 0; i < nreg; i++) { 731 r600_loop_consts[i].offset = R600_LOOP_CONST_OFFSET + ((offset + i) * 4); 732 r600_loop_consts[i].flags = REG_FLAG_DIRTY_ALWAYS; 733 r600_loop_consts[i].flush_flags = 0; 734 r600_loop_consts[i].flush_mask = 0; 735 } 736 return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, R600_LOOP_CONST_OFFSET); 737} 738 739static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks) 740{ 741 struct r600_block *block; 742 int i; 743 for (i = 0; i < nblocks; i++) { 744 block = range->blocks[i]; 745 if (block) { 746 for (int k = 1; k <= block->nbo; k++) 747 pipe_resource_reference((struct pipe_resource**)&block->reloc[k].bo, NULL); 748 free(block); 749 } 750 } 751 free(range->blocks); 752 753} 754 755/* initialize */ 756void r600_context_fini(struct r600_context *ctx) 757{ 758 struct r600_block *block; 759 struct r600_range *range; 760 761 for (int i = 0; i < NUM_RANGES; i++) { 762 if (!ctx->range[i].blocks) 763 continue; 764 for (int j = 0; j < (1 << HASH_SHIFT); j++) { 765 block = ctx->range[i].blocks[j]; 766 if (block) { 767 for (int k = 0, offset = block->start_offset; k < block->nreg; k++, offset += 4) { 768 range = &ctx->range[CTX_RANGE_ID(offset)]; 769 range->blocks[CTX_BLOCK_ID(offset)] = NULL; 770 } 771 for (int k = 1; k <= block->nbo; k++) { 772 pipe_resource_reference((struct pipe_resource**)&block->reloc[k].bo, NULL); 773 } 774 free(block); 775 } 776 } 777 free(ctx->range[i].blocks); 778 } 779 r600_free_resource_range(ctx, &ctx->ps_resources, ctx->num_ps_resources); 780 r600_free_resource_range(ctx, &ctx->vs_resources, ctx->num_vs_resources); 781 r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); 782 free(ctx->range); 783 free(ctx->blocks); 784 free(ctx->bo); 785 ctx->ws->cs_destroy(ctx->cs); 786} 787 788static void r600_add_resource_block(struct r600_context *ctx, struct r600_range *range, int num_blocks, int *index) 789{ 790 int c = *index; 791 for (int j = 0; j < num_blocks; j++) { 792 if (!range->blocks[j]) 793 continue; 794 795 ctx->blocks[c++] = range->blocks[j]; 796 } 797 *index = c; 798} 799 800int r600_setup_block_table(struct r600_context *ctx) 801{ 802 /* setup block table */ 803 int c = 0; 804 ctx->blocks = calloc(ctx->nblocks, sizeof(void*)); 805 if (!ctx->blocks) 806 return -ENOMEM; 807 for (int i = 0; i < NUM_RANGES; i++) { 808 if (!ctx->range[i].blocks) 809 continue; 810 for (int j = 0, add; j < (1 << HASH_SHIFT); j++) { 811 if (!ctx->range[i].blocks[j]) 812 continue; 813 814 add = 1; 815 for (int k = 0; k < c; k++) { 816 if (ctx->blocks[k] == ctx->range[i].blocks[j]) { 817 add = 0; 818 break; 819 } 820 } 821 if (add) { 822 assert(c < ctx->nblocks); 823 ctx->blocks[c++] = ctx->range[i].blocks[j]; 824 j += (ctx->range[i].blocks[j]->nreg) - 1; 825 } 826 } 827 } 828 829 r600_add_resource_block(ctx, &ctx->ps_resources, ctx->num_ps_resources, &c); 830 r600_add_resource_block(ctx, &ctx->vs_resources, ctx->num_vs_resources, &c); 831 r600_add_resource_block(ctx, &ctx->fs_resources, ctx->num_fs_resources, &c); 832 return 0; 833} 834 835int r600_context_init(struct r600_context *ctx) 836{ 837 int r; 838 839 LIST_INITHEAD(&ctx->active_query_list); 840 841 /* init dirty list */ 842 LIST_INITHEAD(&ctx->dirty); 843 LIST_INITHEAD(&ctx->resource_dirty); 844 LIST_INITHEAD(&ctx->enable_list); 845 846 ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range)); 847 if (!ctx->range) { 848 r = -ENOMEM; 849 goto out_err; 850 } 851 852 /* add blocks */ 853 r = r600_context_add_block(ctx, r600_config_reg_list, 854 Elements(r600_config_reg_list), PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET); 855 if (r) 856 goto out_err; 857 r = r600_context_add_block(ctx, r600_context_reg_list, 858 Elements(r600_context_reg_list), PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET); 859 if (r) 860 goto out_err; 861 r = r600_context_add_block(ctx, r600_ctl_const_list, 862 Elements(r600_ctl_const_list), PKT3_SET_CTL_CONST, R600_CTL_CONST_OFFSET); 863 if (r) 864 goto out_err; 865 866 /* PS SAMPLER BORDER */ 867 for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) { 868 r = r600_state_sampler_border_init(ctx, offset); 869 if (r) 870 goto out_err; 871 } 872 873 /* VS SAMPLER BORDER */ 874 for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) { 875 r = r600_state_sampler_border_init(ctx, offset); 876 if (r) 877 goto out_err; 878 } 879 /* PS SAMPLER */ 880 for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) { 881 r = r600_state_sampler_init(ctx, offset); 882 if (r) 883 goto out_err; 884 } 885 /* VS SAMPLER */ 886 for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) { 887 r = r600_state_sampler_init(ctx, offset); 888 if (r) 889 goto out_err; 890 } 891 892 ctx->num_ps_resources = 160; 893 ctx->num_vs_resources = 160; 894 ctx->num_fs_resources = 16; 895 r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 160, 0x1c); 896 if (r) 897 goto out_err; 898 r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1180, 160, 0x1c); 899 if (r) 900 goto out_err; 901 r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x2300, 16, 0x1c); 902 if (r) 903 goto out_err; 904 905 /* PS loop const */ 906 r600_loop_const_init(ctx, 0); 907 /* VS loop const */ 908 r600_loop_const_init(ctx, 32); 909 910 r = r600_setup_block_table(ctx); 911 if (r) 912 goto out_err; 913 914 ctx->cs = ctx->ws->cs_create(ctx->ws); 915 916 /* allocate cs variables */ 917 ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *)); 918 if (ctx->bo == NULL) { 919 r = -ENOMEM; 920 goto out_err; 921 } 922 923 r600_init_cs(ctx); 924 ctx->max_db = 4; 925 return 0; 926out_err: 927 r600_context_fini(ctx); 928 return r; 929} 930 931void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, 932 boolean count_draw_in) 933{ 934 struct r600_atom *state; 935 936 /* The number of dwords we already used in the CS so far. */ 937 num_dw += ctx->cs->cdw; 938 939 if (count_draw_in) { 940 /* The number of dwords all the dirty states would take. */ 941 LIST_FOR_EACH_ENTRY(state, &ctx->dirty_states, head) { 942 num_dw += state->num_dw; 943 } 944 945 num_dw += ctx->pm4_dirty_cdwords; 946 947 /* The upper-bound of how much a draw command would take. */ 948 num_dw += R600_MAX_DRAW_CS_DWORDS; 949 } 950 951 /* Count in queries_suspend. */ 952 num_dw += ctx->num_cs_dw_queries_suspend; 953 954 /* Count in streamout_end at the end of CS. */ 955 num_dw += ctx->num_cs_dw_streamout_end; 956 957 /* Count in render_condition(NULL) at the end of CS. */ 958 if (ctx->predicate_drawing) { 959 num_dw += 3; 960 } 961 962 /* Count in framebuffer cache flushes at the end of CS. */ 963 num_dw += ctx->num_dest_buffers * 7; 964 965 /* Save 16 dwords for the fence mechanism. */ 966 num_dw += 16; 967 968 /* Flush if there's not enough space. */ 969 if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { 970 r600_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC); 971 } 972} 973 974void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, 975 unsigned flush_mask, struct r600_resource *bo) 976{ 977 struct radeon_winsys_cs *cs = ctx->cs; 978 uint64_t va = 0; 979 980 /* if bo has already been flushed */ 981 if (!(~bo->cs_buf->last_flush & flush_flags)) { 982 bo->cs_buf->last_flush &= flush_mask; 983 return; 984 } 985 986 va = r600_resource_va(&ctx->screen->screen, (void *)bo); 987 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); 988 cs->buf[cs->cdw++] = flush_flags; 989 cs->buf[cs->cdw++] = (bo->buf->size + 255) >> 8; 990 cs->buf[cs->cdw++] = va >> 8; 991 cs->buf[cs->cdw++] = 0x0000000A; 992 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 993 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); 994 bo->cs_buf->last_flush = (bo->cs_buf->last_flush | flush_flags) & flush_mask; 995} 996 997void r600_context_dirty_block(struct r600_context *ctx, 998 struct r600_block *block, 999 int dirty, int index) 1000{ 1001 if ((index + 1) > block->nreg_dirty) 1002 block->nreg_dirty = index + 1; 1003 1004 if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { 1005 block->status |= R600_BLOCK_STATUS_DIRTY; 1006 ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; 1007 if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { 1008 block->status |= R600_BLOCK_STATUS_ENABLED; 1009 LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); 1010 } 1011 LIST_ADDTAIL(&block->list,&ctx->dirty); 1012 1013 if (block->flags & REG_FLAG_FLUSH_CHANGE) { 1014 r600_context_ps_partial_flush(ctx); 1015 } 1016 } 1017} 1018 1019void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state) 1020{ 1021 struct r600_block *block; 1022 int dirty; 1023 for (int i = 0; i < state->nregs; i++) { 1024 unsigned id, reloc_id; 1025 struct r600_pipe_reg *reg = &state->regs[i]; 1026 1027 block = reg->block; 1028 id = reg->id; 1029 1030 dirty = block->status & R600_BLOCK_STATUS_DIRTY; 1031 1032 if (reg->value != block->reg[id]) { 1033 block->reg[id] = reg->value; 1034 dirty |= R600_BLOCK_STATUS_DIRTY; 1035 } 1036 if (block->flags & REG_FLAG_DIRTY_ALWAYS) 1037 dirty |= R600_BLOCK_STATUS_DIRTY; 1038 if (block->pm4_bo_index[id]) { 1039 /* find relocation */ 1040 reloc_id = block->pm4_bo_index[id]; 1041 pipe_resource_reference((struct pipe_resource**)&block->reloc[reloc_id].bo, ®->bo->b.b.b); 1042 block->reloc[reloc_id].bo_usage = reg->bo_usage; 1043 /* always force dirty for relocs for now */ 1044 dirty |= R600_BLOCK_STATUS_DIRTY; 1045 } 1046 1047 if (dirty) 1048 r600_context_dirty_block(ctx, block, dirty, id); 1049 } 1050} 1051 1052static void r600_context_dirty_resource_block(struct r600_context *ctx, 1053 struct r600_block *block, 1054 int dirty, int index) 1055{ 1056 block->nreg_dirty = index + 1; 1057 1058 if ((dirty != (block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { 1059 block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1060 ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; 1061 if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { 1062 block->status |= R600_BLOCK_STATUS_ENABLED; 1063 LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); 1064 } 1065 LIST_ADDTAIL(&block->list,&ctx->resource_dirty); 1066 } 1067} 1068 1069void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block) 1070{ 1071 int dirty; 1072 int num_regs = ctx->screen->chip_class >= EVERGREEN ? 8 : 7; 1073 boolean is_vertex; 1074 1075 if (state == NULL) { 1076 block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY); 1077 if (block->reloc[1].bo) 1078 block->reloc[1].bo->cs_buf->binding &= ~BO_BOUND_TEXTURE; 1079 1080 pipe_resource_reference((struct pipe_resource**)&block->reloc[1].bo, NULL); 1081 pipe_resource_reference((struct pipe_resource**)&block->reloc[2].bo, NULL); 1082 LIST_DELINIT(&block->list); 1083 LIST_DELINIT(&block->enable_list); 1084 return; 1085 } 1086 1087 is_vertex = ((state->val[num_regs-1] & 0xc0000000) == 0xc0000000); 1088 dirty = block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY; 1089 1090 if (memcmp(block->reg, state->val, num_regs*4)) { 1091 memcpy(block->reg, state->val, num_regs * 4); 1092 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1093 } 1094 1095 /* if no BOs on block, force dirty */ 1096 if (!block->reloc[1].bo || !block->reloc[2].bo) 1097 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1098 1099 if (!dirty) { 1100 if (is_vertex) { 1101 if (block->reloc[1].bo->buf != state->bo[0]->buf) 1102 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1103 } else { 1104 if ((block->reloc[1].bo->buf != state->bo[0]->buf) || 1105 (block->reloc[2].bo->buf != state->bo[1]->buf)) 1106 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1107 } 1108 } 1109 1110 if (dirty) { 1111 if (is_vertex) { 1112 /* VERTEX RESOURCE, we preted there is 2 bo to relocate so 1113 * we have single case btw VERTEX & TEXTURE resource 1114 */ 1115 pipe_resource_reference((struct pipe_resource**)&block->reloc[1].bo, &state->bo[0]->b.b.b); 1116 block->reloc[1].bo_usage = state->bo_usage[0]; 1117 pipe_resource_reference((struct pipe_resource**)&block->reloc[2].bo, NULL); 1118 } else { 1119 /* TEXTURE RESOURCE */ 1120 pipe_resource_reference((struct pipe_resource**)&block->reloc[1].bo, &state->bo[0]->b.b.b); 1121 block->reloc[1].bo_usage = state->bo_usage[0]; 1122 pipe_resource_reference((struct pipe_resource**)&block->reloc[2].bo, &state->bo[1]->b.b.b); 1123 block->reloc[2].bo_usage = state->bo_usage[1]; 1124 state->bo[0]->cs_buf->binding |= BO_BOUND_TEXTURE; 1125 } 1126 1127 if (is_vertex) 1128 block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX; 1129 else 1130 block->status &= ~R600_BLOCK_STATUS_RESOURCE_VERTEX; 1131 1132 r600_context_dirty_resource_block(ctx, block, dirty, num_regs - 1); 1133 } 1134} 1135 1136void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) 1137{ 1138 struct r600_block *block = ctx->ps_resources.blocks[rid]; 1139 1140 r600_context_pipe_state_set_resource(ctx, state, block); 1141} 1142 1143void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) 1144{ 1145 struct r600_block *block = ctx->vs_resources.blocks[rid]; 1146 1147 r600_context_pipe_state_set_resource(ctx, state, block); 1148} 1149 1150void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) 1151{ 1152 struct r600_block *block = ctx->fs_resources.blocks[rid]; 1153 1154 r600_context_pipe_state_set_resource(ctx, state, block); 1155} 1156 1157static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) 1158{ 1159 struct r600_range *range; 1160 struct r600_block *block; 1161 int i; 1162 int dirty; 1163 1164 range = &ctx->range[CTX_RANGE_ID(offset)]; 1165 block = range->blocks[CTX_BLOCK_ID(offset)]; 1166 if (state == NULL) { 1167 block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); 1168 LIST_DELINIT(&block->list); 1169 LIST_DELINIT(&block->enable_list); 1170 return; 1171 } 1172 dirty = block->status & R600_BLOCK_STATUS_DIRTY; 1173 for (i = 0; i < 3; i++) { 1174 if (block->reg[i] != state->regs[i].value) { 1175 block->reg[i] = state->regs[i].value; 1176 dirty |= R600_BLOCK_STATUS_DIRTY; 1177 } 1178 } 1179 1180 if (dirty) 1181 r600_context_dirty_block(ctx, block, dirty, 2); 1182} 1183 1184 1185static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) 1186{ 1187 struct r600_range *range; 1188 struct r600_block *block; 1189 int i; 1190 int dirty; 1191 1192 range = &ctx->range[CTX_RANGE_ID(offset)]; 1193 block = range->blocks[CTX_BLOCK_ID(offset)]; 1194 if (state == NULL) { 1195 block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); 1196 LIST_DELINIT(&block->list); 1197 LIST_DELINIT(&block->enable_list); 1198 return; 1199 } 1200 if (state->nregs <= 3) { 1201 return; 1202 } 1203 dirty = block->status & R600_BLOCK_STATUS_DIRTY; 1204 for (i = 0; i < 4; i++) { 1205 if (block->reg[i] != state->regs[i + 3].value) { 1206 block->reg[i] = state->regs[i + 3].value; 1207 dirty |= R600_BLOCK_STATUS_DIRTY; 1208 } 1209 } 1210 1211 /* We have to flush the shaders before we change the border color 1212 * registers, or previous draw commands that haven't completed yet 1213 * will end up using the new border color. */ 1214 if (dirty & R600_BLOCK_STATUS_DIRTY) 1215 r600_context_ps_partial_flush(ctx); 1216 if (dirty) 1217 r600_context_dirty_block(ctx, block, dirty, 3); 1218} 1219 1220void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) 1221{ 1222 unsigned offset; 1223 1224 offset = 0x0003C000 + id * 0xc; 1225 r600_context_pipe_state_set_sampler(ctx, state, offset); 1226 offset = 0x0000A400 + id * 0x10; 1227 r600_context_pipe_state_set_sampler_border(ctx, state, offset); 1228} 1229 1230void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) 1231{ 1232 unsigned offset; 1233 1234 offset = 0x0003C0D8 + id * 0xc; 1235 r600_context_pipe_state_set_sampler(ctx, state, offset); 1236 offset = 0x0000A600 + id * 0x10; 1237 r600_context_pipe_state_set_sampler_border(ctx, state, offset); 1238} 1239 1240struct r600_resource *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) 1241{ 1242 struct r600_range *range; 1243 struct r600_block *block; 1244 unsigned id; 1245 1246 range = &ctx->range[CTX_RANGE_ID(offset)]; 1247 block = range->blocks[CTX_BLOCK_ID(offset)]; 1248 offset -= block->start_offset; 1249 id = block->pm4_bo_index[offset >> 2]; 1250 if (block->reloc[id].bo) { 1251 return block->reloc[id].bo; 1252 } 1253 return NULL; 1254} 1255 1256void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block) 1257{ 1258 struct radeon_winsys_cs *cs = ctx->cs; 1259 int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); 1260 int cp_dwords = block->pm4_ndwords, start_dword = 0; 1261 int new_dwords = 0; 1262 int nbo = block->nbo; 1263 1264 if (block->nreg_dirty == 0 && optional) { 1265 goto out; 1266 } 1267 1268 if (nbo) { 1269 ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; 1270 1271 for (int j = 0; j < block->nreg; j++) { 1272 if (block->pm4_bo_index[j]) { 1273 /* find relocation */ 1274 struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; 1275 if (reloc->bo) { 1276 block->pm4[reloc->bo_pm4_index] = 1277 r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); 1278 r600_context_bo_flush(ctx, 1279 reloc->flush_flags, 1280 reloc->flush_mask, 1281 reloc->bo); 1282 } else { 1283 block->pm4[reloc->bo_pm4_index] = 0; 1284 } 1285 nbo--; 1286 if (nbo == 0) 1287 break; 1288 1289 } 1290 } 1291 ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; 1292 } 1293 1294 optional &= (block->nreg_dirty != block->nreg); 1295 if (optional) { 1296 new_dwords = block->nreg_dirty; 1297 start_dword = cs->cdw; 1298 cp_dwords = new_dwords + 2; 1299 } 1300 memcpy(&cs->buf[cs->cdw], block->pm4, cp_dwords * 4); 1301 cs->cdw += cp_dwords; 1302 1303 if (optional) { 1304 uint32_t newword; 1305 1306 newword = cs->buf[start_dword]; 1307 newword &= PKT_COUNT_C; 1308 newword |= PKT_COUNT_S(new_dwords); 1309 cs->buf[start_dword] = newword; 1310 } 1311out: 1312 block->status ^= R600_BLOCK_STATUS_DIRTY; 1313 block->nreg_dirty = 0; 1314 LIST_DELINIT(&block->list); 1315} 1316 1317void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block) 1318{ 1319 struct radeon_winsys_cs *cs = ctx->cs; 1320 int cp_dwords = block->pm4_ndwords; 1321 int nbo = block->nbo; 1322 1323 ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; 1324 1325 if (block->status & R600_BLOCK_STATUS_RESOURCE_VERTEX) { 1326 nbo = 1; 1327 cp_dwords -= 2; /* don't copy the second NOP */ 1328 } 1329 1330 for (int j = 0; j < nbo; j++) { 1331 if (block->pm4_bo_index[j]) { 1332 /* find relocation */ 1333 struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; 1334 block->pm4[reloc->bo_pm4_index] = 1335 r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); 1336 r600_context_bo_flush(ctx, 1337 reloc->flush_flags, 1338 reloc->flush_mask, 1339 reloc->bo); 1340 } 1341 } 1342 ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; 1343 1344 memcpy(&cs->buf[cs->cdw], block->pm4, cp_dwords * 4); 1345 cs->cdw += cp_dwords; 1346 1347 block->status ^= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1348 block->nreg_dirty = 0; 1349 LIST_DELINIT(&block->list); 1350} 1351 1352void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) 1353{ 1354 struct radeon_winsys_cs *cs = ctx->cs; 1355 unsigned ndwords = 7; 1356 uint32_t *pm4; 1357 1358 if (draw->indices) { 1359 ndwords = 11; 1360 } 1361 if (ctx->num_cs_dw_queries_suspend) { 1362 if (ctx->screen->family >= CHIP_RV770) 1363 ndwords += 3; 1364 ndwords += 3; 1365 } 1366 1367 /* when increasing ndwords, bump the max limit too */ 1368 assert(ndwords <= R600_MAX_DRAW_CS_DWORDS); 1369 1370 /* queries need some special values 1371 * (this is non-zero if any query is active) */ 1372 if (ctx->num_cs_dw_queries_suspend) { 1373 if (ctx->screen->family >= CHIP_RV770) { 1374 pm4 = &cs->buf[cs->cdw]; 1375 pm4[0] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1376 pm4[1] = (R_028D0C_DB_RENDER_CONTROL - R600_CONTEXT_REG_OFFSET) >> 2; 1377 pm4[2] = draw->db_render_control | S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); 1378 cs->cdw += 3; 1379 ndwords -= 3; 1380 } 1381 pm4 = &cs->buf[cs->cdw]; 1382 pm4[0] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1383 pm4[1] = (R_028D10_DB_RENDER_OVERRIDE - R600_CONTEXT_REG_OFFSET) >> 2; 1384 pm4[2] = draw->db_render_override | S_028D10_NOOP_CULL_DISABLE(1); 1385 cs->cdw += 3; 1386 ndwords -= 3; 1387 } 1388 1389 /* draw packet */ 1390 pm4 = &cs->buf[cs->cdw]; 1391 pm4[0] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); 1392 pm4[1] = draw->vgt_index_type; 1393 pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); 1394 pm4[3] = draw->vgt_num_instances; 1395 if (draw->indices) { 1396 pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); 1397 pm4[5] = draw->indices_bo_offset; 1398 pm4[6] = 0; 1399 pm4[7] = draw->vgt_num_indices; 1400 pm4[8] = draw->vgt_draw_initiator; 1401 pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); 1402 pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ); 1403 } else { 1404 pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); 1405 pm4[5] = draw->vgt_num_indices; 1406 pm4[6] = draw->vgt_draw_initiator; 1407 } 1408 cs->cdw += ndwords; 1409} 1410 1411void r600_inval_shader_cache(struct r600_context *ctx) 1412{ 1413 ctx->atom_surface_sync.flush_flags |= S_0085F0_SH_ACTION_ENA(1); 1414 r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); 1415} 1416 1417void r600_inval_texture_cache(struct r600_context *ctx) 1418{ 1419 ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1); 1420 r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); 1421} 1422 1423void r600_inval_vertex_cache(struct r600_context *ctx) 1424{ 1425 if (ctx->family == CHIP_RV610 || 1426 ctx->family == CHIP_RV620 || 1427 ctx->family == CHIP_RS780 || 1428 ctx->family == CHIP_RS880 || 1429 ctx->family == CHIP_RV710 || 1430 ctx->family == CHIP_CEDAR || 1431 ctx->family == CHIP_PALM || 1432 ctx->family == CHIP_SUMO || 1433 ctx->family == CHIP_SUMO2 || 1434 ctx->family == CHIP_CAICOS || 1435 ctx->family == CHIP_CAYMAN) { 1436 /* Some GPUs don't have the vertex cache and must use the texture cache instead. */ 1437 ctx->atom_surface_sync.flush_flags |= S_0085F0_TC_ACTION_ENA(1); 1438 } else { 1439 ctx->atom_surface_sync.flush_flags |= S_0085F0_VC_ACTION_ENA(1); 1440 } 1441 r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); 1442} 1443 1444void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now) 1445{ 1446 if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) 1447 return; 1448 1449 ctx->atom_surface_sync.flush_flags |= 1450 r600_get_cb_flush_flags(ctx) | 1451 (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0); 1452 1453 if (flush_now) { 1454 r600_emit_atom(ctx, &ctx->atom_surface_sync.atom); 1455 } else { 1456 r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom); 1457 } 1458 1459 /* Also add a complete cache flush to work around broken flushing on R6xx. */ 1460 if (ctx->chip_class == R600) { 1461 if (flush_now) { 1462 r600_emit_atom(ctx, &ctx->atom_r6xx_flush_and_inv); 1463 } else { 1464 r600_atom_dirty(ctx, &ctx->atom_r6xx_flush_and_inv); 1465 } 1466 } 1467 1468 ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; 1469} 1470 1471void r600_context_flush(struct r600_context *ctx, unsigned flags) 1472{ 1473 struct radeon_winsys_cs *cs = ctx->cs; 1474 struct r600_block *enable_block = NULL; 1475 bool queries_suspended = false; 1476 bool streamout_suspended = false; 1477 1478 if (cs->cdw == ctx->init_dwords) 1479 return; 1480 1481 /* suspend queries */ 1482 if (ctx->num_cs_dw_queries_suspend) { 1483 r600_context_queries_suspend(ctx); 1484 queries_suspended = true; 1485 } 1486 1487 if (ctx->num_cs_dw_streamout_end) { 1488 r600_context_streamout_end(ctx); 1489 streamout_suspended = true; 1490 } 1491 1492 r600_flush_framebuffer(ctx, true); 1493 1494 /* partial flush is needed to avoid lockups on some chips with user fences */ 1495 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1496 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); 1497 1498 /* Flush the CS. */ 1499 ctx->ws->cs_flush(ctx->cs, flags); 1500 1501 /* restart */ 1502 for (int i = 0; i < ctx->creloc; i++) { 1503 ctx->bo[i]->cs_buf->last_flush = 0; 1504 pipe_resource_reference((struct pipe_resource**)&ctx->bo[i], NULL); 1505 } 1506 ctx->creloc = 0; 1507 ctx->pm4_dirty_cdwords = 0; 1508 ctx->flags = 0; 1509 1510 r600_init_cs(ctx); 1511 1512 if (streamout_suspended) { 1513 ctx->streamout_start = TRUE; 1514 ctx->streamout_append_bitmask = ~0; 1515 } 1516 1517 /* resume queries */ 1518 if (queries_suspended) { 1519 r600_context_queries_resume(ctx); 1520 } 1521 1522 /* set all valid group as dirty so they get reemited on 1523 * next draw command 1524 */ 1525 LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) { 1526 if (!(enable_block->flags & BLOCK_FLAG_RESOURCE)) { 1527 if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) { 1528 LIST_ADDTAIL(&enable_block->list,&ctx->dirty); 1529 enable_block->status |= R600_BLOCK_STATUS_DIRTY; 1530 } 1531 } else { 1532 if(!(enable_block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) { 1533 LIST_ADDTAIL(&enable_block->list,&ctx->resource_dirty); 1534 enable_block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1535 } 1536 } 1537 ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords + 1538 enable_block->pm4_flush_ndwords; 1539 enable_block->nreg_dirty = enable_block->nreg; 1540 } 1541} 1542 1543void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value) 1544{ 1545 struct radeon_winsys_cs *cs = ctx->cs; 1546 uint64_t va; 1547 1548 r600_need_cs_space(ctx, 10, FALSE); 1549 1550 va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); 1551 va = va + (offset << 2); 1552 1553 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1554 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); 1555 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 1556 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 1557 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ 1558 /* DATA_SEL | INT_EN | ADDRESS_HI */ 1559 cs->buf[cs->cdw++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF); 1560 cs->buf[cs->cdw++] = value; /* DATA_LO */ 1561 cs->buf[cs->cdw++] = 0; /* DATA_HI */ 1562 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1563 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE); 1564} 1565 1566static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, 1567 bool test_status_bit) 1568{ 1569 uint32_t *current_result = (uint32_t*)map; 1570 uint64_t start, end; 1571 1572 start = (uint64_t)current_result[start_index] | 1573 (uint64_t)current_result[start_index+1] << 32; 1574 end = (uint64_t)current_result[end_index] | 1575 (uint64_t)current_result[end_index+1] << 32; 1576 1577 if (!test_status_bit || 1578 ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { 1579 return end - start; 1580 } 1581 return 0; 1582} 1583 1584static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) 1585{ 1586 unsigned results_base = query->results_start; 1587 char *map; 1588 1589 map = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, 1590 PIPE_TRANSFER_READ | 1591 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); 1592 if (!map) 1593 return FALSE; 1594 1595 /* count all results across all data blocks */ 1596 switch (query->type) { 1597 case PIPE_QUERY_OCCLUSION_COUNTER: 1598 while (results_base != query->results_end) { 1599 query->result.u64 += 1600 r600_query_read_result(map + results_base, 0, 2, true); 1601 results_base = (results_base + 16) % query->buffer->b.b.b.width0; 1602 } 1603 break; 1604 case PIPE_QUERY_OCCLUSION_PREDICATE: 1605 while (results_base != query->results_end) { 1606 query->result.b = query->result.b || 1607 r600_query_read_result(map + results_base, 0, 2, true) != 0; 1608 results_base = (results_base + 16) % query->buffer->b.b.b.width0; 1609 } 1610 break; 1611 case PIPE_QUERY_TIME_ELAPSED: 1612 while (results_base != query->results_end) { 1613 query->result.u64 += 1614 r600_query_read_result(map + results_base, 0, 2, false); 1615 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1616 } 1617 break; 1618 case PIPE_QUERY_PRIMITIVES_EMITTED: 1619 /* SAMPLE_STREAMOUTSTATS stores this structure: 1620 * { 1621 * u64 NumPrimitivesWritten; 1622 * u64 PrimitiveStorageNeeded; 1623 * } 1624 * We only need NumPrimitivesWritten here. */ 1625 while (results_base != query->results_end) { 1626 query->result.u64 += 1627 r600_query_read_result(map + results_base, 2, 6, true); 1628 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1629 } 1630 break; 1631 case PIPE_QUERY_PRIMITIVES_GENERATED: 1632 /* Here we read PrimitiveStorageNeeded. */ 1633 while (results_base != query->results_end) { 1634 query->result.u64 += 1635 r600_query_read_result(map + results_base, 0, 4, true); 1636 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1637 } 1638 break; 1639 case PIPE_QUERY_SO_STATISTICS: 1640 while (results_base != query->results_end) { 1641 query->result.so.num_primitives_written += 1642 r600_query_read_result(map + results_base, 2, 6, true); 1643 query->result.so.primitives_storage_needed += 1644 r600_query_read_result(map + results_base, 0, 4, true); 1645 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1646 } 1647 break; 1648 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1649 while (results_base != query->results_end) { 1650 query->result.b = query->result.b || 1651 r600_query_read_result(map + results_base, 2, 6, true) != 1652 r600_query_read_result(map + results_base, 0, 4, true); 1653 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1654 } 1655 break; 1656 default: 1657 assert(0); 1658 } 1659 1660 query->results_start = query->results_end; 1661 ctx->ws->buffer_unmap(query->buffer->buf); 1662 return TRUE; 1663} 1664 1665void r600_query_begin(struct r600_context *ctx, struct r600_query *query) 1666{ 1667 struct radeon_winsys_cs *cs = ctx->cs; 1668 unsigned new_results_end, i; 1669 uint32_t *results; 1670 uint64_t va; 1671 1672 r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); 1673 1674 new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.b.width0; 1675 1676 /* collect current results if query buffer is full */ 1677 if (new_results_end == query->results_start) { 1678 r600_query_result(ctx, query, TRUE); 1679 } 1680 1681 switch (query->type) { 1682 case PIPE_QUERY_OCCLUSION_COUNTER: 1683 case PIPE_QUERY_OCCLUSION_PREDICATE: 1684 results = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE); 1685 if (results) { 1686 results = (uint32_t*)((char*)results + query->results_end); 1687 memset(results, 0, query->result_size); 1688 1689 /* Set top bits for unused backends */ 1690 for (i = 0; i < ctx->max_db; i++) { 1691 if (!(ctx->backend_mask & (1<<i))) { 1692 results[(i * 4)+1] = 0x80000000; 1693 results[(i * 4)+3] = 0x80000000; 1694 } 1695 } 1696 ctx->ws->buffer_unmap(query->buffer->buf); 1697 } 1698 break; 1699 case PIPE_QUERY_TIME_ELAPSED: 1700 break; 1701 case PIPE_QUERY_PRIMITIVES_EMITTED: 1702 case PIPE_QUERY_PRIMITIVES_GENERATED: 1703 case PIPE_QUERY_SO_STATISTICS: 1704 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1705 results = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE); 1706 results = (uint32_t*)((char*)results + query->results_end); 1707 memset(results, 0, query->result_size); 1708 ctx->ws->buffer_unmap(query->buffer->buf); 1709 break; 1710 default: 1711 assert(0); 1712 } 1713 1714 /* emit begin query */ 1715 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); 1716 va += query->results_end; 1717 1718 switch (query->type) { 1719 case PIPE_QUERY_OCCLUSION_COUNTER: 1720 case PIPE_QUERY_OCCLUSION_PREDICATE: 1721 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 1722 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); 1723 cs->buf[cs->cdw++] = va; 1724 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; 1725 break; 1726 case PIPE_QUERY_PRIMITIVES_EMITTED: 1727 case PIPE_QUERY_PRIMITIVES_GENERATED: 1728 case PIPE_QUERY_SO_STATISTICS: 1729 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1730 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 1731 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); 1732 cs->buf[cs->cdw++] = query->results_end; 1733 cs->buf[cs->cdw++] = 0; 1734 break; 1735 case PIPE_QUERY_TIME_ELAPSED: 1736 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 1737 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 1738 cs->buf[cs->cdw++] = va; 1739 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); 1740 cs->buf[cs->cdw++] = 0; 1741 cs->buf[cs->cdw++] = 0; 1742 break; 1743 default: 1744 assert(0); 1745 } 1746 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1747 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); 1748 1749 ctx->num_cs_dw_queries_suspend += query->num_cs_dw; 1750} 1751 1752void r600_query_end(struct r600_context *ctx, struct r600_query *query) 1753{ 1754 struct radeon_winsys_cs *cs = ctx->cs; 1755 uint64_t va; 1756 1757 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); 1758 /* emit end query */ 1759 switch (query->type) { 1760 case PIPE_QUERY_OCCLUSION_COUNTER: 1761 case PIPE_QUERY_OCCLUSION_PREDICATE: 1762 va += query->results_end + 8; 1763 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 1764 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); 1765 cs->buf[cs->cdw++] = va; 1766 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; 1767 break; 1768 case PIPE_QUERY_PRIMITIVES_EMITTED: 1769 case PIPE_QUERY_PRIMITIVES_GENERATED: 1770 case PIPE_QUERY_SO_STATISTICS: 1771 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1772 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 1773 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); 1774 cs->buf[cs->cdw++] = query->results_end + query->result_size/2; 1775 cs->buf[cs->cdw++] = 0; 1776 break; 1777 case PIPE_QUERY_TIME_ELAPSED: 1778 va += query->results_end + query->result_size/2; 1779 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 1780 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 1781 cs->buf[cs->cdw++] = va; 1782 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); 1783 cs->buf[cs->cdw++] = 0; 1784 cs->buf[cs->cdw++] = 0; 1785 break; 1786 default: 1787 assert(0); 1788 } 1789 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1790 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); 1791 1792 query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.b.width0; 1793 ctx->num_cs_dw_queries_suspend -= query->num_cs_dw; 1794} 1795 1796void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, 1797 int flag_wait) 1798{ 1799 struct radeon_winsys_cs *cs = ctx->cs; 1800 uint64_t va; 1801 1802 if (operation == PREDICATION_OP_CLEAR) { 1803 r600_need_cs_space(ctx, 3, FALSE); 1804 1805 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); 1806 cs->buf[cs->cdw++] = 0; 1807 cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR); 1808 } else { 1809 unsigned results_base = query->results_start; 1810 unsigned count; 1811 uint32_t op; 1812 1813 /* find count of the query data blocks */ 1814 count = (query->buffer->b.b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.b.width0; 1815 count /= query->result_size; 1816 1817 r600_need_cs_space(ctx, 5 * count, TRUE); 1818 1819 op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | 1820 (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); 1821 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); 1822 1823 /* emit predicate packets for all data blocks */ 1824 while (results_base != query->results_end) { 1825 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); 1826 cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL; 1827 cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF); 1828 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1829 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, 1830 RADEON_USAGE_READ); 1831 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1832 1833 /* set CONTINUE bit for all packets except the first */ 1834 op |= PREDICATION_CONTINUE; 1835 } 1836 } 1837} 1838 1839struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type) 1840{ 1841 struct r600_query *query; 1842 unsigned buffer_size = 4096; 1843 1844 query = CALLOC_STRUCT(r600_query); 1845 if (query == NULL) 1846 return NULL; 1847 1848 query->type = query_type; 1849 1850 switch (query_type) { 1851 case PIPE_QUERY_OCCLUSION_COUNTER: 1852 case PIPE_QUERY_OCCLUSION_PREDICATE: 1853 query->result_size = 16 * ctx->max_db; 1854 query->num_cs_dw = 6; 1855 break; 1856 case PIPE_QUERY_TIME_ELAPSED: 1857 query->result_size = 16; 1858 query->num_cs_dw = 8; 1859 break; 1860 case PIPE_QUERY_PRIMITIVES_EMITTED: 1861 case PIPE_QUERY_PRIMITIVES_GENERATED: 1862 case PIPE_QUERY_SO_STATISTICS: 1863 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1864 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ 1865 query->result_size = 32; 1866 query->num_cs_dw = 6; 1867 break; 1868 default: 1869 assert(0); 1870 FREE(query); 1871 return NULL; 1872 } 1873 1874 /* adjust buffer size to simplify offsets wrapping math */ 1875 buffer_size -= buffer_size % query->result_size; 1876 1877 /* Queries are normally read by the CPU after 1878 * being written by the gpu, hence staging is probably a good 1879 * usage pattern. 1880 */ 1881 query->buffer = (struct r600_resource*) 1882 pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, buffer_size); 1883 if (!query->buffer) { 1884 FREE(query); 1885 return NULL; 1886 } 1887 return query; 1888} 1889 1890void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) 1891{ 1892 pipe_resource_reference((struct pipe_resource**)&query->buffer, NULL); 1893 free(query); 1894} 1895 1896boolean r600_context_query_result(struct r600_context *ctx, 1897 struct r600_query *query, 1898 boolean wait, void *vresult) 1899{ 1900 boolean *result_b = (boolean*)vresult; 1901 uint64_t *result_u64 = (uint64_t*)vresult; 1902 struct pipe_query_data_so_statistics *result_so = 1903 (struct pipe_query_data_so_statistics*)vresult; 1904 1905 if (!r600_query_result(ctx, query, wait)) 1906 return FALSE; 1907 1908 switch (query->type) { 1909 case PIPE_QUERY_OCCLUSION_COUNTER: 1910 case PIPE_QUERY_PRIMITIVES_EMITTED: 1911 case PIPE_QUERY_PRIMITIVES_GENERATED: 1912 *result_u64 = query->result.u64; 1913 break; 1914 case PIPE_QUERY_OCCLUSION_PREDICATE: 1915 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1916 *result_b = query->result.b; 1917 break; 1918 case PIPE_QUERY_TIME_ELAPSED: 1919 *result_u64 = (1000000 * query->result.u64) / ctx->screen->info.r600_clock_crystal_freq; 1920 break; 1921 case PIPE_QUERY_SO_STATISTICS: 1922 *result_so = query->result.so; 1923 break; 1924 default: 1925 assert(0); 1926 } 1927 return TRUE; 1928} 1929 1930void r600_context_queries_suspend(struct r600_context *ctx) 1931{ 1932 struct r600_query *query; 1933 1934 LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) { 1935 r600_query_end(ctx, query); 1936 } 1937 assert(ctx->num_cs_dw_queries_suspend == 0); 1938} 1939 1940void r600_context_queries_resume(struct r600_context *ctx) 1941{ 1942 struct r600_query *query; 1943 1944 assert(ctx->num_cs_dw_queries_suspend == 0); 1945 1946 LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) { 1947 r600_query_begin(ctx, query); 1948 } 1949} 1950 1951static void r600_flush_vgt_streamout(struct r600_context *ctx) 1952{ 1953 struct radeon_winsys_cs *cs = ctx->cs; 1954 1955 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0); 1956 cs->buf[cs->cdw++] = (R_008490_CP_STRMOUT_CNTL - R600_CONFIG_REG_OFFSET) >> 2; 1957 cs->buf[cs->cdw++] = 0; 1958 1959 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1960 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0); 1961 1962 cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0); 1963 cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */ 1964 cs->buf[cs->cdw++] = R_008490_CP_STRMOUT_CNTL >> 2; /* register */ 1965 cs->buf[cs->cdw++] = 0; 1966 cs->buf[cs->cdw++] = S_008490_OFFSET_UPDATE_DONE(1); /* reference value */ 1967 cs->buf[cs->cdw++] = S_008490_OFFSET_UPDATE_DONE(1); /* mask */ 1968 cs->buf[cs->cdw++] = 4; /* poll interval */ 1969} 1970 1971static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit) 1972{ 1973 struct radeon_winsys_cs *cs = ctx->cs; 1974 1975 if (buffer_enable_bit) { 1976 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1977 cs->buf[cs->cdw++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2; 1978 cs->buf[cs->cdw++] = S_028AB0_STREAMOUT(1); 1979 1980 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1981 cs->buf[cs->cdw++] = (R_028B20_VGT_STRMOUT_BUFFER_EN - R600_CONTEXT_REG_OFFSET) >> 2; 1982 cs->buf[cs->cdw++] = buffer_enable_bit; 1983 } else { 1984 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1985 cs->buf[cs->cdw++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2; 1986 cs->buf[cs->cdw++] = S_028AB0_STREAMOUT(0); 1987 } 1988} 1989 1990void r600_context_streamout_begin(struct r600_context *ctx) 1991{ 1992 struct radeon_winsys_cs *cs = ctx->cs; 1993 struct r600_so_target **t = ctx->so_targets; 1994 unsigned *stride_in_dw = ctx->vs_so_stride_in_dw; 1995 unsigned buffer_en, i, update_flags = 0; 1996 uint64_t va; 1997 1998 buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) | 1999 (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) | 2000 (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) | 2001 (ctx->num_so_targets >= 4 && t[3] ? 8 : 0); 2002 2003 ctx->num_cs_dw_streamout_end = 2004 12 + /* flush_vgt_streamout */ 2005 util_bitcount(buffer_en) * 8 + 2006 8; 2007 2008 r600_need_cs_space(ctx, 2009 12 + /* flush_vgt_streamout */ 2010 6 + /* enables */ 2011 util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + 2012 util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + 2013 (ctx->screen->family > CHIP_R600 && ctx->screen->family < CHIP_RV770 ? 2 : 0) + 2014 ctx->num_cs_dw_streamout_end, TRUE); 2015 2016 if (ctx->screen->chip_class >= EVERGREEN) { 2017 evergreen_flush_vgt_streamout(ctx); 2018 evergreen_set_streamout_enable(ctx, buffer_en); 2019 } else { 2020 r600_flush_vgt_streamout(ctx); 2021 r600_set_streamout_enable(ctx, buffer_en); 2022 } 2023 2024 for (i = 0; i < ctx->num_so_targets; i++) { 2025 if (t[i]) { 2026 t[i]->stride_in_dw = stride_in_dw[i]; 2027 t[i]->so_index = i; 2028 va = r600_resource_va(&ctx->screen->screen, 2029 (void*)t[i]->b.buffer); 2030 2031 update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i); 2032 2033 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0); 2034 cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 2035 16*i - R600_CONTEXT_REG_OFFSET) >> 2; 2036 cs->buf[cs->cdw++] = (t[i]->b.buffer_offset + 2037 t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */ 2038 cs->buf[cs->cdw++] = stride_in_dw[i]; /* VTX_STRIDE (in DW) */ 2039 cs->buf[cs->cdw++] = va >> 8; /* BUFFER_BASE */ 2040 2041 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 2042 cs->buf[cs->cdw++] = 2043 r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer), 2044 RADEON_USAGE_WRITE); 2045 2046 if (ctx->streamout_append_bitmask & (1 << i)) { 2047 va = r600_resource_va(&ctx->screen->screen, 2048 (void*)t[i]->filled_size); 2049 /* Append. */ 2050 cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); 2051 cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | 2052 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */ 2053 cs->buf[cs->cdw++] = 0; /* unused */ 2054 cs->buf[cs->cdw++] = 0; /* unused */ 2055 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* src address lo */ 2056 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* src address hi */ 2057 2058 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 2059 cs->buf[cs->cdw++] = 2060 r600_context_bo_reloc(ctx, t[i]->filled_size, 2061 RADEON_USAGE_READ); 2062 } else { 2063 /* Start from the beginning. */ 2064 cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); 2065 cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | 2066 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */ 2067 cs->buf[cs->cdw++] = 0; /* unused */ 2068 cs->buf[cs->cdw++] = 0; /* unused */ 2069 cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */ 2070 cs->buf[cs->cdw++] = 0; /* unused */ 2071 } 2072 } 2073 } 2074 2075 if (ctx->screen->family > CHIP_R600 && ctx->screen->family < CHIP_RV770) { 2076 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); 2077 cs->buf[cs->cdw++] = update_flags; 2078 } 2079} 2080 2081void r600_context_streamout_end(struct r600_context *ctx) 2082{ 2083 struct radeon_winsys_cs *cs = ctx->cs; 2084 struct r600_so_target **t = ctx->so_targets; 2085 unsigned i, flush_flags = 0; 2086 uint64_t va; 2087 2088 if (ctx->screen->chip_class >= EVERGREEN) { 2089 evergreen_flush_vgt_streamout(ctx); 2090 } else { 2091 r600_flush_vgt_streamout(ctx); 2092 } 2093 2094 for (i = 0; i < ctx->num_so_targets; i++) { 2095 if (t[i]) { 2096 va = r600_resource_va(&ctx->screen->screen, 2097 (void*)t[i]->filled_size); 2098 cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); 2099 cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | 2100 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | 2101 STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */ 2102 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* dst address lo */ 2103 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* dst address hi */ 2104 cs->buf[cs->cdw++] = 0; /* unused */ 2105 cs->buf[cs->cdw++] = 0; /* unused */ 2106 2107 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 2108 cs->buf[cs->cdw++] = 2109 r600_context_bo_reloc(ctx, t[i]->filled_size, 2110 RADEON_USAGE_WRITE); 2111 2112 flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i; 2113 } 2114 } 2115 2116 if (ctx->screen->chip_class >= EVERGREEN) { 2117 evergreen_set_streamout_enable(ctx, 0); 2118 } else { 2119 r600_set_streamout_enable(ctx, 0); 2120 } 2121 2122 if (ctx->screen->family < CHIP_RV770) { 2123 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 2124 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); 2125 } else { 2126 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); 2127 cs->buf[cs->cdw++] = flush_flags; /* CP_COHER_CNTL */ 2128 cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ 2129 cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ 2130 cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ 2131 } 2132 2133 ctx->num_cs_dw_streamout_end = 0; 2134 2135#if 0 2136 for (i = 0; i < ctx->num_so_targets; i++) { 2137 if (!t[i]) 2138 continue; 2139 2140 uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->buf, ctx->cs, RADEON_USAGE_READ); 2141 printf("FILLED_SIZE%i: %u\n", i, *ptr); 2142 ctx->ws->buffer_unmap(t[i]->filled_size->buf); 2143 } 2144#endif 2145} 2146 2147void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t) 2148{ 2149 struct radeon_winsys_cs *cs = ctx->cs; 2150 uint64_t va = r600_resource_va(&ctx->screen->screen, 2151 (void*)t->filled_size); 2152 2153 r600_need_cs_space(ctx, 14 + 21, TRUE); 2154 2155 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 2156 cs->buf[cs->cdw++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - R600_CONTEXT_REG_OFFSET) >> 2; 2157 cs->buf[cs->cdw++] = 0; 2158 2159 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 2160 cs->buf[cs->cdw++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - R600_CONTEXT_REG_OFFSET) >> 2; 2161 cs->buf[cs->cdw++] = t->stride_in_dw; 2162 2163 cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0); 2164 cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG; 2165 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* src address lo */ 2166 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* src address hi */ 2167 cs->buf[cs->cdw++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */ 2168 cs->buf[cs->cdw++] = 0; /* unused */ 2169 2170 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 2171 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, 2172 RADEON_USAGE_READ); 2173} 2174