r600_hw_context.c revision 621e0db71c5ddcb379171064a4f720c9cf01e888
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jerome Glisse 25 */ 26#include "r600_hw_context_priv.h" 27#include "r600_pipe.h" 28#include "r600d.h" 29#include "util/u_memory.h" 30#include <errno.h> 31 32#define GROUP_FORCE_NEW_BLOCK 0 33 34/* Get backends mask */ 35void r600_get_backend_mask(struct r600_context *ctx) 36{ 37 struct radeon_winsys_cs *cs = ctx->cs; 38 struct r600_resource *buffer; 39 uint32_t *results; 40 unsigned num_backends = ctx->screen->info.r600_num_backends; 41 unsigned i, mask = 0; 42 43 /* if backend_map query is supported by the kernel */ 44 if (ctx->screen->info.r600_backend_map_valid) { 45 unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes; 46 unsigned backend_map = ctx->screen->info.r600_backend_map; 47 unsigned item_width, item_mask; 48 49 if (ctx->screen->chip_class >= EVERGREEN) { 50 item_width = 4; 51 item_mask = 0x7; 52 } else { 53 item_width = 2; 54 item_mask = 0x3; 55 } 56 57 while(num_tile_pipes--) { 58 i = backend_map & item_mask; 59 mask |= (1<<i); 60 backend_map >>= item_width; 61 } 62 if (mask != 0) { 63 ctx->backend_mask = mask; 64 return; 65 } 66 } 67 68 /* otherwise backup path for older kernels */ 69 70 /* create buffer for event data */ 71 buffer = (struct r600_resource*) 72 pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, 73 PIPE_USAGE_STAGING, ctx->max_db*16); 74 if (!buffer) 75 goto err; 76 77 /* initialize buffer with zeroes */ 78 results = ctx->ws->buffer_map(buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE); 79 if (results) { 80 memset(results, 0, ctx->max_db * 4 * 4); 81 ctx->ws->buffer_unmap(buffer->buf); 82 83 /* emit EVENT_WRITE for ZPASS_DONE */ 84 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 85 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); 86 cs->buf[cs->cdw++] = 0; 87 cs->buf[cs->cdw++] = 0; 88 89 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 90 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE); 91 92 /* analyze results */ 93 results = ctx->ws->buffer_map(buffer->buf, ctx->cs, PIPE_TRANSFER_READ); 94 if (results) { 95 for(i = 0; i < ctx->max_db; i++) { 96 /* at least highest bit will be set if backend is used */ 97 if (results[i*4 + 1]) 98 mask |= (1<<i); 99 } 100 ctx->ws->buffer_unmap(buffer->buf); 101 } 102 } 103 104 pipe_resource_reference((struct pipe_resource**)&buffer, NULL); 105 106 if (mask != 0) { 107 ctx->backend_mask = mask; 108 return; 109 } 110 111err: 112 /* fallback to old method - set num_backends lower bits to 1 */ 113 ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends); 114 return; 115} 116 117static inline void r600_context_ps_partial_flush(struct r600_context *ctx) 118{ 119 struct radeon_winsys_cs *cs = ctx->cs; 120 121 if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) 122 return; 123 124 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 125 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); 126 127 ctx->flags &= ~R600_CONTEXT_DRAW_PENDING; 128} 129 130void r600_init_cs(struct r600_context *ctx) 131{ 132 struct radeon_winsys_cs *cs = ctx->cs; 133 134 /* R6xx requires this packet at the start of each command buffer */ 135 if (ctx->screen->family < CHIP_RV770) { 136 cs->buf[cs->cdw++] = PKT3(PKT3_START_3D_CMDBUF, 0, 0); 137 cs->buf[cs->cdw++] = 0x00000000; 138 } 139 /* All asics require this one */ 140 cs->buf[cs->cdw++] = PKT3(PKT3_CONTEXT_CONTROL, 1, 0); 141 cs->buf[cs->cdw++] = 0x80000000; 142 cs->buf[cs->cdw++] = 0x80000000; 143 144 ctx->init_dwords = cs->cdw; 145} 146 147static void r600_init_block(struct r600_context *ctx, 148 struct r600_block *block, 149 const struct r600_reg *reg, int index, int nreg, 150 unsigned opcode, unsigned offset_base) 151{ 152 int i = index; 153 int j, n = nreg; 154 155 /* initialize block */ 156 if (opcode == PKT3_SET_RESOURCE) { 157 block->flags = BLOCK_FLAG_RESOURCE; 158 block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; /* dirty all blocks at start */ 159 } else { 160 block->flags = 0; 161 block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */ 162 } 163 block->start_offset = reg[i].offset; 164 block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0); 165 block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2; 166 block->reg = &block->pm4[block->pm4_ndwords]; 167 block->pm4_ndwords += n; 168 block->nreg = n; 169 block->nreg_dirty = n; 170 LIST_INITHEAD(&block->list); 171 LIST_INITHEAD(&block->enable_list); 172 173 for (j = 0; j < n; j++) { 174 if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) { 175 block->flags |= REG_FLAG_DIRTY_ALWAYS; 176 } 177 if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) { 178 if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { 179 block->status |= R600_BLOCK_STATUS_ENABLED; 180 LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); 181 LIST_ADDTAIL(&block->list,&ctx->dirty); 182 } 183 } 184 if (reg[i+j].flags & REG_FLAG_FLUSH_CHANGE) { 185 block->flags |= REG_FLAG_FLUSH_CHANGE; 186 } 187 188 if (reg[i+j].flags & REG_FLAG_NEED_BO) { 189 block->nbo++; 190 assert(block->nbo < R600_BLOCK_MAX_BO); 191 block->pm4_bo_index[j] = block->nbo; 192 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); 193 block->pm4[block->pm4_ndwords++] = 0x00000000; 194 if (reg[i+j].flags & REG_FLAG_RV6XX_SBU) { 195 block->reloc[block->nbo].flush_flags = 0; 196 block->reloc[block->nbo].flush_mask = 0; 197 } else { 198 block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; 199 block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; 200 } 201 block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; 202 } 203 if ((ctx->screen->family > CHIP_R600) && 204 (ctx->screen->family < CHIP_RV770) && reg[i+j].flags & REG_FLAG_RV6XX_SBU) { 205 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); 206 block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; 207 } 208 } 209 for (j = 0; j < n; j++) { 210 if (reg[i+j].flush_flags) { 211 block->pm4_flush_ndwords += 7; 212 } 213 } 214 /* check that we stay in limit */ 215 assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); 216} 217 218int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, 219 unsigned opcode, unsigned offset_base) 220{ 221 struct r600_block *block; 222 struct r600_range *range; 223 int offset; 224 225 for (unsigned i = 0, n = 0; i < nreg; i += n) { 226 /* ignore new block balise */ 227 if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) { 228 n = 1; 229 continue; 230 } 231 232 /* ignore regs not on R600 on R600 */ 233 if ((reg[i].flags & REG_FLAG_NOT_R600) && ctx->screen->family == CHIP_R600) { 234 n = 1; 235 continue; 236 } 237 238 /* register that need relocation are in their own group */ 239 /* find number of consecutive registers */ 240 n = 0; 241 offset = reg[i].offset; 242 while (reg[i + n].offset == offset) { 243 n++; 244 offset += 4; 245 if ((n + i) >= nreg) 246 break; 247 if (n >= (R600_BLOCK_MAX_REG - 2)) 248 break; 249 } 250 251 /* allocate new block */ 252 block = calloc(1, sizeof(struct r600_block)); 253 if (block == NULL) { 254 return -ENOMEM; 255 } 256 ctx->nblocks++; 257 for (int j = 0; j < n; j++) { 258 range = &ctx->range[CTX_RANGE_ID(reg[i + j].offset)]; 259 /* create block table if it doesn't exist */ 260 if (!range->blocks) 261 range->blocks = calloc(1 << HASH_SHIFT, sizeof(void *)); 262 if (!range->blocks) 263 return -1; 264 265 range->blocks[CTX_BLOCK_ID(reg[i + j].offset)] = block; 266 } 267 268 r600_init_block(ctx, block, reg, i, n, opcode, offset_base); 269 270 } 271 return 0; 272} 273 274/* R600/R700 configuration */ 275static const struct r600_reg r600_config_reg_list[] = { 276 {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, 277 {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 278 {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 279 {R_008C08_SQ_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 280 {R_008C0C_SQ_THREAD_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 281 {R_008C10_SQ_STACK_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 282 {R_008C14_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 283 {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 284 {R_009508_TA_CNTL_AUX, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 285 {R_009714_VC_ENHANCE, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 286 {R_009830_DB_DEBUG, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 287 {R_009838_DB_WATERMARKS, REG_FLAG_ENABLE_ALWAYS | REG_FLAG_FLUSH_CHANGE, 0, 0}, 288}; 289 290static const struct r600_reg r600_ctl_const_list[] = { 291 {R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0, 0}, 292 {R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0, 0}, 293}; 294 295static const struct r600_reg r600_context_reg_list[] = { 296 {R_028350_SX_MISC, 0, 0, 0}, 297 {R_0286C8_SPI_THREAD_GROUPING, 0, 0, 0}, 298 {R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0}, 299 {R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0, 0, 0}, 300 {R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0, 0, 0}, 301 {R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0, 0, 0}, 302 {R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0, 0, 0}, 303 {R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0, 0, 0}, 304 {R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0, 0, 0}, 305 {R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0, 0, 0}, 306 {R_0288C8_SQ_GS_VERT_ITEMSIZE, 0, 0, 0}, 307 {R_028A10_VGT_OUTPUT_PATH_CNTL, 0, 0, 0}, 308 {R_028A14_VGT_HOS_CNTL, 0, 0, 0}, 309 {R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0, 0, 0}, 310 {R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0, 0, 0}, 311 {R_028A20_VGT_HOS_REUSE_DEPTH, 0, 0, 0}, 312 {R_028A24_VGT_GROUP_PRIM_TYPE, 0, 0, 0}, 313 {R_028A28_VGT_GROUP_FIRST_DECR, 0, 0, 0}, 314 {R_028A2C_VGT_GROUP_DECR, 0, 0, 0}, 315 {R_028A30_VGT_GROUP_VECT_0_CNTL, 0, 0, 0}, 316 {R_028A34_VGT_GROUP_VECT_1_CNTL, 0, 0, 0}, 317 {R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0, 0, 0}, 318 {R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0, 0, 0}, 319 {R_028A40_VGT_GS_MODE, 0, 0, 0}, 320 {R_028A4C_PA_SC_MODE_CNTL, 0, 0, 0}, 321 {R_028AB0_VGT_STRMOUT_EN, 0, 0, 0}, 322 {R_028AB4_VGT_REUSE_OFF, 0, 0, 0}, 323 {R_028AB8_VGT_VTX_CNT_EN, 0, 0, 0}, 324 {R_028B20_VGT_STRMOUT_BUFFER_EN, 0, 0, 0}, 325 {R_028028_DB_STENCIL_CLEAR, 0, 0, 0}, 326 {R_02802C_DB_DEPTH_CLEAR, 0, 0, 0}, 327 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 328 {R_028040_CB_COLOR0_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(0), 0}, 329 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 330 {R_0280A0_CB_COLOR0_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 331 {R_028060_CB_COLOR0_SIZE, 0, 0, 0}, 332 {R_028080_CB_COLOR0_VIEW, 0, 0, 0}, 333 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 334 {R_0280E0_CB_COLOR0_FRAG, REG_FLAG_NEED_BO, 0, 0}, 335 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 336 {R_0280C0_CB_COLOR0_TILE, REG_FLAG_NEED_BO, 0, 0}, 337 {R_028100_CB_COLOR0_MASK, 0, 0, 0}, 338 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 339 {R_028044_CB_COLOR1_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(1), 0}, 340 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 341 {R_0280A4_CB_COLOR1_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 342 {R_028064_CB_COLOR1_SIZE, 0, 0, 0}, 343 {R_028084_CB_COLOR1_VIEW, 0, 0, 0}, 344 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 345 {R_0280E4_CB_COLOR1_FRAG, REG_FLAG_NEED_BO, 0, 0}, 346 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 347 {R_0280C4_CB_COLOR1_TILE, REG_FLAG_NEED_BO, 0, 0}, 348 {R_028104_CB_COLOR1_MASK, 0, 0, 0}, 349 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 350 {R_028048_CB_COLOR2_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(2), 0}, 351 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 352 {R_0280A8_CB_COLOR2_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 353 {R_028068_CB_COLOR2_SIZE, 0, 0, 0}, 354 {R_028088_CB_COLOR2_VIEW, 0, 0, 0}, 355 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 356 {R_0280E8_CB_COLOR2_FRAG, REG_FLAG_NEED_BO, 0, 0}, 357 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 358 {R_0280C8_CB_COLOR2_TILE, REG_FLAG_NEED_BO, 0, 0}, 359 {R_028108_CB_COLOR2_MASK, 0, 0, 0}, 360 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 361 {R_02804C_CB_COLOR3_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(3), 0}, 362 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 363 {R_0280AC_CB_COLOR3_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 364 {R_02806C_CB_COLOR3_SIZE, 0, 0, 0}, 365 {R_02808C_CB_COLOR3_VIEW, 0, 0, 0}, 366 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 367 {R_0280EC_CB_COLOR3_FRAG, REG_FLAG_NEED_BO, 0, 0}, 368 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 369 {R_0280CC_CB_COLOR3_TILE, REG_FLAG_NEED_BO, 0, 0}, 370 {R_02810C_CB_COLOR3_MASK, 0, 0, 0}, 371 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 372 {R_028050_CB_COLOR4_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(4), 0}, 373 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 374 {R_0280B0_CB_COLOR4_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 375 {R_028070_CB_COLOR4_SIZE, 0, 0, 0}, 376 {R_028090_CB_COLOR4_VIEW, 0, 0, 0}, 377 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 378 {R_0280F0_CB_COLOR4_FRAG, REG_FLAG_NEED_BO, 0, 0}, 379 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 380 {R_0280D0_CB_COLOR4_TILE, REG_FLAG_NEED_BO, 0, 0}, 381 {R_028110_CB_COLOR4_MASK, 0, 0, 0}, 382 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 383 {R_028054_CB_COLOR5_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(5), 0}, 384 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 385 {R_0280B4_CB_COLOR5_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 386 {R_028074_CB_COLOR5_SIZE, 0, 0, 0}, 387 {R_028094_CB_COLOR5_VIEW, 0, 0, 0}, 388 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 389 {R_0280F4_CB_COLOR5_FRAG, REG_FLAG_NEED_BO, 0, 0}, 390 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 391 {R_0280D4_CB_COLOR5_TILE, REG_FLAG_NEED_BO, 0, 0}, 392 {R_028114_CB_COLOR5_MASK, 0, 0, 0}, 393 {R_028058_CB_COLOR6_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(6), 0}, 394 {R_0280B8_CB_COLOR6_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 395 {R_028078_CB_COLOR6_SIZE, 0, 0, 0}, 396 {R_028098_CB_COLOR6_VIEW, 0, 0, 0}, 397 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 398 {R_0280F8_CB_COLOR6_FRAG, REG_FLAG_NEED_BO, 0, 0}, 399 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 400 {R_0280D8_CB_COLOR6_TILE, REG_FLAG_NEED_BO, 0, 0}, 401 {R_028118_CB_COLOR6_MASK, 0, 0, 0}, 402 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 403 {R_02805C_CB_COLOR7_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_COLOR(7), 0}, 404 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 405 {R_0280BC_CB_COLOR7_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, 406 {R_02807C_CB_COLOR7_SIZE, 0, 0, 0}, 407 {R_02809C_CB_COLOR7_VIEW, 0, 0, 0}, 408 {R_0280FC_CB_COLOR7_FRAG, REG_FLAG_NEED_BO, 0, 0}, 409 {R_0280DC_CB_COLOR7_TILE, REG_FLAG_NEED_BO, 0, 0}, 410 {R_02811C_CB_COLOR7_MASK, 0, 0, 0}, 411 {R_028120_CB_CLEAR_RED, 0, 0, 0}, 412 {R_028124_CB_CLEAR_GREEN, 0, 0, 0}, 413 {R_028128_CB_CLEAR_BLUE, 0, 0, 0}, 414 {R_02812C_CB_CLEAR_ALPHA, 0, 0, 0}, 415 {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0}, 416 {R_028144_ALU_CONST_BUFFER_SIZE_PS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0}, 417 {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0}, 418 {R_028184_ALU_CONST_BUFFER_SIZE_VS_1, REG_FLAG_DIRTY_ALWAYS, 0, 0}, 419 {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, 420 {R_028944_ALU_CONST_CACHE_PS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, 421 {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, 422 {R_028984_ALU_CONST_CACHE_VS_1, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, 423 {R_02823C_CB_SHADER_MASK, 0, 0, 0}, 424 {R_028238_CB_TARGET_MASK, 0, 0, 0}, 425 {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, 426 {R_028414_CB_BLEND_RED, 0, 0, 0}, 427 {R_028418_CB_BLEND_GREEN, 0, 0, 0}, 428 {R_02841C_CB_BLEND_BLUE, 0, 0, 0}, 429 {R_028420_CB_BLEND_ALPHA, 0, 0, 0}, 430 {R_028424_CB_FOG_RED, 0, 0, 0}, 431 {R_028428_CB_FOG_GREEN, 0, 0, 0}, 432 {R_02842C_CB_FOG_BLUE, 0, 0, 0}, 433 {R_028430_DB_STENCILREFMASK, 0, 0, 0}, 434 {R_028434_DB_STENCILREFMASK_BF, 0, 0, 0}, 435 {R_028438_SX_ALPHA_REF, 0, 0, 0}, 436 {R_0286DC_SPI_FOG_CNTL, 0, 0, 0}, 437 {R_0286E0_SPI_FOG_FUNC_SCALE, 0, 0, 0}, 438 {R_0286E4_SPI_FOG_FUNC_BIAS, 0, 0, 0}, 439 {R_028780_CB_BLEND0_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 440 {R_028784_CB_BLEND1_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 441 {R_028788_CB_BLEND2_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 442 {R_02878C_CB_BLEND3_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 443 {R_028790_CB_BLEND4_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 444 {R_028794_CB_BLEND5_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 445 {R_028798_CB_BLEND6_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 446 {R_02879C_CB_BLEND7_CONTROL, REG_FLAG_NOT_R600, 0, 0}, 447 {R_0287A0_CB_SHADER_CONTROL, 0, 0, 0}, 448 {R_028800_DB_DEPTH_CONTROL, 0, 0, 0}, 449 {R_028804_CB_BLEND_CONTROL, 0, 0, 0}, 450 {R_028808_CB_COLOR_CONTROL, 0, 0, 0}, 451 {R_02880C_DB_SHADER_CONTROL, 0, 0, 0}, 452 {R_028C04_PA_SC_AA_CONFIG, 0, 0, 0}, 453 {R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, 0, 0, 0}, 454 {R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, 0, 0, 0}, 455 {R_028C30_CB_CLRCMP_CONTROL, 0, 0, 0}, 456 {R_028C34_CB_CLRCMP_SRC, 0, 0, 0}, 457 {R_028C38_CB_CLRCMP_DST, 0, 0, 0}, 458 {R_028C3C_CB_CLRCMP_MSK, 0, 0, 0}, 459 {R_028C48_PA_SC_AA_MASK, 0, 0, 0}, 460 {R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0, 0, 0}, 461 {R_028D44_DB_ALPHA_TO_MASK, 0, 0, 0}, 462 {R_02800C_DB_DEPTH_BASE, REG_FLAG_NEED_BO|REG_FLAG_RV6XX_SBU, SURFACE_BASE_UPDATE_DEPTH, 0}, 463 {R_028000_DB_DEPTH_SIZE, 0, 0, 0}, 464 {R_028004_DB_DEPTH_VIEW, 0, 0, 0}, 465 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 466 {R_028010_DB_DEPTH_INFO, REG_FLAG_NEED_BO, 0, 0}, 467 {R_028D0C_DB_RENDER_CONTROL, 0, 0, 0}, 468 {R_028D10_DB_RENDER_OVERRIDE, 0, 0, 0}, 469 {R_028D24_DB_HTILE_SURFACE, 0, 0, 0}, 470 {R_028D30_DB_PRELOAD_CONTROL, 0, 0, 0}, 471 {R_028D34_DB_PREFETCH_LIMIT, 0, 0, 0}, 472 {R_028030_PA_SC_SCREEN_SCISSOR_TL, 0, 0, 0}, 473 {R_028034_PA_SC_SCREEN_SCISSOR_BR, 0, 0, 0}, 474 {R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0}, 475 {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0}, 476 {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0}, 477 {R_02820C_PA_SC_CLIPRECT_RULE, 0, 0, 0}, 478 {R_028210_PA_SC_CLIPRECT_0_TL, 0, 0, 0}, 479 {R_028214_PA_SC_CLIPRECT_0_BR, 0, 0, 0}, 480 {R_028218_PA_SC_CLIPRECT_1_TL, 0, 0, 0}, 481 {R_02821C_PA_SC_CLIPRECT_1_BR, 0, 0, 0}, 482 {R_028220_PA_SC_CLIPRECT_2_TL, 0, 0, 0}, 483 {R_028224_PA_SC_CLIPRECT_2_BR, 0, 0, 0}, 484 {R_028228_PA_SC_CLIPRECT_3_TL, 0, 0, 0}, 485 {R_02822C_PA_SC_CLIPRECT_3_BR, 0, 0, 0}, 486 {R_028230_PA_SC_EDGERULE, 0, 0, 0}, 487 {R_028240_PA_SC_GENERIC_SCISSOR_TL, 0, 0, 0}, 488 {R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0}, 489 {R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0}, 490 {R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0}, 491 {R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0}, 492 {R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0}, 493 {R_02843C_PA_CL_VPORT_XSCALE_0, 0, 0, 0}, 494 {R_028440_PA_CL_VPORT_XOFFSET_0, 0, 0, 0}, 495 {R_028444_PA_CL_VPORT_YSCALE_0, 0, 0, 0}, 496 {R_028448_PA_CL_VPORT_YOFFSET_0, 0, 0, 0}, 497 {R_02844C_PA_CL_VPORT_ZSCALE_0, 0, 0, 0}, 498 {R_028450_PA_CL_VPORT_ZOFFSET_0, 0, 0, 0}, 499 {R_0286D4_SPI_INTERP_CONTROL_0, 0, 0, 0}, 500 {R_028810_PA_CL_CLIP_CNTL, 0, 0, 0}, 501 {R_028814_PA_SU_SC_MODE_CNTL, 0, 0, 0}, 502 {R_028818_PA_CL_VTE_CNTL, 0, 0, 0}, 503 {R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0}, 504 {R_028820_PA_CL_NANINF_CNTL, 0, 0, 0}, 505 {R_028A00_PA_SU_POINT_SIZE, 0, 0, 0}, 506 {R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0}, 507 {R_028A08_PA_SU_LINE_CNTL, 0, 0, 0}, 508 {R_028A0C_PA_SC_LINE_STIPPLE, 0, 0, 0}, 509 {R_028A48_PA_SC_MPASS_PS_CNTL, 0, 0, 0}, 510 {R_028C00_PA_SC_LINE_CNTL, 0, 0, 0}, 511 {R_028C08_PA_SU_VTX_CNTL, 0, 0, 0}, 512 {R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0, 0, 0}, 513 {R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0, 0, 0}, 514 {R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0, 0, 0}, 515 {R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0, 0, 0}, 516 {R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0, 0}, 517 {R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0, 0, 0}, 518 {R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0, 0}, 519 {R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0, 0}, 520 {R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0, 0}, 521 {R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0, 0}, 522 {R_028E20_PA_CL_UCP0_X, 0, 0, 0}, 523 {R_028E24_PA_CL_UCP0_Y, 0, 0, 0}, 524 {R_028E28_PA_CL_UCP0_Z, 0, 0, 0}, 525 {R_028E2C_PA_CL_UCP0_W, 0, 0, 0}, 526 {R_028E30_PA_CL_UCP1_X, 0, 0, 0}, 527 {R_028E34_PA_CL_UCP1_Y, 0, 0, 0}, 528 {R_028E38_PA_CL_UCP1_Z, 0, 0, 0}, 529 {R_028E3C_PA_CL_UCP1_W, 0, 0, 0}, 530 {R_028E40_PA_CL_UCP2_X, 0, 0, 0}, 531 {R_028E44_PA_CL_UCP2_Y, 0, 0, 0}, 532 {R_028E48_PA_CL_UCP2_Z, 0, 0, 0}, 533 {R_028E4C_PA_CL_UCP2_W, 0, 0, 0}, 534 {R_028E50_PA_CL_UCP3_X, 0, 0, 0}, 535 {R_028E54_PA_CL_UCP3_Y, 0, 0, 0}, 536 {R_028E58_PA_CL_UCP3_Z, 0, 0, 0}, 537 {R_028E5C_PA_CL_UCP3_W, 0, 0, 0}, 538 {R_028E60_PA_CL_UCP4_X, 0, 0, 0}, 539 {R_028E64_PA_CL_UCP4_Y, 0, 0, 0}, 540 {R_028E68_PA_CL_UCP4_Z, 0, 0, 0}, 541 {R_028E6C_PA_CL_UCP4_W, 0, 0, 0}, 542 {R_028E70_PA_CL_UCP5_X, 0, 0, 0}, 543 {R_028E74_PA_CL_UCP5_Y, 0, 0, 0}, 544 {R_028E78_PA_CL_UCP5_Z, 0, 0, 0}, 545 {R_028E7C_PA_CL_UCP5_W, 0, 0, 0}, 546 {R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0}, 547 {R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0}, 548 {R_028388_SQ_VTX_SEMANTIC_2, 0, 0, 0}, 549 {R_02838C_SQ_VTX_SEMANTIC_3, 0, 0, 0}, 550 {R_028390_SQ_VTX_SEMANTIC_4, 0, 0, 0}, 551 {R_028394_SQ_VTX_SEMANTIC_5, 0, 0, 0}, 552 {R_028398_SQ_VTX_SEMANTIC_6, 0, 0, 0}, 553 {R_02839C_SQ_VTX_SEMANTIC_7, 0, 0, 0}, 554 {R_0283A0_SQ_VTX_SEMANTIC_8, 0, 0, 0}, 555 {R_0283A4_SQ_VTX_SEMANTIC_9, 0, 0, 0}, 556 {R_0283A8_SQ_VTX_SEMANTIC_10, 0, 0, 0}, 557 {R_0283AC_SQ_VTX_SEMANTIC_11, 0, 0, 0}, 558 {R_0283B0_SQ_VTX_SEMANTIC_12, 0, 0, 0}, 559 {R_0283B4_SQ_VTX_SEMANTIC_13, 0, 0, 0}, 560 {R_0283B8_SQ_VTX_SEMANTIC_14, 0, 0, 0}, 561 {R_0283BC_SQ_VTX_SEMANTIC_15, 0, 0, 0}, 562 {R_0283C0_SQ_VTX_SEMANTIC_16, 0, 0, 0}, 563 {R_0283C4_SQ_VTX_SEMANTIC_17, 0, 0, 0}, 564 {R_0283C8_SQ_VTX_SEMANTIC_18, 0, 0, 0}, 565 {R_0283CC_SQ_VTX_SEMANTIC_19, 0, 0, 0}, 566 {R_0283D0_SQ_VTX_SEMANTIC_20, 0, 0, 0}, 567 {R_0283D4_SQ_VTX_SEMANTIC_21, 0, 0, 0}, 568 {R_0283D8_SQ_VTX_SEMANTIC_22, 0, 0, 0}, 569 {R_0283DC_SQ_VTX_SEMANTIC_23, 0, 0, 0}, 570 {R_0283E0_SQ_VTX_SEMANTIC_24, 0, 0, 0}, 571 {R_0283E4_SQ_VTX_SEMANTIC_25, 0, 0, 0}, 572 {R_0283E8_SQ_VTX_SEMANTIC_26, 0, 0, 0}, 573 {R_0283EC_SQ_VTX_SEMANTIC_27, 0, 0, 0}, 574 {R_0283F0_SQ_VTX_SEMANTIC_28, 0, 0, 0}, 575 {R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0}, 576 {R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0}, 577 {R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0}, 578 {R_028614_SPI_VS_OUT_ID_0, 0, 0, 0}, 579 {R_028618_SPI_VS_OUT_ID_1, 0, 0, 0}, 580 {R_02861C_SPI_VS_OUT_ID_2, 0, 0, 0}, 581 {R_028620_SPI_VS_OUT_ID_3, 0, 0, 0}, 582 {R_028624_SPI_VS_OUT_ID_4, 0, 0, 0}, 583 {R_028628_SPI_VS_OUT_ID_5, 0, 0, 0}, 584 {R_02862C_SPI_VS_OUT_ID_6, 0, 0, 0}, 585 {R_028630_SPI_VS_OUT_ID_7, 0, 0, 0}, 586 {R_028634_SPI_VS_OUT_ID_8, 0, 0, 0}, 587 {R_028638_SPI_VS_OUT_ID_9, 0, 0, 0}, 588 {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0}, 589 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 590 {R_028858_SQ_PGM_START_VS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, 591 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 592 {R_028868_SQ_PGM_RESOURCES_VS, 0, 0, 0}, 593 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 594 {R_028894_SQ_PGM_START_FS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, 595 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 596 {R_0288A4_SQ_PGM_RESOURCES_FS, 0, 0, 0}, 597 {R_0288D0_SQ_PGM_CF_OFFSET_VS, 0, 0, 0}, 598 {R_0288DC_SQ_PGM_CF_OFFSET_FS, 0, 0, 0}, 599 {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0, 0}, 600 {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0, 0}, 601 {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0, 0}, 602 {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0, 0}, 603 {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0, 0}, 604 {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0, 0}, 605 {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0, 0}, 606 {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0, 0}, 607 {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0, 0}, 608 {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0, 0}, 609 {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0, 0}, 610 {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0, 0}, 611 {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0, 0}, 612 {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0, 0}, 613 {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0, 0}, 614 {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0, 0}, 615 {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0, 0}, 616 {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0, 0}, 617 {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0, 0}, 618 {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0, 0}, 619 {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0, 0}, 620 {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0, 0}, 621 {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0, 0}, 622 {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0, 0}, 623 {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0, 0}, 624 {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0, 0}, 625 {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0, 0}, 626 {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0, 0}, 627 {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0, 0}, 628 {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0, 0}, 629 {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0, 0}, 630 {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0, 0}, 631 {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0, 0}, 632 {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0}, 633 {R_0286D8_SPI_INPUT_Z, 0, 0, 0}, 634 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 635 {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, 636 {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, 637 {R_028850_SQ_PGM_RESOURCES_PS, 0, 0, 0}, 638 {R_028854_SQ_PGM_EXPORTS_PS, 0, 0, 0}, 639 {R_0288CC_SQ_PGM_CF_OFFSET_PS, 0, 0, 0}, 640 {R_028400_VGT_MAX_VTX_INDX, 0, 0, 0}, 641 {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, 642 {R_028408_VGT_INDX_OFFSET, 0, 0, 0}, 643 {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, 644 {R_028A84_VGT_PRIMITIVEID_EN, 0, 0, 0}, 645 {R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0}, 646 {R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0, 0, 0}, 647 {R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0, 0, 0}, 648}; 649 650/* SHADER RESOURCE R600/R700 */ 651int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base) 652{ 653 int i; 654 struct r600_block *block; 655 range->blocks = calloc(nblocks, sizeof(struct r600_block *)); 656 if (range->blocks == NULL) 657 return -ENOMEM; 658 659 reg[0].offset += offset; 660 for (i = 0; i < nblocks; i++) { 661 block = calloc(1, sizeof(struct r600_block)); 662 if (block == NULL) { 663 return -ENOMEM; 664 } 665 ctx->nblocks++; 666 range->blocks[i] = block; 667 r600_init_block(ctx, block, reg, 0, nreg, PKT3_SET_RESOURCE, offset_base); 668 669 reg[0].offset += stride; 670 } 671 return 0; 672} 673 674 675static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) 676{ 677 struct r600_reg r600_shader_resource[] = { 678 {R_038000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, 679 {R_038004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, 680 {R_038008_RESOURCE0_WORD2, 0, 0, 0}, 681 {R_03800C_RESOURCE0_WORD3, 0, 0, 0}, 682 {R_038010_RESOURCE0_WORD4, 0, 0, 0}, 683 {R_038014_RESOURCE0_WORD5, 0, 0, 0}, 684 {R_038018_RESOURCE0_WORD6, 0, 0, 0}, 685 }; 686 unsigned nreg = Elements(r600_shader_resource); 687 688 return r600_resource_init(ctx, range, offset, nblocks, stride, r600_shader_resource, nreg, R600_RESOURCE_OFFSET); 689} 690 691/* SHADER SAMPLER R600/R700 */ 692static int r600_state_sampler_init(struct r600_context *ctx, uint32_t offset) 693{ 694 struct r600_reg r600_shader_sampler[] = { 695 {R_03C000_SQ_TEX_SAMPLER_WORD0_0, 0, 0, 0}, 696 {R_03C004_SQ_TEX_SAMPLER_WORD1_0, 0, 0, 0}, 697 {R_03C008_SQ_TEX_SAMPLER_WORD2_0, 0, 0, 0}, 698 }; 699 unsigned nreg = Elements(r600_shader_sampler); 700 701 for (int i = 0; i < nreg; i++) { 702 r600_shader_sampler[i].offset += offset; 703 } 704 return r600_context_add_block(ctx, r600_shader_sampler, nreg, PKT3_SET_SAMPLER, R600_SAMPLER_OFFSET); 705} 706 707/* SHADER SAMPLER BORDER R600/R700 */ 708static int r600_state_sampler_border_init(struct r600_context *ctx, uint32_t offset) 709{ 710 struct r600_reg r600_shader_sampler_border[] = { 711 {R_00A400_TD_PS_SAMPLER0_BORDER_RED, 0, 0, 0}, 712 {R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, 0, 0, 0}, 713 {R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, 0, 0, 0}, 714 {R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, 0, 0, 0}, 715 }; 716 unsigned nreg = Elements(r600_shader_sampler_border); 717 718 for (int i = 0; i < nreg; i++) { 719 r600_shader_sampler_border[i].offset += offset; 720 } 721 return r600_context_add_block(ctx, r600_shader_sampler_border, nreg, PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET); 722} 723 724static int r600_loop_const_init(struct r600_context *ctx, uint32_t offset) 725{ 726 unsigned nreg = 32; 727 struct r600_reg r600_loop_consts[32]; 728 int i; 729 730 for (i = 0; i < nreg; i++) { 731 r600_loop_consts[i].offset = R600_LOOP_CONST_OFFSET + ((offset + i) * 4); 732 r600_loop_consts[i].flags = REG_FLAG_DIRTY_ALWAYS; 733 r600_loop_consts[i].flush_flags = 0; 734 r600_loop_consts[i].flush_mask = 0; 735 } 736 return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, R600_LOOP_CONST_OFFSET); 737} 738 739static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks) 740{ 741 struct r600_block *block; 742 int i; 743 for (i = 0; i < nblocks; i++) { 744 block = range->blocks[i]; 745 if (block) { 746 for (int k = 1; k <= block->nbo; k++) 747 pipe_resource_reference((struct pipe_resource**)&block->reloc[k].bo, NULL); 748 free(block); 749 } 750 } 751 free(range->blocks); 752 753} 754 755/* initialize */ 756void r600_context_fini(struct r600_context *ctx) 757{ 758 struct r600_block *block; 759 struct r600_range *range; 760 761 for (int i = 0; i < NUM_RANGES; i++) { 762 if (!ctx->range[i].blocks) 763 continue; 764 for (int j = 0; j < (1 << HASH_SHIFT); j++) { 765 block = ctx->range[i].blocks[j]; 766 if (block) { 767 for (int k = 0, offset = block->start_offset; k < block->nreg; k++, offset += 4) { 768 range = &ctx->range[CTX_RANGE_ID(offset)]; 769 range->blocks[CTX_BLOCK_ID(offset)] = NULL; 770 } 771 for (int k = 1; k <= block->nbo; k++) { 772 pipe_resource_reference((struct pipe_resource**)&block->reloc[k].bo, NULL); 773 } 774 free(block); 775 } 776 } 777 free(ctx->range[i].blocks); 778 } 779 r600_free_resource_range(ctx, &ctx->ps_resources, ctx->num_ps_resources); 780 r600_free_resource_range(ctx, &ctx->vs_resources, ctx->num_vs_resources); 781 r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); 782 free(ctx->range); 783 free(ctx->blocks); 784 free(ctx->bo); 785 ctx->ws->cs_destroy(ctx->cs); 786} 787 788static void r600_add_resource_block(struct r600_context *ctx, struct r600_range *range, int num_blocks, int *index) 789{ 790 int c = *index; 791 for (int j = 0; j < num_blocks; j++) { 792 if (!range->blocks[j]) 793 continue; 794 795 ctx->blocks[c++] = range->blocks[j]; 796 } 797 *index = c; 798} 799 800int r600_setup_block_table(struct r600_context *ctx) 801{ 802 /* setup block table */ 803 int c = 0; 804 ctx->blocks = calloc(ctx->nblocks, sizeof(void*)); 805 if (!ctx->blocks) 806 return -ENOMEM; 807 for (int i = 0; i < NUM_RANGES; i++) { 808 if (!ctx->range[i].blocks) 809 continue; 810 for (int j = 0, add; j < (1 << HASH_SHIFT); j++) { 811 if (!ctx->range[i].blocks[j]) 812 continue; 813 814 add = 1; 815 for (int k = 0; k < c; k++) { 816 if (ctx->blocks[k] == ctx->range[i].blocks[j]) { 817 add = 0; 818 break; 819 } 820 } 821 if (add) { 822 assert(c < ctx->nblocks); 823 ctx->blocks[c++] = ctx->range[i].blocks[j]; 824 j += (ctx->range[i].blocks[j]->nreg) - 1; 825 } 826 } 827 } 828 829 r600_add_resource_block(ctx, &ctx->ps_resources, ctx->num_ps_resources, &c); 830 r600_add_resource_block(ctx, &ctx->vs_resources, ctx->num_vs_resources, &c); 831 r600_add_resource_block(ctx, &ctx->fs_resources, ctx->num_fs_resources, &c); 832 return 0; 833} 834 835int r600_context_init(struct r600_context *ctx) 836{ 837 int r; 838 839 LIST_INITHEAD(&ctx->active_query_list); 840 841 /* init dirty list */ 842 LIST_INITHEAD(&ctx->dirty); 843 LIST_INITHEAD(&ctx->resource_dirty); 844 LIST_INITHEAD(&ctx->enable_list); 845 846 ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range)); 847 if (!ctx->range) { 848 r = -ENOMEM; 849 goto out_err; 850 } 851 852 /* add blocks */ 853 r = r600_context_add_block(ctx, r600_config_reg_list, 854 Elements(r600_config_reg_list), PKT3_SET_CONFIG_REG, R600_CONFIG_REG_OFFSET); 855 if (r) 856 goto out_err; 857 r = r600_context_add_block(ctx, r600_context_reg_list, 858 Elements(r600_context_reg_list), PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET); 859 if (r) 860 goto out_err; 861 r = r600_context_add_block(ctx, r600_ctl_const_list, 862 Elements(r600_ctl_const_list), PKT3_SET_CTL_CONST, R600_CTL_CONST_OFFSET); 863 if (r) 864 goto out_err; 865 866 /* PS SAMPLER BORDER */ 867 for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) { 868 r = r600_state_sampler_border_init(ctx, offset); 869 if (r) 870 goto out_err; 871 } 872 873 /* VS SAMPLER BORDER */ 874 for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) { 875 r = r600_state_sampler_border_init(ctx, offset); 876 if (r) 877 goto out_err; 878 } 879 /* PS SAMPLER */ 880 for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) { 881 r = r600_state_sampler_init(ctx, offset); 882 if (r) 883 goto out_err; 884 } 885 /* VS SAMPLER */ 886 for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) { 887 r = r600_state_sampler_init(ctx, offset); 888 if (r) 889 goto out_err; 890 } 891 892 ctx->num_ps_resources = 160; 893 ctx->num_vs_resources = 160; 894 ctx->num_fs_resources = 16; 895 r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 160, 0x1c); 896 if (r) 897 goto out_err; 898 r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1180, 160, 0x1c); 899 if (r) 900 goto out_err; 901 r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x2300, 16, 0x1c); 902 if (r) 903 goto out_err; 904 905 /* PS loop const */ 906 r600_loop_const_init(ctx, 0); 907 /* VS loop const */ 908 r600_loop_const_init(ctx, 32); 909 910 r = r600_setup_block_table(ctx); 911 if (r) 912 goto out_err; 913 914 ctx->cs = ctx->ws->cs_create(ctx->ws); 915 916 /* allocate cs variables */ 917 ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *)); 918 if (ctx->bo == NULL) { 919 r = -ENOMEM; 920 goto out_err; 921 } 922 923 r600_init_cs(ctx); 924 ctx->max_db = 4; 925 return 0; 926out_err: 927 r600_context_fini(ctx); 928 return r; 929} 930 931void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, 932 boolean count_draw_in) 933{ 934 /* The number of dwords we already used in the CS so far. */ 935 num_dw += ctx->cs->cdw; 936 937 if (count_draw_in) { 938 /* The number of dwords all the dirty states would take. */ 939 num_dw += ctx->pm4_dirty_cdwords; 940 941 /* The upper-bound of how much a draw command would take. */ 942 num_dw += R600_MAX_DRAW_CS_DWORDS; 943 } 944 945 /* Count in queries_suspend. */ 946 num_dw += ctx->num_cs_dw_queries_suspend; 947 948 /* Count in streamout_end at the end of CS. */ 949 num_dw += ctx->num_cs_dw_streamout_end; 950 951 /* Count in render_condition(NULL) at the end of CS. */ 952 if (ctx->predicate_drawing) { 953 num_dw += 3; 954 } 955 956 /* Count in framebuffer cache flushes at the end of CS. */ 957 num_dw += ctx->num_dest_buffers * 7; 958 959 /* Save 16 dwords for the fence mechanism. */ 960 num_dw += 16; 961 962 /* Flush if there's not enough space. */ 963 if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { 964 r600_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC); 965 } 966} 967 968/* Flushes all surfaces */ 969void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) 970{ 971 struct radeon_winsys_cs *cs = ctx->cs; 972 973 r600_need_cs_space(ctx, 5, FALSE); 974 975 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); 976 cs->buf[cs->cdw++] = flush_flags; /* CP_COHER_CNTL */ 977 cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ 978 cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ 979 cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ 980} 981 982void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, 983 unsigned flush_mask, struct r600_resource *bo) 984{ 985 struct radeon_winsys_cs *cs = ctx->cs; 986 uint64_t va = 0; 987 988 /* if bo has already been flushed */ 989 if (!(~bo->cs_buf->last_flush & flush_flags)) { 990 bo->cs_buf->last_flush &= flush_mask; 991 return; 992 } 993 994 if ((ctx->screen->family < CHIP_RV770) && 995 (G_0085F0_CB_ACTION_ENA(flush_flags) || 996 G_0085F0_DB_ACTION_ENA(flush_flags))) { 997 if (ctx->flags & R600_CONTEXT_CHECK_EVENT_FLUSH) { 998 /* the rv670 seems to fail fbo-generatemipmap unless we flush the CB1 dest base ena */ 999 if ((bo->cs_buf->binding & BO_BOUND_TEXTURE) && 1000 (flush_flags & S_0085F0_CB_ACTION_ENA(1))) { 1001 if ((ctx->screen->family == CHIP_RV670) || 1002 (ctx->screen->family == CHIP_RS780) || 1003 (ctx->screen->family == CHIP_RS880)) { 1004 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); 1005 cs->buf[cs->cdw++] = S_0085F0_CB1_DEST_BASE_ENA(1); /* CP_COHER_CNTL */ 1006 cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ 1007 cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ 1008 cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ 1009 } 1010 } 1011 1012 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1013 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); 1014 ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; 1015 } 1016 } else { 1017 va = r600_resource_va(&ctx->screen->screen, (void *)bo); 1018 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); 1019 cs->buf[cs->cdw++] = flush_flags; 1020 cs->buf[cs->cdw++] = (bo->buf->size + 255) >> 8; 1021 cs->buf[cs->cdw++] = va >> 8; 1022 cs->buf[cs->cdw++] = 0x0000000A; 1023 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1024 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); 1025 } 1026 bo->cs_buf->last_flush = (bo->cs_buf->last_flush | flush_flags) & flush_mask; 1027} 1028 1029void r600_context_dirty_block(struct r600_context *ctx, 1030 struct r600_block *block, 1031 int dirty, int index) 1032{ 1033 if ((index + 1) > block->nreg_dirty) 1034 block->nreg_dirty = index + 1; 1035 1036 if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { 1037 block->status |= R600_BLOCK_STATUS_DIRTY; 1038 ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; 1039 if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { 1040 block->status |= R600_BLOCK_STATUS_ENABLED; 1041 LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); 1042 } 1043 LIST_ADDTAIL(&block->list,&ctx->dirty); 1044 1045 if (block->flags & REG_FLAG_FLUSH_CHANGE) { 1046 r600_context_ps_partial_flush(ctx); 1047 } 1048 } 1049} 1050 1051void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state) 1052{ 1053 struct r600_block *block; 1054 int dirty; 1055 for (int i = 0; i < state->nregs; i++) { 1056 unsigned id, reloc_id; 1057 struct r600_pipe_reg *reg = &state->regs[i]; 1058 1059 block = reg->block; 1060 id = reg->id; 1061 1062 dirty = block->status & R600_BLOCK_STATUS_DIRTY; 1063 1064 if (reg->value != block->reg[id]) { 1065 block->reg[id] = reg->value; 1066 dirty |= R600_BLOCK_STATUS_DIRTY; 1067 } 1068 if (block->flags & REG_FLAG_DIRTY_ALWAYS) 1069 dirty |= R600_BLOCK_STATUS_DIRTY; 1070 if (block->pm4_bo_index[id]) { 1071 /* find relocation */ 1072 reloc_id = block->pm4_bo_index[id]; 1073 pipe_resource_reference((struct pipe_resource**)&block->reloc[reloc_id].bo, ®->bo->b.b.b); 1074 block->reloc[reloc_id].bo_usage = reg->bo_usage; 1075 /* always force dirty for relocs for now */ 1076 dirty |= R600_BLOCK_STATUS_DIRTY; 1077 } 1078 1079 if (dirty) 1080 r600_context_dirty_block(ctx, block, dirty, id); 1081 } 1082} 1083 1084static void r600_context_dirty_resource_block(struct r600_context *ctx, 1085 struct r600_block *block, 1086 int dirty, int index) 1087{ 1088 block->nreg_dirty = index + 1; 1089 1090 if ((dirty != (block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { 1091 block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1092 ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; 1093 if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { 1094 block->status |= R600_BLOCK_STATUS_ENABLED; 1095 LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); 1096 } 1097 LIST_ADDTAIL(&block->list,&ctx->resource_dirty); 1098 } 1099} 1100 1101void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block) 1102{ 1103 int dirty; 1104 int num_regs = ctx->screen->chip_class >= EVERGREEN ? 8 : 7; 1105 boolean is_vertex; 1106 1107 if (state == NULL) { 1108 block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY); 1109 if (block->reloc[1].bo) 1110 block->reloc[1].bo->cs_buf->binding &= ~BO_BOUND_TEXTURE; 1111 1112 pipe_resource_reference((struct pipe_resource**)&block->reloc[1].bo, NULL); 1113 pipe_resource_reference((struct pipe_resource**)&block->reloc[2].bo, NULL); 1114 LIST_DELINIT(&block->list); 1115 LIST_DELINIT(&block->enable_list); 1116 return; 1117 } 1118 1119 is_vertex = ((state->val[num_regs-1] & 0xc0000000) == 0xc0000000); 1120 dirty = block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY; 1121 1122 if (memcmp(block->reg, state->val, num_regs*4)) { 1123 memcpy(block->reg, state->val, num_regs * 4); 1124 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1125 } 1126 1127 /* if no BOs on block, force dirty */ 1128 if (!block->reloc[1].bo || !block->reloc[2].bo) 1129 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1130 1131 if (!dirty) { 1132 if (is_vertex) { 1133 if (block->reloc[1].bo->buf != state->bo[0]->buf) 1134 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1135 } else { 1136 if ((block->reloc[1].bo->buf != state->bo[0]->buf) || 1137 (block->reloc[2].bo->buf != state->bo[1]->buf)) 1138 dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1139 } 1140 } 1141 1142 if (dirty) { 1143 if (is_vertex) { 1144 /* VERTEX RESOURCE, we preted there is 2 bo to relocate so 1145 * we have single case btw VERTEX & TEXTURE resource 1146 */ 1147 pipe_resource_reference((struct pipe_resource**)&block->reloc[1].bo, &state->bo[0]->b.b.b); 1148 block->reloc[1].bo_usage = state->bo_usage[0]; 1149 pipe_resource_reference((struct pipe_resource**)&block->reloc[2].bo, NULL); 1150 } else { 1151 /* TEXTURE RESOURCE */ 1152 pipe_resource_reference((struct pipe_resource**)&block->reloc[1].bo, &state->bo[0]->b.b.b); 1153 block->reloc[1].bo_usage = state->bo_usage[0]; 1154 pipe_resource_reference((struct pipe_resource**)&block->reloc[2].bo, &state->bo[1]->b.b.b); 1155 block->reloc[2].bo_usage = state->bo_usage[1]; 1156 state->bo[0]->cs_buf->binding |= BO_BOUND_TEXTURE; 1157 } 1158 1159 if (is_vertex) 1160 block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX; 1161 else 1162 block->status &= ~R600_BLOCK_STATUS_RESOURCE_VERTEX; 1163 1164 r600_context_dirty_resource_block(ctx, block, dirty, num_regs - 1); 1165 } 1166} 1167 1168void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) 1169{ 1170 struct r600_block *block = ctx->ps_resources.blocks[rid]; 1171 1172 r600_context_pipe_state_set_resource(ctx, state, block); 1173} 1174 1175void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) 1176{ 1177 struct r600_block *block = ctx->vs_resources.blocks[rid]; 1178 1179 r600_context_pipe_state_set_resource(ctx, state, block); 1180} 1181 1182void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) 1183{ 1184 struct r600_block *block = ctx->fs_resources.blocks[rid]; 1185 1186 r600_context_pipe_state_set_resource(ctx, state, block); 1187} 1188 1189static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) 1190{ 1191 struct r600_range *range; 1192 struct r600_block *block; 1193 int i; 1194 int dirty; 1195 1196 range = &ctx->range[CTX_RANGE_ID(offset)]; 1197 block = range->blocks[CTX_BLOCK_ID(offset)]; 1198 if (state == NULL) { 1199 block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); 1200 LIST_DELINIT(&block->list); 1201 LIST_DELINIT(&block->enable_list); 1202 return; 1203 } 1204 dirty = block->status & R600_BLOCK_STATUS_DIRTY; 1205 for (i = 0; i < 3; i++) { 1206 if (block->reg[i] != state->regs[i].value) { 1207 block->reg[i] = state->regs[i].value; 1208 dirty |= R600_BLOCK_STATUS_DIRTY; 1209 } 1210 } 1211 1212 if (dirty) 1213 r600_context_dirty_block(ctx, block, dirty, 2); 1214} 1215 1216 1217static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) 1218{ 1219 struct r600_range *range; 1220 struct r600_block *block; 1221 int i; 1222 int dirty; 1223 1224 range = &ctx->range[CTX_RANGE_ID(offset)]; 1225 block = range->blocks[CTX_BLOCK_ID(offset)]; 1226 if (state == NULL) { 1227 block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); 1228 LIST_DELINIT(&block->list); 1229 LIST_DELINIT(&block->enable_list); 1230 return; 1231 } 1232 if (state->nregs <= 3) { 1233 return; 1234 } 1235 dirty = block->status & R600_BLOCK_STATUS_DIRTY; 1236 for (i = 0; i < 4; i++) { 1237 if (block->reg[i] != state->regs[i + 3].value) { 1238 block->reg[i] = state->regs[i + 3].value; 1239 dirty |= R600_BLOCK_STATUS_DIRTY; 1240 } 1241 } 1242 1243 /* We have to flush the shaders before we change the border color 1244 * registers, or previous draw commands that haven't completed yet 1245 * will end up using the new border color. */ 1246 if (dirty & R600_BLOCK_STATUS_DIRTY) 1247 r600_context_ps_partial_flush(ctx); 1248 if (dirty) 1249 r600_context_dirty_block(ctx, block, dirty, 3); 1250} 1251 1252void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) 1253{ 1254 unsigned offset; 1255 1256 offset = 0x0003C000 + id * 0xc; 1257 r600_context_pipe_state_set_sampler(ctx, state, offset); 1258 offset = 0x0000A400 + id * 0x10; 1259 r600_context_pipe_state_set_sampler_border(ctx, state, offset); 1260} 1261 1262void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) 1263{ 1264 unsigned offset; 1265 1266 offset = 0x0003C0D8 + id * 0xc; 1267 r600_context_pipe_state_set_sampler(ctx, state, offset); 1268 offset = 0x0000A600 + id * 0x10; 1269 r600_context_pipe_state_set_sampler_border(ctx, state, offset); 1270} 1271 1272struct r600_resource *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) 1273{ 1274 struct r600_range *range; 1275 struct r600_block *block; 1276 unsigned id; 1277 1278 range = &ctx->range[CTX_RANGE_ID(offset)]; 1279 block = range->blocks[CTX_BLOCK_ID(offset)]; 1280 offset -= block->start_offset; 1281 id = block->pm4_bo_index[offset >> 2]; 1282 if (block->reloc[id].bo) { 1283 return block->reloc[id].bo; 1284 } 1285 return NULL; 1286} 1287 1288void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block) 1289{ 1290 struct radeon_winsys_cs *cs = ctx->cs; 1291 int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); 1292 int cp_dwords = block->pm4_ndwords, start_dword = 0; 1293 int new_dwords = 0; 1294 int nbo = block->nbo; 1295 1296 if (block->nreg_dirty == 0 && optional) { 1297 goto out; 1298 } 1299 1300 if (nbo) { 1301 ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; 1302 1303 for (int j = 0; j < block->nreg; j++) { 1304 if (block->pm4_bo_index[j]) { 1305 /* find relocation */ 1306 struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; 1307 if (reloc->bo) { 1308 block->pm4[reloc->bo_pm4_index] = 1309 r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); 1310 r600_context_bo_flush(ctx, 1311 reloc->flush_flags, 1312 reloc->flush_mask, 1313 reloc->bo); 1314 } else { 1315 block->pm4[reloc->bo_pm4_index] = 0; 1316 } 1317 nbo--; 1318 if (nbo == 0) 1319 break; 1320 1321 } 1322 } 1323 ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; 1324 } 1325 1326 optional &= (block->nreg_dirty != block->nreg); 1327 if (optional) { 1328 new_dwords = block->nreg_dirty; 1329 start_dword = cs->cdw; 1330 cp_dwords = new_dwords + 2; 1331 } 1332 memcpy(&cs->buf[cs->cdw], block->pm4, cp_dwords * 4); 1333 cs->cdw += cp_dwords; 1334 1335 if (optional) { 1336 uint32_t newword; 1337 1338 newword = cs->buf[start_dword]; 1339 newword &= PKT_COUNT_C; 1340 newword |= PKT_COUNT_S(new_dwords); 1341 cs->buf[start_dword] = newword; 1342 } 1343out: 1344 block->status ^= R600_BLOCK_STATUS_DIRTY; 1345 block->nreg_dirty = 0; 1346 LIST_DELINIT(&block->list); 1347} 1348 1349void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block) 1350{ 1351 struct radeon_winsys_cs *cs = ctx->cs; 1352 int cp_dwords = block->pm4_ndwords; 1353 int nbo = block->nbo; 1354 1355 ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; 1356 1357 if (block->status & R600_BLOCK_STATUS_RESOURCE_VERTEX) { 1358 nbo = 1; 1359 cp_dwords -= 2; /* don't copy the second NOP */ 1360 } 1361 1362 for (int j = 0; j < nbo; j++) { 1363 if (block->pm4_bo_index[j]) { 1364 /* find relocation */ 1365 struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; 1366 block->pm4[reloc->bo_pm4_index] = 1367 r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); 1368 r600_context_bo_flush(ctx, 1369 reloc->flush_flags, 1370 reloc->flush_mask, 1371 reloc->bo); 1372 } 1373 } 1374 ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; 1375 1376 memcpy(&cs->buf[cs->cdw], block->pm4, cp_dwords * 4); 1377 cs->cdw += cp_dwords; 1378 1379 block->status ^= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1380 block->nreg_dirty = 0; 1381 LIST_DELINIT(&block->list); 1382} 1383 1384void r600_context_flush_dest_caches(struct r600_context *ctx) 1385{ 1386 struct r600_resource *cb[8]; 1387 struct r600_resource *db; 1388 int i; 1389 1390 if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) 1391 return; 1392 1393 db = r600_context_reg_bo(ctx, R_02800C_DB_DEPTH_BASE); 1394 cb[0] = r600_context_reg_bo(ctx, R_028040_CB_COLOR0_BASE); 1395 cb[1] = r600_context_reg_bo(ctx, R_028044_CB_COLOR1_BASE); 1396 cb[2] = r600_context_reg_bo(ctx, R_028048_CB_COLOR2_BASE); 1397 cb[3] = r600_context_reg_bo(ctx, R_02804C_CB_COLOR3_BASE); 1398 cb[4] = r600_context_reg_bo(ctx, R_028050_CB_COLOR4_BASE); 1399 cb[5] = r600_context_reg_bo(ctx, R_028054_CB_COLOR5_BASE); 1400 cb[6] = r600_context_reg_bo(ctx, R_028058_CB_COLOR6_BASE); 1401 cb[7] = r600_context_reg_bo(ctx, R_02805C_CB_COLOR7_BASE); 1402 1403 ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; 1404 /* flush the color buffers */ 1405 for (i = 0; i < 8; i++) { 1406 if (!cb[i]) 1407 continue; 1408 1409 r600_context_bo_flush(ctx, 1410 (S_0085F0_CB0_DEST_BASE_ENA(1) << i) | 1411 S_0085F0_CB_ACTION_ENA(1), 1412 0, cb[i]); 1413 } 1414 if (db) { 1415 r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1), 0, db); 1416 } 1417 ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; 1418 ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; 1419} 1420 1421void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) 1422{ 1423 struct radeon_winsys_cs *cs = ctx->cs; 1424 unsigned ndwords = 7; 1425 uint32_t *pm4; 1426 1427 if (draw->indices) { 1428 ndwords = 11; 1429 } 1430 if (ctx->num_cs_dw_queries_suspend) { 1431 if (ctx->screen->family >= CHIP_RV770) 1432 ndwords += 3; 1433 ndwords += 3; 1434 } 1435 1436 /* when increasing ndwords, bump the max limit too */ 1437 assert(ndwords <= R600_MAX_DRAW_CS_DWORDS); 1438 1439 /* queries need some special values 1440 * (this is non-zero if any query is active) */ 1441 if (ctx->num_cs_dw_queries_suspend) { 1442 if (ctx->screen->family >= CHIP_RV770) { 1443 pm4 = &cs->buf[cs->cdw]; 1444 pm4[0] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1445 pm4[1] = (R_028D0C_DB_RENDER_CONTROL - R600_CONTEXT_REG_OFFSET) >> 2; 1446 pm4[2] = draw->db_render_control | S_028D0C_R700_PERFECT_ZPASS_COUNTS(1); 1447 cs->cdw += 3; 1448 ndwords -= 3; 1449 } 1450 pm4 = &cs->buf[cs->cdw]; 1451 pm4[0] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1452 pm4[1] = (R_028D10_DB_RENDER_OVERRIDE - R600_CONTEXT_REG_OFFSET) >> 2; 1453 pm4[2] = draw->db_render_override | S_028D10_NOOP_CULL_DISABLE(1); 1454 cs->cdw += 3; 1455 ndwords -= 3; 1456 } 1457 1458 /* draw packet */ 1459 pm4 = &cs->buf[cs->cdw]; 1460 pm4[0] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); 1461 pm4[1] = draw->vgt_index_type; 1462 pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); 1463 pm4[3] = draw->vgt_num_instances; 1464 if (draw->indices) { 1465 pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); 1466 pm4[5] = draw->indices_bo_offset; 1467 pm4[6] = 0; 1468 pm4[7] = draw->vgt_num_indices; 1469 pm4[8] = draw->vgt_draw_initiator; 1470 pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); 1471 pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ); 1472 } else { 1473 pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); 1474 pm4[5] = draw->vgt_num_indices; 1475 pm4[6] = draw->vgt_draw_initiator; 1476 } 1477 cs->cdw += ndwords; 1478} 1479 1480void r600_context_flush(struct r600_context *ctx, unsigned flags) 1481{ 1482 struct radeon_winsys_cs *cs = ctx->cs; 1483 struct r600_block *enable_block = NULL; 1484 bool queries_suspended = false; 1485 bool streamout_suspended = false; 1486 1487 if (cs->cdw == ctx->init_dwords) 1488 return; 1489 1490 /* suspend queries */ 1491 if (ctx->num_cs_dw_queries_suspend) { 1492 r600_context_queries_suspend(ctx); 1493 queries_suspended = true; 1494 } 1495 1496 if (ctx->num_cs_dw_streamout_end) { 1497 r600_context_streamout_end(ctx); 1498 streamout_suspended = true; 1499 } 1500 1501 if (ctx->screen->chip_class >= EVERGREEN) 1502 evergreen_context_flush_dest_caches(ctx); 1503 else 1504 r600_context_flush_dest_caches(ctx); 1505 1506 /* partial flush is needed to avoid lockups on some chips with user fences */ 1507 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1508 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); 1509 1510 /* Flush the CS. */ 1511 ctx->ws->cs_flush(ctx->cs, flags); 1512 1513 /* restart */ 1514 for (int i = 0; i < ctx->creloc; i++) { 1515 ctx->bo[i]->cs_buf->last_flush = 0; 1516 pipe_resource_reference((struct pipe_resource**)&ctx->bo[i], NULL); 1517 } 1518 ctx->creloc = 0; 1519 ctx->pm4_dirty_cdwords = 0; 1520 ctx->flags = 0; 1521 1522 r600_init_cs(ctx); 1523 1524 if (streamout_suspended) { 1525 ctx->streamout_start = TRUE; 1526 ctx->streamout_append_bitmask = ~0; 1527 } 1528 1529 /* resume queries */ 1530 if (queries_suspended) { 1531 r600_context_queries_resume(ctx); 1532 } 1533 1534 /* set all valid group as dirty so they get reemited on 1535 * next draw command 1536 */ 1537 LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) { 1538 if (!(enable_block->flags & BLOCK_FLAG_RESOURCE)) { 1539 if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) { 1540 LIST_ADDTAIL(&enable_block->list,&ctx->dirty); 1541 enable_block->status |= R600_BLOCK_STATUS_DIRTY; 1542 } 1543 } else { 1544 if(!(enable_block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) { 1545 LIST_ADDTAIL(&enable_block->list,&ctx->resource_dirty); 1546 enable_block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; 1547 } 1548 } 1549 ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords + 1550 enable_block->pm4_flush_ndwords; 1551 enable_block->nreg_dirty = enable_block->nreg; 1552 } 1553} 1554 1555void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value) 1556{ 1557 struct radeon_winsys_cs *cs = ctx->cs; 1558 uint64_t va; 1559 1560 r600_need_cs_space(ctx, 10, FALSE); 1561 1562 va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); 1563 va = va + (offset << 2); 1564 1565 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1566 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); 1567 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 1568 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 1569 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ 1570 /* DATA_SEL | INT_EN | ADDRESS_HI */ 1571 cs->buf[cs->cdw++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF); 1572 cs->buf[cs->cdw++] = value; /* DATA_LO */ 1573 cs->buf[cs->cdw++] = 0; /* DATA_HI */ 1574 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1575 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE); 1576} 1577 1578static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, 1579 bool test_status_bit) 1580{ 1581 uint32_t *current_result = (uint32_t*)map; 1582 uint64_t start, end; 1583 1584 start = (uint64_t)current_result[start_index] | 1585 (uint64_t)current_result[start_index+1] << 32; 1586 end = (uint64_t)current_result[end_index] | 1587 (uint64_t)current_result[end_index+1] << 32; 1588 1589 if (!test_status_bit || 1590 ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { 1591 return end - start; 1592 } 1593 return 0; 1594} 1595 1596static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) 1597{ 1598 unsigned results_base = query->results_start; 1599 char *map; 1600 1601 map = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, 1602 PIPE_TRANSFER_READ | 1603 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); 1604 if (!map) 1605 return FALSE; 1606 1607 /* count all results across all data blocks */ 1608 switch (query->type) { 1609 case PIPE_QUERY_OCCLUSION_COUNTER: 1610 while (results_base != query->results_end) { 1611 query->result.u64 += 1612 r600_query_read_result(map + results_base, 0, 2, true); 1613 results_base = (results_base + 16) % query->buffer->b.b.b.width0; 1614 } 1615 break; 1616 case PIPE_QUERY_OCCLUSION_PREDICATE: 1617 while (results_base != query->results_end) { 1618 query->result.b = query->result.b || 1619 r600_query_read_result(map + results_base, 0, 2, true) != 0; 1620 results_base = (results_base + 16) % query->buffer->b.b.b.width0; 1621 } 1622 break; 1623 case PIPE_QUERY_TIME_ELAPSED: 1624 while (results_base != query->results_end) { 1625 query->result.u64 += 1626 r600_query_read_result(map + results_base, 0, 2, false); 1627 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1628 } 1629 break; 1630 case PIPE_QUERY_PRIMITIVES_EMITTED: 1631 /* SAMPLE_STREAMOUTSTATS stores this structure: 1632 * { 1633 * u64 NumPrimitivesWritten; 1634 * u64 PrimitiveStorageNeeded; 1635 * } 1636 * We only need NumPrimitivesWritten here. */ 1637 while (results_base != query->results_end) { 1638 query->result.u64 += 1639 r600_query_read_result(map + results_base, 2, 6, true); 1640 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1641 } 1642 break; 1643 case PIPE_QUERY_PRIMITIVES_GENERATED: 1644 /* Here we read PrimitiveStorageNeeded. */ 1645 while (results_base != query->results_end) { 1646 query->result.u64 += 1647 r600_query_read_result(map + results_base, 0, 4, true); 1648 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1649 } 1650 break; 1651 case PIPE_QUERY_SO_STATISTICS: 1652 while (results_base != query->results_end) { 1653 query->result.so.num_primitives_written += 1654 r600_query_read_result(map + results_base, 2, 6, true); 1655 query->result.so.primitives_storage_needed += 1656 r600_query_read_result(map + results_base, 0, 4, true); 1657 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1658 } 1659 break; 1660 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1661 while (results_base != query->results_end) { 1662 query->result.b = query->result.b || 1663 r600_query_read_result(map + results_base, 2, 6, true) != 1664 r600_query_read_result(map + results_base, 0, 4, true); 1665 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1666 } 1667 break; 1668 default: 1669 assert(0); 1670 } 1671 1672 query->results_start = query->results_end; 1673 ctx->ws->buffer_unmap(query->buffer->buf); 1674 return TRUE; 1675} 1676 1677void r600_query_begin(struct r600_context *ctx, struct r600_query *query) 1678{ 1679 struct radeon_winsys_cs *cs = ctx->cs; 1680 unsigned new_results_end, i; 1681 uint32_t *results; 1682 uint64_t va; 1683 1684 r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); 1685 1686 new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.b.width0; 1687 1688 /* collect current results if query buffer is full */ 1689 if (new_results_end == query->results_start) { 1690 r600_query_result(ctx, query, TRUE); 1691 } 1692 1693 switch (query->type) { 1694 case PIPE_QUERY_OCCLUSION_COUNTER: 1695 case PIPE_QUERY_OCCLUSION_PREDICATE: 1696 results = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE); 1697 if (results) { 1698 results = (uint32_t*)((char*)results + query->results_end); 1699 memset(results, 0, query->result_size); 1700 1701 /* Set top bits for unused backends */ 1702 for (i = 0; i < ctx->max_db; i++) { 1703 if (!(ctx->backend_mask & (1<<i))) { 1704 results[(i * 4)+1] = 0x80000000; 1705 results[(i * 4)+3] = 0x80000000; 1706 } 1707 } 1708 ctx->ws->buffer_unmap(query->buffer->buf); 1709 } 1710 break; 1711 case PIPE_QUERY_TIME_ELAPSED: 1712 break; 1713 case PIPE_QUERY_PRIMITIVES_EMITTED: 1714 case PIPE_QUERY_PRIMITIVES_GENERATED: 1715 case PIPE_QUERY_SO_STATISTICS: 1716 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1717 results = ctx->ws->buffer_map(query->buffer->buf, ctx->cs, PIPE_TRANSFER_WRITE); 1718 results = (uint32_t*)((char*)results + query->results_end); 1719 memset(results, 0, query->result_size); 1720 ctx->ws->buffer_unmap(query->buffer->buf); 1721 break; 1722 default: 1723 assert(0); 1724 } 1725 1726 /* emit begin query */ 1727 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); 1728 va += query->results_end; 1729 1730 switch (query->type) { 1731 case PIPE_QUERY_OCCLUSION_COUNTER: 1732 case PIPE_QUERY_OCCLUSION_PREDICATE: 1733 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 1734 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); 1735 cs->buf[cs->cdw++] = va; 1736 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; 1737 break; 1738 case PIPE_QUERY_PRIMITIVES_EMITTED: 1739 case PIPE_QUERY_PRIMITIVES_GENERATED: 1740 case PIPE_QUERY_SO_STATISTICS: 1741 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1742 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 1743 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); 1744 cs->buf[cs->cdw++] = query->results_end; 1745 cs->buf[cs->cdw++] = 0; 1746 break; 1747 case PIPE_QUERY_TIME_ELAPSED: 1748 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 1749 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 1750 cs->buf[cs->cdw++] = va; 1751 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); 1752 cs->buf[cs->cdw++] = 0; 1753 cs->buf[cs->cdw++] = 0; 1754 break; 1755 default: 1756 assert(0); 1757 } 1758 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1759 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); 1760 1761 ctx->num_cs_dw_queries_suspend += query->num_cs_dw; 1762} 1763 1764void r600_query_end(struct r600_context *ctx, struct r600_query *query) 1765{ 1766 struct radeon_winsys_cs *cs = ctx->cs; 1767 uint64_t va; 1768 1769 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); 1770 /* emit end query */ 1771 switch (query->type) { 1772 case PIPE_QUERY_OCCLUSION_COUNTER: 1773 case PIPE_QUERY_OCCLUSION_PREDICATE: 1774 va += query->results_end + 8; 1775 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 1776 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); 1777 cs->buf[cs->cdw++] = va; 1778 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; 1779 break; 1780 case PIPE_QUERY_PRIMITIVES_EMITTED: 1781 case PIPE_QUERY_PRIMITIVES_GENERATED: 1782 case PIPE_QUERY_SO_STATISTICS: 1783 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1784 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 1785 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); 1786 cs->buf[cs->cdw++] = query->results_end + query->result_size/2; 1787 cs->buf[cs->cdw++] = 0; 1788 break; 1789 case PIPE_QUERY_TIME_ELAPSED: 1790 va += query->results_end + query->result_size/2; 1791 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 1792 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 1793 cs->buf[cs->cdw++] = va; 1794 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); 1795 cs->buf[cs->cdw++] = 0; 1796 cs->buf[cs->cdw++] = 0; 1797 break; 1798 default: 1799 assert(0); 1800 } 1801 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1802 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); 1803 1804 query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.b.width0; 1805 ctx->num_cs_dw_queries_suspend -= query->num_cs_dw; 1806} 1807 1808void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, 1809 int flag_wait) 1810{ 1811 struct radeon_winsys_cs *cs = ctx->cs; 1812 uint64_t va; 1813 1814 if (operation == PREDICATION_OP_CLEAR) { 1815 r600_need_cs_space(ctx, 3, FALSE); 1816 1817 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); 1818 cs->buf[cs->cdw++] = 0; 1819 cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR); 1820 } else { 1821 unsigned results_base = query->results_start; 1822 unsigned count; 1823 uint32_t op; 1824 1825 /* find count of the query data blocks */ 1826 count = (query->buffer->b.b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.b.width0; 1827 count /= query->result_size; 1828 1829 r600_need_cs_space(ctx, 5 * count, TRUE); 1830 1831 op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | 1832 (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); 1833 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); 1834 1835 /* emit predicate packets for all data blocks */ 1836 while (results_base != query->results_end) { 1837 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); 1838 cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL; 1839 cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF); 1840 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 1841 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, 1842 RADEON_USAGE_READ); 1843 results_base = (results_base + query->result_size) % query->buffer->b.b.b.width0; 1844 1845 /* set CONTINUE bit for all packets except the first */ 1846 op |= PREDICATION_CONTINUE; 1847 } 1848 } 1849} 1850 1851struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type) 1852{ 1853 struct r600_query *query; 1854 unsigned buffer_size = 4096; 1855 1856 query = CALLOC_STRUCT(r600_query); 1857 if (query == NULL) 1858 return NULL; 1859 1860 query->type = query_type; 1861 1862 switch (query_type) { 1863 case PIPE_QUERY_OCCLUSION_COUNTER: 1864 case PIPE_QUERY_OCCLUSION_PREDICATE: 1865 query->result_size = 16 * ctx->max_db; 1866 query->num_cs_dw = 6; 1867 break; 1868 case PIPE_QUERY_TIME_ELAPSED: 1869 query->result_size = 16; 1870 query->num_cs_dw = 8; 1871 break; 1872 case PIPE_QUERY_PRIMITIVES_EMITTED: 1873 case PIPE_QUERY_PRIMITIVES_GENERATED: 1874 case PIPE_QUERY_SO_STATISTICS: 1875 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1876 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ 1877 query->result_size = 32; 1878 query->num_cs_dw = 6; 1879 break; 1880 default: 1881 assert(0); 1882 FREE(query); 1883 return NULL; 1884 } 1885 1886 /* adjust buffer size to simplify offsets wrapping math */ 1887 buffer_size -= buffer_size % query->result_size; 1888 1889 /* Queries are normally read by the CPU after 1890 * being written by the gpu, hence staging is probably a good 1891 * usage pattern. 1892 */ 1893 query->buffer = (struct r600_resource*) 1894 pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, buffer_size); 1895 if (!query->buffer) { 1896 FREE(query); 1897 return NULL; 1898 } 1899 return query; 1900} 1901 1902void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) 1903{ 1904 pipe_resource_reference((struct pipe_resource**)&query->buffer, NULL); 1905 free(query); 1906} 1907 1908boolean r600_context_query_result(struct r600_context *ctx, 1909 struct r600_query *query, 1910 boolean wait, void *vresult) 1911{ 1912 boolean *result_b = (boolean*)vresult; 1913 uint64_t *result_u64 = (uint64_t*)vresult; 1914 struct pipe_query_data_so_statistics *result_so = 1915 (struct pipe_query_data_so_statistics*)vresult; 1916 1917 if (!r600_query_result(ctx, query, wait)) 1918 return FALSE; 1919 1920 switch (query->type) { 1921 case PIPE_QUERY_OCCLUSION_COUNTER: 1922 case PIPE_QUERY_PRIMITIVES_EMITTED: 1923 case PIPE_QUERY_PRIMITIVES_GENERATED: 1924 *result_u64 = query->result.u64; 1925 break; 1926 case PIPE_QUERY_OCCLUSION_PREDICATE: 1927 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 1928 *result_b = query->result.b; 1929 break; 1930 case PIPE_QUERY_TIME_ELAPSED: 1931 *result_u64 = (1000000 * query->result.u64) / ctx->screen->info.r600_clock_crystal_freq; 1932 break; 1933 case PIPE_QUERY_SO_STATISTICS: 1934 *result_so = query->result.so; 1935 break; 1936 default: 1937 assert(0); 1938 } 1939 return TRUE; 1940} 1941 1942void r600_context_queries_suspend(struct r600_context *ctx) 1943{ 1944 struct r600_query *query; 1945 1946 LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) { 1947 r600_query_end(ctx, query); 1948 } 1949 assert(ctx->num_cs_dw_queries_suspend == 0); 1950} 1951 1952void r600_context_queries_resume(struct r600_context *ctx) 1953{ 1954 struct r600_query *query; 1955 1956 assert(ctx->num_cs_dw_queries_suspend == 0); 1957 1958 LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) { 1959 r600_query_begin(ctx, query); 1960 } 1961} 1962 1963static void r600_flush_vgt_streamout(struct r600_context *ctx) 1964{ 1965 struct radeon_winsys_cs *cs = ctx->cs; 1966 1967 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0); 1968 cs->buf[cs->cdw++] = (R_008490_CP_STRMOUT_CNTL - R600_CONFIG_REG_OFFSET) >> 2; 1969 cs->buf[cs->cdw++] = 0; 1970 1971 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 1972 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0); 1973 1974 cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0); 1975 cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */ 1976 cs->buf[cs->cdw++] = R_008490_CP_STRMOUT_CNTL >> 2; /* register */ 1977 cs->buf[cs->cdw++] = 0; 1978 cs->buf[cs->cdw++] = S_008490_OFFSET_UPDATE_DONE(1); /* reference value */ 1979 cs->buf[cs->cdw++] = S_008490_OFFSET_UPDATE_DONE(1); /* mask */ 1980 cs->buf[cs->cdw++] = 4; /* poll interval */ 1981} 1982 1983static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit) 1984{ 1985 struct radeon_winsys_cs *cs = ctx->cs; 1986 1987 if (buffer_enable_bit) { 1988 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1989 cs->buf[cs->cdw++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2; 1990 cs->buf[cs->cdw++] = S_028AB0_STREAMOUT(1); 1991 1992 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1993 cs->buf[cs->cdw++] = (R_028B20_VGT_STRMOUT_BUFFER_EN - R600_CONTEXT_REG_OFFSET) >> 2; 1994 cs->buf[cs->cdw++] = buffer_enable_bit; 1995 } else { 1996 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 1997 cs->buf[cs->cdw++] = (R_028AB0_VGT_STRMOUT_EN - R600_CONTEXT_REG_OFFSET) >> 2; 1998 cs->buf[cs->cdw++] = S_028AB0_STREAMOUT(0); 1999 } 2000} 2001 2002void r600_context_streamout_begin(struct r600_context *ctx) 2003{ 2004 struct radeon_winsys_cs *cs = ctx->cs; 2005 struct r600_so_target **t = ctx->so_targets; 2006 unsigned *stride_in_dw = ctx->vs_so_stride_in_dw; 2007 unsigned buffer_en, i, update_flags = 0; 2008 uint64_t va; 2009 2010 buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) | 2011 (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) | 2012 (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) | 2013 (ctx->num_so_targets >= 4 && t[3] ? 8 : 0); 2014 2015 ctx->num_cs_dw_streamout_end = 2016 12 + /* flush_vgt_streamout */ 2017 util_bitcount(buffer_en) * 8 + 2018 8; 2019 2020 r600_need_cs_space(ctx, 2021 12 + /* flush_vgt_streamout */ 2022 6 + /* enables */ 2023 util_bitcount(buffer_en & ctx->streamout_append_bitmask) * 8 + 2024 util_bitcount(buffer_en & ~ctx->streamout_append_bitmask) * 6 + 2025 (ctx->screen->family > CHIP_R600 && ctx->screen->family < CHIP_RV770 ? 2 : 0) + 2026 ctx->num_cs_dw_streamout_end, TRUE); 2027 2028 if (ctx->screen->chip_class >= EVERGREEN) { 2029 evergreen_flush_vgt_streamout(ctx); 2030 evergreen_set_streamout_enable(ctx, buffer_en); 2031 } else { 2032 r600_flush_vgt_streamout(ctx); 2033 r600_set_streamout_enable(ctx, buffer_en); 2034 } 2035 2036 for (i = 0; i < ctx->num_so_targets; i++) { 2037 if (t[i]) { 2038 t[i]->stride_in_dw = stride_in_dw[i]; 2039 t[i]->so_index = i; 2040 va = r600_resource_va(&ctx->screen->screen, 2041 (void*)t[i]->b.buffer); 2042 2043 update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i); 2044 2045 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0); 2046 cs->buf[cs->cdw++] = (R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 2047 16*i - R600_CONTEXT_REG_OFFSET) >> 2; 2048 cs->buf[cs->cdw++] = (t[i]->b.buffer_offset + 2049 t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */ 2050 cs->buf[cs->cdw++] = stride_in_dw[i]; /* VTX_STRIDE (in DW) */ 2051 cs->buf[cs->cdw++] = va >> 8; /* BUFFER_BASE */ 2052 2053 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 2054 cs->buf[cs->cdw++] = 2055 r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer), 2056 RADEON_USAGE_WRITE); 2057 2058 if (ctx->streamout_append_bitmask & (1 << i)) { 2059 va = r600_resource_va(&ctx->screen->screen, 2060 (void*)t[i]->filled_size); 2061 /* Append. */ 2062 cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); 2063 cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | 2064 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */ 2065 cs->buf[cs->cdw++] = 0; /* unused */ 2066 cs->buf[cs->cdw++] = 0; /* unused */ 2067 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* src address lo */ 2068 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* src address hi */ 2069 2070 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 2071 cs->buf[cs->cdw++] = 2072 r600_context_bo_reloc(ctx, t[i]->filled_size, 2073 RADEON_USAGE_READ); 2074 } else { 2075 /* Start from the beginning. */ 2076 cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); 2077 cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | 2078 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */ 2079 cs->buf[cs->cdw++] = 0; /* unused */ 2080 cs->buf[cs->cdw++] = 0; /* unused */ 2081 cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 2; /* buffer offset in DW */ 2082 cs->buf[cs->cdw++] = 0; /* unused */ 2083 } 2084 } 2085 } 2086 2087 if (ctx->screen->family > CHIP_R600 && ctx->screen->family < CHIP_RV770) { 2088 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); 2089 cs->buf[cs->cdw++] = update_flags; 2090 } 2091} 2092 2093void r600_context_streamout_end(struct r600_context *ctx) 2094{ 2095 struct radeon_winsys_cs *cs = ctx->cs; 2096 struct r600_so_target **t = ctx->so_targets; 2097 unsigned i, flush_flags = 0; 2098 uint64_t va; 2099 2100 if (ctx->screen->chip_class >= EVERGREEN) { 2101 evergreen_flush_vgt_streamout(ctx); 2102 } else { 2103 r600_flush_vgt_streamout(ctx); 2104 } 2105 2106 for (i = 0; i < ctx->num_so_targets; i++) { 2107 if (t[i]) { 2108 va = r600_resource_va(&ctx->screen->screen, 2109 (void*)t[i]->filled_size); 2110 cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); 2111 cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | 2112 STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | 2113 STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */ 2114 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* dst address lo */ 2115 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* dst address hi */ 2116 cs->buf[cs->cdw++] = 0; /* unused */ 2117 cs->buf[cs->cdw++] = 0; /* unused */ 2118 2119 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 2120 cs->buf[cs->cdw++] = 2121 r600_context_bo_reloc(ctx, t[i]->filled_size, 2122 RADEON_USAGE_WRITE); 2123 2124 flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i; 2125 } 2126 } 2127 2128 if (ctx->screen->chip_class >= EVERGREEN) { 2129 evergreen_set_streamout_enable(ctx, 0); 2130 } else { 2131 r600_set_streamout_enable(ctx, 0); 2132 } 2133 2134 if (ctx->screen->family < CHIP_RV770) { 2135 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); 2136 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); 2137 } else { 2138 cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); 2139 cs->buf[cs->cdw++] = flush_flags; /* CP_COHER_CNTL */ 2140 cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ 2141 cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ 2142 cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ 2143 } 2144 2145 ctx->num_cs_dw_streamout_end = 0; 2146 2147#if 0 2148 for (i = 0; i < ctx->num_so_targets; i++) { 2149 if (!t[i]) 2150 continue; 2151 2152 uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->buf, ctx->cs, RADEON_USAGE_READ); 2153 printf("FILLED_SIZE%i: %u\n", i, *ptr); 2154 ctx->ws->buffer_unmap(t[i]->filled_size->buf); 2155 } 2156#endif 2157} 2158 2159void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t) 2160{ 2161 struct radeon_winsys_cs *cs = ctx->cs; 2162 uint64_t va = r600_resource_va(&ctx->screen->screen, 2163 (void*)t->filled_size); 2164 2165 r600_need_cs_space(ctx, 14 + 21, TRUE); 2166 2167 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 2168 cs->buf[cs->cdw++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - R600_CONTEXT_REG_OFFSET) >> 2; 2169 cs->buf[cs->cdw++] = 0; 2170 2171 cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); 2172 cs->buf[cs->cdw++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - R600_CONTEXT_REG_OFFSET) >> 2; 2173 cs->buf[cs->cdw++] = t->stride_in_dw; 2174 2175 cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0); 2176 cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG; 2177 cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* src address lo */ 2178 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFUL; /* src address hi */ 2179 cs->buf[cs->cdw++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */ 2180 cs->buf[cs->cdw++] = 0; /* unused */ 2181 2182 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 2183 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, 2184 RADEON_USAGE_READ); 2185} 2186