r600_query.c revision 83667acfd9feed932f6864092382e752466975ed
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_pipe.h" 24#include "r600d.h" 25#include "util/u_memory.h" 26#include "r600_hw_context_priv.h" 27 28static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, unsigned type) 29{ 30 unsigned j, i, num_results, buf_size = 4096; 31 uint32_t *results; 32 /* Queries are normally read by the CPU after 33 * being written by the gpu, hence staging is probably a good 34 * usage pattern. 35 */ 36 struct r600_resource *buf = (struct r600_resource*) 37 pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, 38 PIPE_USAGE_STAGING, buf_size); 39 40 switch (type) { 41 case PIPE_QUERY_OCCLUSION_COUNTER: 42 case PIPE_QUERY_OCCLUSION_PREDICATE: 43 results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE); 44 memset(results, 0, buf_size); 45 46 /* Set top bits for unused backends. */ 47 num_results = buf_size / (16 * ctx->max_db); 48 for (j = 0; j < num_results; j++) { 49 for (i = 0; i < ctx->max_db; i++) { 50 if (!(ctx->backend_mask & (1<<i))) { 51 results[(i * 4)+1] = 0x80000000; 52 results[(i * 4)+3] = 0x80000000; 53 } 54 } 55 results += 4 * ctx->max_db; 56 } 57 ctx->ws->buffer_unmap(buf->buf); 58 break; 59 case PIPE_QUERY_TIME_ELAPSED: 60 break; 61 case PIPE_QUERY_PRIMITIVES_EMITTED: 62 case PIPE_QUERY_PRIMITIVES_GENERATED: 63 case PIPE_QUERY_SO_STATISTICS: 64 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 65 results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE); 66 memset(results, 0, buf_size); 67 ctx->ws->buffer_unmap(buf->buf); 68 break; 69 default: 70 assert(0); 71 } 72 return buf; 73} 74 75static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query) 76{ 77 struct radeon_winsys_cs *cs = ctx->cs; 78 uint64_t va; 79 80 r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); 81 82 /* Get a new query buffer if needed. */ 83 if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.b.width0) { 84 struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); 85 *qbuf = query->buffer; 86 query->buffer.buf = r600_new_query_buffer(ctx, query->type); 87 query->buffer.results_end = 0; 88 query->buffer.previous = qbuf; 89 } 90 91 /* emit begin query */ 92 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf); 93 va += query->buffer.results_end; 94 95 switch (query->type) { 96 case PIPE_QUERY_OCCLUSION_COUNTER: 97 case PIPE_QUERY_OCCLUSION_PREDICATE: 98 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 99 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); 100 cs->buf[cs->cdw++] = va; 101 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; 102 break; 103 case PIPE_QUERY_PRIMITIVES_EMITTED: 104 case PIPE_QUERY_PRIMITIVES_GENERATED: 105 case PIPE_QUERY_SO_STATISTICS: 106 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 107 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 108 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); 109 cs->buf[cs->cdw++] = va; 110 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; 111 break; 112 case PIPE_QUERY_TIME_ELAPSED: 113 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 114 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 115 cs->buf[cs->cdw++] = va; 116 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); 117 cs->buf[cs->cdw++] = 0; 118 cs->buf[cs->cdw++] = 0; 119 break; 120 default: 121 assert(0); 122 } 123 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 124 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE); 125 126 ctx->num_cs_dw_queries_suspend += query->num_cs_dw; 127} 128 129static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query) 130{ 131 struct radeon_winsys_cs *cs = ctx->cs; 132 uint64_t va; 133 134 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf); 135 /* emit end query */ 136 switch (query->type) { 137 case PIPE_QUERY_OCCLUSION_COUNTER: 138 case PIPE_QUERY_OCCLUSION_PREDICATE: 139 va += query->buffer.results_end + 8; 140 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 141 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); 142 cs->buf[cs->cdw++] = va; 143 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; 144 break; 145 case PIPE_QUERY_PRIMITIVES_EMITTED: 146 case PIPE_QUERY_PRIMITIVES_GENERATED: 147 case PIPE_QUERY_SO_STATISTICS: 148 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 149 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); 150 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); 151 cs->buf[cs->cdw++] = query->buffer.results_end + query->result_size/2; 152 cs->buf[cs->cdw++] = 0; 153 break; 154 case PIPE_QUERY_TIME_ELAPSED: 155 va += query->buffer.results_end + query->result_size/2; 156 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); 157 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); 158 cs->buf[cs->cdw++] = va; 159 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); 160 cs->buf[cs->cdw++] = 0; 161 cs->buf[cs->cdw++] = 0; 162 break; 163 default: 164 assert(0); 165 } 166 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 167 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE); 168 169 query->buffer.results_end += query->result_size; 170 ctx->num_cs_dw_queries_suspend -= query->num_cs_dw; 171} 172 173static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query, 174 int operation, bool flag_wait) 175{ 176 struct radeon_winsys_cs *cs = ctx->cs; 177 178 if (operation == PREDICATION_OP_CLEAR) { 179 r600_need_cs_space(ctx, 3, FALSE); 180 181 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); 182 cs->buf[cs->cdw++] = 0; 183 cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR); 184 } else { 185 struct r600_query_buffer *qbuf; 186 unsigned count; 187 uint32_t op; 188 189 /* Find how many results there are. */ 190 count = 0; 191 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { 192 count += qbuf->results_end / query->result_size; 193 } 194 195 r600_need_cs_space(ctx, 5 * count, TRUE); 196 197 op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | 198 (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); 199 200 /* emit predicate packets for all data blocks */ 201 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { 202 unsigned results_base = 0; 203 uint64_t va = r600_resource_va(&ctx->screen->screen, &qbuf->buf->b.b.b); 204 205 while (results_base < qbuf->results_end) { 206 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); 207 cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL; 208 cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF); 209 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 210 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, qbuf->buf, RADEON_USAGE_READ); 211 results_base += query->result_size; 212 213 /* set CONTINUE bit for all packets except the first */ 214 op |= PREDICATION_CONTINUE; 215 } 216 } while (qbuf); 217 } 218} 219 220static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) 221{ 222 struct r600_context *rctx = (struct r600_context *)ctx; 223 224 struct r600_query *query; 225 226 query = CALLOC_STRUCT(r600_query); 227 if (query == NULL) 228 return NULL; 229 230 query->type = query_type; 231 232 switch (query_type) { 233 case PIPE_QUERY_OCCLUSION_COUNTER: 234 case PIPE_QUERY_OCCLUSION_PREDICATE: 235 query->result_size = 16 * rctx->max_db; 236 query->num_cs_dw = 6; 237 break; 238 case PIPE_QUERY_TIME_ELAPSED: 239 query->result_size = 16; 240 query->num_cs_dw = 8; 241 break; 242 case PIPE_QUERY_PRIMITIVES_EMITTED: 243 case PIPE_QUERY_PRIMITIVES_GENERATED: 244 case PIPE_QUERY_SO_STATISTICS: 245 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 246 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ 247 query->result_size = 32; 248 query->num_cs_dw = 6; 249 break; 250 default: 251 assert(0); 252 FREE(query); 253 return NULL; 254 } 255 256 query->buffer.buf = r600_new_query_buffer(rctx, query_type); 257 if (!query->buffer.buf) { 258 FREE(query); 259 return NULL; 260 } 261 return (struct pipe_query*)query; 262} 263 264static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) 265{ 266 struct r600_query *rquery = (struct r600_query*)query; 267 268 pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); 269 FREE(query); 270} 271 272static void r600_update_occlusion_query_state(struct r600_context *rctx, 273 unsigned type, int diff) 274{ 275 if (type == PIPE_QUERY_OCCLUSION_COUNTER || 276 type == PIPE_QUERY_OCCLUSION_PREDICATE) { 277 bool enable; 278 279 rctx->num_occlusion_queries += diff; 280 assert(rctx->num_occlusion_queries >= 0); 281 282 enable = rctx->num_occlusion_queries != 0; 283 284 if (rctx->atom_db_misc_state.occlusion_query_enabled != enable) { 285 rctx->atom_db_misc_state.occlusion_query_enabled = enable; 286 r600_atom_dirty(rctx, &rctx->atom_db_misc_state.atom); 287 } 288 } 289} 290 291static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) 292{ 293 struct r600_context *rctx = (struct r600_context *)ctx; 294 struct r600_query *rquery = (struct r600_query *)query; 295 /* Discard the old query buffers. */ 296 struct r600_query_buffer *prev = rquery->buffer.previous; 297 298 while (prev) { 299 struct r600_query_buffer *qbuf = prev; 300 prev = prev->previous; 301 pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); 302 FREE(qbuf); 303 } 304 305 /* Obtain a new buffer if the current one can't be mapped without a stall. */ 306 if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rquery->buffer.buf->cs_buf) || 307 rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) { 308 pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); 309 rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type); 310 } 311 312 rquery->buffer.results_end = 0; 313 rquery->buffer.previous = NULL; 314 315 r600_update_occlusion_query_state(rctx, rquery->type, 1); 316 317 r600_emit_query_begin(rctx, rquery); 318 LIST_ADDTAIL(&rquery->list, &rctx->active_query_list); 319} 320 321static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) 322{ 323 struct r600_context *rctx = (struct r600_context *)ctx; 324 struct r600_query *rquery = (struct r600_query *)query; 325 326 r600_emit_query_end(rctx, rquery); 327 LIST_DELINIT(&rquery->list); 328 329 r600_update_occlusion_query_state(rctx, rquery->type, -1); 330} 331 332static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, 333 bool test_status_bit) 334{ 335 uint32_t *current_result = (uint32_t*)map; 336 uint64_t start, end; 337 338 start = (uint64_t)current_result[start_index] | 339 (uint64_t)current_result[start_index+1] << 32; 340 end = (uint64_t)current_result[end_index] | 341 (uint64_t)current_result[end_index+1] << 32; 342 343 if (!test_status_bit || 344 ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { 345 return end - start; 346 } 347 return 0; 348} 349 350static boolean r600_get_query_buffer_result(struct r600_context *ctx, 351 struct r600_query *query, 352 struct r600_query_buffer *qbuf, 353 boolean wait, 354 union r600_query_result *result) 355{ 356 unsigned results_base = 0; 357 char *map; 358 359 map = ctx->ws->buffer_map(qbuf->buf->buf, ctx->cs, 360 PIPE_TRANSFER_READ | 361 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); 362 if (!map) 363 return FALSE; 364 365 /* count all results across all data blocks */ 366 switch (query->type) { 367 case PIPE_QUERY_OCCLUSION_COUNTER: 368 while (results_base != qbuf->results_end) { 369 result->u64 += 370 r600_query_read_result(map + results_base, 0, 2, true); 371 results_base += 16; 372 } 373 break; 374 case PIPE_QUERY_OCCLUSION_PREDICATE: 375 while (results_base != qbuf->results_end) { 376 result->b = result->b || 377 r600_query_read_result(map + results_base, 0, 2, true) != 0; 378 results_base += 16; 379 } 380 break; 381 case PIPE_QUERY_TIME_ELAPSED: 382 while (results_base != qbuf->results_end) { 383 result->u64 += 384 r600_query_read_result(map + results_base, 0, 2, false); 385 results_base += query->result_size; 386 } 387 break; 388 case PIPE_QUERY_PRIMITIVES_EMITTED: 389 /* SAMPLE_STREAMOUTSTATS stores this structure: 390 * { 391 * u64 NumPrimitivesWritten; 392 * u64 PrimitiveStorageNeeded; 393 * } 394 * We only need NumPrimitivesWritten here. */ 395 while (results_base != qbuf->results_end) { 396 result->u64 += 397 r600_query_read_result(map + results_base, 2, 6, true); 398 results_base += query->result_size; 399 } 400 break; 401 case PIPE_QUERY_PRIMITIVES_GENERATED: 402 /* Here we read PrimitiveStorageNeeded. */ 403 while (results_base != qbuf->results_end) { 404 result->u64 += 405 r600_query_read_result(map + results_base, 0, 4, true); 406 results_base += query->result_size; 407 } 408 break; 409 case PIPE_QUERY_SO_STATISTICS: 410 while (results_base != qbuf->results_end) { 411 result->so.num_primitives_written += 412 r600_query_read_result(map + results_base, 2, 6, true); 413 result->so.primitives_storage_needed += 414 r600_query_read_result(map + results_base, 0, 4, true); 415 results_base += query->result_size; 416 } 417 break; 418 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 419 while (results_base != qbuf->results_end) { 420 result->b = result->b || 421 r600_query_read_result(map + results_base, 2, 6, true) != 422 r600_query_read_result(map + results_base, 0, 4, true); 423 results_base += query->result_size; 424 } 425 break; 426 default: 427 assert(0); 428 } 429 430 ctx->ws->buffer_unmap(qbuf->buf->buf); 431 return TRUE; 432} 433 434static boolean r600_get_query_result(struct pipe_context *ctx, 435 struct pipe_query *query, 436 boolean wait, void *vresult) 437{ 438 struct r600_context *rctx = (struct r600_context *)ctx; 439 struct r600_query *rquery = (struct r600_query *)query; 440 boolean *result_b = (boolean*)vresult; 441 uint64_t *result_u64 = (uint64_t*)vresult; 442 union r600_query_result result; 443 struct pipe_query_data_so_statistics *result_so = 444 (struct pipe_query_data_so_statistics*)vresult; 445 struct r600_query_buffer *qbuf; 446 447 memset(&result, 0, sizeof(result)); 448 449 for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) { 450 if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, &result)) { 451 return FALSE; 452 } 453 } 454 455 switch (rquery->type) { 456 case PIPE_QUERY_OCCLUSION_COUNTER: 457 case PIPE_QUERY_PRIMITIVES_EMITTED: 458 case PIPE_QUERY_PRIMITIVES_GENERATED: 459 *result_u64 = result.u64; 460 break; 461 case PIPE_QUERY_OCCLUSION_PREDICATE: 462 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 463 *result_b = result.b; 464 break; 465 case PIPE_QUERY_TIME_ELAPSED: 466 *result_u64 = (1000000 * result.u64) / rctx->screen->info.r600_clock_crystal_freq; 467 break; 468 case PIPE_QUERY_SO_STATISTICS: 469 *result_so = result.so; 470 break; 471 default: 472 assert(0); 473 } 474 return TRUE; 475} 476 477static void r600_render_condition(struct pipe_context *ctx, 478 struct pipe_query *query, 479 uint mode) 480{ 481 struct r600_context *rctx = (struct r600_context *)ctx; 482 struct r600_query *rquery = (struct r600_query *)query; 483 bool wait_flag = false; 484 485 rctx->current_render_cond = query; 486 rctx->current_render_cond_mode = mode; 487 488 if (query == NULL) { 489 if (rctx->predicate_drawing) { 490 rctx->predicate_drawing = false; 491 r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false); 492 } 493 return; 494 } 495 496 if (mode == PIPE_RENDER_COND_WAIT || 497 mode == PIPE_RENDER_COND_BY_REGION_WAIT) { 498 wait_flag = true; 499 } 500 501 rctx->predicate_drawing = true; 502 503 switch (rquery->type) { 504 case PIPE_QUERY_OCCLUSION_COUNTER: 505 case PIPE_QUERY_OCCLUSION_PREDICATE: 506 r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag); 507 break; 508 case PIPE_QUERY_PRIMITIVES_EMITTED: 509 case PIPE_QUERY_PRIMITIVES_GENERATED: 510 case PIPE_QUERY_SO_STATISTICS: 511 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 512 r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag); 513 break; 514 default: 515 assert(0); 516 } 517} 518 519void r600_suspend_queries(struct r600_context *ctx) 520{ 521 struct r600_query *query; 522 523 LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) { 524 r600_emit_query_end(ctx, query); 525 } 526 assert(ctx->num_cs_dw_queries_suspend == 0); 527} 528 529void r600_resume_queries(struct r600_context *ctx) 530{ 531 struct r600_query *query; 532 533 assert(ctx->num_cs_dw_queries_suspend == 0); 534 535 LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) { 536 r600_emit_query_begin(ctx, query); 537 } 538} 539 540void r600_init_query_functions(struct r600_context *rctx) 541{ 542 rctx->context.create_query = r600_create_query; 543 rctx->context.destroy_query = r600_destroy_query; 544 rctx->context.begin_query = r600_begin_query; 545 rctx->context.end_query = r600_end_query; 546 rctx->context.get_query_result = r600_get_query_result; 547 548 if (rctx->screen->info.r600_num_backends > 0) 549 rctx->context.render_condition = r600_render_condition; 550} 551