brw_queryobj.c revision e45a9ce474c3562f16c8a773260752d77a4fed5c
1/* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28/** @file support for ARB_query_object 29 * 30 * ARB_query_object is implemented by using the PIPE_CONTROL command to stall 31 * execution on the completion of previous depth tests, and write the 32 * current PS_DEPTH_COUNT to a buffer object. 33 * 34 * We use before and after counts when drawing during a query so that 35 * we don't pick up other clients' query data in ours. To reduce overhead, 36 * a single BO is used to record the query data for all active queries at 37 * once. This also gives us a simple bound on how much batchbuffer space is 38 * required for handling queries, so that we can be sure that we won't 39 * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. 40 */ 41#include "main/imports.h" 42 43#include "brw_context.h" 44#include "brw_state.h" 45#include "intel_batchbuffer.h" 46#include "intel_reg.h" 47 48static void 49write_timestamp(struct intel_context *intel, drm_intel_bo *query_bo, int idx) 50{ 51 if (intel->gen >= 6) { 52 /* Emit workaround flushes: */ 53 if (intel->gen == 6) { 54 /* The timestamp write below is a non-zero post-sync op, which on 55 * Gen6 necessitates a CS stall. CS stalls need stall at scoreboard 56 * set. See the comments for intel_emit_post_sync_nonzero_flush(). 57 */ 58 BEGIN_BATCH(4); 59 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); 60 OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); 61 OUT_BATCH(0); 62 OUT_BATCH(0); 63 ADVANCE_BATCH(); 64 } 65 66 BEGIN_BATCH(5); 67 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); 68 OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP); 69 OUT_RELOC(query_bo, 70 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 71 PIPE_CONTROL_GLOBAL_GTT_WRITE | 72 idx * sizeof(uint64_t)); 73 OUT_BATCH(0); 74 OUT_BATCH(0); 75 ADVANCE_BATCH(); 76 } else { 77 BEGIN_BATCH(4); 78 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | 79 PIPE_CONTROL_WRITE_TIMESTAMP); 80 OUT_RELOC(query_bo, 81 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 82 PIPE_CONTROL_GLOBAL_GTT_WRITE | 83 idx * sizeof(uint64_t)); 84 OUT_BATCH(0); 85 OUT_BATCH(0); 86 ADVANCE_BATCH(); 87 } 88} 89 90static void 91write_depth_count(struct intel_context *intel, drm_intel_bo *query_bo, int idx) 92{ 93 if (intel->gen >= 6) { 94 BEGIN_BATCH(9); 95 96 /* workaround: CS stall required before depth stall. */ 97 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); 98 OUT_BATCH(PIPE_CONTROL_CS_STALL); 99 OUT_BATCH(0); /* write address */ 100 OUT_BATCH(0); /* write data */ 101 102 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); 103 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | 104 PIPE_CONTROL_WRITE_DEPTH_COUNT); 105 OUT_RELOC(query_bo, 106 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 107 PIPE_CONTROL_GLOBAL_GTT_WRITE | 108 (idx * sizeof(uint64_t))); 109 OUT_BATCH(0); 110 OUT_BATCH(0); 111 ADVANCE_BATCH(); 112 } else { 113 BEGIN_BATCH(4); 114 OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | 115 PIPE_CONTROL_DEPTH_STALL | 116 PIPE_CONTROL_WRITE_DEPTH_COUNT); 117 /* This object could be mapped cacheable, but we don't have an exposed 118 * mechanism to support that. Since it's going uncached, tell GEM that 119 * we're writing to it. The usual clflush should be all that's required 120 * to pick up the results. 121 */ 122 OUT_RELOC(query_bo, 123 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 124 PIPE_CONTROL_GLOBAL_GTT_WRITE | 125 (idx * sizeof(uint64_t))); 126 OUT_BATCH(0); 127 OUT_BATCH(0); 128 ADVANCE_BATCH(); 129 } 130} 131 132/** Waits on the query object's BO and totals the results for this query */ 133static void 134brw_queryobj_get_results(struct gl_context *ctx, 135 struct brw_query_object *query) 136{ 137 struct intel_context *intel = intel_context(ctx); 138 139 int i; 140 uint64_t *results; 141 142 if (query->bo == NULL) 143 return; 144 145 drm_intel_bo_map(query->bo, false); 146 results = query->bo->virtual; 147 switch (query->Base.Target) { 148 case GL_TIME_ELAPSED_EXT: 149 if (intel->gen >= 6) 150 query->Base.Result += 80 * (results[1] - results[0]); 151 else 152 query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32)); 153 break; 154 155 case GL_SAMPLES_PASSED_ARB: 156 /* Map and count the pixels from the current query BO */ 157 for (i = query->first_index; i <= query->last_index; i++) { 158 query->Base.Result += results[i * 2 + 1] - results[i * 2]; 159 } 160 break; 161 162 case GL_PRIMITIVES_GENERATED: 163 case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: 164 /* We don't actually query the hardware for this value, so query->bo 165 * should always be NULL and execution should never reach here. 166 */ 167 assert(!"Unreachable"); 168 break; 169 170 default: 171 assert(!"Unrecognized query target in brw_queryobj_get_results()"); 172 break; 173 } 174 drm_intel_bo_unmap(query->bo); 175 176 drm_intel_bo_unreference(query->bo); 177 query->bo = NULL; 178} 179 180static struct gl_query_object * 181brw_new_query_object(struct gl_context *ctx, GLuint id) 182{ 183 struct brw_query_object *query; 184 185 query = calloc(1, sizeof(struct brw_query_object)); 186 187 query->Base.Id = id; 188 query->Base.Result = 0; 189 query->Base.Active = false; 190 query->Base.Ready = true; 191 192 return &query->Base; 193} 194 195static void 196brw_delete_query(struct gl_context *ctx, struct gl_query_object *q) 197{ 198 struct brw_query_object *query = (struct brw_query_object *)q; 199 200 drm_intel_bo_unreference(query->bo); 201 free(query); 202} 203 204static void 205brw_begin_query(struct gl_context *ctx, struct gl_query_object *q) 206{ 207 struct brw_context *brw = brw_context(ctx); 208 struct intel_context *intel = intel_context(ctx); 209 struct brw_query_object *query = (struct brw_query_object *)q; 210 211 switch (query->Base.Target) { 212 case GL_TIME_ELAPSED_EXT: 213 drm_intel_bo_unreference(query->bo); 214 query->bo = drm_intel_bo_alloc(intel->bufmgr, "timer query", 4096, 4096); 215 write_timestamp(intel, query->bo, 0); 216 break; 217 218 case GL_SAMPLES_PASSED_ARB: 219 /* Reset our driver's tracking of query state. */ 220 drm_intel_bo_unreference(query->bo); 221 query->bo = NULL; 222 query->first_index = -1; 223 query->last_index = -1; 224 225 brw->query.obj = query; 226 intel->stats_wm++; 227 break; 228 229 case GL_PRIMITIVES_GENERATED: 230 /* We don't actually query the hardware for this value; we keep track of 231 * it a software counter. So just reset the counter. 232 */ 233 brw->sol.primitives_generated = 0; 234 brw->sol.counting_primitives_generated = true; 235 break; 236 237 case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: 238 /* We don't actually query the hardware for this value; we keep track of 239 * it a software counter. So just reset the counter. 240 */ 241 brw->sol.primitives_written = 0; 242 brw->sol.counting_primitives_written = true; 243 break; 244 245 default: 246 assert(!"Unrecognized query target in brw_begin_query()"); 247 break; 248 } 249} 250 251/** 252 * Begin the ARB_occlusion_query query on a query object. 253 */ 254static void 255brw_end_query(struct gl_context *ctx, struct gl_query_object *q) 256{ 257 struct brw_context *brw = brw_context(ctx); 258 struct intel_context *intel = intel_context(ctx); 259 struct brw_query_object *query = (struct brw_query_object *)q; 260 261 switch (query->Base.Target) { 262 case GL_TIME_ELAPSED_EXT: 263 write_timestamp(intel, query->bo, 1); 264 intel_batchbuffer_flush(intel); 265 break; 266 267 case GL_SAMPLES_PASSED_ARB: 268 /* Flush the batchbuffer in case it has writes to our query BO. 269 * Have later queries write to a new query BO so that further rendering 270 * doesn't delay the collection of our results. 271 */ 272 if (query->bo) { 273 brw_emit_query_end(brw); 274 intel_batchbuffer_flush(intel); 275 276 drm_intel_bo_unreference(brw->query.bo); 277 brw->query.bo = NULL; 278 } 279 280 brw->query.obj = NULL; 281 282 intel->stats_wm--; 283 break; 284 285 case GL_PRIMITIVES_GENERATED: 286 /* We don't actually query the hardware for this value; we keep track of 287 * it in a software counter. So just read the counter and store it in 288 * the query object. 289 */ 290 query->Base.Result = brw->sol.primitives_generated; 291 brw->sol.counting_primitives_generated = false; 292 293 /* And set brw->query.obj to NULL so that this query won't try to wait 294 * for any rendering to complete. 295 */ 296 query->bo = NULL; 297 break; 298 299 case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: 300 /* We don't actually query the hardware for this value; we keep track of 301 * it in a software counter. So just read the counter and store it in 302 * the query object. 303 */ 304 query->Base.Result = brw->sol.primitives_written; 305 brw->sol.counting_primitives_written = false; 306 307 /* And set brw->query.obj to NULL so that this query won't try to wait 308 * for any rendering to complete. 309 */ 310 query->bo = NULL; 311 break; 312 313 default: 314 assert(!"Unrecognized query target in brw_end_query()"); 315 break; 316 } 317} 318 319static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q) 320{ 321 struct brw_query_object *query = (struct brw_query_object *)q; 322 323 brw_queryobj_get_results(ctx, query); 324 query->Base.Ready = true; 325} 326 327static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q) 328{ 329 struct brw_query_object *query = (struct brw_query_object *)q; 330 331 if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { 332 brw_queryobj_get_results(ctx, query); 333 query->Base.Ready = true; 334 } 335} 336 337/** Called to set up the query BO and account for its aperture space */ 338void 339brw_prepare_query_begin(struct brw_context *brw) 340{ 341 struct intel_context *intel = &brw->intel; 342 343 /* Skip if we're not doing any queries. */ 344 if (!brw->query.obj) 345 return; 346 347 /* Get a new query BO if we're going to need it. */ 348 if (brw->query.bo == NULL || 349 brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) { 350 drm_intel_bo_unreference(brw->query.bo); 351 brw->query.bo = NULL; 352 353 brw->query.bo = drm_intel_bo_alloc(intel->bufmgr, "query", 4096, 1); 354 355 /* clear target buffer */ 356 drm_intel_bo_map(brw->query.bo, true); 357 memset((char *)brw->query.bo->virtual, 0, 4096); 358 drm_intel_bo_unmap(brw->query.bo); 359 360 brw->query.index = 0; 361 } 362} 363 364/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ 365void 366brw_emit_query_begin(struct brw_context *brw) 367{ 368 struct intel_context *intel = &brw->intel; 369 struct gl_context *ctx = &intel->ctx; 370 struct brw_query_object *query = brw->query.obj; 371 372 /* Skip if we're not doing any queries, or we've emitted the start. */ 373 if (!query || brw->query.active) 374 return; 375 376 write_depth_count(intel, brw->query.bo, brw->query.index * 2); 377 378 if (query->bo != brw->query.bo) { 379 if (query->bo != NULL) 380 brw_queryobj_get_results(ctx, query); 381 drm_intel_bo_reference(brw->query.bo); 382 query->bo = brw->query.bo; 383 query->first_index = brw->query.index; 384 } 385 query->last_index = brw->query.index; 386 brw->query.active = true; 387} 388 389/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */ 390void 391brw_emit_query_end(struct brw_context *brw) 392{ 393 struct intel_context *intel = &brw->intel; 394 395 if (!brw->query.active) 396 return; 397 398 write_depth_count(intel, brw->query.bo, brw->query.index * 2 + 1); 399 400 brw->query.active = false; 401 brw->query.index++; 402} 403 404void brw_init_queryobj_functions(struct dd_function_table *functions) 405{ 406 functions->NewQueryObject = brw_new_query_object; 407 functions->DeleteQuery = brw_delete_query; 408 functions->BeginQuery = brw_begin_query; 409 functions->EndQuery = brw_end_query; 410 functions->CheckQuery = brw_check_query; 411 functions->WaitQuery = brw_wait_query; 412} 413