brw_queryobj.c revision c4c78c275abffe8d1014b1355f02239859d6aa2b
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28/** @file support for ARB_query_object
29 *
30 * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
31 * execution on the completion of previous depth tests, and write the
32 * current PS_DEPTH_COUNT to a buffer object.
33 *
34 * We use before and after counts when drawing during a query so that
35 * we don't pick up other clients' query data in ours.  To reduce overhead,
36 * a single BO is used to record the query data for all active queries at
37 * once.  This also gives us a simple bound on how much batchbuffer space is
38 * required for handling queries, so that we can be sure that we won't
39 * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
40 */
41#include "main/imports.h"
42
43#include "brw_context.h"
44#include "brw_state.h"
45#include "intel_batchbuffer.h"
46#include "intel_reg.h"
47
48static void
49write_timestamp(struct intel_context *intel, drm_intel_bo *query_bo, int idx)
50{
51   if (intel->gen >= 6) {
52      BEGIN_BATCH(5);
53      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
54      OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
55      OUT_RELOC(query_bo,
56                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
57                PIPE_CONTROL_GLOBAL_GTT_WRITE |
58                idx * sizeof(uint64_t));
59      OUT_BATCH(0);
60      OUT_BATCH(0);
61      ADVANCE_BATCH();
62   } else {
63      BEGIN_BATCH(4);
64      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
65                PIPE_CONTROL_WRITE_TIMESTAMP);
66      OUT_RELOC(query_bo,
67                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
68                PIPE_CONTROL_GLOBAL_GTT_WRITE |
69                idx * sizeof(uint64_t));
70      OUT_BATCH(0);
71      OUT_BATCH(0);
72      ADVANCE_BATCH();
73   }
74}
75
76/** Waits on the query object's BO and totals the results for this query */
77static void
78brw_queryobj_get_results(struct gl_context *ctx,
79			 struct brw_query_object *query)
80{
81   struct intel_context *intel = intel_context(ctx);
82
83   int i;
84   uint64_t *results;
85
86   if (query->bo == NULL)
87      return;
88
89   drm_intel_bo_map(query->bo, false);
90   results = query->bo->virtual;
91   switch (query->Base.Target) {
92   case GL_TIME_ELAPSED_EXT:
93      if (intel->gen >= 6)
94	 query->Base.Result += 80 * (results[1] - results[0]);
95      else
96	 query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32));
97      break;
98
99   case GL_SAMPLES_PASSED_ARB:
100      /* Map and count the pixels from the current query BO */
101      for (i = query->first_index; i <= query->last_index; i++) {
102	 query->Base.Result += results[i * 2 + 1] - results[i * 2];
103      }
104      break;
105
106   case GL_PRIMITIVES_GENERATED:
107   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
108      /* We don't actually query the hardware for this value, so query->bo
109       * should always be NULL and execution should never reach here.
110       */
111      assert(!"Unreachable");
112      break;
113
114   default:
115      assert(!"Unrecognized query target in brw_queryobj_get_results()");
116      break;
117   }
118   drm_intel_bo_unmap(query->bo);
119
120   drm_intel_bo_unreference(query->bo);
121   query->bo = NULL;
122}
123
124static struct gl_query_object *
125brw_new_query_object(struct gl_context *ctx, GLuint id)
126{
127   struct brw_query_object *query;
128
129   query = calloc(1, sizeof(struct brw_query_object));
130
131   query->Base.Id = id;
132   query->Base.Result = 0;
133   query->Base.Active = false;
134   query->Base.Ready = true;
135
136   return &query->Base;
137}
138
139static void
140brw_delete_query(struct gl_context *ctx, struct gl_query_object *q)
141{
142   struct brw_query_object *query = (struct brw_query_object *)q;
143
144   drm_intel_bo_unreference(query->bo);
145   free(query);
146}
147
148static void
149brw_begin_query(struct gl_context *ctx, struct gl_query_object *q)
150{
151   struct brw_context *brw = brw_context(ctx);
152   struct intel_context *intel = intel_context(ctx);
153   struct brw_query_object *query = (struct brw_query_object *)q;
154
155   switch (query->Base.Target) {
156   case GL_TIME_ELAPSED_EXT:
157      drm_intel_bo_unreference(query->bo);
158      query->bo = drm_intel_bo_alloc(intel->bufmgr, "timer query", 4096, 4096);
159      write_timestamp(intel, query->bo, 0);
160      break;
161
162   case GL_SAMPLES_PASSED_ARB:
163      /* Reset our driver's tracking of query state. */
164      drm_intel_bo_unreference(query->bo);
165      query->bo = NULL;
166      query->first_index = -1;
167      query->last_index = -1;
168
169      brw->query.obj = query;
170      intel->stats_wm++;
171      break;
172
173   case GL_PRIMITIVES_GENERATED:
174      /* We don't actually query the hardware for this value; we keep track of
175       * it a software counter.  So just reset the counter.
176       */
177      brw->sol.primitives_generated = 0;
178      brw->sol.counting_primitives_generated = true;
179      break;
180
181   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
182      /* We don't actually query the hardware for this value; we keep track of
183       * it a software counter.  So just reset the counter.
184       */
185      brw->sol.primitives_written = 0;
186      brw->sol.counting_primitives_written = true;
187      break;
188
189   default:
190      assert(!"Unrecognized query target in brw_begin_query()");
191      break;
192   }
193}
194
195/**
196 * Begin the ARB_occlusion_query query on a query object.
197 */
198static void
199brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
200{
201   struct brw_context *brw = brw_context(ctx);
202   struct intel_context *intel = intel_context(ctx);
203   struct brw_query_object *query = (struct brw_query_object *)q;
204
205   switch (query->Base.Target) {
206   case GL_TIME_ELAPSED_EXT:
207      write_timestamp(intel, query->bo, 1);
208      intel_batchbuffer_flush(intel);
209      break;
210
211   case GL_SAMPLES_PASSED_ARB:
212      /* Flush the batchbuffer in case it has writes to our query BO.
213       * Have later queries write to a new query BO so that further rendering
214       * doesn't delay the collection of our results.
215       */
216      if (query->bo) {
217	 brw_emit_query_end(brw);
218	 intel_batchbuffer_flush(intel);
219
220	 drm_intel_bo_unreference(brw->query.bo);
221	 brw->query.bo = NULL;
222      }
223
224      brw->query.obj = NULL;
225
226      intel->stats_wm--;
227      break;
228
229   case GL_PRIMITIVES_GENERATED:
230      /* We don't actually query the hardware for this value; we keep track of
231       * it in a software counter.  So just read the counter and store it in
232       * the query object.
233       */
234      query->Base.Result = brw->sol.primitives_generated;
235      brw->sol.counting_primitives_generated = false;
236
237      /* And set brw->query.obj to NULL so that this query won't try to wait
238       * for any rendering to complete.
239       */
240      query->bo = NULL;
241      break;
242
243   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
244      /* We don't actually query the hardware for this value; we keep track of
245       * it in a software counter.  So just read the counter and store it in
246       * the query object.
247       */
248      query->Base.Result = brw->sol.primitives_written;
249      brw->sol.counting_primitives_written = false;
250
251      /* And set brw->query.obj to NULL so that this query won't try to wait
252       * for any rendering to complete.
253       */
254      query->bo = NULL;
255      break;
256
257   default:
258      assert(!"Unrecognized query target in brw_end_query()");
259      break;
260   }
261}
262
263static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q)
264{
265   struct brw_query_object *query = (struct brw_query_object *)q;
266
267   brw_queryobj_get_results(ctx, query);
268   query->Base.Ready = true;
269}
270
271static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q)
272{
273   struct brw_query_object *query = (struct brw_query_object *)q;
274
275   if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
276      brw_queryobj_get_results(ctx, query);
277      query->Base.Ready = true;
278   }
279}
280
281/** Called to set up the query BO and account for its aperture space */
282void
283brw_prepare_query_begin(struct brw_context *brw)
284{
285   struct intel_context *intel = &brw->intel;
286
287   /* Skip if we're not doing any queries. */
288   if (!brw->query.obj)
289      return;
290
291   /* Get a new query BO if we're going to need it. */
292   if (brw->query.bo == NULL ||
293       brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
294      drm_intel_bo_unreference(brw->query.bo);
295      brw->query.bo = NULL;
296
297      brw->query.bo = drm_intel_bo_alloc(intel->bufmgr, "query", 4096, 1);
298
299      /* clear target buffer */
300      drm_intel_bo_map(brw->query.bo, true);
301      memset((char *)brw->query.bo->virtual, 0, 4096);
302      drm_intel_bo_unmap(brw->query.bo);
303
304      brw->query.index = 0;
305   }
306}
307
308/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
309void
310brw_emit_query_begin(struct brw_context *brw)
311{
312   struct intel_context *intel = &brw->intel;
313   struct gl_context *ctx = &intel->ctx;
314   struct brw_query_object *query = brw->query.obj;
315
316   /* Skip if we're not doing any queries, or we've emitted the start. */
317   if (!query || brw->query.active)
318      return;
319
320   if (intel->gen >= 6) {
321       BEGIN_BATCH(8);
322
323       /* workaround: CS stall required before depth stall. */
324       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
325       OUT_BATCH(PIPE_CONTROL_CS_STALL);
326       OUT_BATCH(0); /* write address */
327       OUT_BATCH(0); /* write data */
328
329       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
330       OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
331	         PIPE_CONTROL_WRITE_DEPTH_COUNT);
332       OUT_RELOC(brw->query.bo,
333	         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
334		 PIPE_CONTROL_GLOBAL_GTT_WRITE |
335		 ((brw->query.index * 2) * sizeof(uint64_t)));
336       OUT_BATCH(0);
337       ADVANCE_BATCH();
338
339   } else {
340       BEGIN_BATCH(4);
341       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
342	       PIPE_CONTROL_DEPTH_STALL |
343	       PIPE_CONTROL_WRITE_DEPTH_COUNT);
344       /* This object could be mapped cacheable, but we don't have an exposed
345	* mechanism to support that.  Since it's going uncached, tell GEM that
346	* we're writing to it.  The usual clflush should be all that's required
347	* to pick up the results.
348	*/
349       OUT_RELOC(brw->query.bo,
350	       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
351	       PIPE_CONTROL_GLOBAL_GTT_WRITE |
352	       ((brw->query.index * 2) * sizeof(uint64_t)));
353       OUT_BATCH(0);
354       OUT_BATCH(0);
355       ADVANCE_BATCH();
356   }
357
358   if (query->bo != brw->query.bo) {
359      if (query->bo != NULL)
360	 brw_queryobj_get_results(ctx, query);
361      drm_intel_bo_reference(brw->query.bo);
362      query->bo = brw->query.bo;
363      query->first_index = brw->query.index;
364   }
365   query->last_index = brw->query.index;
366   brw->query.active = true;
367}
368
369/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
370void
371brw_emit_query_end(struct brw_context *brw)
372{
373   struct intel_context *intel = &brw->intel;
374
375   if (!brw->query.active)
376      return;
377
378   if (intel->gen >= 6) {
379       BEGIN_BATCH(8);
380       /* workaround: CS stall required before depth stall. */
381       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
382       OUT_BATCH(PIPE_CONTROL_CS_STALL);
383       OUT_BATCH(0); /* write address */
384       OUT_BATCH(0); /* write data */
385
386       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
387       OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
388	         PIPE_CONTROL_WRITE_DEPTH_COUNT);
389       OUT_RELOC(brw->query.bo,
390	         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
391		 PIPE_CONTROL_GLOBAL_GTT_WRITE |
392		 ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
393       OUT_BATCH(0);
394       ADVANCE_BATCH();
395
396   } else {
397       BEGIN_BATCH(4);
398       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
399	       PIPE_CONTROL_DEPTH_STALL |
400	       PIPE_CONTROL_WRITE_DEPTH_COUNT);
401       OUT_RELOC(brw->query.bo,
402	       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
403	       PIPE_CONTROL_GLOBAL_GTT_WRITE |
404	       ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
405       OUT_BATCH(0);
406       OUT_BATCH(0);
407       ADVANCE_BATCH();
408   }
409
410   brw->query.active = false;
411   brw->query.index++;
412}
413
414void brw_init_queryobj_functions(struct dd_function_table *functions)
415{
416   functions->NewQueryObject = brw_new_query_object;
417   functions->DeleteQuery = brw_delete_query;
418   functions->BeginQuery = brw_begin_query;
419   functions->EndQuery = brw_end_query;
420   functions->CheckQuery = brw_check_query;
421   functions->WaitQuery = brw_wait_query;
422}
423