1/*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
26
27#include "nvc0_context.h"
28#include "nouveau/nv_object.xml.h"
29
30#define NVC0_QUERY_STATE_READY   0
31#define NVC0_QUERY_STATE_ACTIVE  1
32#define NVC0_QUERY_STATE_ENDED   2
33#define NVC0_QUERY_STATE_FLUSHED 3
34
35struct nvc0_query {
36   uint32_t *data;
37   uint16_t type;
38   uint16_t index;
39   uint32_t sequence;
40   struct nouveau_bo *bo;
41   uint32_t base;
42   uint32_t offset; /* base + i * rotate */
43   uint8_t state;
44   boolean is64bit;
45   uint8_t rotate;
46   int nesting; /* only used for occlusion queries */
47   struct nouveau_mm_allocation *mm;
48};
49
50#define NVC0_QUERY_ALLOC_SPACE 256
51
52static INLINE struct nvc0_query *
53nvc0_query(struct pipe_query *pipe)
54{
55   return (struct nvc0_query *)pipe;
56}
57
58static boolean
59nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
60{
61   struct nvc0_screen *screen = nvc0->screen;
62   int ret;
63
64   if (q->bo) {
65      nouveau_bo_ref(NULL, &q->bo);
66      if (q->mm) {
67         if (q->state == NVC0_QUERY_STATE_READY)
68            nouveau_mm_free(q->mm);
69         else
70            nouveau_fence_work(screen->base.fence.current,
71                               nouveau_mm_free_work, q->mm);
72      }
73   }
74   if (size) {
75      q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
76      if (!q->bo)
77         return FALSE;
78      q->offset = q->base;
79
80      ret = nouveau_bo_map(q->bo, 0, screen->base.client);
81      if (ret) {
82         nvc0_query_allocate(nvc0, q, 0);
83         return FALSE;
84      }
85      q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
86   }
87   return TRUE;
88}
89
90static void
91nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
92{
93   nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
94   FREE(nvc0_query(pq));
95}
96
97static struct pipe_query *
98nvc0_query_create(struct pipe_context *pipe, unsigned type)
99{
100   struct nvc0_context *nvc0 = nvc0_context(pipe);
101   struct nvc0_query *q;
102   unsigned space = NVC0_QUERY_ALLOC_SPACE;
103
104   q = CALLOC_STRUCT(nvc0_query);
105   if (!q)
106      return NULL;
107
108   switch (type) {
109   case PIPE_QUERY_OCCLUSION_COUNTER:
110   case PIPE_QUERY_OCCLUSION_PREDICATE:
111      q->rotate = 32;
112      space = NVC0_QUERY_ALLOC_SPACE;
113      break;
114   case PIPE_QUERY_PIPELINE_STATISTICS:
115      q->is64bit = TRUE;
116      space = 512;
117      break;
118   case PIPE_QUERY_SO_STATISTICS:
119   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
120      q->is64bit = TRUE;
121      space = 64;
122      break;
123   case PIPE_QUERY_TIME_ELAPSED:
124   case PIPE_QUERY_TIMESTAMP:
125   case PIPE_QUERY_TIMESTAMP_DISJOINT:
126   case PIPE_QUERY_GPU_FINISHED:
127   case PIPE_QUERY_PRIMITIVES_GENERATED:
128   case PIPE_QUERY_PRIMITIVES_EMITTED:
129      space = 32;
130      break;
131   case NVC0_QUERY_TFB_BUFFER_OFFSET:
132      space = 16;
133      break;
134   default:
135      FREE(q);
136      return NULL;
137   }
138   if (!nvc0_query_allocate(nvc0, q, space)) {
139      FREE(q);
140      return NULL;
141   }
142
143   q->type = type;
144
145   if (q->rotate) {
146      /* we advance before query_begin ! */
147      q->offset -= q->rotate;
148      q->data -= q->rotate / sizeof(*q->data);
149   } else
150   if (!q->is64bit)
151      q->data[0] = 0; /* initialize sequence */
152
153   return (struct pipe_query *)q;
154}
155
156static void
157nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
158               unsigned offset, uint32_t get)
159{
160   offset += q->offset;
161
162   PUSH_SPACE(push, 5);
163   PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
164   BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
165   PUSH_DATAh(push, q->bo->offset + offset);
166   PUSH_DATA (push, q->bo->offset + offset);
167   PUSH_DATA (push, q->sequence);
168   PUSH_DATA (push, get);
169}
170
171static void
172nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
173{
174   q->offset += q->rotate;
175   q->data += q->rotate / sizeof(*q->data);
176   if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
177      nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
178}
179
180static void
181nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
182{
183   struct nvc0_context *nvc0 = nvc0_context(pipe);
184   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
185   struct nvc0_query *q = nvc0_query(pq);
186
187   /* For occlusion queries we have to change the storage, because a previous
188    * query might set the initial render conition to FALSE even *after* we re-
189    * initialized it to TRUE.
190    */
191   if (q->rotate) {
192      nvc0_query_rotate(nvc0, q);
193
194      /* XXX: can we do this with the GPU, and sync with respect to a previous
195       *  query ?
196       */
197      q->data[0] = q->sequence; /* initialize sequence */
198      q->data[1] = 1; /* initial render condition = TRUE */
199      q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
200      q->data[5] = 0;
201   }
202   q->sequence++;
203
204   switch (q->type) {
205   case PIPE_QUERY_OCCLUSION_COUNTER:
206   case PIPE_QUERY_OCCLUSION_PREDICATE:
207      q->nesting = nvc0->screen->num_occlusion_queries_active++;
208      if (q->nesting) {
209         nvc0_query_get(push, q, 0x10, 0x0100f002);
210      } else {
211         PUSH_SPACE(push, 3);
212         BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
213         PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
214         IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
215      }
216      break;
217   case PIPE_QUERY_PRIMITIVES_GENERATED:
218      nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
219      break;
220   case PIPE_QUERY_PRIMITIVES_EMITTED:
221      nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
222      break;
223   case PIPE_QUERY_SO_STATISTICS:
224      nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
225      nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
226      break;
227   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
228      nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
229      break;
230   case PIPE_QUERY_TIMESTAMP_DISJOINT:
231   case PIPE_QUERY_TIME_ELAPSED:
232      nvc0_query_get(push, q, 0x10, 0x00005002);
233      break;
234   case PIPE_QUERY_PIPELINE_STATISTICS:
235      nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
236      nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
237      nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
238      nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
239      nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
240      nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
241      nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
242      nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
243      nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
244      nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
245      break;
246   default:
247      break;
248   }
249   q->state = NVC0_QUERY_STATE_ACTIVE;
250}
251
252static void
253nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
254{
255   struct nvc0_context *nvc0 = nvc0_context(pipe);
256   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
257   struct nvc0_query *q = nvc0_query(pq);
258
259   if (q->state != NVC0_QUERY_STATE_ACTIVE) {
260      /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
261      if (q->rotate)
262         nvc0_query_rotate(nvc0, q);
263      q->sequence++;
264   }
265   q->state = NVC0_QUERY_STATE_ENDED;
266
267   switch (q->type) {
268   case PIPE_QUERY_OCCLUSION_COUNTER:
269   case PIPE_QUERY_OCCLUSION_PREDICATE:
270      nvc0_query_get(push, q, 0, 0x0100f002);
271      if (--nvc0->screen->num_occlusion_queries_active == 0) {
272         PUSH_SPACE(push, 1);
273         IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
274      }
275      break;
276   case PIPE_QUERY_PRIMITIVES_GENERATED:
277      nvc0_query_get(push, q, 0, 0x06805002 | (q->index << 5));
278      break;
279   case PIPE_QUERY_PRIMITIVES_EMITTED:
280      nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
281      break;
282   case PIPE_QUERY_SO_STATISTICS:
283      nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
284      nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
285      break;
286   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
287      /* TODO: How do we sum over all streams for render condition ? */
288      /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
289      nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
290      nvc0_query_get(push, q, 0x20, 0x00005002);
291      break;
292   case PIPE_QUERY_TIMESTAMP:
293   case PIPE_QUERY_TIMESTAMP_DISJOINT:
294   case PIPE_QUERY_TIME_ELAPSED:
295      nvc0_query_get(push, q, 0, 0x00005002);
296      break;
297   case PIPE_QUERY_GPU_FINISHED:
298      nvc0_query_get(push, q, 0, 0x1000f010);
299      break;
300   case PIPE_QUERY_PIPELINE_STATISTICS:
301      nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
302      nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
303      nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
304      nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
305      nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
306      nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
307      nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
308      nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
309      nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
310      nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
311      break;
312   case NVC0_QUERY_TFB_BUFFER_OFFSET:
313      /* indexed by TFB buffer instead of by vertex stream */
314      nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
315      break;
316   default:
317      assert(0);
318      break;
319   }
320}
321
322static INLINE void
323nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
324{
325   if (q->is64bit) {
326      if (!nouveau_bo_map(q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_NOBLOCK, cli))
327         q->state = NVC0_QUERY_STATE_READY;
328   } else {
329      if (q->data[0] == q->sequence)
330         q->state = NVC0_QUERY_STATE_READY;
331   }
332}
333
334static boolean
335nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
336                  boolean wait, union pipe_query_result *result)
337{
338   struct nvc0_context *nvc0 = nvc0_context(pipe);
339   struct nvc0_query *q = nvc0_query(pq);
340   uint64_t *res64 = (uint64_t*)result;
341   uint32_t *res32 = (uint32_t*)result;
342   boolean *res8 = (boolean*)result;
343   uint64_t *data64 = (uint64_t *)q->data;
344   unsigned i;
345
346   if (q->state != NVC0_QUERY_STATE_READY)
347      nvc0_query_update(nvc0->screen->base.client, q);
348
349   if (q->state != NVC0_QUERY_STATE_READY) {
350      if (!wait) {
351         if (q->state != NVC0_QUERY_STATE_FLUSHED) {
352            q->state = NVC0_QUERY_STATE_FLUSHED;
353            /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
354            PUSH_KICK(nvc0->base.pushbuf);
355         }
356         return FALSE;
357      }
358      if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
359         return FALSE;
360   }
361   q->state = NVC0_QUERY_STATE_READY;
362
363   switch (q->type) {
364   case PIPE_QUERY_GPU_FINISHED:
365      res8[0] = TRUE;
366      break;
367   case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
368      res64[0] = q->data[1] - q->data[5];
369      break;
370   case PIPE_QUERY_OCCLUSION_PREDICATE:
371      res8[0] = q->data[1] != q->data[5];
372      break;
373   case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
374   case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
375      res64[0] = data64[0] - data64[2];
376      break;
377   case PIPE_QUERY_SO_STATISTICS:
378      res64[0] = data64[0] - data64[4];
379      res64[1] = data64[2] - data64[6];
380      break;
381   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
382      res8[0] = data64[0] != data64[2];
383      break;
384   case PIPE_QUERY_TIMESTAMP:
385      res64[0] = data64[1];
386      break;
387   case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
388      res64[0] = 1000000000;
389      res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE;
390      break;
391   case PIPE_QUERY_TIME_ELAPSED:
392      res64[0] = data64[1] - data64[3];
393      break;
394   case PIPE_QUERY_PIPELINE_STATISTICS:
395      for (i = 0; i < 10; ++i)
396         res64[i] = data64[i * 2] - data64[24 + i * 2];
397      break;
398   case NVC0_QUERY_TFB_BUFFER_OFFSET:
399      res32[0] = q->data[1];
400      break;
401   default:
402      return FALSE;
403   }
404
405   return TRUE;
406}
407
408void
409nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
410{
411   struct nvc0_query *q = nvc0_query(pq);
412   unsigned offset = q->offset;
413
414   if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
415
416   PUSH_SPACE(push, 5);
417   PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
418   BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
419   PUSH_DATAh(push, q->bo->offset + offset);
420   PUSH_DATA (push, q->bo->offset + offset);
421   PUSH_DATA (push, q->sequence);
422   PUSH_DATA (push, (1 << 12) |
423              NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
424}
425
426static void
427nvc0_render_condition(struct pipe_context *pipe,
428                      struct pipe_query *pq, uint mode)
429{
430   struct nvc0_context *nvc0 = nvc0_context(pipe);
431   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
432   struct nvc0_query *q;
433   uint32_t cond;
434   boolean negated = FALSE;
435   boolean wait =
436      mode != PIPE_RENDER_COND_NO_WAIT &&
437      mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
438
439   if (!pq) {
440      PUSH_SPACE(push, 1);
441      IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
442      return;
443   }
444   q = nvc0_query(pq);
445
446   /* NOTE: comparison of 2 queries only works if both have completed */
447   switch (q->type) {
448   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
449      cond = negated ? NVC0_3D_COND_MODE_EQUAL :
450                       NVC0_3D_COND_MODE_NOT_EQUAL;
451      wait = TRUE;
452      break;
453   case PIPE_QUERY_OCCLUSION_COUNTER:
454   case PIPE_QUERY_OCCLUSION_PREDICATE:
455      if (likely(!negated)) {
456         if (unlikely(q->nesting))
457            cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
458                          NVC0_3D_COND_MODE_ALWAYS;
459         else
460            cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
461      } else {
462         cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
463      }
464      break;
465   default:
466      assert(!"render condition query not a predicate");
467      mode = NVC0_3D_COND_MODE_ALWAYS;
468      break;
469   }
470
471   if (wait)
472      nvc0_query_fifo_wait(push, pq);
473
474   PUSH_SPACE(push, 4);
475   PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
476   BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
477   PUSH_DATAh(push, q->bo->offset + q->offset);
478   PUSH_DATA (push, q->bo->offset + q->offset);
479   PUSH_DATA (push, cond);
480}
481
482void
483nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
484                          struct pipe_query *pq, unsigned result_offset)
485{
486   struct nvc0_query *q = nvc0_query(pq);
487
488#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
489
490   nouveau_pushbuf_space(push, 0, 0, 1);
491   nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
492                        NVC0_IB_ENTRY_1_NO_PREFETCH);
493}
494
495void
496nvc0_so_target_save_offset(struct pipe_context *pipe,
497                           struct pipe_stream_output_target *ptarg,
498                           unsigned index, boolean *serialize)
499{
500   struct nvc0_so_target *targ = nvc0_so_target(ptarg);
501
502   if (*serialize) {
503      *serialize = FALSE;
504      PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
505      IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
506   }
507
508   nvc0_query(targ->pq)->index = index;
509
510   nvc0_query_end(pipe, targ->pq);
511}
512
513void
514nvc0_init_query_functions(struct nvc0_context *nvc0)
515{
516   struct pipe_context *pipe = &nvc0->base.pipe;
517
518   pipe->create_query = nvc0_query_create;
519   pipe->destroy_query = nvc0_query_destroy;
520   pipe->begin_query = nvc0_query_begin;
521   pipe->end_query = nvc0_query_end;
522   pipe->get_query_result = nvc0_query_result;
523   pipe->render_condition = nvc0_render_condition;
524}
525