1/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3/*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Rob Clark <robclark@freedesktop.org>
27 */
28
29#include "pipe/p_state.h"
30#include "util/u_memory.h"
31#include "util/u_inlines.h"
32
33#include "freedreno_query_hw.h"
34#include "freedreno_context.h"
35#include "freedreno_resource.h"
36#include "freedreno_util.h"
37
38struct fd_hw_sample_period {
39	struct fd_hw_sample *start, *end;
40	struct list_head list;
41};
42
43/* maps query_type to sample provider idx: */
44static int pidx(unsigned query_type)
45{
46	switch (query_type) {
47	case PIPE_QUERY_OCCLUSION_COUNTER:
48		return 0;
49	case PIPE_QUERY_OCCLUSION_PREDICATE:
50		return 1;
51	/* TODO currently queries only emitted in main pass (not in binning pass)..
52	 * which is fine for occlusion query, but pretty much not anything else.
53	 */
54	case PIPE_QUERY_TIME_ELAPSED:
55		return 2;
56	case PIPE_QUERY_TIMESTAMP:
57		return 3;
58	default:
59		return -1;
60	}
61}
62
63static struct fd_hw_sample *
64get_sample(struct fd_batch *batch, struct fd_ringbuffer *ring,
65		unsigned query_type)
66{
67	struct fd_context *ctx = batch->ctx;
68	struct fd_hw_sample *samp = NULL;
69	int idx = pidx(query_type);
70
71	assume(idx >= 0);   /* query never would have been created otherwise */
72
73	if (!batch->sample_cache[idx]) {
74		struct fd_hw_sample *new_samp =
75			ctx->sample_providers[idx]->get_sample(batch, ring);
76		fd_hw_sample_reference(ctx, &batch->sample_cache[idx], new_samp);
77		util_dynarray_append(&batch->samples, struct fd_hw_sample *, new_samp);
78		batch->needs_flush = true;
79	}
80
81	fd_hw_sample_reference(ctx, &samp, batch->sample_cache[idx]);
82
83	return samp;
84}
85
86static void
87clear_sample_cache(struct fd_batch *batch)
88{
89	int i;
90
91	for (i = 0; i < ARRAY_SIZE(batch->sample_cache); i++)
92		fd_hw_sample_reference(batch->ctx, &batch->sample_cache[i], NULL);
93}
94
95static bool
96is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
97{
98	return !!(hq->provider->active & stage);
99}
100
101
102static void
103resume_query(struct fd_batch *batch, struct fd_hw_query *hq,
104		struct fd_ringbuffer *ring)
105{
106	int idx = pidx(hq->provider->query_type);
107	DBG("%p", hq);
108	assert(idx >= 0);   /* query never would have been created otherwise */
109	assert(!hq->period);
110	batch->active_providers |= (1 << idx);
111	hq->period = slab_alloc_st(&batch->ctx->sample_period_pool);
112	list_inithead(&hq->period->list);
113	hq->period->start = get_sample(batch, ring, hq->base.type);
114	/* NOTE: slab_alloc_st() does not zero out the buffer: */
115	hq->period->end = NULL;
116}
117
118static void
119pause_query(struct fd_batch *batch, struct fd_hw_query *hq,
120		struct fd_ringbuffer *ring)
121{
122	int idx = pidx(hq->provider->query_type);
123	DBG("%p", hq);
124	assert(idx >= 0);   /* query never would have been created otherwise */
125	assert(hq->period && !hq->period->end);
126	assert(batch->active_providers & (1 << idx));
127	hq->period->end = get_sample(batch, ring, hq->base.type);
128	list_addtail(&hq->period->list, &hq->periods);
129	hq->period = NULL;
130}
131
132static void
133destroy_periods(struct fd_context *ctx, struct fd_hw_query *hq)
134{
135	struct fd_hw_sample_period *period, *s;
136	LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->periods, list) {
137		fd_hw_sample_reference(ctx, &period->start, NULL);
138		fd_hw_sample_reference(ctx, &period->end, NULL);
139		list_del(&period->list);
140		slab_free_st(&ctx->sample_period_pool, period);
141	}
142}
143
144static void
145fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
146{
147	struct fd_hw_query *hq = fd_hw_query(q);
148
149	DBG("%p: active=%d", q, q->active);
150
151	destroy_periods(ctx, hq);
152	list_del(&hq->list);
153
154	free(hq);
155}
156
157static boolean
158fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
159{
160	struct fd_batch *batch = ctx->batch;
161	struct fd_hw_query *hq = fd_hw_query(q);
162
163	DBG("%p: active=%d", q, q->active);
164
165	if (q->active)
166		return false;
167
168	/* begin_query() should clear previous results: */
169	destroy_periods(ctx, hq);
170
171	if (batch && is_active(hq, batch->stage))
172		resume_query(batch, hq, batch->draw);
173
174	q->active = true;
175
176	/* add to active list: */
177	assert(list_empty(&hq->list));
178	list_addtail(&hq->list, &ctx->active_queries);
179
180	return true;
181}
182
183static void
184fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
185{
186	struct fd_batch *batch = ctx->batch;
187	struct fd_hw_query *hq = fd_hw_query(q);
188
189	/* there are a couple special cases, which don't have
190	 * a matching ->begin_query():
191	 */
192	if (skip_begin_query(q->type) && !q->active) {
193		fd_hw_begin_query(ctx, q);
194	}
195
196	DBG("%p: active=%d", q, q->active);
197
198	if (!q->active)
199		return;
200
201	if (batch && is_active(hq, batch->stage))
202		pause_query(batch, hq, batch->draw);
203
204	q->active = false;
205	/* remove from active list: */
206	list_delinit(&hq->list);
207}
208
209/* helper to get ptr to specified sample: */
210static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
211{
212	return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
213}
214
215static boolean
216fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
217		boolean wait, union pipe_query_result *result)
218{
219	struct fd_hw_query *hq = fd_hw_query(q);
220	const struct fd_hw_sample_provider *p = hq->provider;
221	struct fd_hw_sample_period *period;
222
223	DBG("%p: wait=%d, active=%d", q, wait, q->active);
224
225	if (q->active)
226		return false;
227
228	util_query_clear_result(result, q->type);
229
230	if (LIST_IS_EMPTY(&hq->periods))
231		return true;
232
233	assert(LIST_IS_EMPTY(&hq->list));
234	assert(!hq->period);
235
236	/* if !wait, then check the last sample (the one most likely to
237	 * not be ready yet) and bail if it is not ready:
238	 */
239	if (!wait) {
240		int ret;
241
242		period = LIST_ENTRY(struct fd_hw_sample_period,
243				hq->periods.prev, list);
244
245		struct fd_resource *rsc = fd_resource(period->end->prsc);
246
247		if (pending(rsc, false)) {
248			/* piglit spec@arb_occlusion_query@occlusion_query_conform
249			 * test, and silly apps perhaps, get stuck in a loop trying
250			 * to get  query result forever with wait==false..  we don't
251			 * wait to flush unnecessarily but we also don't want to
252			 * spin forever:
253			 */
254			if (hq->no_wait_cnt++ > 5)
255				fd_batch_flush(rsc->write_batch, false);
256			return false;
257		}
258
259		if (!rsc->bo)
260			return false;
261
262		ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe,
263				DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
264		if (ret)
265			return false;
266
267		fd_bo_cpu_fini(rsc->bo);
268	}
269
270	/* sum the result across all sample periods: */
271	LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
272		struct fd_hw_sample *start = period->start;
273		struct fd_hw_sample *end = period->end;
274		unsigned i;
275
276		/* start and end samples should be from same batch: */
277		assert(start->prsc == end->prsc);
278		assert(start->num_tiles == end->num_tiles);
279
280		struct fd_resource *rsc = fd_resource(start->prsc);
281
282		if (rsc->write_batch)
283			fd_batch_flush(rsc->write_batch, true);
284
285		/* some piglit tests at least do query with no draws, I guess: */
286		if (!rsc->bo)
287			continue;
288
289		fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, DRM_FREEDRENO_PREP_READ);
290
291		void *ptr = fd_bo_map(rsc->bo);
292
293		for (i = 0; i < start->num_tiles; i++) {
294			p->accumulate_result(ctx, sampptr(period->start, i, ptr),
295					sampptr(period->end, i, ptr), result);
296		}
297
298		fd_bo_cpu_fini(rsc->bo);
299	}
300
301	return true;
302}
303
304static const struct fd_query_funcs hw_query_funcs = {
305		.destroy_query    = fd_hw_destroy_query,
306		.begin_query      = fd_hw_begin_query,
307		.end_query        = fd_hw_end_query,
308		.get_query_result = fd_hw_get_query_result,
309};
310
311struct fd_query *
312fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
313{
314	struct fd_hw_query *hq;
315	struct fd_query *q;
316	int idx = pidx(query_type);
317
318	if ((idx < 0) || !ctx->sample_providers[idx])
319		return NULL;
320
321	hq = CALLOC_STRUCT(fd_hw_query);
322	if (!hq)
323		return NULL;
324
325	DBG("%p: query_type=%u", hq, query_type);
326
327	hq->provider = ctx->sample_providers[idx];
328
329	list_inithead(&hq->periods);
330	list_inithead(&hq->list);
331
332	q = &hq->base;
333	q->funcs = &hw_query_funcs;
334	q->type = query_type;
335
336	return q;
337}
338
339struct fd_hw_sample *
340fd_hw_sample_init(struct fd_batch *batch, uint32_t size)
341{
342	struct fd_hw_sample *samp = slab_alloc_st(&batch->ctx->sample_pool);
343	pipe_reference_init(&samp->reference, 1);
344	samp->size = size;
345	debug_assert(util_is_power_of_two(size));
346	batch->next_sample_offset = align(batch->next_sample_offset, size);
347	samp->offset = batch->next_sample_offset;
348	/* NOTE: slab_alloc_st() does not zero out the buffer: */
349	samp->prsc = NULL;
350	samp->num_tiles = 0;
351	samp->tile_stride = 0;
352	batch->next_sample_offset += size;
353
354	if (!batch->query_buf) {
355		struct pipe_screen *pscreen = &batch->ctx->screen->base;
356		struct pipe_resource templ = {
357			.target  = PIPE_BUFFER,
358			.format  = PIPE_FORMAT_R8_UNORM,
359			.bind    = PIPE_BIND_QUERY_BUFFER,
360			.width0  = 0,    /* create initially zero size buffer */
361			.height0 = 1,
362			.depth0  = 1,
363			.array_size = 1,
364			.last_level = 0,
365			.nr_samples = 1,
366		};
367		batch->query_buf = pscreen->resource_create(pscreen, &templ);
368	}
369
370	pipe_resource_reference(&samp->prsc, batch->query_buf);
371
372	return samp;
373}
374
375void
376__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
377{
378	pipe_resource_reference(&samp->prsc, NULL);
379	slab_free_st(&ctx->sample_pool, samp);
380}
381
382/* called from gmem code once total storage requirements are known (ie.
383 * number of samples times number of tiles)
384 */
385void
386fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
387{
388	uint32_t tile_stride = batch->next_sample_offset;
389
390	if (tile_stride > 0)
391		fd_resource_resize(batch->query_buf, tile_stride * num_tiles);
392
393	batch->query_tile_stride = tile_stride;
394
395	while (batch->samples.size > 0) {
396		struct fd_hw_sample *samp =
397			util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
398		samp->num_tiles = num_tiles;
399		samp->tile_stride = tile_stride;
400		fd_hw_sample_reference(batch->ctx, &samp, NULL);
401	}
402
403	/* reset things for next batch: */
404	batch->next_sample_offset = 0;
405}
406
407void
408fd_hw_query_prepare_tile(struct fd_batch *batch, uint32_t n,
409		struct fd_ringbuffer *ring)
410{
411	uint32_t tile_stride = batch->query_tile_stride;
412	uint32_t offset = tile_stride * n;
413
414	/* bail if no queries: */
415	if (tile_stride == 0)
416		return;
417
418	fd_wfi(batch, ring);
419	OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
420	OUT_RELOCW(ring, fd_resource(batch->query_buf)->bo, offset, 0, 0);
421}
422
423void
424fd_hw_query_set_stage(struct fd_batch *batch, struct fd_ringbuffer *ring,
425		enum fd_render_stage stage)
426{
427	/* special case: internal blits (like mipmap level generation)
428	 * go through normal draw path (via util_blitter_blit()).. but
429	 * we need to ignore the FD_STAGE_DRAW which will be set, so we
430	 * don't enable queries which should be paused during internal
431	 * blits:
432	 */
433	if ((batch->stage == FD_STAGE_BLIT) &&
434			(stage != FD_STAGE_NULL))
435		return;
436
437	if (stage != batch->stage) {
438		struct fd_hw_query *hq;
439		LIST_FOR_EACH_ENTRY(hq, &batch->ctx->active_queries, list) {
440			bool was_active = is_active(hq, batch->stage);
441			bool now_active = is_active(hq, stage);
442
443			if (now_active && !was_active)
444				resume_query(batch, hq, ring);
445			else if (was_active && !now_active)
446				pause_query(batch, hq, ring);
447		}
448	}
449	clear_sample_cache(batch);
450	batch->stage = stage;
451}
452
453/* call the provider->enable() for all the hw queries that were active
454 * in the current batch.  This sets up perfctr selector regs statically
455 * for the duration of the batch.
456 */
457void
458fd_hw_query_enable(struct fd_batch *batch, struct fd_ringbuffer *ring)
459{
460	struct fd_context *ctx = batch->ctx;
461	for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
462		if (batch->active_providers & (1 << idx)) {
463			assert(ctx->sample_providers[idx]);
464			if (ctx->sample_providers[idx]->enable)
465				ctx->sample_providers[idx]->enable(ctx, ring);
466		}
467	}
468	batch->active_providers = 0;  /* clear it for next frame */
469}
470
471void
472fd_hw_query_register_provider(struct pipe_context *pctx,
473		const struct fd_hw_sample_provider *provider)
474{
475	struct fd_context *ctx = fd_context(pctx);
476	int idx = pidx(provider->query_type);
477
478	assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
479	assert(!ctx->sample_providers[idx]);
480
481	ctx->sample_providers[idx] = provider;
482}
483
484void
485fd_hw_query_init(struct pipe_context *pctx)
486{
487	struct fd_context *ctx = fd_context(pctx);
488
489	slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
490			16);
491	slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
492			16);
493	list_inithead(&ctx->active_queries);
494}
495
496void
497fd_hw_query_fini(struct pipe_context *pctx)
498{
499	struct fd_context *ctx = fd_context(pctx);
500
501	slab_destroy(&ctx->sample_pool);
502	slab_destroy(&ctx->sample_period_pool);
503}
504