r600_query.c revision 83667acfd9feed932f6864092382e752466975ed
1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#include "r600_pipe.h"
24#include "r600d.h"
25#include "util/u_memory.h"
26#include "r600_hw_context_priv.h"
27
28static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, unsigned type)
29{
30	unsigned j, i, num_results, buf_size = 4096;
31	uint32_t *results;
32	/* Queries are normally read by the CPU after
33	 * being written by the gpu, hence staging is probably a good
34	 * usage pattern.
35	 */
36	struct r600_resource *buf = (struct r600_resource*)
37		pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM,
38				   PIPE_USAGE_STAGING, buf_size);
39
40	switch (type) {
41	case PIPE_QUERY_OCCLUSION_COUNTER:
42	case PIPE_QUERY_OCCLUSION_PREDICATE:
43		results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE);
44		memset(results, 0, buf_size);
45
46		/* Set top bits for unused backends. */
47		num_results = buf_size / (16 * ctx->max_db);
48		for (j = 0; j < num_results; j++) {
49			for (i = 0; i < ctx->max_db; i++) {
50				if (!(ctx->backend_mask & (1<<i))) {
51					results[(i * 4)+1] = 0x80000000;
52					results[(i * 4)+3] = 0x80000000;
53				}
54			}
55			results += 4 * ctx->max_db;
56		}
57		ctx->ws->buffer_unmap(buf->buf);
58		break;
59	case PIPE_QUERY_TIME_ELAPSED:
60		break;
61	case PIPE_QUERY_PRIMITIVES_EMITTED:
62	case PIPE_QUERY_PRIMITIVES_GENERATED:
63	case PIPE_QUERY_SO_STATISTICS:
64	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
65		results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE);
66		memset(results, 0, buf_size);
67		ctx->ws->buffer_unmap(buf->buf);
68		break;
69	default:
70		assert(0);
71	}
72	return buf;
73}
74
75static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query)
76{
77	struct radeon_winsys_cs *cs = ctx->cs;
78	uint64_t va;
79
80	r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
81
82	/* Get a new query buffer if needed. */
83	if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.b.width0) {
84		struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
85		*qbuf = query->buffer;
86		query->buffer.buf = r600_new_query_buffer(ctx, query->type);
87		query->buffer.results_end = 0;
88		query->buffer.previous = qbuf;
89	}
90
91	/* emit begin query */
92	va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
93	va += query->buffer.results_end;
94
95	switch (query->type) {
96	case PIPE_QUERY_OCCLUSION_COUNTER:
97	case PIPE_QUERY_OCCLUSION_PREDICATE:
98		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
99		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
100		cs->buf[cs->cdw++] = va;
101		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
102		break;
103	case PIPE_QUERY_PRIMITIVES_EMITTED:
104	case PIPE_QUERY_PRIMITIVES_GENERATED:
105	case PIPE_QUERY_SO_STATISTICS:
106	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
107		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
108		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
109		cs->buf[cs->cdw++] = va;
110		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
111		break;
112	case PIPE_QUERY_TIME_ELAPSED:
113		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
114		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
115		cs->buf[cs->cdw++] = va;
116		cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
117		cs->buf[cs->cdw++] = 0;
118		cs->buf[cs->cdw++] = 0;
119		break;
120	default:
121		assert(0);
122	}
123	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
124	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
125
126	ctx->num_cs_dw_queries_suspend += query->num_cs_dw;
127}
128
129static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query)
130{
131	struct radeon_winsys_cs *cs = ctx->cs;
132	uint64_t va;
133
134	va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
135	/* emit end query */
136	switch (query->type) {
137	case PIPE_QUERY_OCCLUSION_COUNTER:
138	case PIPE_QUERY_OCCLUSION_PREDICATE:
139		va += query->buffer.results_end + 8;
140		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
141		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
142		cs->buf[cs->cdw++] = va;
143		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
144		break;
145	case PIPE_QUERY_PRIMITIVES_EMITTED:
146	case PIPE_QUERY_PRIMITIVES_GENERATED:
147	case PIPE_QUERY_SO_STATISTICS:
148	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
149		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
150		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
151		cs->buf[cs->cdw++] = query->buffer.results_end + query->result_size/2;
152		cs->buf[cs->cdw++] = 0;
153		break;
154	case PIPE_QUERY_TIME_ELAPSED:
155		va += query->buffer.results_end + query->result_size/2;
156		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
157		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
158		cs->buf[cs->cdw++] = va;
159		cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
160		cs->buf[cs->cdw++] = 0;
161		cs->buf[cs->cdw++] = 0;
162		break;
163	default:
164		assert(0);
165	}
166	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
167	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
168
169	query->buffer.results_end += query->result_size;
170	ctx->num_cs_dw_queries_suspend -= query->num_cs_dw;
171}
172
173static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query,
174					int operation, bool flag_wait)
175{
176	struct radeon_winsys_cs *cs = ctx->cs;
177
178	if (operation == PREDICATION_OP_CLEAR) {
179		r600_need_cs_space(ctx, 3, FALSE);
180
181		cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
182		cs->buf[cs->cdw++] = 0;
183		cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR);
184	} else {
185		struct r600_query_buffer *qbuf;
186		unsigned count;
187		uint32_t op;
188
189		/* Find how many results there are. */
190		count = 0;
191		for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
192			count += qbuf->results_end / query->result_size;
193		}
194
195		r600_need_cs_space(ctx, 5 * count, TRUE);
196
197		op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
198				(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
199
200		/* emit predicate packets for all data blocks */
201		for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
202			unsigned results_base = 0;
203			uint64_t va = r600_resource_va(&ctx->screen->screen, &qbuf->buf->b.b.b);
204
205			while (results_base < qbuf->results_end) {
206				cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
207				cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
208				cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
209				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
210				cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, qbuf->buf, RADEON_USAGE_READ);
211				results_base += query->result_size;
212
213				/* set CONTINUE bit for all packets except the first */
214				op |= PREDICATION_CONTINUE;
215			}
216		} while (qbuf);
217	}
218}
219
220static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
221{
222	struct r600_context *rctx = (struct r600_context *)ctx;
223
224	struct r600_query *query;
225
226	query = CALLOC_STRUCT(r600_query);
227	if (query == NULL)
228		return NULL;
229
230	query->type = query_type;
231
232	switch (query_type) {
233	case PIPE_QUERY_OCCLUSION_COUNTER:
234	case PIPE_QUERY_OCCLUSION_PREDICATE:
235		query->result_size = 16 * rctx->max_db;
236		query->num_cs_dw = 6;
237		break;
238	case PIPE_QUERY_TIME_ELAPSED:
239		query->result_size = 16;
240		query->num_cs_dw = 8;
241		break;
242	case PIPE_QUERY_PRIMITIVES_EMITTED:
243	case PIPE_QUERY_PRIMITIVES_GENERATED:
244	case PIPE_QUERY_SO_STATISTICS:
245	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
246		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
247		query->result_size = 32;
248		query->num_cs_dw = 6;
249		break;
250	default:
251		assert(0);
252		FREE(query);
253		return NULL;
254	}
255
256	query->buffer.buf = r600_new_query_buffer(rctx, query_type);
257	if (!query->buffer.buf) {
258		FREE(query);
259		return NULL;
260	}
261	return (struct pipe_query*)query;
262}
263
264static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
265{
266	struct r600_query *rquery = (struct r600_query*)query;
267
268	pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
269	FREE(query);
270}
271
272static void r600_update_occlusion_query_state(struct r600_context *rctx,
273					      unsigned type, int diff)
274{
275	if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
276	    type == PIPE_QUERY_OCCLUSION_PREDICATE) {
277		bool enable;
278
279		rctx->num_occlusion_queries += diff;
280		assert(rctx->num_occlusion_queries >= 0);
281
282		enable = rctx->num_occlusion_queries != 0;
283
284		if (rctx->atom_db_misc_state.occlusion_query_enabled != enable) {
285			rctx->atom_db_misc_state.occlusion_query_enabled = enable;
286			r600_atom_dirty(rctx, &rctx->atom_db_misc_state.atom);
287		}
288	}
289}
290
291static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
292{
293	struct r600_context *rctx = (struct r600_context *)ctx;
294	struct r600_query *rquery = (struct r600_query *)query;
295	/* Discard the old query buffers. */
296	struct r600_query_buffer *prev = rquery->buffer.previous;
297
298	while (prev) {
299		struct r600_query_buffer *qbuf = prev;
300		prev = prev->previous;
301		pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
302		FREE(qbuf);
303	}
304
305	/* Obtain a new buffer if the current one can't be mapped without a stall. */
306	if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rquery->buffer.buf->cs_buf) ||
307	    rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
308		pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
309		rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
310	}
311
312	rquery->buffer.results_end = 0;
313	rquery->buffer.previous = NULL;
314
315	r600_update_occlusion_query_state(rctx, rquery->type, 1);
316
317	r600_emit_query_begin(rctx, rquery);
318	LIST_ADDTAIL(&rquery->list, &rctx->active_query_list);
319}
320
321static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
322{
323	struct r600_context *rctx = (struct r600_context *)ctx;
324	struct r600_query *rquery = (struct r600_query *)query;
325
326	r600_emit_query_end(rctx, rquery);
327	LIST_DELINIT(&rquery->list);
328
329	r600_update_occlusion_query_state(rctx, rquery->type, -1);
330}
331
332static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
333				       bool test_status_bit)
334{
335	uint32_t *current_result = (uint32_t*)map;
336	uint64_t start, end;
337
338	start = (uint64_t)current_result[start_index] |
339		(uint64_t)current_result[start_index+1] << 32;
340	end = (uint64_t)current_result[end_index] |
341	      (uint64_t)current_result[end_index+1] << 32;
342
343	if (!test_status_bit ||
344	    ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
345		return end - start;
346	}
347	return 0;
348}
349
350static boolean r600_get_query_buffer_result(struct r600_context *ctx,
351					    struct r600_query *query,
352					    struct r600_query_buffer *qbuf,
353					    boolean wait,
354					    union r600_query_result *result)
355{
356	unsigned results_base = 0;
357	char *map;
358
359	map = ctx->ws->buffer_map(qbuf->buf->buf, ctx->cs,
360				  PIPE_TRANSFER_READ |
361				  (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
362	if (!map)
363		return FALSE;
364
365	/* count all results across all data blocks */
366	switch (query->type) {
367	case PIPE_QUERY_OCCLUSION_COUNTER:
368		while (results_base != qbuf->results_end) {
369			result->u64 +=
370				r600_query_read_result(map + results_base, 0, 2, true);
371			results_base += 16;
372		}
373		break;
374	case PIPE_QUERY_OCCLUSION_PREDICATE:
375		while (results_base != qbuf->results_end) {
376			result->b = result->b ||
377				r600_query_read_result(map + results_base, 0, 2, true) != 0;
378			results_base += 16;
379		}
380		break;
381	case PIPE_QUERY_TIME_ELAPSED:
382		while (results_base != qbuf->results_end) {
383			result->u64 +=
384				r600_query_read_result(map + results_base, 0, 2, false);
385			results_base += query->result_size;
386		}
387		break;
388	case PIPE_QUERY_PRIMITIVES_EMITTED:
389		/* SAMPLE_STREAMOUTSTATS stores this structure:
390		 * {
391		 *    u64 NumPrimitivesWritten;
392		 *    u64 PrimitiveStorageNeeded;
393		 * }
394		 * We only need NumPrimitivesWritten here. */
395		while (results_base != qbuf->results_end) {
396			result->u64 +=
397				r600_query_read_result(map + results_base, 2, 6, true);
398			results_base += query->result_size;
399		}
400		break;
401	case PIPE_QUERY_PRIMITIVES_GENERATED:
402		/* Here we read PrimitiveStorageNeeded. */
403		while (results_base != qbuf->results_end) {
404			result->u64 +=
405				r600_query_read_result(map + results_base, 0, 4, true);
406			results_base += query->result_size;
407		}
408		break;
409	case PIPE_QUERY_SO_STATISTICS:
410		while (results_base != qbuf->results_end) {
411			result->so.num_primitives_written +=
412				r600_query_read_result(map + results_base, 2, 6, true);
413			result->so.primitives_storage_needed +=
414				r600_query_read_result(map + results_base, 0, 4, true);
415			results_base += query->result_size;
416		}
417		break;
418	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
419		while (results_base != qbuf->results_end) {
420			result->b = result->b ||
421				r600_query_read_result(map + results_base, 2, 6, true) !=
422				r600_query_read_result(map + results_base, 0, 4, true);
423			results_base += query->result_size;
424		}
425		break;
426	default:
427		assert(0);
428	}
429
430	ctx->ws->buffer_unmap(qbuf->buf->buf);
431	return TRUE;
432}
433
434static boolean r600_get_query_result(struct pipe_context *ctx,
435					struct pipe_query *query,
436					boolean wait, void *vresult)
437{
438	struct r600_context *rctx = (struct r600_context *)ctx;
439	struct r600_query *rquery = (struct r600_query *)query;
440	boolean *result_b = (boolean*)vresult;
441	uint64_t *result_u64 = (uint64_t*)vresult;
442	union r600_query_result result;
443	struct pipe_query_data_so_statistics *result_so =
444		(struct pipe_query_data_so_statistics*)vresult;
445	struct r600_query_buffer *qbuf;
446
447	memset(&result, 0, sizeof(result));
448
449	for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) {
450		if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, &result)) {
451			return FALSE;
452		}
453	}
454
455	switch (rquery->type) {
456	case PIPE_QUERY_OCCLUSION_COUNTER:
457	case PIPE_QUERY_PRIMITIVES_EMITTED:
458	case PIPE_QUERY_PRIMITIVES_GENERATED:
459		*result_u64 = result.u64;
460		break;
461	case PIPE_QUERY_OCCLUSION_PREDICATE:
462	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
463		*result_b = result.b;
464		break;
465	case PIPE_QUERY_TIME_ELAPSED:
466		*result_u64 = (1000000 * result.u64) / rctx->screen->info.r600_clock_crystal_freq;
467		break;
468	case PIPE_QUERY_SO_STATISTICS:
469		*result_so = result.so;
470		break;
471	default:
472		assert(0);
473	}
474	return TRUE;
475}
476
477static void r600_render_condition(struct pipe_context *ctx,
478				  struct pipe_query *query,
479				  uint mode)
480{
481	struct r600_context *rctx = (struct r600_context *)ctx;
482	struct r600_query *rquery = (struct r600_query *)query;
483	bool wait_flag = false;
484
485	rctx->current_render_cond = query;
486	rctx->current_render_cond_mode = mode;
487
488	if (query == NULL) {
489		if (rctx->predicate_drawing) {
490			rctx->predicate_drawing = false;
491			r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false);
492		}
493		return;
494	}
495
496	if (mode == PIPE_RENDER_COND_WAIT ||
497	    mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
498		wait_flag = true;
499	}
500
501	rctx->predicate_drawing = true;
502
503	switch (rquery->type) {
504	case PIPE_QUERY_OCCLUSION_COUNTER:
505	case PIPE_QUERY_OCCLUSION_PREDICATE:
506		r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
507		break;
508	case PIPE_QUERY_PRIMITIVES_EMITTED:
509	case PIPE_QUERY_PRIMITIVES_GENERATED:
510	case PIPE_QUERY_SO_STATISTICS:
511	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
512		r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
513		break;
514	default:
515		assert(0);
516	}
517}
518
519void r600_suspend_queries(struct r600_context *ctx)
520{
521	struct r600_query *query;
522
523	LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) {
524		r600_emit_query_end(ctx, query);
525	}
526	assert(ctx->num_cs_dw_queries_suspend == 0);
527}
528
529void r600_resume_queries(struct r600_context *ctx)
530{
531	struct r600_query *query;
532
533	assert(ctx->num_cs_dw_queries_suspend == 0);
534
535	LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) {
536		r600_emit_query_begin(ctx, query);
537	}
538}
539
540void r600_init_query_functions(struct r600_context *rctx)
541{
542	rctx->context.create_query = r600_create_query;
543	rctx->context.destroy_query = r600_destroy_query;
544	rctx->context.begin_query = r600_begin_query;
545	rctx->context.end_query = r600_end_query;
546	rctx->context.get_query_result = r600_get_query_result;
547
548	if (rctx->screen->info.r600_num_backends > 0)
549	    rctx->context.render_condition = r600_render_condition;
550}
551