freedreno_resource.c revision 8cb965b112ad8a8465c4c5063b821a0210008d7d
1/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3/*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Rob Clark <robclark@freedesktop.org>
27 */
28
29#include "util/u_format.h"
30#include "util/u_format_rgtc.h"
31#include "util/u_format_zs.h"
32#include "util/u_inlines.h"
33#include "util/u_transfer.h"
34#include "util/u_string.h"
35#include "util/u_surface.h"
36#include "util/set.h"
37
38#include "freedreno_resource.h"
39#include "freedreno_batch_cache.h"
40#include "freedreno_screen.h"
41#include "freedreno_surface.h"
42#include "freedreno_context.h"
43#include "freedreno_query_hw.h"
44#include "freedreno_util.h"
45
46#include <errno.h>
47
48/* XXX this should go away, needed for 'struct winsys_handle' */
49#include "state_tracker/drm_driver.h"
50
51static void
52fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53{
54	int i;
55
56	/* Go through the entire state and see if the resource is bound
57	 * anywhere. If it is, mark the relevant state as dirty. This is called on
58	 * realloc_bo.
59	 */
60
61	/* Constbufs */
62	for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) {
63		if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc)
64			ctx->dirty |= FD_DIRTY_CONSTBUF;
65		if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc)
66			ctx->dirty |= FD_DIRTY_CONSTBUF;
67	}
68
69	/* VBOs */
70	for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
71		if (ctx->vtx.vertexbuf.vb[i].buffer == prsc)
72			ctx->dirty |= FD_DIRTY_VTXBUF;
73	}
74
75	/* Index buffer */
76	if (ctx->indexbuf.buffer == prsc)
77		ctx->dirty |= FD_DIRTY_INDEXBUF;
78
79	/* Textures */
80	for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
81		if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
82			ctx->dirty |= FD_DIRTY_VERTTEX;
83	}
84	for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
85		if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
86			ctx->dirty |= FD_DIRTY_FRAGTEX;
87	}
88}
89
90static void
91realloc_bo(struct fd_resource *rsc, uint32_t size)
92{
93	struct fd_screen *screen = fd_screen(rsc->base.b.screen);
94	uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
95			DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
96
97	/* if we start using things other than write-combine,
98	 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
99	 */
100
101	if (rsc->bo)
102		fd_bo_del(rsc->bo);
103
104	rsc->bo = fd_bo_new(screen->dev, size, flags);
105	rsc->timestamp = 0;
106	util_range_set_empty(&rsc->valid_buffer_range);
107	fd_bc_invalidate_resource(rsc, true);
108}
109
110static void
111do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
112{
113	/* TODO size threshold too?? */
114	if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
115		/* do blit on gpu: */
116		fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
117		util_blitter_blit(ctx->blitter, blit);
118		fd_blitter_pipe_end(ctx);
119	} else {
120		/* do blit on cpu: */
121		util_resource_copy_region(&ctx->base,
122				blit->dst.resource, blit->dst.level, blit->dst.box.x,
123				blit->dst.box.y, blit->dst.box.z,
124				blit->src.resource, blit->src.level, &blit->src.box);
125	}
126}
127
128static bool
129fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
130		unsigned level, unsigned usage, const struct pipe_box *box)
131{
132	struct pipe_context *pctx = &ctx->base;
133	struct pipe_resource *prsc = &rsc->base.b;
134	bool fallback = false;
135
136	if (prsc->next)
137		return false;
138
139	/* TODO: somehow munge dimensions and format to copy unsupported
140	 * render target format to something that is supported?
141	 */
142	if (!pctx->screen->is_format_supported(pctx->screen,
143			prsc->format, prsc->target, prsc->nr_samples,
144			PIPE_BIND_RENDER_TARGET))
145		fallback = true;
146
147	/* these cases should be handled elsewhere.. just for future
148	 * reference in case this gets split into a more generic(ish)
149	 * helper.
150	 */
151	debug_assert(!(usage & PIPE_TRANSFER_READ));
152	debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
153
154	/* if we do a gpu blit to clone the whole resource, we'll just
155	 * end up stalling on that.. so only allow if we can discard
156	 * current range (and blit, possibly cpu or gpu, the rest)
157	 */
158	if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
159		return false;
160
161	bool whole_level = util_texrange_covers_whole_level(prsc, level,
162		box->x, box->y, box->z, box->width, box->height, box->depth);
163
164	/* TODO need to be more clever about current level */
165	if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
166		return false;
167
168	struct pipe_resource *pshadow =
169		pctx->screen->resource_create(pctx->screen, prsc);
170
171	if (!pshadow)
172		return false;
173
174	assert(!ctx->in_shadow);
175	ctx->in_shadow = true;
176
177	/* get rid of any references that batch-cache might have to us (which
178	 * should empty/destroy rsc->batches hashset)
179	 */
180	fd_bc_invalidate_resource(rsc, false);
181
182	pipe_mutex_lock(ctx->screen->lock);
183
184	/* Swap the backing bo's, so shadow becomes the old buffer,
185	 * blit from shadow to new buffer.  From here on out, we
186	 * cannot fail.
187	 *
188	 * Note that we need to do it in this order, otherwise if
189	 * we go down cpu blit path, the recursive transfer_map()
190	 * sees the wrong status..
191	 */
192	struct fd_resource *shadow = fd_resource(pshadow);
193
194	DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
195			shadow, shadow->base.b.reference.count);
196
197	/* TODO valid_buffer_range?? */
198	swap(rsc->bo,        shadow->bo);
199	swap(rsc->timestamp, shadow->timestamp);
200	swap(rsc->write_batch,   shadow->write_batch);
201
202	/* at this point, the newly created shadow buffer is not referenced
203	 * by any batches, but the existing rsc (probably) is.  We need to
204	 * transfer those references over:
205	 */
206	debug_assert(shadow->batch_mask == 0);
207	struct fd_batch *batch;
208	foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
209		struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
210		_mesa_set_remove(batch->resources, entry);
211		_mesa_set_add(batch->resources, shadow);
212	}
213	swap(rsc->batch_mask, shadow->batch_mask);
214
215	pipe_mutex_unlock(ctx->screen->lock);
216
217	struct pipe_blit_info blit = {0};
218	blit.dst.resource = prsc;
219	blit.dst.format   = prsc->format;
220	blit.src.resource = pshadow;
221	blit.src.format   = pshadow->format;
222	blit.mask = util_format_get_mask(prsc->format);
223	blit.filter = PIPE_TEX_FILTER_NEAREST;
224
225#define set_box(field, val) do {     \
226		blit.dst.field = (val);      \
227		blit.src.field = (val);      \
228	} while (0)
229
230	/* blit the other levels in their entirety: */
231	for (unsigned l = 0; l <= prsc->last_level; l++) {
232		if (l == level)
233			continue;
234
235		/* just blit whole level: */
236		set_box(level, l);
237		set_box(box.width,  u_minify(prsc->width0, l));
238		set_box(box.height, u_minify(prsc->height0, l));
239		set_box(box.depth,  u_minify(prsc->depth0, l));
240
241		do_blit(ctx, &blit, fallback);
242	}
243
244	/* deal w/ current level specially, since we might need to split
245	 * it up into a couple blits:
246	 */
247	if (!whole_level) {
248		set_box(level, level);
249
250		switch (prsc->target) {
251		case PIPE_BUFFER:
252		case PIPE_TEXTURE_1D:
253			set_box(box.y, 0);
254			set_box(box.z, 0);
255			set_box(box.height, 1);
256			set_box(box.depth, 1);
257
258			if (box->x > 0) {
259				set_box(box.x, 0);
260				set_box(box.width, box->x);
261
262				do_blit(ctx, &blit, fallback);
263			}
264			if ((box->x + box->width) < u_minify(prsc->width0, level)) {
265				set_box(box.x, box->x + box->width);
266				set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
267
268				do_blit(ctx, &blit, fallback);
269			}
270			break;
271		case PIPE_TEXTURE_2D:
272			/* TODO */
273		default:
274			unreachable("TODO");
275		}
276	}
277
278	ctx->in_shadow = false;
279
280	pipe_resource_reference(&pshadow, NULL);
281
282	return true;
283}
284
285static unsigned
286fd_resource_layer_offset(struct fd_resource *rsc,
287						 struct fd_resource_slice *slice,
288						 unsigned layer)
289{
290	if (rsc->layer_first)
291		return layer * rsc->layer_size;
292	else
293		return layer * slice->size0;
294}
295
296static void
297fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
298{
299	struct fd_resource *rsc = fd_resource(trans->base.resource);
300	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
301	struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
302	enum pipe_format format = trans->base.resource->format;
303
304	float *depth = fd_bo_map(rsc->bo) + slice->offset +
305		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
306		(trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
307	uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
308		fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
309		(trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
310
311	if (format != PIPE_FORMAT_X32_S8X24_UINT)
312		util_format_z32_float_s8x24_uint_unpack_z_float(
313				depth, slice->pitch * 4,
314				trans->staging, trans->base.stride,
315				box->width, box->height);
316
317	util_format_z32_float_s8x24_uint_unpack_s_8uint(
318			stencil, sslice->pitch,
319			trans->staging, trans->base.stride,
320			box->width, box->height);
321}
322
323static void
324fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
325{
326	struct fd_resource *rsc = fd_resource(trans->base.resource);
327	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
328	enum pipe_format format = trans->base.resource->format;
329
330	uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
331		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
332		((trans->base.box.y + box->y) * slice->pitch +
333		 trans->base.box.x + box->x) * rsc->cpp;
334
335	uint8_t *source = trans->staging +
336		util_format_get_nblocksy(format, box->y) * trans->base.stride +
337		util_format_get_stride(format, box->x);
338
339	switch (format) {
340	case PIPE_FORMAT_RGTC1_UNORM:
341	case PIPE_FORMAT_RGTC1_SNORM:
342	case PIPE_FORMAT_LATC1_UNORM:
343	case PIPE_FORMAT_LATC1_SNORM:
344		util_format_rgtc1_unorm_unpack_rgba_8unorm(
345				data, slice->pitch * rsc->cpp,
346				source, trans->base.stride,
347				box->width, box->height);
348		break;
349	case PIPE_FORMAT_RGTC2_UNORM:
350	case PIPE_FORMAT_RGTC2_SNORM:
351	case PIPE_FORMAT_LATC2_UNORM:
352	case PIPE_FORMAT_LATC2_SNORM:
353		util_format_rgtc2_unorm_unpack_rgba_8unorm(
354				data, slice->pitch * rsc->cpp,
355				source, trans->base.stride,
356				box->width, box->height);
357		break;
358	default:
359		assert(!"Unexpected format\n");
360		break;
361	}
362}
363
364static void
365fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
366{
367	enum pipe_format format = trans->base.resource->format;
368
369	switch (format) {
370	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
371	case PIPE_FORMAT_X32_S8X24_UINT:
372		fd_resource_flush_z32s8(trans, box);
373		break;
374	case PIPE_FORMAT_RGTC1_UNORM:
375	case PIPE_FORMAT_RGTC1_SNORM:
376	case PIPE_FORMAT_RGTC2_UNORM:
377	case PIPE_FORMAT_RGTC2_SNORM:
378	case PIPE_FORMAT_LATC1_UNORM:
379	case PIPE_FORMAT_LATC1_SNORM:
380	case PIPE_FORMAT_LATC2_UNORM:
381	case PIPE_FORMAT_LATC2_SNORM:
382		fd_resource_flush_rgtc(trans, box);
383		break;
384	default:
385		assert(!"Unexpected staging transfer type");
386		break;
387	}
388}
389
390static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
391		struct pipe_transfer *ptrans,
392		const struct pipe_box *box)
393{
394	struct fd_resource *rsc = fd_resource(ptrans->resource);
395	struct fd_transfer *trans = fd_transfer(ptrans);
396
397	if (ptrans->resource->target == PIPE_BUFFER)
398		util_range_add(&rsc->valid_buffer_range,
399					   ptrans->box.x + box->x,
400					   ptrans->box.x + box->x + box->width);
401
402	if (trans->staging)
403		fd_resource_flush(trans, box);
404}
405
406static void
407fd_resource_transfer_unmap(struct pipe_context *pctx,
408		struct pipe_transfer *ptrans)
409{
410	struct fd_context *ctx = fd_context(pctx);
411	struct fd_resource *rsc = fd_resource(ptrans->resource);
412	struct fd_transfer *trans = fd_transfer(ptrans);
413
414	if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
415		struct pipe_box box;
416		u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
417		fd_resource_flush(trans, &box);
418	}
419
420	if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
421		fd_bo_cpu_fini(rsc->bo);
422		if (rsc->stencil)
423			fd_bo_cpu_fini(rsc->stencil->bo);
424	}
425
426	util_range_add(&rsc->valid_buffer_range,
427				   ptrans->box.x,
428				   ptrans->box.x + ptrans->box.width);
429
430	pipe_resource_reference(&ptrans->resource, NULL);
431	slab_free(&ctx->transfer_pool, ptrans);
432
433	free(trans->staging);
434}
435
436static void *
437fd_resource_transfer_map(struct pipe_context *pctx,
438		struct pipe_resource *prsc,
439		unsigned level, unsigned usage,
440		const struct pipe_box *box,
441		struct pipe_transfer **pptrans)
442{
443	struct fd_context *ctx = fd_context(pctx);
444	struct fd_resource *rsc = fd_resource(prsc);
445	struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
446	struct fd_transfer *trans;
447	struct pipe_transfer *ptrans;
448	enum pipe_format format = prsc->format;
449	uint32_t op = 0;
450	uint32_t offset;
451	char *buf;
452	int ret = 0;
453
454	DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
455		box->width, box->height, box->x, box->y);
456
457	ptrans = slab_alloc(&ctx->transfer_pool);
458	if (!ptrans)
459		return NULL;
460
461	/* slab_alloc_st() doesn't zero: */
462	trans = fd_transfer(ptrans);
463	memset(trans, 0, sizeof(*trans));
464
465	pipe_resource_reference(&ptrans->resource, prsc);
466	ptrans->level = level;
467	ptrans->usage = usage;
468	ptrans->box = *box;
469	ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
470	ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
471
472	if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
473		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
474
475	if (usage & PIPE_TRANSFER_READ)
476		op |= DRM_FREEDRENO_PREP_READ;
477
478	if (usage & PIPE_TRANSFER_WRITE)
479		op |= DRM_FREEDRENO_PREP_WRITE;
480
481	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
482		realloc_bo(rsc, fd_bo_size(rsc->bo));
483		if (rsc->stencil)
484			realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
485		fd_invalidate_resource(ctx, prsc);
486	} else if ((usage & PIPE_TRANSFER_WRITE) &&
487			   prsc->target == PIPE_BUFFER &&
488			   !util_ranges_intersect(&rsc->valid_buffer_range,
489									  box->x, box->x + box->width)) {
490		/* We are trying to write to a previously uninitialized range. No need
491		 * to wait.
492		 */
493	} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
494		struct fd_batch *write_batch = NULL;
495
496		/* hold a reference, so it doesn't disappear under us: */
497		fd_batch_reference(&write_batch, rsc->write_batch);
498
499		if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
500				write_batch->back_blit) {
501			/* if only thing pending is a back-blit, we can discard it: */
502			fd_batch_reset(write_batch);
503		}
504
505		/* If the GPU is writing to the resource, or if it is reading from the
506		 * resource and we're trying to write to it, flush the renders.
507		 */
508		bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
509		bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
510				ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
511
512		/* if we need to flush/stall, see if we can make a shadow buffer
513		 * to avoid this:
514		 *
515		 * TODO we could go down this path !reorder && !busy_for_read
516		 * ie. we only *don't* want to go down this path if the blit
517		 * will trigger a flush!
518		 */
519		if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
520			if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
521				needs_flush = busy = false;
522				fd_invalidate_resource(ctx, prsc);
523			}
524		}
525
526		if (needs_flush) {
527			if (usage & PIPE_TRANSFER_WRITE) {
528				struct fd_batch *batch, *last_batch = NULL;
529				foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
530					fd_batch_reference(&last_batch, batch);
531					fd_batch_flush(batch, false);
532				}
533				if (last_batch) {
534					fd_batch_sync(last_batch);
535					fd_batch_reference(&last_batch, NULL);
536				}
537				assert(rsc->batch_mask == 0);
538			} else {
539				fd_batch_flush(write_batch, true);
540			}
541			assert(!rsc->write_batch);
542		}
543
544		fd_batch_reference(&write_batch, NULL);
545
546		/* The GPU keeps track of how the various bo's are being used, and
547		 * will wait if necessary for the proper operation to have
548		 * completed.
549		 */
550		if (busy) {
551			ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
552			if (ret)
553				goto fail;
554		}
555	}
556
557	buf = fd_bo_map(rsc->bo);
558	if (!buf)
559		goto fail;
560
561	offset = slice->offset +
562		box->y / util_format_get_blockheight(format) * ptrans->stride +
563		box->x / util_format_get_blockwidth(format) * rsc->cpp +
564		fd_resource_layer_offset(rsc, slice, box->z);
565
566	if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
567		prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
568		assert(trans->base.box.depth == 1);
569
570		trans->base.stride = trans->base.box.width * rsc->cpp * 2;
571		trans->staging = malloc(trans->base.stride * trans->base.box.height);
572		if (!trans->staging)
573			goto fail;
574
575		/* if we're not discarding the whole range (or resource), we must copy
576		 * the real data in.
577		 */
578		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
579					   PIPE_TRANSFER_DISCARD_RANGE))) {
580			struct fd_resource_slice *sslice =
581				fd_resource_slice(rsc->stencil, level);
582			void *sbuf = fd_bo_map(rsc->stencil->bo);
583			if (!sbuf)
584				goto fail;
585
586			float *depth = (float *)(buf + slice->offset +
587				fd_resource_layer_offset(rsc, slice, box->z) +
588				box->y * slice->pitch * 4 + box->x * 4);
589			uint8_t *stencil = sbuf + sslice->offset +
590				fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
591				box->y * sslice->pitch + box->x;
592
593			if (format != PIPE_FORMAT_X32_S8X24_UINT)
594				util_format_z32_float_s8x24_uint_pack_z_float(
595						trans->staging, trans->base.stride,
596						depth, slice->pitch * 4,
597						box->width, box->height);
598
599			util_format_z32_float_s8x24_uint_pack_s_8uint(
600					trans->staging, trans->base.stride,
601					stencil, sslice->pitch,
602					box->width, box->height);
603		}
604
605		buf = trans->staging;
606		offset = 0;
607	} else if (rsc->internal_format != format &&
608			   util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
609		assert(trans->base.box.depth == 1);
610
611		trans->base.stride = util_format_get_stride(
612				format, trans->base.box.width);
613		trans->staging = malloc(
614				util_format_get_2d_size(format, trans->base.stride,
615										trans->base.box.height));
616		if (!trans->staging)
617			goto fail;
618
619		/* if we're not discarding the whole range (or resource), we must copy
620		 * the real data in.
621		 */
622		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
623					   PIPE_TRANSFER_DISCARD_RANGE))) {
624			uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
625				fd_resource_layer_offset(rsc, slice, box->z) +
626				box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
627
628			switch (format) {
629			case PIPE_FORMAT_RGTC1_UNORM:
630			case PIPE_FORMAT_RGTC1_SNORM:
631			case PIPE_FORMAT_LATC1_UNORM:
632			case PIPE_FORMAT_LATC1_SNORM:
633				util_format_rgtc1_unorm_pack_rgba_8unorm(
634					trans->staging, trans->base.stride,
635					rgba8, slice->pitch * rsc->cpp,
636					box->width, box->height);
637				break;
638			case PIPE_FORMAT_RGTC2_UNORM:
639			case PIPE_FORMAT_RGTC2_SNORM:
640			case PIPE_FORMAT_LATC2_UNORM:
641			case PIPE_FORMAT_LATC2_SNORM:
642				util_format_rgtc2_unorm_pack_rgba_8unorm(
643					trans->staging, trans->base.stride,
644					rgba8, slice->pitch * rsc->cpp,
645					box->width, box->height);
646				break;
647			default:
648				assert(!"Unexpected format");
649				break;
650			}
651		}
652
653		buf = trans->staging;
654		offset = 0;
655	}
656
657	*pptrans = ptrans;
658
659	return buf + offset;
660
661fail:
662	fd_resource_transfer_unmap(pctx, ptrans);
663	return NULL;
664}
665
666static void
667fd_resource_destroy(struct pipe_screen *pscreen,
668		struct pipe_resource *prsc)
669{
670	struct fd_resource *rsc = fd_resource(prsc);
671	fd_bc_invalidate_resource(rsc, true);
672	if (rsc->bo)
673		fd_bo_del(rsc->bo);
674	util_range_destroy(&rsc->valid_buffer_range);
675	FREE(rsc);
676}
677
678static boolean
679fd_resource_get_handle(struct pipe_screen *pscreen,
680		struct pipe_resource *prsc,
681		struct winsys_handle *handle)
682{
683	struct fd_resource *rsc = fd_resource(prsc);
684
685	return fd_screen_bo_get_handle(pscreen, rsc->bo,
686			rsc->slices[0].pitch * rsc->cpp, handle);
687}
688
689
690static const struct u_resource_vtbl fd_resource_vtbl = {
691		.resource_get_handle      = fd_resource_get_handle,
692		.resource_destroy         = fd_resource_destroy,
693		.transfer_map             = fd_resource_transfer_map,
694		.transfer_flush_region    = fd_resource_transfer_flush_region,
695		.transfer_unmap           = fd_resource_transfer_unmap,
696};
697
698static uint32_t
699setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
700{
701	struct pipe_resource *prsc = &rsc->base.b;
702	enum util_format_layout layout = util_format_description(format)->layout;
703	uint32_t level, size = 0;
704	uint32_t width = prsc->width0;
705	uint32_t height = prsc->height0;
706	uint32_t depth = prsc->depth0;
707	/* in layer_first layout, the level (slice) contains just one
708	 * layer (since in fact the layer contains the slices)
709	 */
710	uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
711
712	for (level = 0; level <= prsc->last_level; level++) {
713		struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
714		uint32_t blocks;
715
716		if (layout == UTIL_FORMAT_LAYOUT_ASTC)
717			slice->pitch = width =
718				util_align_npot(width, 32 * util_format_get_blockwidth(format));
719		else
720			slice->pitch = width = align(width, 32);
721		slice->offset = size;
722		blocks = util_format_get_nblocks(format, width, height);
723		/* 1d array and 2d array textures must all have the same layer size
724		 * for each miplevel on a3xx. 3d textures can have different layer
725		 * sizes for high levels, but the hw auto-sizer is buggy (or at least
726		 * different than what this code does), so as soon as the layer size
727		 * range gets into range, we stop reducing it.
728		 */
729		if (prsc->target == PIPE_TEXTURE_3D && (
730					level == 1 ||
731					(level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
732			slice->size0 = align(blocks * rsc->cpp, alignment);
733		else if (level == 0 || rsc->layer_first || alignment == 1)
734			slice->size0 = align(blocks * rsc->cpp, alignment);
735		else
736			slice->size0 = rsc->slices[level - 1].size0;
737
738		size += slice->size0 * depth * layers_in_level;
739
740		width = u_minify(width, 1);
741		height = u_minify(height, 1);
742		depth = u_minify(depth, 1);
743	}
744
745	return size;
746}
747
748static uint32_t
749slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
750{
751	/* on a3xx, 2d array and 3d textures seem to want their
752	 * layers aligned to page boundaries:
753	 */
754	switch (tmpl->target) {
755	case PIPE_TEXTURE_3D:
756	case PIPE_TEXTURE_1D_ARRAY:
757	case PIPE_TEXTURE_2D_ARRAY:
758		return 4096;
759	default:
760		return 1;
761	}
762}
763
764/* special case to resize query buf after allocated.. */
765void
766fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
767{
768	struct fd_resource *rsc = fd_resource(prsc);
769
770	debug_assert(prsc->width0 == 0);
771	debug_assert(prsc->target == PIPE_BUFFER);
772	debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
773
774	prsc->width0 = sz;
775	realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
776}
777
778/**
779 * Create a new texture object, using the given template info.
780 */
781static struct pipe_resource *
782fd_resource_create(struct pipe_screen *pscreen,
783		const struct pipe_resource *tmpl)
784{
785	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
786	struct pipe_resource *prsc = &rsc->base.b;
787	enum pipe_format format = tmpl->format;
788	uint32_t size, alignment;
789
790	DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
791			"nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
792			tmpl->target, util_format_name(format),
793			tmpl->width0, tmpl->height0, tmpl->depth0,
794			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
795			tmpl->usage, tmpl->bind, tmpl->flags);
796
797	if (!rsc)
798		return NULL;
799
800	*prsc = *tmpl;
801
802	pipe_reference_init(&prsc->reference, 1);
803
804	prsc->screen = pscreen;
805
806	util_range_init(&rsc->valid_buffer_range);
807
808	rsc->base.vtbl = &fd_resource_vtbl;
809
810	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
811		format = PIPE_FORMAT_Z32_FLOAT;
812	else if (fd_screen(pscreen)->gpu_id < 400 &&
813			 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
814		format = PIPE_FORMAT_R8G8B8A8_UNORM;
815	rsc->internal_format = format;
816	rsc->cpp = util_format_get_blocksize(format);
817
818	assert(rsc->cpp);
819
820	alignment = slice_alignment(pscreen, tmpl);
821	if (is_a4xx(fd_screen(pscreen))) {
822		switch (tmpl->target) {
823		case PIPE_TEXTURE_3D:
824			rsc->layer_first = false;
825			break;
826		default:
827			rsc->layer_first = true;
828			alignment = 1;
829			break;
830		}
831	}
832
833	size = setup_slices(rsc, alignment, format);
834
835	/* special case for hw-query buffer, which we need to allocate before we
836	 * know the size:
837	 */
838	if (size == 0) {
839		/* note, semi-intention == instead of & */
840		debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
841		return prsc;
842	}
843
844	if (rsc->layer_first) {
845		rsc->layer_size = align(size, 4096);
846		size = rsc->layer_size * prsc->array_size;
847	}
848
849	realloc_bo(rsc, size);
850	if (!rsc->bo)
851		goto fail;
852
853	/* There is no native Z32F_S8 sampling or rendering format, so this must
854	 * be emulated via two separate textures. The depth texture still keeps
855	 * its Z32F_S8 format though, and we also keep a reference to a separate
856	 * S8 texture.
857	 */
858	if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
859		struct pipe_resource stencil = *tmpl;
860		stencil.format = PIPE_FORMAT_S8_UINT;
861		rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
862		if (!rsc->stencil)
863			goto fail;
864	}
865
866	return prsc;
867fail:
868	fd_resource_destroy(pscreen, prsc);
869	return NULL;
870}
871
872/**
873 * Create a texture from a winsys_handle. The handle is often created in
874 * another process by first creating a pipe texture and then calling
875 * resource_get_handle.
876 */
877static struct pipe_resource *
878fd_resource_from_handle(struct pipe_screen *pscreen,
879		const struct pipe_resource *tmpl,
880		struct winsys_handle *handle, unsigned usage)
881{
882	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
883	struct fd_resource_slice *slice = &rsc->slices[0];
884	struct pipe_resource *prsc = &rsc->base.b;
885
886	DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
887			"nr_samples=%u, usage=%u, bind=%x, flags=%x",
888			tmpl->target, util_format_name(tmpl->format),
889			tmpl->width0, tmpl->height0, tmpl->depth0,
890			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
891			tmpl->usage, tmpl->bind, tmpl->flags);
892
893	if (!rsc)
894		return NULL;
895
896	*prsc = *tmpl;
897
898	pipe_reference_init(&prsc->reference, 1);
899
900	prsc->screen = pscreen;
901
902	util_range_init(&rsc->valid_buffer_range);
903
904	rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
905	if (!rsc->bo)
906		goto fail;
907
908	rsc->base.vtbl = &fd_resource_vtbl;
909	rsc->cpp = util_format_get_blocksize(tmpl->format);
910	slice->pitch = handle->stride / rsc->cpp;
911	slice->offset = handle->offset;
912	slice->size0 = handle->stride * prsc->height0;
913
914	if ((slice->pitch < align(prsc->width0, 32)) || (slice->pitch % 32))
915		goto fail;
916
917	assert(rsc->cpp);
918
919	return prsc;
920
921fail:
922	fd_resource_destroy(pscreen, prsc);
923	return NULL;
924}
925
926/**
927 * _copy_region using pipe (3d engine)
928 */
929static bool
930fd_blitter_pipe_copy_region(struct fd_context *ctx,
931		struct pipe_resource *dst,
932		unsigned dst_level,
933		unsigned dstx, unsigned dsty, unsigned dstz,
934		struct pipe_resource *src,
935		unsigned src_level,
936		const struct pipe_box *src_box)
937{
938	/* not until we allow rendertargets to be buffers */
939	if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
940		return false;
941
942	if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
943		return false;
944
945	/* TODO we could discard if dst box covers dst level fully.. */
946	fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
947	util_blitter_copy_texture(ctx->blitter,
948			dst, dst_level, dstx, dsty, dstz,
949			src, src_level, src_box);
950	fd_blitter_pipe_end(ctx);
951
952	return true;
953}
954
955/**
956 * Copy a block of pixels from one resource to another.
957 * The resource must be of the same format.
958 * Resources with nr_samples > 1 are not allowed.
959 */
960static void
961fd_resource_copy_region(struct pipe_context *pctx,
962		struct pipe_resource *dst,
963		unsigned dst_level,
964		unsigned dstx, unsigned dsty, unsigned dstz,
965		struct pipe_resource *src,
966		unsigned src_level,
967		const struct pipe_box *src_box)
968{
969	struct fd_context *ctx = fd_context(pctx);
970
971	/* TODO if we have 2d core, or other DMA engine that could be used
972	 * for simple copies and reasonably easily synchronized with the 3d
973	 * core, this is where we'd plug it in..
974	 */
975
976	/* try blit on 3d pipe: */
977	if (fd_blitter_pipe_copy_region(ctx,
978			dst, dst_level, dstx, dsty, dstz,
979			src, src_level, src_box))
980		return;
981
982	/* else fallback to pure sw: */
983	util_resource_copy_region(pctx,
984			dst, dst_level, dstx, dsty, dstz,
985			src, src_level, src_box);
986}
987
988bool
989fd_render_condition_check(struct pipe_context *pctx)
990{
991	struct fd_context *ctx = fd_context(pctx);
992
993	if (!ctx->cond_query)
994		return true;
995
996	union pipe_query_result res = { 0 };
997	bool wait =
998		ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
999		ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
1000
1001	if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
1002			return (bool)res.u64 != ctx->cond_cond;
1003
1004	return true;
1005}
1006
1007/**
1008 * Optimal hardware path for blitting pixels.
1009 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1010 */
1011static void
1012fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1013{
1014	struct fd_context *ctx = fd_context(pctx);
1015	struct pipe_blit_info info = *blit_info;
1016	bool discard = false;
1017
1018	if (info.src.resource->nr_samples > 1 &&
1019			info.dst.resource->nr_samples <= 1 &&
1020			!util_format_is_depth_or_stencil(info.src.resource->format) &&
1021			!util_format_is_pure_integer(info.src.resource->format)) {
1022		DBG("color resolve unimplemented");
1023		return;
1024	}
1025
1026	if (info.render_condition_enable && !fd_render_condition_check(pctx))
1027		return;
1028
1029	if (!info.scissor_enable && !info.alpha_blend) {
1030		discard = util_texrange_covers_whole_level(info.dst.resource,
1031				info.dst.level, info.dst.box.x, info.dst.box.y,
1032				info.dst.box.z, info.dst.box.width,
1033				info.dst.box.height, info.dst.box.depth);
1034	}
1035
1036	if (util_try_blit_via_copy_region(pctx, &info)) {
1037		return; /* done */
1038	}
1039
1040	if (info.mask & PIPE_MASK_S) {
1041		DBG("cannot blit stencil, skipping");
1042		info.mask &= ~PIPE_MASK_S;
1043	}
1044
1045	if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1046		DBG("blit unsupported %s -> %s",
1047				util_format_short_name(info.src.resource->format),
1048				util_format_short_name(info.dst.resource->format));
1049		return;
1050	}
1051
1052	fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT);
1053	util_blitter_blit(ctx->blitter, &info);
1054	fd_blitter_pipe_end(ctx);
1055}
1056
1057void
1058fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
1059		enum fd_render_stage stage)
1060{
1061	util_blitter_save_fragment_constant_buffer_slot(ctx->blitter,
1062			ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
1063	util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1064	util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1065	util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1066	util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1067			ctx->streamout.targets);
1068	util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1069	util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1070	util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1071	util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1072	util_blitter_save_blend(ctx->blitter, ctx->blend);
1073	util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1074	util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1075	util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1076	util_blitter_save_framebuffer(ctx->blitter,
1077			ctx->batch ? &ctx->batch->framebuffer : NULL);
1078	util_blitter_save_fragment_sampler_states(ctx->blitter,
1079			ctx->fragtex.num_samplers,
1080			(void **)ctx->fragtex.samplers);
1081	util_blitter_save_fragment_sampler_views(ctx->blitter,
1082			ctx->fragtex.num_textures, ctx->fragtex.textures);
1083	if (!render_cond)
1084		util_blitter_save_render_condition(ctx->blitter,
1085			ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1086
1087	if (ctx->batch)
1088		fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, stage);
1089
1090	ctx->in_blit = discard;
1091}
1092
1093void
1094fd_blitter_pipe_end(struct fd_context *ctx)
1095{
1096	if (ctx->batch)
1097		fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL);
1098	ctx->in_blit = false;
1099}
1100
1101static void
1102fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1103{
1104	struct fd_resource *rsc = fd_resource(prsc);
1105
1106	if (rsc->write_batch)
1107		fd_batch_flush(rsc->write_batch, true);
1108
1109	assert(!rsc->write_batch);
1110}
1111
1112void
1113fd_resource_screen_init(struct pipe_screen *pscreen)
1114{
1115	pscreen->resource_create = fd_resource_create;
1116	pscreen->resource_from_handle = fd_resource_from_handle;
1117	pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1118	pscreen->resource_destroy = u_resource_destroy_vtbl;
1119}
1120
1121void
1122fd_resource_context_init(struct pipe_context *pctx)
1123{
1124	pctx->transfer_map = u_transfer_map_vtbl;
1125	pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1126	pctx->transfer_unmap = u_transfer_unmap_vtbl;
1127	pctx->buffer_subdata = u_default_buffer_subdata;
1128        pctx->texture_subdata = u_default_texture_subdata;
1129	pctx->create_surface = fd_create_surface;
1130	pctx->surface_destroy = fd_surface_destroy;
1131	pctx->resource_copy_region = fd_resource_copy_region;
1132	pctx->blit = fd_blit;
1133	pctx->flush_resource = fd_flush_resource;
1134}
1135