freedreno_resource.c revision 0334ba150f429b7d6b0bdc003c4301e0ad5fa21d
1/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3/*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Rob Clark <robclark@freedesktop.org>
27 */
28
29#include "util/u_format.h"
30#include "util/u_format_rgtc.h"
31#include "util/u_format_zs.h"
32#include "util/u_inlines.h"
33#include "util/u_transfer.h"
34#include "util/u_string.h"
35#include "util/u_surface.h"
36#include "util/set.h"
37
38#include "freedreno_resource.h"
39#include "freedreno_batch_cache.h"
40#include "freedreno_screen.h"
41#include "freedreno_surface.h"
42#include "freedreno_context.h"
43#include "freedreno_query_hw.h"
44#include "freedreno_util.h"
45
46#include <errno.h>
47
48/* XXX this should go away, needed for 'struct winsys_handle' */
49#include "state_tracker/drm_driver.h"
50
51static void
52fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53{
54	int i;
55
56	/* Go through the entire state and see if the resource is bound
57	 * anywhere. If it is, mark the relevant state as dirty. This is called on
58	 * realloc_bo.
59	 */
60
61	/* Constbufs */
62	for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) {
63		if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc)
64			ctx->dirty |= FD_DIRTY_CONSTBUF;
65		if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc)
66			ctx->dirty |= FD_DIRTY_CONSTBUF;
67	}
68
69	/* VBOs */
70	for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
71		if (ctx->vtx.vertexbuf.vb[i].buffer == prsc)
72			ctx->dirty |= FD_DIRTY_VTXBUF;
73	}
74
75	/* Index buffer */
76	if (ctx->indexbuf.buffer == prsc)
77		ctx->dirty |= FD_DIRTY_INDEXBUF;
78
79	/* Textures */
80	for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
81		if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
82			ctx->dirty |= FD_DIRTY_VERTTEX;
83	}
84	for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
85		if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
86			ctx->dirty |= FD_DIRTY_FRAGTEX;
87	}
88}
89
90static void
91realloc_bo(struct fd_resource *rsc, uint32_t size)
92{
93	struct fd_screen *screen = fd_screen(rsc->base.b.screen);
94	uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
95			DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
96
97	/* if we start using things other than write-combine,
98	 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
99	 */
100
101	if (rsc->bo)
102		fd_bo_del(rsc->bo);
103
104	rsc->bo = fd_bo_new(screen->dev, size, flags);
105	rsc->timestamp = 0;
106	util_range_set_empty(&rsc->valid_buffer_range);
107	fd_bc_invalidate_resource(rsc, true);
108}
109
110static void
111do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
112{
113	/* TODO size threshold too?? */
114	if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
115		/* do blit on gpu: */
116		fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
117		util_blitter_blit(ctx->blitter, blit);
118		fd_blitter_pipe_end(ctx);
119	} else {
120		/* do blit on cpu: */
121		util_resource_copy_region(&ctx->base,
122				blit->dst.resource, blit->dst.level, blit->dst.box.x,
123				blit->dst.box.y, blit->dst.box.z,
124				blit->src.resource, blit->src.level, &blit->src.box);
125	}
126}
127
128static bool
129fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
130		unsigned level, unsigned usage, const struct pipe_box *box)
131{
132	struct pipe_context *pctx = &ctx->base;
133	struct pipe_resource *prsc = &rsc->base.b;
134	bool fallback = false;
135
136	/* TODO: somehow munge dimensions and format to copy unsupported
137	 * render target format to something that is supported?
138	 */
139	if (!pctx->screen->is_format_supported(pctx->screen,
140			prsc->format, prsc->target, prsc->nr_samples,
141			PIPE_BIND_RENDER_TARGET))
142		fallback = true;
143
144	/* these cases should be handled elsewhere.. just for future
145	 * reference in case this gets split into a more generic(ish)
146	 * helper.
147	 */
148	debug_assert(!(usage & PIPE_TRANSFER_READ));
149	debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
150
151	/* if we do a gpu blit to clone the whole resource, we'll just
152	 * end up stalling on that.. so only allow if we can discard
153	 * current range (and blit, possibly cpu or gpu, the rest)
154	 */
155	if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
156		return false;
157
158	bool whole_level = util_texrange_covers_whole_level(prsc, level,
159		box->x, box->y, box->z, box->width, box->height, box->depth);
160
161	/* TODO need to be more clever about current level */
162	if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
163		return false;
164
165	struct pipe_resource *pshadow =
166		pctx->screen->resource_create(pctx->screen, prsc);
167
168	if (!pshadow)
169		return false;
170
171	assert(!ctx->in_shadow);
172	ctx->in_shadow = true;
173
174	/* get rid of any references that batch-cache might have to us (which
175	 * should empty/destroy rsc->batches hashset)
176	 */
177	fd_bc_invalidate_resource(rsc, false);
178
179	pipe_mutex_lock(ctx->screen->lock);
180
181	/* Swap the backing bo's, so shadow becomes the old buffer,
182	 * blit from shadow to new buffer.  From here on out, we
183	 * cannot fail.
184	 *
185	 * Note that we need to do it in this order, otherwise if
186	 * we go down cpu blit path, the recursive transfer_map()
187	 * sees the wrong status..
188	 */
189	struct fd_resource *shadow = fd_resource(pshadow);
190
191	DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
192			shadow, shadow->base.b.reference.count);
193
194	/* TODO valid_buffer_range?? */
195	swap(rsc->bo,        shadow->bo);
196	swap(rsc->timestamp, shadow->timestamp);
197	swap(rsc->write_batch,   shadow->write_batch);
198
199	/* at this point, the newly created shadow buffer is not referenced
200	 * by any batches, but the existing rsc (probably) is.  We need to
201	 * transfer those references over:
202	 */
203	debug_assert(shadow->batch_mask == 0);
204	struct fd_batch *batch;
205	foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
206		struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
207		_mesa_set_remove(batch->resources, entry);
208		_mesa_set_add(batch->resources, shadow);
209	}
210	swap(rsc->batch_mask, shadow->batch_mask);
211
212	pipe_mutex_unlock(ctx->screen->lock);
213
214	struct pipe_blit_info blit = {0};
215	blit.dst.resource = prsc;
216	blit.dst.format   = prsc->format;
217	blit.src.resource = pshadow;
218	blit.src.format   = pshadow->format;
219	blit.mask = util_format_get_mask(prsc->format);
220	blit.filter = PIPE_TEX_FILTER_NEAREST;
221
222#define set_box(field, val) do {     \
223		blit.dst.field = (val);      \
224		blit.src.field = (val);      \
225	} while (0)
226
227	/* blit the other levels in their entirety: */
228	for (unsigned l = 0; l <= prsc->last_level; l++) {
229		if (l == level)
230			continue;
231
232		/* just blit whole level: */
233		set_box(level, l);
234		set_box(box.width,  u_minify(prsc->width0, l));
235		set_box(box.height, u_minify(prsc->height0, l));
236		set_box(box.depth,  u_minify(prsc->depth0, l));
237
238		do_blit(ctx, &blit, fallback);
239	}
240
241	/* deal w/ current level specially, since we might need to split
242	 * it up into a couple blits:
243	 */
244	if (!whole_level) {
245		set_box(level, level);
246
247		switch (prsc->target) {
248		case PIPE_BUFFER:
249		case PIPE_TEXTURE_1D:
250			set_box(box.y, 0);
251			set_box(box.z, 0);
252			set_box(box.height, 1);
253			set_box(box.depth, 1);
254
255			if (box->x > 0) {
256				set_box(box.x, 0);
257				set_box(box.width, box->x);
258
259				do_blit(ctx, &blit, fallback);
260			}
261			if ((box->x + box->width) < u_minify(prsc->width0, level)) {
262				set_box(box.x, box->x + box->width);
263				set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
264
265				do_blit(ctx, &blit, fallback);
266			}
267			break;
268		case PIPE_TEXTURE_2D:
269			/* TODO */
270		default:
271			unreachable("TODO");
272		}
273	}
274
275	ctx->in_shadow = false;
276
277	pipe_resource_reference(&pshadow, NULL);
278
279	return true;
280}
281
282static unsigned
283fd_resource_layer_offset(struct fd_resource *rsc,
284						 struct fd_resource_slice *slice,
285						 unsigned layer)
286{
287	if (rsc->layer_first)
288		return layer * rsc->layer_size;
289	else
290		return layer * slice->size0;
291}
292
293static void
294fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
295{
296	struct fd_resource *rsc = fd_resource(trans->base.resource);
297	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
298	struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
299	enum pipe_format format = trans->base.resource->format;
300
301	float *depth = fd_bo_map(rsc->bo) + slice->offset +
302		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
303		(trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
304	uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
305		fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
306		(trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
307
308	if (format != PIPE_FORMAT_X32_S8X24_UINT)
309		util_format_z32_float_s8x24_uint_unpack_z_float(
310				depth, slice->pitch * 4,
311				trans->staging, trans->base.stride,
312				box->width, box->height);
313
314	util_format_z32_float_s8x24_uint_unpack_s_8uint(
315			stencil, sslice->pitch,
316			trans->staging, trans->base.stride,
317			box->width, box->height);
318}
319
320static void
321fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
322{
323	struct fd_resource *rsc = fd_resource(trans->base.resource);
324	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
325	enum pipe_format format = trans->base.resource->format;
326
327	uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
328		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
329		((trans->base.box.y + box->y) * slice->pitch +
330		 trans->base.box.x + box->x) * rsc->cpp;
331
332	uint8_t *source = trans->staging +
333		util_format_get_nblocksy(format, box->y) * trans->base.stride +
334		util_format_get_stride(format, box->x);
335
336	switch (format) {
337	case PIPE_FORMAT_RGTC1_UNORM:
338	case PIPE_FORMAT_RGTC1_SNORM:
339	case PIPE_FORMAT_LATC1_UNORM:
340	case PIPE_FORMAT_LATC1_SNORM:
341		util_format_rgtc1_unorm_unpack_rgba_8unorm(
342				data, slice->pitch * rsc->cpp,
343				source, trans->base.stride,
344				box->width, box->height);
345		break;
346	case PIPE_FORMAT_RGTC2_UNORM:
347	case PIPE_FORMAT_RGTC2_SNORM:
348	case PIPE_FORMAT_LATC2_UNORM:
349	case PIPE_FORMAT_LATC2_SNORM:
350		util_format_rgtc2_unorm_unpack_rgba_8unorm(
351				data, slice->pitch * rsc->cpp,
352				source, trans->base.stride,
353				box->width, box->height);
354		break;
355	default:
356		assert(!"Unexpected format\n");
357		break;
358	}
359}
360
361static void
362fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
363{
364	enum pipe_format format = trans->base.resource->format;
365
366	switch (format) {
367	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
368	case PIPE_FORMAT_X32_S8X24_UINT:
369		fd_resource_flush_z32s8(trans, box);
370		break;
371	case PIPE_FORMAT_RGTC1_UNORM:
372	case PIPE_FORMAT_RGTC1_SNORM:
373	case PIPE_FORMAT_RGTC2_UNORM:
374	case PIPE_FORMAT_RGTC2_SNORM:
375	case PIPE_FORMAT_LATC1_UNORM:
376	case PIPE_FORMAT_LATC1_SNORM:
377	case PIPE_FORMAT_LATC2_UNORM:
378	case PIPE_FORMAT_LATC2_SNORM:
379		fd_resource_flush_rgtc(trans, box);
380		break;
381	default:
382		assert(!"Unexpected staging transfer type");
383		break;
384	}
385}
386
387static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
388		struct pipe_transfer *ptrans,
389		const struct pipe_box *box)
390{
391	struct fd_resource *rsc = fd_resource(ptrans->resource);
392	struct fd_transfer *trans = fd_transfer(ptrans);
393
394	if (ptrans->resource->target == PIPE_BUFFER)
395		util_range_add(&rsc->valid_buffer_range,
396					   ptrans->box.x + box->x,
397					   ptrans->box.x + box->x + box->width);
398
399	if (trans->staging)
400		fd_resource_flush(trans, box);
401}
402
403static void
404fd_resource_transfer_unmap(struct pipe_context *pctx,
405		struct pipe_transfer *ptrans)
406{
407	struct fd_context *ctx = fd_context(pctx);
408	struct fd_resource *rsc = fd_resource(ptrans->resource);
409	struct fd_transfer *trans = fd_transfer(ptrans);
410
411	if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
412		struct pipe_box box;
413		u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
414		fd_resource_flush(trans, &box);
415	}
416
417	if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
418		fd_bo_cpu_fini(rsc->bo);
419		if (rsc->stencil)
420			fd_bo_cpu_fini(rsc->stencil->bo);
421	}
422
423	util_range_add(&rsc->valid_buffer_range,
424				   ptrans->box.x,
425				   ptrans->box.x + ptrans->box.width);
426
427	pipe_resource_reference(&ptrans->resource, NULL);
428	slab_free(&ctx->transfer_pool, ptrans);
429
430	free(trans->staging);
431}
432
433static void *
434fd_resource_transfer_map(struct pipe_context *pctx,
435		struct pipe_resource *prsc,
436		unsigned level, unsigned usage,
437		const struct pipe_box *box,
438		struct pipe_transfer **pptrans)
439{
440	struct fd_context *ctx = fd_context(pctx);
441	struct fd_resource *rsc = fd_resource(prsc);
442	struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
443	struct fd_transfer *trans;
444	struct pipe_transfer *ptrans;
445	enum pipe_format format = prsc->format;
446	uint32_t op = 0;
447	uint32_t offset;
448	char *buf;
449	int ret = 0;
450
451	DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
452		box->width, box->height, box->x, box->y);
453
454	ptrans = slab_alloc(&ctx->transfer_pool);
455	if (!ptrans)
456		return NULL;
457
458	/* slab_alloc_st() doesn't zero: */
459	trans = fd_transfer(ptrans);
460	memset(trans, 0, sizeof(*trans));
461
462	pipe_resource_reference(&ptrans->resource, prsc);
463	ptrans->level = level;
464	ptrans->usage = usage;
465	ptrans->box = *box;
466	ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
467	ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
468
469	if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
470		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
471
472	if (usage & PIPE_TRANSFER_READ)
473		op |= DRM_FREEDRENO_PREP_READ;
474
475	if (usage & PIPE_TRANSFER_WRITE)
476		op |= DRM_FREEDRENO_PREP_WRITE;
477
478	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
479		realloc_bo(rsc, fd_bo_size(rsc->bo));
480		if (rsc->stencil)
481			realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
482		fd_invalidate_resource(ctx, prsc);
483	} else if ((usage & PIPE_TRANSFER_WRITE) &&
484			   prsc->target == PIPE_BUFFER &&
485			   !util_ranges_intersect(&rsc->valid_buffer_range,
486									  box->x, box->x + box->width)) {
487		/* We are trying to write to a previously uninitialized range. No need
488		 * to wait.
489		 */
490	} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
491		struct fd_batch *write_batch = NULL;
492
493		/* hold a reference, so it doesn't disappear under us: */
494		fd_batch_reference(&write_batch, rsc->write_batch);
495
496		if ((usage & PIPE_TRANSFER_WRITE) && write_batch &&
497				write_batch->back_blit) {
498			/* if only thing pending is a back-blit, we can discard it: */
499			fd_batch_reset(write_batch);
500		}
501
502		/* If the GPU is writing to the resource, or if it is reading from the
503		 * resource and we're trying to write to it, flush the renders.
504		 */
505		bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
506		bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
507				ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
508
509		/* if we need to flush/stall, see if we can make a shadow buffer
510		 * to avoid this:
511		 *
512		 * TODO we could go down this path !reorder && !busy_for_read
513		 * ie. we only *don't* want to go down this path if the blit
514		 * will trigger a flush!
515		 */
516		if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
517			if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
518				needs_flush = busy = false;
519				fd_invalidate_resource(ctx, prsc);
520			}
521		}
522
523		if (needs_flush) {
524			if (usage & PIPE_TRANSFER_WRITE) {
525				struct fd_batch *batch, *last_batch = NULL;
526				foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
527					fd_batch_reference(&last_batch, batch);
528					fd_batch_flush(batch, false);
529				}
530				if (last_batch) {
531					fd_batch_sync(last_batch);
532					fd_batch_reference(&last_batch, NULL);
533				}
534				assert(rsc->batch_mask == 0);
535			} else {
536				fd_batch_flush(write_batch, true);
537			}
538			assert(!rsc->write_batch);
539		}
540
541		fd_batch_reference(&write_batch, NULL);
542
543		/* The GPU keeps track of how the various bo's are being used, and
544		 * will wait if necessary for the proper operation to have
545		 * completed.
546		 */
547		if (busy) {
548			ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
549			if (ret)
550				goto fail;
551		}
552	}
553
554	buf = fd_bo_map(rsc->bo);
555	if (!buf)
556		goto fail;
557
558	offset = slice->offset +
559		box->y / util_format_get_blockheight(format) * ptrans->stride +
560		box->x / util_format_get_blockwidth(format) * rsc->cpp +
561		fd_resource_layer_offset(rsc, slice, box->z);
562
563	if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
564		prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
565		assert(trans->base.box.depth == 1);
566
567		trans->base.stride = trans->base.box.width * rsc->cpp * 2;
568		trans->staging = malloc(trans->base.stride * trans->base.box.height);
569		if (!trans->staging)
570			goto fail;
571
572		/* if we're not discarding the whole range (or resource), we must copy
573		 * the real data in.
574		 */
575		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
576					   PIPE_TRANSFER_DISCARD_RANGE))) {
577			struct fd_resource_slice *sslice =
578				fd_resource_slice(rsc->stencil, level);
579			void *sbuf = fd_bo_map(rsc->stencil->bo);
580			if (!sbuf)
581				goto fail;
582
583			float *depth = (float *)(buf + slice->offset +
584				fd_resource_layer_offset(rsc, slice, box->z) +
585				box->y * slice->pitch * 4 + box->x * 4);
586			uint8_t *stencil = sbuf + sslice->offset +
587				fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
588				box->y * sslice->pitch + box->x;
589
590			if (format != PIPE_FORMAT_X32_S8X24_UINT)
591				util_format_z32_float_s8x24_uint_pack_z_float(
592						trans->staging, trans->base.stride,
593						depth, slice->pitch * 4,
594						box->width, box->height);
595
596			util_format_z32_float_s8x24_uint_pack_s_8uint(
597					trans->staging, trans->base.stride,
598					stencil, sslice->pitch,
599					box->width, box->height);
600		}
601
602		buf = trans->staging;
603		offset = 0;
604	} else if (rsc->internal_format != format &&
605			   util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
606		assert(trans->base.box.depth == 1);
607
608		trans->base.stride = util_format_get_stride(
609				format, trans->base.box.width);
610		trans->staging = malloc(
611				util_format_get_2d_size(format, trans->base.stride,
612										trans->base.box.height));
613		if (!trans->staging)
614			goto fail;
615
616		/* if we're not discarding the whole range (or resource), we must copy
617		 * the real data in.
618		 */
619		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
620					   PIPE_TRANSFER_DISCARD_RANGE))) {
621			uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
622				fd_resource_layer_offset(rsc, slice, box->z) +
623				box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
624
625			switch (format) {
626			case PIPE_FORMAT_RGTC1_UNORM:
627			case PIPE_FORMAT_RGTC1_SNORM:
628			case PIPE_FORMAT_LATC1_UNORM:
629			case PIPE_FORMAT_LATC1_SNORM:
630				util_format_rgtc1_unorm_pack_rgba_8unorm(
631					trans->staging, trans->base.stride,
632					rgba8, slice->pitch * rsc->cpp,
633					box->width, box->height);
634				break;
635			case PIPE_FORMAT_RGTC2_UNORM:
636			case PIPE_FORMAT_RGTC2_SNORM:
637			case PIPE_FORMAT_LATC2_UNORM:
638			case PIPE_FORMAT_LATC2_SNORM:
639				util_format_rgtc2_unorm_pack_rgba_8unorm(
640					trans->staging, trans->base.stride,
641					rgba8, slice->pitch * rsc->cpp,
642					box->width, box->height);
643				break;
644			default:
645				assert(!"Unexpected format");
646				break;
647			}
648		}
649
650		buf = trans->staging;
651		offset = 0;
652	}
653
654	*pptrans = ptrans;
655
656	return buf + offset;
657
658fail:
659	fd_resource_transfer_unmap(pctx, ptrans);
660	return NULL;
661}
662
663static void
664fd_resource_destroy(struct pipe_screen *pscreen,
665		struct pipe_resource *prsc)
666{
667	struct fd_resource *rsc = fd_resource(prsc);
668	fd_bc_invalidate_resource(rsc, true);
669	if (rsc->bo)
670		fd_bo_del(rsc->bo);
671	util_range_destroy(&rsc->valid_buffer_range);
672	FREE(rsc);
673}
674
675static boolean
676fd_resource_get_handle(struct pipe_screen *pscreen,
677		struct pipe_resource *prsc,
678		struct winsys_handle *handle)
679{
680	struct fd_resource *rsc = fd_resource(prsc);
681
682	return fd_screen_bo_get_handle(pscreen, rsc->bo,
683			rsc->slices[0].pitch * rsc->cpp, handle);
684}
685
686
687static const struct u_resource_vtbl fd_resource_vtbl = {
688		.resource_get_handle      = fd_resource_get_handle,
689		.resource_destroy         = fd_resource_destroy,
690		.transfer_map             = fd_resource_transfer_map,
691		.transfer_flush_region    = fd_resource_transfer_flush_region,
692		.transfer_unmap           = fd_resource_transfer_unmap,
693};
694
695static uint32_t
696setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
697{
698	struct pipe_resource *prsc = &rsc->base.b;
699	enum util_format_layout layout = util_format_description(format)->layout;
700	uint32_t level, size = 0;
701	uint32_t width = prsc->width0;
702	uint32_t height = prsc->height0;
703	uint32_t depth = prsc->depth0;
704	/* in layer_first layout, the level (slice) contains just one
705	 * layer (since in fact the layer contains the slices)
706	 */
707	uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
708
709	for (level = 0; level <= prsc->last_level; level++) {
710		struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
711		uint32_t blocks;
712
713		if (layout == UTIL_FORMAT_LAYOUT_ASTC)
714			slice->pitch = width =
715				util_align_npot(width, 32 * util_format_get_blockwidth(format));
716		else
717			slice->pitch = width = align(width, 32);
718		slice->offset = size;
719		blocks = util_format_get_nblocks(format, width, height);
720		/* 1d array and 2d array textures must all have the same layer size
721		 * for each miplevel on a3xx. 3d textures can have different layer
722		 * sizes for high levels, but the hw auto-sizer is buggy (or at least
723		 * different than what this code does), so as soon as the layer size
724		 * range gets into range, we stop reducing it.
725		 */
726		if (prsc->target == PIPE_TEXTURE_3D && (
727					level == 1 ||
728					(level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
729			slice->size0 = align(blocks * rsc->cpp, alignment);
730		else if (level == 0 || rsc->layer_first || alignment == 1)
731			slice->size0 = align(blocks * rsc->cpp, alignment);
732		else
733			slice->size0 = rsc->slices[level - 1].size0;
734
735		size += slice->size0 * depth * layers_in_level;
736
737		width = u_minify(width, 1);
738		height = u_minify(height, 1);
739		depth = u_minify(depth, 1);
740	}
741
742	return size;
743}
744
745static uint32_t
746slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
747{
748	/* on a3xx, 2d array and 3d textures seem to want their
749	 * layers aligned to page boundaries:
750	 */
751	switch (tmpl->target) {
752	case PIPE_TEXTURE_3D:
753	case PIPE_TEXTURE_1D_ARRAY:
754	case PIPE_TEXTURE_2D_ARRAY:
755		return 4096;
756	default:
757		return 1;
758	}
759}
760
761/* special case to resize query buf after allocated.. */
762void
763fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
764{
765	struct fd_resource *rsc = fd_resource(prsc);
766
767	debug_assert(prsc->width0 == 0);
768	debug_assert(prsc->target == PIPE_BUFFER);
769	debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
770
771	prsc->width0 = sz;
772	realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
773}
774
775/**
776 * Create a new texture object, using the given template info.
777 */
778static struct pipe_resource *
779fd_resource_create(struct pipe_screen *pscreen,
780		const struct pipe_resource *tmpl)
781{
782	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
783	struct pipe_resource *prsc = &rsc->base.b;
784	enum pipe_format format = tmpl->format;
785	uint32_t size, alignment;
786
787	DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
788			"nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
789			tmpl->target, util_format_name(format),
790			tmpl->width0, tmpl->height0, tmpl->depth0,
791			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
792			tmpl->usage, tmpl->bind, tmpl->flags);
793
794	if (!rsc)
795		return NULL;
796
797	*prsc = *tmpl;
798
799	pipe_reference_init(&prsc->reference, 1);
800
801	prsc->screen = pscreen;
802
803	util_range_init(&rsc->valid_buffer_range);
804
805	rsc->base.vtbl = &fd_resource_vtbl;
806
807	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
808		format = PIPE_FORMAT_Z32_FLOAT;
809	else if (fd_screen(pscreen)->gpu_id < 400 &&
810			 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
811		format = PIPE_FORMAT_R8G8B8A8_UNORM;
812	rsc->internal_format = format;
813	rsc->cpp = util_format_get_blocksize(format);
814
815	assert(rsc->cpp);
816
817	alignment = slice_alignment(pscreen, tmpl);
818	if (is_a4xx(fd_screen(pscreen))) {
819		switch (tmpl->target) {
820		case PIPE_TEXTURE_3D:
821			rsc->layer_first = false;
822			break;
823		default:
824			rsc->layer_first = true;
825			alignment = 1;
826			break;
827		}
828	}
829
830	size = setup_slices(rsc, alignment, format);
831
832	/* special case for hw-query buffer, which we need to allocate before we
833	 * know the size:
834	 */
835	if (size == 0) {
836		/* note, semi-intention == instead of & */
837		debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
838		return prsc;
839	}
840
841	if (rsc->layer_first) {
842		rsc->layer_size = align(size, 4096);
843		size = rsc->layer_size * prsc->array_size;
844	}
845
846	realloc_bo(rsc, size);
847	if (!rsc->bo)
848		goto fail;
849
850	/* There is no native Z32F_S8 sampling or rendering format, so this must
851	 * be emulated via two separate textures. The depth texture still keeps
852	 * its Z32F_S8 format though, and we also keep a reference to a separate
853	 * S8 texture.
854	 */
855	if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
856		struct pipe_resource stencil = *tmpl;
857		stencil.format = PIPE_FORMAT_S8_UINT;
858		rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
859		if (!rsc->stencil)
860			goto fail;
861	}
862
863	return prsc;
864fail:
865	fd_resource_destroy(pscreen, prsc);
866	return NULL;
867}
868
869/**
870 * Create a texture from a winsys_handle. The handle is often created in
871 * another process by first creating a pipe texture and then calling
872 * resource_get_handle.
873 */
874static struct pipe_resource *
875fd_resource_from_handle(struct pipe_screen *pscreen,
876		const struct pipe_resource *tmpl,
877		struct winsys_handle *handle, unsigned usage)
878{
879	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
880	struct fd_resource_slice *slice = &rsc->slices[0];
881	struct pipe_resource *prsc = &rsc->base.b;
882
883	DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
884			"nr_samples=%u, usage=%u, bind=%x, flags=%x",
885			tmpl->target, util_format_name(tmpl->format),
886			tmpl->width0, tmpl->height0, tmpl->depth0,
887			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
888			tmpl->usage, tmpl->bind, tmpl->flags);
889
890	if (!rsc)
891		return NULL;
892
893	*prsc = *tmpl;
894
895	pipe_reference_init(&prsc->reference, 1);
896
897	prsc->screen = pscreen;
898
899	util_range_init(&rsc->valid_buffer_range);
900
901	rsc->bo = fd_screen_bo_from_handle(pscreen, handle);
902	if (!rsc->bo)
903		goto fail;
904
905	rsc->base.vtbl = &fd_resource_vtbl;
906	rsc->cpp = util_format_get_blocksize(tmpl->format);
907	slice->pitch = handle->stride / rsc->cpp;
908	slice->offset = handle->offset;
909
910	if ((slice->pitch < align(prsc->width0, 32)) || (slice->pitch % 32))
911		goto fail;
912
913	assert(rsc->cpp);
914
915	return prsc;
916
917fail:
918	fd_resource_destroy(pscreen, prsc);
919	return NULL;
920}
921
922/**
923 * _copy_region using pipe (3d engine)
924 */
925static bool
926fd_blitter_pipe_copy_region(struct fd_context *ctx,
927		struct pipe_resource *dst,
928		unsigned dst_level,
929		unsigned dstx, unsigned dsty, unsigned dstz,
930		struct pipe_resource *src,
931		unsigned src_level,
932		const struct pipe_box *src_box)
933{
934	/* not until we allow rendertargets to be buffers */
935	if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
936		return false;
937
938	if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
939		return false;
940
941	/* TODO we could discard if dst box covers dst level fully.. */
942	fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
943	util_blitter_copy_texture(ctx->blitter,
944			dst, dst_level, dstx, dsty, dstz,
945			src, src_level, src_box);
946	fd_blitter_pipe_end(ctx);
947
948	return true;
949}
950
951/**
952 * Copy a block of pixels from one resource to another.
953 * The resource must be of the same format.
954 * Resources with nr_samples > 1 are not allowed.
955 */
956static void
957fd_resource_copy_region(struct pipe_context *pctx,
958		struct pipe_resource *dst,
959		unsigned dst_level,
960		unsigned dstx, unsigned dsty, unsigned dstz,
961		struct pipe_resource *src,
962		unsigned src_level,
963		const struct pipe_box *src_box)
964{
965	struct fd_context *ctx = fd_context(pctx);
966
967	/* TODO if we have 2d core, or other DMA engine that could be used
968	 * for simple copies and reasonably easily synchronized with the 3d
969	 * core, this is where we'd plug it in..
970	 */
971
972	/* try blit on 3d pipe: */
973	if (fd_blitter_pipe_copy_region(ctx,
974			dst, dst_level, dstx, dsty, dstz,
975			src, src_level, src_box))
976		return;
977
978	/* else fallback to pure sw: */
979	util_resource_copy_region(pctx,
980			dst, dst_level, dstx, dsty, dstz,
981			src, src_level, src_box);
982}
983
984bool
985fd_render_condition_check(struct pipe_context *pctx)
986{
987	struct fd_context *ctx = fd_context(pctx);
988
989	if (!ctx->cond_query)
990		return true;
991
992	union pipe_query_result res = { 0 };
993	bool wait =
994		ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
995		ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
996
997	if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
998			return (bool)res.u64 != ctx->cond_cond;
999
1000	return true;
1001}
1002
1003/**
1004 * Optimal hardware path for blitting pixels.
1005 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
1006 */
1007static void
1008fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
1009{
1010	struct fd_context *ctx = fd_context(pctx);
1011	struct pipe_blit_info info = *blit_info;
1012	bool discard = false;
1013
1014	if (info.src.resource->nr_samples > 1 &&
1015			info.dst.resource->nr_samples <= 1 &&
1016			!util_format_is_depth_or_stencil(info.src.resource->format) &&
1017			!util_format_is_pure_integer(info.src.resource->format)) {
1018		DBG("color resolve unimplemented");
1019		return;
1020	}
1021
1022	if (info.render_condition_enable && !fd_render_condition_check(pctx))
1023		return;
1024
1025	if (!info.scissor_enable && !info.alpha_blend) {
1026		discard = util_texrange_covers_whole_level(info.dst.resource,
1027				info.dst.level, info.dst.box.x, info.dst.box.y,
1028				info.dst.box.z, info.dst.box.width,
1029				info.dst.box.height, info.dst.box.depth);
1030	}
1031
1032	if (util_try_blit_via_copy_region(pctx, &info)) {
1033		return; /* done */
1034	}
1035
1036	if (info.mask & PIPE_MASK_S) {
1037		DBG("cannot blit stencil, skipping");
1038		info.mask &= ~PIPE_MASK_S;
1039	}
1040
1041	if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1042		DBG("blit unsupported %s -> %s",
1043				util_format_short_name(info.src.resource->format),
1044				util_format_short_name(info.dst.resource->format));
1045		return;
1046	}
1047
1048	fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT);
1049	util_blitter_blit(ctx->blitter, &info);
1050	fd_blitter_pipe_end(ctx);
1051}
1052
1053void
1054fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
1055		enum fd_render_stage stage)
1056{
1057	util_blitter_save_fragment_constant_buffer_slot(ctx->blitter,
1058			ctx->constbuf[PIPE_SHADER_FRAGMENT].cb);
1059	util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1060	util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1061	util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1062	util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1063			ctx->streamout.targets);
1064	util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1065	util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1066	util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1067	util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1068	util_blitter_save_blend(ctx->blitter, ctx->blend);
1069	util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1070	util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1071	util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1072	util_blitter_save_framebuffer(ctx->blitter,
1073			ctx->batch ? &ctx->batch->framebuffer : NULL);
1074	util_blitter_save_fragment_sampler_states(ctx->blitter,
1075			ctx->fragtex.num_samplers,
1076			(void **)ctx->fragtex.samplers);
1077	util_blitter_save_fragment_sampler_views(ctx->blitter,
1078			ctx->fragtex.num_textures, ctx->fragtex.textures);
1079	if (!render_cond)
1080		util_blitter_save_render_condition(ctx->blitter,
1081			ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1082
1083	if (ctx->batch)
1084		fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, stage);
1085
1086	ctx->in_blit = discard;
1087}
1088
1089void
1090fd_blitter_pipe_end(struct fd_context *ctx)
1091{
1092	if (ctx->batch)
1093		fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL);
1094	ctx->in_blit = false;
1095}
1096
1097static void
1098fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1099{
1100	struct fd_resource *rsc = fd_resource(prsc);
1101
1102	if (rsc->write_batch)
1103		fd_batch_flush(rsc->write_batch, true);
1104
1105	assert(!rsc->write_batch);
1106}
1107
1108void
1109fd_resource_screen_init(struct pipe_screen *pscreen)
1110{
1111	pscreen->resource_create = fd_resource_create;
1112	pscreen->resource_from_handle = fd_resource_from_handle;
1113	pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1114	pscreen->resource_destroy = u_resource_destroy_vtbl;
1115}
1116
1117void
1118fd_resource_context_init(struct pipe_context *pctx)
1119{
1120	pctx->transfer_map = u_transfer_map_vtbl;
1121	pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1122	pctx->transfer_unmap = u_transfer_unmap_vtbl;
1123	pctx->buffer_subdata = u_default_buffer_subdata;
1124        pctx->texture_subdata = u_default_texture_subdata;
1125	pctx->create_surface = fd_create_surface;
1126	pctx->surface_destroy = fd_surface_destroy;
1127	pctx->resource_copy_region = fd_resource_copy_region;
1128	pctx->blit = fd_blit;
1129	pctx->flush_resource = fd_flush_resource;
1130}
1131