freedreno_resource.c revision c44163876a2858aea219a08bd2e048b76953cff9
1/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3/*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Rob Clark <robclark@freedesktop.org>
27 */
28
29#include "util/u_format.h"
30#include "util/u_format_rgtc.h"
31#include "util/u_format_zs.h"
32#include "util/u_inlines.h"
33#include "util/u_transfer.h"
34#include "util/u_string.h"
35#include "util/u_surface.h"
36#include "util/set.h"
37
38#include "freedreno_resource.h"
39#include "freedreno_batch_cache.h"
40#include "freedreno_screen.h"
41#include "freedreno_surface.h"
42#include "freedreno_context.h"
43#include "freedreno_query_hw.h"
44#include "freedreno_util.h"
45
46#include <errno.h>
47
48/* XXX this should go away, needed for 'struct winsys_handle' */
49#include "state_tracker/drm_driver.h"
50
51static void
52fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53{
54	int i;
55
56	/* Go through the entire state and see if the resource is bound
57	 * anywhere. If it is, mark the relevant state as dirty. This is called on
58	 * realloc_bo.
59	 */
60
61	/* Constbufs */
62	for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) {
63		if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc)
64			ctx->dirty |= FD_DIRTY_CONSTBUF;
65		if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc)
66			ctx->dirty |= FD_DIRTY_CONSTBUF;
67	}
68
69	/* VBOs */
70	for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
71		if (ctx->vtx.vertexbuf.vb[i].buffer == prsc)
72			ctx->dirty |= FD_DIRTY_VTXBUF;
73	}
74
75	/* Index buffer */
76	if (ctx->indexbuf.buffer == prsc)
77		ctx->dirty |= FD_DIRTY_INDEXBUF;
78
79	/* Textures */
80	for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
81		if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
82			ctx->dirty |= FD_DIRTY_VERTTEX;
83	}
84	for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
85		if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
86			ctx->dirty |= FD_DIRTY_FRAGTEX;
87	}
88}
89
90static void
91realloc_bo(struct fd_resource *rsc, uint32_t size)
92{
93	struct fd_screen *screen = fd_screen(rsc->base.b.screen);
94	uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
95			DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
96
97	/* if we start using things other than write-combine,
98	 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
99	 */
100
101	if (rsc->bo)
102		fd_bo_del(rsc->bo);
103
104	rsc->bo = fd_bo_new(screen->dev, size, flags);
105	rsc->timestamp = 0;
106	util_range_set_empty(&rsc->valid_buffer_range);
107	fd_bc_invalidate_resource(rsc, true);
108}
109
110static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard);
111static void fd_blitter_pipe_end(struct fd_context *ctx);
112
113static void
114do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
115{
116	/* TODO size threshold too?? */
117	if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
118		/* do blit on gpu: */
119		fd_blitter_pipe_begin(ctx, false, true);
120		util_blitter_blit(ctx->blitter, blit);
121		fd_blitter_pipe_end(ctx);
122	} else {
123		/* do blit on cpu: */
124		util_resource_copy_region(&ctx->base,
125				blit->dst.resource, blit->dst.level, blit->dst.box.x,
126				blit->dst.box.y, blit->dst.box.z,
127				blit->src.resource, blit->src.level, &blit->src.box);
128	}
129}
130
131static bool
132fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
133		unsigned level, unsigned usage, const struct pipe_box *box)
134{
135	struct pipe_context *pctx = &ctx->base;
136	struct pipe_resource *prsc = &rsc->base.b;
137	bool fallback = false;
138
139	/* TODO: somehow munge dimensions and format to copy unsupported
140	 * render target format to something that is supported?
141	 */
142	if (!pctx->screen->is_format_supported(pctx->screen,
143			prsc->format, prsc->target, prsc->nr_samples,
144			PIPE_BIND_RENDER_TARGET))
145		fallback = true;
146
147	/* these cases should be handled elsewhere.. just for future
148	 * reference in case this gets split into a more generic(ish)
149	 * helper.
150	 */
151	debug_assert(!(usage & PIPE_TRANSFER_READ));
152	debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
153
154	/* if we do a gpu blit to clone the whole resource, we'll just
155	 * end up stalling on that.. so only allow if we can discard
156	 * current range (and blit, possibly cpu or gpu, the rest)
157	 */
158	if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
159		return false;
160
161	bool whole_level = util_texrange_covers_whole_level(prsc, level,
162		box->x, box->y, box->z, box->width, box->height, box->depth);
163
164	/* TODO need to be more clever about current level */
165	if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
166		return false;
167
168	struct pipe_resource *pshadow =
169		pctx->screen->resource_create(pctx->screen, prsc);
170
171	if (!pshadow)
172		return false;
173
174	assert(!ctx->in_shadow);
175	ctx->in_shadow = true;
176
177	/* get rid of any references that batch-cache might have to us (which
178	 * should empty/destroy rsc->batches hashset)
179	 */
180	fd_bc_invalidate_resource(rsc, false);
181
182	/* Swap the backing bo's, so shadow becomes the old buffer,
183	 * blit from shadow to new buffer.  From here on out, we
184	 * cannot fail.
185	 *
186	 * Note that we need to do it in this order, otherwise if
187	 * we go down cpu blit path, the recursive transfer_map()
188	 * sees the wrong status..
189	 */
190	struct fd_resource *shadow = fd_resource(pshadow);
191
192	DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
193			shadow, shadow->base.b.reference.count);
194
195	/* TODO valid_buffer_range?? */
196	swap(rsc->bo,        shadow->bo);
197	swap(rsc->timestamp, shadow->timestamp);
198	swap(rsc->write_batch,   shadow->write_batch);
199
200	/* at this point, the newly created shadow buffer is not referenced
201	 * by any batches, but the existing rsc (probably) is.  We need to
202	 * transfer those references over:
203	 */
204	debug_assert(shadow->batch_mask == 0);
205	struct fd_batch *batch;
206	foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
207		struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
208		_mesa_set_remove(batch->resources, entry);
209		_mesa_set_add(batch->resources, shadow);
210	}
211	swap(rsc->batch_mask, shadow->batch_mask);
212
213	struct pipe_blit_info blit = {0};
214	blit.dst.resource = prsc;
215	blit.dst.format   = prsc->format;
216	blit.src.resource = pshadow;
217	blit.src.format   = pshadow->format;
218	blit.mask = util_format_get_mask(prsc->format);
219	blit.filter = PIPE_TEX_FILTER_NEAREST;
220
221#define set_box(field, val) do {     \
222		blit.dst.field = (val);      \
223		blit.src.field = (val);      \
224	} while (0)
225
226	/* blit the other levels in their entirety: */
227	for (unsigned l = 0; l <= prsc->last_level; l++) {
228		if (l == level)
229			continue;
230
231		/* just blit whole level: */
232		set_box(level, l);
233		set_box(box.width,  u_minify(prsc->width0, l));
234		set_box(box.height, u_minify(prsc->height0, l));
235		set_box(box.depth,  u_minify(prsc->depth0, l));
236
237		do_blit(ctx, &blit, fallback);
238	}
239
240	/* deal w/ current level specially, since we might need to split
241	 * it up into a couple blits:
242	 */
243	if (!whole_level) {
244		set_box(level, level);
245
246		switch (prsc->target) {
247		case PIPE_BUFFER:
248		case PIPE_TEXTURE_1D:
249			set_box(box.y, 0);
250			set_box(box.z, 0);
251			set_box(box.height, 1);
252			set_box(box.depth, 1);
253
254			if (box->x > 0) {
255				set_box(box.x, 0);
256				set_box(box.width, box->x);
257
258				do_blit(ctx, &blit, fallback);
259			}
260			if ((box->x + box->width) < u_minify(prsc->width0, level)) {
261				set_box(box.x, box->x + box->width);
262				set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
263
264				do_blit(ctx, &blit, fallback);
265			}
266			break;
267		case PIPE_TEXTURE_2D:
268			/* TODO */
269		default:
270			unreachable("TODO");
271		}
272	}
273
274	ctx->in_shadow = false;
275
276	pipe_resource_reference(&pshadow, NULL);
277
278	return true;
279}
280
281static unsigned
282fd_resource_layer_offset(struct fd_resource *rsc,
283						 struct fd_resource_slice *slice,
284						 unsigned layer)
285{
286	if (rsc->layer_first)
287		return layer * rsc->layer_size;
288	else
289		return layer * slice->size0;
290}
291
292static void
293fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
294{
295	struct fd_resource *rsc = fd_resource(trans->base.resource);
296	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
297	struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
298	enum pipe_format format = trans->base.resource->format;
299
300	float *depth = fd_bo_map(rsc->bo) + slice->offset +
301		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
302		(trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
303	uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
304		fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
305		(trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
306
307	if (format != PIPE_FORMAT_X32_S8X24_UINT)
308		util_format_z32_float_s8x24_uint_unpack_z_float(
309				depth, slice->pitch * 4,
310				trans->staging, trans->base.stride,
311				box->width, box->height);
312
313	util_format_z32_float_s8x24_uint_unpack_s_8uint(
314			stencil, sslice->pitch,
315			trans->staging, trans->base.stride,
316			box->width, box->height);
317}
318
319static void
320fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
321{
322	struct fd_resource *rsc = fd_resource(trans->base.resource);
323	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
324	enum pipe_format format = trans->base.resource->format;
325
326	uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
327		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
328		((trans->base.box.y + box->y) * slice->pitch +
329		 trans->base.box.x + box->x) * rsc->cpp;
330
331	uint8_t *source = trans->staging +
332		util_format_get_nblocksy(format, box->y) * trans->base.stride +
333		util_format_get_stride(format, box->x);
334
335	switch (format) {
336	case PIPE_FORMAT_RGTC1_UNORM:
337	case PIPE_FORMAT_RGTC1_SNORM:
338	case PIPE_FORMAT_LATC1_UNORM:
339	case PIPE_FORMAT_LATC1_SNORM:
340		util_format_rgtc1_unorm_unpack_rgba_8unorm(
341				data, slice->pitch * rsc->cpp,
342				source, trans->base.stride,
343				box->width, box->height);
344		break;
345	case PIPE_FORMAT_RGTC2_UNORM:
346	case PIPE_FORMAT_RGTC2_SNORM:
347	case PIPE_FORMAT_LATC2_UNORM:
348	case PIPE_FORMAT_LATC2_SNORM:
349		util_format_rgtc2_unorm_unpack_rgba_8unorm(
350				data, slice->pitch * rsc->cpp,
351				source, trans->base.stride,
352				box->width, box->height);
353		break;
354	default:
355		assert(!"Unexpected format\n");
356		break;
357	}
358}
359
360static void
361fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
362{
363	enum pipe_format format = trans->base.resource->format;
364
365	switch (format) {
366	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
367	case PIPE_FORMAT_X32_S8X24_UINT:
368		fd_resource_flush_z32s8(trans, box);
369		break;
370	case PIPE_FORMAT_RGTC1_UNORM:
371	case PIPE_FORMAT_RGTC1_SNORM:
372	case PIPE_FORMAT_RGTC2_UNORM:
373	case PIPE_FORMAT_RGTC2_SNORM:
374	case PIPE_FORMAT_LATC1_UNORM:
375	case PIPE_FORMAT_LATC1_SNORM:
376	case PIPE_FORMAT_LATC2_UNORM:
377	case PIPE_FORMAT_LATC2_SNORM:
378		fd_resource_flush_rgtc(trans, box);
379		break;
380	default:
381		assert(!"Unexpected staging transfer type");
382		break;
383	}
384}
385
386static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
387		struct pipe_transfer *ptrans,
388		const struct pipe_box *box)
389{
390	struct fd_resource *rsc = fd_resource(ptrans->resource);
391	struct fd_transfer *trans = fd_transfer(ptrans);
392
393	if (ptrans->resource->target == PIPE_BUFFER)
394		util_range_add(&rsc->valid_buffer_range,
395					   ptrans->box.x + box->x,
396					   ptrans->box.x + box->x + box->width);
397
398	if (trans->staging)
399		fd_resource_flush(trans, box);
400}
401
402static void
403fd_resource_transfer_unmap(struct pipe_context *pctx,
404		struct pipe_transfer *ptrans)
405{
406	struct fd_context *ctx = fd_context(pctx);
407	struct fd_resource *rsc = fd_resource(ptrans->resource);
408	struct fd_transfer *trans = fd_transfer(ptrans);
409
410	if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
411		struct pipe_box box;
412		u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
413		fd_resource_flush(trans, &box);
414	}
415
416	if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
417		fd_bo_cpu_fini(rsc->bo);
418		if (rsc->stencil)
419			fd_bo_cpu_fini(rsc->stencil->bo);
420	}
421
422	util_range_add(&rsc->valid_buffer_range,
423				   ptrans->box.x,
424				   ptrans->box.x + ptrans->box.width);
425
426	pipe_resource_reference(&ptrans->resource, NULL);
427	util_slab_free(&ctx->transfer_pool, ptrans);
428
429	free(trans->staging);
430}
431
432static void *
433fd_resource_transfer_map(struct pipe_context *pctx,
434		struct pipe_resource *prsc,
435		unsigned level, unsigned usage,
436		const struct pipe_box *box,
437		struct pipe_transfer **pptrans)
438{
439	struct fd_context *ctx = fd_context(pctx);
440	struct fd_resource *rsc = fd_resource(prsc);
441	struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
442	struct fd_transfer *trans;
443	struct pipe_transfer *ptrans;
444	enum pipe_format format = prsc->format;
445	uint32_t op = 0;
446	uint32_t offset;
447	char *buf;
448	int ret = 0;
449
450	DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
451		box->width, box->height, box->x, box->y);
452
453	ptrans = util_slab_alloc(&ctx->transfer_pool);
454	if (!ptrans)
455		return NULL;
456
457	/* util_slab_alloc() doesn't zero: */
458	trans = fd_transfer(ptrans);
459	memset(trans, 0, sizeof(*trans));
460
461	pipe_resource_reference(&ptrans->resource, prsc);
462	ptrans->level = level;
463	ptrans->usage = usage;
464	ptrans->box = *box;
465	ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
466	ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
467
468	if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
469		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
470
471	if (usage & PIPE_TRANSFER_READ)
472		op |= DRM_FREEDRENO_PREP_READ;
473
474	if (usage & PIPE_TRANSFER_WRITE)
475		op |= DRM_FREEDRENO_PREP_WRITE;
476
477	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
478		realloc_bo(rsc, fd_bo_size(rsc->bo));
479		if (rsc->stencil)
480			realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
481		fd_invalidate_resource(ctx, prsc);
482	} else if ((usage & PIPE_TRANSFER_WRITE) &&
483			   prsc->target == PIPE_BUFFER &&
484			   !util_ranges_intersect(&rsc->valid_buffer_range,
485									  box->x, box->x + box->width)) {
486		/* We are trying to write to a previously uninitialized range. No need
487		 * to wait.
488		 */
489	} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
490		if ((usage & PIPE_TRANSFER_WRITE) && rsc->write_batch &&
491				rsc->write_batch->back_blit) {
492			/* if only thing pending is a back-blit, we can discard it: */
493			fd_batch_reset(rsc->write_batch);
494		}
495
496		/* If the GPU is writing to the resource, or if it is reading from the
497		 * resource and we're trying to write to it, flush the renders.
498		 */
499		bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
500		bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
501				ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
502
503		/* if we need to flush/stall, see if we can make a shadow buffer
504		 * to avoid this:
505		 *
506		 * TODO we could go down this path !reorder && !busy_for_read
507		 * ie. we only *don't* want to go down this path if the blit
508		 * will trigger a flush!
509		 */
510		if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
511			if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
512				needs_flush = busy = false;
513				fd_invalidate_resource(ctx, prsc);
514			}
515		}
516
517		if (needs_flush) {
518			if (usage & PIPE_TRANSFER_WRITE) {
519				struct fd_batch *batch;
520				foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask)
521					fd_batch_flush(batch);
522				assert(rsc->batch_mask == 0);
523			} else {
524				fd_batch_flush(rsc->write_batch);
525			}
526			assert(!rsc->write_batch);
527		}
528
529		/* The GPU keeps track of how the various bo's are being used, and
530		 * will wait if necessary for the proper operation to have
531		 * completed.
532		 */
533		if (busy) {
534			ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
535			if (ret)
536				goto fail;
537		}
538	}
539
540	buf = fd_bo_map(rsc->bo);
541	if (!buf)
542		goto fail;
543
544	offset = slice->offset +
545		box->y / util_format_get_blockheight(format) * ptrans->stride +
546		box->x / util_format_get_blockwidth(format) * rsc->cpp +
547		fd_resource_layer_offset(rsc, slice, box->z);
548
549	if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
550		prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
551		assert(trans->base.box.depth == 1);
552
553		trans->base.stride = trans->base.box.width * rsc->cpp * 2;
554		trans->staging = malloc(trans->base.stride * trans->base.box.height);
555		if (!trans->staging)
556			goto fail;
557
558		/* if we're not discarding the whole range (or resource), we must copy
559		 * the real data in.
560		 */
561		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
562					   PIPE_TRANSFER_DISCARD_RANGE))) {
563			struct fd_resource_slice *sslice =
564				fd_resource_slice(rsc->stencil, level);
565			void *sbuf = fd_bo_map(rsc->stencil->bo);
566			if (!sbuf)
567				goto fail;
568
569			float *depth = (float *)(buf + slice->offset +
570				fd_resource_layer_offset(rsc, slice, box->z) +
571				box->y * slice->pitch * 4 + box->x * 4);
572			uint8_t *stencil = sbuf + sslice->offset +
573				fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
574				box->y * sslice->pitch + box->x;
575
576			if (format != PIPE_FORMAT_X32_S8X24_UINT)
577				util_format_z32_float_s8x24_uint_pack_z_float(
578						trans->staging, trans->base.stride,
579						depth, slice->pitch * 4,
580						box->width, box->height);
581
582			util_format_z32_float_s8x24_uint_pack_s_8uint(
583					trans->staging, trans->base.stride,
584					stencil, sslice->pitch,
585					box->width, box->height);
586		}
587
588		buf = trans->staging;
589		offset = 0;
590	} else if (rsc->internal_format != format &&
591			   util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
592		assert(trans->base.box.depth == 1);
593
594		trans->base.stride = util_format_get_stride(
595				format, trans->base.box.width);
596		trans->staging = malloc(
597				util_format_get_2d_size(format, trans->base.stride,
598										trans->base.box.height));
599		if (!trans->staging)
600			goto fail;
601
602		/* if we're not discarding the whole range (or resource), we must copy
603		 * the real data in.
604		 */
605		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
606					   PIPE_TRANSFER_DISCARD_RANGE))) {
607			uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
608				fd_resource_layer_offset(rsc, slice, box->z) +
609				box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
610
611			switch (format) {
612			case PIPE_FORMAT_RGTC1_UNORM:
613			case PIPE_FORMAT_RGTC1_SNORM:
614			case PIPE_FORMAT_LATC1_UNORM:
615			case PIPE_FORMAT_LATC1_SNORM:
616				util_format_rgtc1_unorm_pack_rgba_8unorm(
617					trans->staging, trans->base.stride,
618					rgba8, slice->pitch * rsc->cpp,
619					box->width, box->height);
620				break;
621			case PIPE_FORMAT_RGTC2_UNORM:
622			case PIPE_FORMAT_RGTC2_SNORM:
623			case PIPE_FORMAT_LATC2_UNORM:
624			case PIPE_FORMAT_LATC2_SNORM:
625				util_format_rgtc2_unorm_pack_rgba_8unorm(
626					trans->staging, trans->base.stride,
627					rgba8, slice->pitch * rsc->cpp,
628					box->width, box->height);
629				break;
630			default:
631				assert(!"Unexpected format");
632				break;
633			}
634		}
635
636		buf = trans->staging;
637		offset = 0;
638	}
639
640	*pptrans = ptrans;
641
642	return buf + offset;
643
644fail:
645	fd_resource_transfer_unmap(pctx, ptrans);
646	return NULL;
647}
648
649static void
650fd_resource_destroy(struct pipe_screen *pscreen,
651		struct pipe_resource *prsc)
652{
653	struct fd_resource *rsc = fd_resource(prsc);
654	fd_bc_invalidate_resource(rsc, true);
655	if (rsc->bo)
656		fd_bo_del(rsc->bo);
657	util_range_destroy(&rsc->valid_buffer_range);
658	FREE(rsc);
659}
660
661static boolean
662fd_resource_get_handle(struct pipe_screen *pscreen,
663		struct pipe_resource *prsc,
664		struct winsys_handle *handle)
665{
666	struct fd_resource *rsc = fd_resource(prsc);
667
668	return fd_screen_bo_get_handle(pscreen, rsc->bo,
669			rsc->slices[0].pitch * rsc->cpp, handle);
670}
671
672
673static const struct u_resource_vtbl fd_resource_vtbl = {
674		.resource_get_handle      = fd_resource_get_handle,
675		.resource_destroy         = fd_resource_destroy,
676		.transfer_map             = fd_resource_transfer_map,
677		.transfer_flush_region    = fd_resource_transfer_flush_region,
678		.transfer_unmap           = fd_resource_transfer_unmap,
679};
680
681static uint32_t
682setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
683{
684	struct pipe_resource *prsc = &rsc->base.b;
685	enum util_format_layout layout = util_format_description(format)->layout;
686	uint32_t level, size = 0;
687	uint32_t width = prsc->width0;
688	uint32_t height = prsc->height0;
689	uint32_t depth = prsc->depth0;
690	/* in layer_first layout, the level (slice) contains just one
691	 * layer (since in fact the layer contains the slices)
692	 */
693	uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
694
695	for (level = 0; level <= prsc->last_level; level++) {
696		struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
697		uint32_t blocks;
698
699		if (layout == UTIL_FORMAT_LAYOUT_ASTC)
700			slice->pitch = width =
701				util_align_npot(width, 32 * util_format_get_blockwidth(format));
702		else
703			slice->pitch = width = align(width, 32);
704		slice->offset = size;
705		blocks = util_format_get_nblocks(format, width, height);
706		/* 1d array and 2d array textures must all have the same layer size
707		 * for each miplevel on a3xx. 3d textures can have different layer
708		 * sizes for high levels, but the hw auto-sizer is buggy (or at least
709		 * different than what this code does), so as soon as the layer size
710		 * range gets into range, we stop reducing it.
711		 */
712		if (prsc->target == PIPE_TEXTURE_3D && (
713					level == 1 ||
714					(level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
715			slice->size0 = align(blocks * rsc->cpp, alignment);
716		else if (level == 0 || rsc->layer_first || alignment == 1)
717			slice->size0 = align(blocks * rsc->cpp, alignment);
718		else
719			slice->size0 = rsc->slices[level - 1].size0;
720
721		size += slice->size0 * depth * layers_in_level;
722
723		width = u_minify(width, 1);
724		height = u_minify(height, 1);
725		depth = u_minify(depth, 1);
726	}
727
728	return size;
729}
730
731static uint32_t
732slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
733{
734	/* on a3xx, 2d array and 3d textures seem to want their
735	 * layers aligned to page boundaries:
736	 */
737	switch (tmpl->target) {
738	case PIPE_TEXTURE_3D:
739	case PIPE_TEXTURE_1D_ARRAY:
740	case PIPE_TEXTURE_2D_ARRAY:
741		return 4096;
742	default:
743		return 1;
744	}
745}
746
747/* special case to resize query buf after allocated.. */
748void
749fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
750{
751	struct fd_resource *rsc = fd_resource(prsc);
752
753	debug_assert(prsc->width0 == 0);
754	debug_assert(prsc->target == PIPE_BUFFER);
755	debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
756
757	prsc->width0 = sz;
758	realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
759}
760
761/**
762 * Create a new texture object, using the given template info.
763 */
764static struct pipe_resource *
765fd_resource_create(struct pipe_screen *pscreen,
766		const struct pipe_resource *tmpl)
767{
768	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
769	struct pipe_resource *prsc = &rsc->base.b;
770	enum pipe_format format = tmpl->format;
771	uint32_t size, alignment;
772
773	DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
774			"nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
775			tmpl->target, util_format_name(format),
776			tmpl->width0, tmpl->height0, tmpl->depth0,
777			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
778			tmpl->usage, tmpl->bind, tmpl->flags);
779
780	if (!rsc)
781		return NULL;
782
783	*prsc = *tmpl;
784
785	pipe_reference_init(&prsc->reference, 1);
786
787	prsc->screen = pscreen;
788
789	util_range_init(&rsc->valid_buffer_range);
790
791	rsc->base.vtbl = &fd_resource_vtbl;
792
793	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
794		format = PIPE_FORMAT_Z32_FLOAT;
795	else if (fd_screen(pscreen)->gpu_id < 400 &&
796			 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
797		format = PIPE_FORMAT_R8G8B8A8_UNORM;
798	rsc->internal_format = format;
799	rsc->cpp = util_format_get_blocksize(format);
800
801	assert(rsc->cpp);
802
803	alignment = slice_alignment(pscreen, tmpl);
804	if (is_a4xx(fd_screen(pscreen))) {
805		switch (tmpl->target) {
806		case PIPE_TEXTURE_3D:
807			rsc->layer_first = false;
808			break;
809		default:
810			rsc->layer_first = true;
811			alignment = 1;
812			break;
813		}
814	}
815
816	size = setup_slices(rsc, alignment, format);
817
818	/* special case for hw-query buffer, which we need to allocate before we
819	 * know the size:
820	 */
821	if (size == 0) {
822		/* note, semi-intention == instead of & */
823		debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
824		return prsc;
825	}
826
827	if (rsc->layer_first) {
828		rsc->layer_size = align(size, 4096);
829		size = rsc->layer_size * prsc->array_size;
830	}
831
832	realloc_bo(rsc, size);
833	if (!rsc->bo)
834		goto fail;
835
836	/* There is no native Z32F_S8 sampling or rendering format, so this must
837	 * be emulated via two separate textures. The depth texture still keeps
838	 * its Z32F_S8 format though, and we also keep a reference to a separate
839	 * S8 texture.
840	 */
841	if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
842		struct pipe_resource stencil = *tmpl;
843		stencil.format = PIPE_FORMAT_S8_UINT;
844		rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
845		if (!rsc->stencil)
846			goto fail;
847	}
848
849	return prsc;
850fail:
851	fd_resource_destroy(pscreen, prsc);
852	return NULL;
853}
854
855/**
856 * Create a texture from a winsys_handle. The handle is often created in
857 * another process by first creating a pipe texture and then calling
858 * resource_get_handle.
859 */
860static struct pipe_resource *
861fd_resource_from_handle(struct pipe_screen *pscreen,
862		const struct pipe_resource *tmpl,
863		struct winsys_handle *handle, unsigned usage)
864{
865	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
866	struct fd_resource_slice *slice = &rsc->slices[0];
867	struct pipe_resource *prsc = &rsc->base.b;
868
869	DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
870			"nr_samples=%u, usage=%u, bind=%x, flags=%x",
871			tmpl->target, util_format_name(tmpl->format),
872			tmpl->width0, tmpl->height0, tmpl->depth0,
873			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
874			tmpl->usage, tmpl->bind, tmpl->flags);
875
876	if (!rsc)
877		return NULL;
878
879	*prsc = *tmpl;
880
881	pipe_reference_init(&prsc->reference, 1);
882
883	prsc->screen = pscreen;
884
885	util_range_init(&rsc->valid_buffer_range);
886
887	rsc->bo = fd_screen_bo_from_handle(pscreen, handle, &slice->pitch);
888	if (!rsc->bo)
889		goto fail;
890
891	rsc->base.vtbl = &fd_resource_vtbl;
892	rsc->cpp = util_format_get_blocksize(tmpl->format);
893	slice->pitch /= rsc->cpp;
894	slice->offset = handle->offset;
895
896	assert(rsc->cpp);
897
898	return prsc;
899
900fail:
901	fd_resource_destroy(pscreen, prsc);
902	return NULL;
903}
904
905/**
906 * _copy_region using pipe (3d engine)
907 */
908static bool
909fd_blitter_pipe_copy_region(struct fd_context *ctx,
910		struct pipe_resource *dst,
911		unsigned dst_level,
912		unsigned dstx, unsigned dsty, unsigned dstz,
913		struct pipe_resource *src,
914		unsigned src_level,
915		const struct pipe_box *src_box)
916{
917	/* not until we allow rendertargets to be buffers */
918	if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
919		return false;
920
921	if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
922		return false;
923
924	/* TODO we could discard if dst box covers dst level fully.. */
925	fd_blitter_pipe_begin(ctx, false, false);
926	util_blitter_copy_texture(ctx->blitter,
927			dst, dst_level, dstx, dsty, dstz,
928			src, src_level, src_box);
929	fd_blitter_pipe_end(ctx);
930
931	return true;
932}
933
934/**
935 * Copy a block of pixels from one resource to another.
936 * The resource must be of the same format.
937 * Resources with nr_samples > 1 are not allowed.
938 */
939static void
940fd_resource_copy_region(struct pipe_context *pctx,
941		struct pipe_resource *dst,
942		unsigned dst_level,
943		unsigned dstx, unsigned dsty, unsigned dstz,
944		struct pipe_resource *src,
945		unsigned src_level,
946		const struct pipe_box *src_box)
947{
948	struct fd_context *ctx = fd_context(pctx);
949
950	/* TODO if we have 2d core, or other DMA engine that could be used
951	 * for simple copies and reasonably easily synchronized with the 3d
952	 * core, this is where we'd plug it in..
953	 */
954
955	/* try blit on 3d pipe: */
956	if (fd_blitter_pipe_copy_region(ctx,
957			dst, dst_level, dstx, dsty, dstz,
958			src, src_level, src_box))
959		return;
960
961	/* else fallback to pure sw: */
962	util_resource_copy_region(pctx,
963			dst, dst_level, dstx, dsty, dstz,
964			src, src_level, src_box);
965}
966
967bool
968fd_render_condition_check(struct pipe_context *pctx)
969{
970	struct fd_context *ctx = fd_context(pctx);
971
972	if (!ctx->cond_query)
973		return true;
974
975	union pipe_query_result res = { 0 };
976	bool wait =
977		ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
978		ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
979
980	if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
981			return (bool)res.u64 != ctx->cond_cond;
982
983	return true;
984}
985
986/**
987 * Optimal hardware path for blitting pixels.
988 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
989 */
990static void
991fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
992{
993	struct fd_context *ctx = fd_context(pctx);
994	struct pipe_blit_info info = *blit_info;
995	bool discard = false;
996
997	if (info.src.resource->nr_samples > 1 &&
998			info.dst.resource->nr_samples <= 1 &&
999			!util_format_is_depth_or_stencil(info.src.resource->format) &&
1000			!util_format_is_pure_integer(info.src.resource->format)) {
1001		DBG("color resolve unimplemented");
1002		return;
1003	}
1004
1005	if (info.render_condition_enable && !fd_render_condition_check(pctx))
1006		return;
1007
1008	if (!info.scissor_enable && !info.alpha_blend) {
1009		discard = util_texrange_covers_whole_level(info.dst.resource,
1010				info.dst.level, info.dst.box.x, info.dst.box.y,
1011				info.dst.box.z, info.dst.box.width,
1012				info.dst.box.height, info.dst.box.depth);
1013	}
1014
1015	if (util_try_blit_via_copy_region(pctx, &info)) {
1016		return; /* done */
1017	}
1018
1019	if (info.mask & PIPE_MASK_S) {
1020		DBG("cannot blit stencil, skipping");
1021		info.mask &= ~PIPE_MASK_S;
1022	}
1023
1024	if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1025		DBG("blit unsupported %s -> %s",
1026				util_format_short_name(info.src.resource->format),
1027				util_format_short_name(info.dst.resource->format));
1028		return;
1029	}
1030
1031	fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard);
1032	util_blitter_blit(ctx->blitter, &info);
1033	fd_blitter_pipe_end(ctx);
1034}
1035
1036static void
1037fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard)
1038{
1039	util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1040	util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1041	util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1042	util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1043			ctx->streamout.targets);
1044	util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1045	util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1046	util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1047	util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1048	util_blitter_save_blend(ctx->blitter, ctx->blend);
1049	util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1050	util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1051	util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1052	util_blitter_save_framebuffer(ctx->blitter,
1053			ctx->batch ? &ctx->batch->framebuffer : NULL);
1054	util_blitter_save_fragment_sampler_states(ctx->blitter,
1055			ctx->fragtex.num_samplers,
1056			(void **)ctx->fragtex.samplers);
1057	util_blitter_save_fragment_sampler_views(ctx->blitter,
1058			ctx->fragtex.num_textures, ctx->fragtex.textures);
1059	if (!render_cond)
1060		util_blitter_save_render_condition(ctx->blitter,
1061			ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1062
1063	if (ctx->batch)
1064		fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_BLIT);
1065
1066	ctx->in_blit = discard;
1067}
1068
1069static void
1070fd_blitter_pipe_end(struct fd_context *ctx)
1071{
1072	if (ctx->batch)
1073		fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL);
1074	ctx->in_blit = false;
1075}
1076
1077static void
1078fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1079{
1080	struct fd_resource *rsc = fd_resource(prsc);
1081
1082	if (rsc->write_batch)
1083		fd_batch_flush(rsc->write_batch);
1084
1085	assert(!rsc->write_batch);
1086}
1087
1088void
1089fd_resource_screen_init(struct pipe_screen *pscreen)
1090{
1091	pscreen->resource_create = fd_resource_create;
1092	pscreen->resource_from_handle = fd_resource_from_handle;
1093	pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1094	pscreen->resource_destroy = u_resource_destroy_vtbl;
1095}
1096
1097void
1098fd_resource_context_init(struct pipe_context *pctx)
1099{
1100	pctx->transfer_map = u_transfer_map_vtbl;
1101	pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1102	pctx->transfer_unmap = u_transfer_unmap_vtbl;
1103	pctx->buffer_subdata = u_default_buffer_subdata;
1104        pctx->texture_subdata = u_default_texture_subdata;
1105	pctx->create_surface = fd_create_surface;
1106	pctx->surface_destroy = fd_surface_destroy;
1107	pctx->resource_copy_region = fd_resource_copy_region;
1108	pctx->blit = fd_blit;
1109	pctx->flush_resource = fd_flush_resource;
1110}
1111