freedreno_resource.c revision 00bed8a794de3d80a46b65b9ab23c6df83e416a8
1/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3/*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Rob Clark <robclark@freedesktop.org>
27 */
28
29#include "util/u_format.h"
30#include "util/u_format_rgtc.h"
31#include "util/u_format_zs.h"
32#include "util/u_inlines.h"
33#include "util/u_transfer.h"
34#include "util/u_string.h"
35#include "util/u_surface.h"
36#include "util/set.h"
37
38#include "freedreno_resource.h"
39#include "freedreno_batch_cache.h"
40#include "freedreno_screen.h"
41#include "freedreno_surface.h"
42#include "freedreno_context.h"
43#include "freedreno_query_hw.h"
44#include "freedreno_util.h"
45
46#include <errno.h>
47
48/* XXX this should go away, needed for 'struct winsys_handle' */
49#include "state_tracker/drm_driver.h"
50
51static void
52fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
53{
54	int i;
55
56	/* Go through the entire state and see if the resource is bound
57	 * anywhere. If it is, mark the relevant state as dirty. This is called on
58	 * realloc_bo.
59	 */
60
61	/* Constbufs */
62	for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) {
63		if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc)
64			ctx->dirty |= FD_DIRTY_CONSTBUF;
65		if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc)
66			ctx->dirty |= FD_DIRTY_CONSTBUF;
67	}
68
69	/* VBOs */
70	for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
71		if (ctx->vtx.vertexbuf.vb[i].buffer == prsc)
72			ctx->dirty |= FD_DIRTY_VTXBUF;
73	}
74
75	/* Index buffer */
76	if (ctx->indexbuf.buffer == prsc)
77		ctx->dirty |= FD_DIRTY_INDEXBUF;
78
79	/* Textures */
80	for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
81		if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
82			ctx->dirty |= FD_DIRTY_VERTTEX;
83	}
84	for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
85		if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
86			ctx->dirty |= FD_DIRTY_FRAGTEX;
87	}
88}
89
90static void
91realloc_bo(struct fd_resource *rsc, uint32_t size)
92{
93	struct fd_screen *screen = fd_screen(rsc->base.b.screen);
94	uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
95			DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
96
97	/* if we start using things other than write-combine,
98	 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
99	 */
100
101	if (rsc->bo)
102		fd_bo_del(rsc->bo);
103
104	rsc->bo = fd_bo_new(screen->dev, size, flags);
105	rsc->timestamp = 0;
106	util_range_set_empty(&rsc->valid_buffer_range);
107	fd_bc_invalidate_resource(rsc, true);
108}
109
110static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard);
111static void fd_blitter_pipe_end(struct fd_context *ctx);
112
113static void
114do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
115{
116	/* TODO size threshold too?? */
117	if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) {
118		/* do blit on gpu: */
119		fd_blitter_pipe_begin(ctx, false, true);
120		util_blitter_blit(ctx->blitter, blit);
121		fd_blitter_pipe_end(ctx);
122	} else {
123		/* do blit on cpu: */
124		util_resource_copy_region(&ctx->base,
125				blit->dst.resource, blit->dst.level, blit->dst.box.x,
126				blit->dst.box.y, blit->dst.box.z,
127				blit->src.resource, blit->src.level, &blit->src.box);
128	}
129}
130
131static bool
132fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
133		unsigned level, unsigned usage, const struct pipe_box *box)
134{
135	struct pipe_context *pctx = &ctx->base;
136	struct pipe_resource *prsc = &rsc->base.b;
137	bool fallback = false;
138
139	/* TODO: somehow munge dimensions and format to copy unsupported
140	 * render target format to something that is supported?
141	 */
142	if (!pctx->screen->is_format_supported(pctx->screen,
143			prsc->format, prsc->target, prsc->nr_samples,
144			PIPE_BIND_RENDER_TARGET))
145		fallback = true;
146
147	/* these cases should be handled elsewhere.. just for future
148	 * reference in case this gets split into a more generic(ish)
149	 * helper.
150	 */
151	debug_assert(!(usage & PIPE_TRANSFER_READ));
152	debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE));
153
154	/* if we do a gpu blit to clone the whole resource, we'll just
155	 * end up stalling on that.. so only allow if we can discard
156	 * current range (and blit, possibly cpu or gpu, the rest)
157	 */
158	if (!(usage & PIPE_TRANSFER_DISCARD_RANGE))
159		return false;
160
161	bool whole_level = util_texrange_covers_whole_level(prsc, level,
162		box->x, box->y, box->z, box->width, box->height, box->depth);
163
164	/* TODO need to be more clever about current level */
165	if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level)
166		return false;
167
168	struct pipe_resource *pshadow =
169		pctx->screen->resource_create(pctx->screen, prsc);
170
171	if (!pshadow)
172		return false;
173
174	assert(!ctx->in_shadow);
175	ctx->in_shadow = true;
176
177	/* get rid of any references that batch-cache might have to us (which
178	 * should empty/destroy rsc->batches hashset)
179	 */
180	fd_bc_invalidate_resource(rsc, false);
181
182	/* Swap the backing bo's, so shadow becomes the old buffer,
183	 * blit from shadow to new buffer.  From here on out, we
184	 * cannot fail.
185	 *
186	 * Note that we need to do it in this order, otherwise if
187	 * we go down cpu blit path, the recursive transfer_map()
188	 * sees the wrong status..
189	 */
190	struct fd_resource *shadow = fd_resource(pshadow);
191
192	DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count,
193			shadow, shadow->base.b.reference.count);
194
195	/* TODO valid_buffer_range?? */
196	swap(rsc->bo,        shadow->bo);
197	swap(rsc->timestamp, shadow->timestamp);
198	swap(rsc->write_batch,   shadow->write_batch);
199
200	/* at this point, the newly created shadow buffer is not referenced
201	 * by any batches, but the existing rsc (probably) is.  We need to
202	 * transfer those references over:
203	 */
204	debug_assert(shadow->batch_mask == 0);
205	struct fd_batch *batch;
206	foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
207		struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
208		_mesa_set_remove(batch->resources, entry);
209		_mesa_set_add(batch->resources, shadow);
210	}
211	swap(rsc->batch_mask, shadow->batch_mask);
212
213	struct pipe_blit_info blit = {0};
214	blit.dst.resource = prsc;
215	blit.dst.format   = prsc->format;
216	blit.src.resource = pshadow;
217	blit.src.format   = pshadow->format;
218	blit.mask = util_format_get_mask(prsc->format);
219	blit.filter = PIPE_TEX_FILTER_NEAREST;
220
221#define set_box(field, val) do {     \
222		blit.dst.field = (val);      \
223		blit.src.field = (val);      \
224	} while (0)
225
226	/* blit the other levels in their entirety: */
227	for (unsigned l = 0; l <= prsc->last_level; l++) {
228		if (l == level)
229			continue;
230
231		/* just blit whole level: */
232		set_box(level, l);
233		set_box(box.width,  u_minify(prsc->width0, l));
234		set_box(box.height, u_minify(prsc->height0, l));
235		set_box(box.depth,  u_minify(prsc->depth0, l));
236
237		do_blit(ctx, &blit, fallback);
238	}
239
240	/* deal w/ current level specially, since we might need to split
241	 * it up into a couple blits:
242	 */
243	if (!whole_level) {
244		set_box(level, level);
245
246		switch (prsc->target) {
247		case PIPE_BUFFER:
248		case PIPE_TEXTURE_1D:
249			set_box(box.y, 0);
250			set_box(box.z, 0);
251			set_box(box.height, 1);
252			set_box(box.depth, 1);
253
254			if (box->x > 0) {
255				set_box(box.x, 0);
256				set_box(box.width, box->x);
257
258				do_blit(ctx, &blit, fallback);
259			}
260			if ((box->x + box->width) < u_minify(prsc->width0, level)) {
261				set_box(box.x, box->x + box->width);
262				set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width));
263
264				do_blit(ctx, &blit, fallback);
265			}
266			break;
267		case PIPE_TEXTURE_2D:
268			/* TODO */
269		default:
270			unreachable("TODO");
271		}
272	}
273
274	ctx->in_shadow = false;
275
276	pipe_resource_reference(&pshadow, NULL);
277
278	return true;
279}
280
281static unsigned
282fd_resource_layer_offset(struct fd_resource *rsc,
283						 struct fd_resource_slice *slice,
284						 unsigned layer)
285{
286	if (rsc->layer_first)
287		return layer * rsc->layer_size;
288	else
289		return layer * slice->size0;
290}
291
292static void
293fd_resource_flush_z32s8(struct fd_transfer *trans, const struct pipe_box *box)
294{
295	struct fd_resource *rsc = fd_resource(trans->base.resource);
296	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
297	struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
298	enum pipe_format format = trans->base.resource->format;
299
300	float *depth = fd_bo_map(rsc->bo) + slice->offset +
301		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
302		(trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
303	uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
304		fd_resource_layer_offset(rsc->stencil, sslice, trans->base.box.z) +
305		(trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
306
307	if (format != PIPE_FORMAT_X32_S8X24_UINT)
308		util_format_z32_float_s8x24_uint_unpack_z_float(
309				depth, slice->pitch * 4,
310				trans->staging, trans->base.stride,
311				box->width, box->height);
312
313	util_format_z32_float_s8x24_uint_unpack_s_8uint(
314			stencil, sslice->pitch,
315			trans->staging, trans->base.stride,
316			box->width, box->height);
317}
318
319static void
320fd_resource_flush_rgtc(struct fd_transfer *trans, const struct pipe_box *box)
321{
322	struct fd_resource *rsc = fd_resource(trans->base.resource);
323	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
324	enum pipe_format format = trans->base.resource->format;
325
326	uint8_t *data = fd_bo_map(rsc->bo) + slice->offset +
327		fd_resource_layer_offset(rsc, slice, trans->base.box.z) +
328		((trans->base.box.y + box->y) * slice->pitch +
329		 trans->base.box.x + box->x) * rsc->cpp;
330
331	uint8_t *source = trans->staging +
332		util_format_get_nblocksy(format, box->y) * trans->base.stride +
333		util_format_get_stride(format, box->x);
334
335	switch (format) {
336	case PIPE_FORMAT_RGTC1_UNORM:
337	case PIPE_FORMAT_RGTC1_SNORM:
338	case PIPE_FORMAT_LATC1_UNORM:
339	case PIPE_FORMAT_LATC1_SNORM:
340		util_format_rgtc1_unorm_unpack_rgba_8unorm(
341				data, slice->pitch * rsc->cpp,
342				source, trans->base.stride,
343				box->width, box->height);
344		break;
345	case PIPE_FORMAT_RGTC2_UNORM:
346	case PIPE_FORMAT_RGTC2_SNORM:
347	case PIPE_FORMAT_LATC2_UNORM:
348	case PIPE_FORMAT_LATC2_SNORM:
349		util_format_rgtc2_unorm_unpack_rgba_8unorm(
350				data, slice->pitch * rsc->cpp,
351				source, trans->base.stride,
352				box->width, box->height);
353		break;
354	default:
355		assert(!"Unexpected format\n");
356		break;
357	}
358}
359
360static void
361fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
362{
363	enum pipe_format format = trans->base.resource->format;
364
365	switch (format) {
366	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
367	case PIPE_FORMAT_X32_S8X24_UINT:
368		fd_resource_flush_z32s8(trans, box);
369		break;
370	case PIPE_FORMAT_RGTC1_UNORM:
371	case PIPE_FORMAT_RGTC1_SNORM:
372	case PIPE_FORMAT_RGTC2_UNORM:
373	case PIPE_FORMAT_RGTC2_SNORM:
374	case PIPE_FORMAT_LATC1_UNORM:
375	case PIPE_FORMAT_LATC1_SNORM:
376	case PIPE_FORMAT_LATC2_UNORM:
377	case PIPE_FORMAT_LATC2_SNORM:
378		fd_resource_flush_rgtc(trans, box);
379		break;
380	default:
381		assert(!"Unexpected staging transfer type");
382		break;
383	}
384}
385
386static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
387		struct pipe_transfer *ptrans,
388		const struct pipe_box *box)
389{
390	struct fd_resource *rsc = fd_resource(ptrans->resource);
391	struct fd_transfer *trans = fd_transfer(ptrans);
392
393	if (ptrans->resource->target == PIPE_BUFFER)
394		util_range_add(&rsc->valid_buffer_range,
395					   ptrans->box.x + box->x,
396					   ptrans->box.x + box->x + box->width);
397
398	if (trans->staging)
399		fd_resource_flush(trans, box);
400}
401
402static void
403fd_resource_transfer_unmap(struct pipe_context *pctx,
404		struct pipe_transfer *ptrans)
405{
406	struct fd_context *ctx = fd_context(pctx);
407	struct fd_resource *rsc = fd_resource(ptrans->resource);
408	struct fd_transfer *trans = fd_transfer(ptrans);
409
410	if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
411		struct pipe_box box;
412		u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
413		fd_resource_flush(trans, &box);
414	}
415
416	if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
417		fd_bo_cpu_fini(rsc->bo);
418		if (rsc->stencil)
419			fd_bo_cpu_fini(rsc->stencil->bo);
420	}
421
422	util_range_add(&rsc->valid_buffer_range,
423				   ptrans->box.x,
424				   ptrans->box.x + ptrans->box.width);
425
426	pipe_resource_reference(&ptrans->resource, NULL);
427	util_slab_free(&ctx->transfer_pool, ptrans);
428
429	free(trans->staging);
430}
431
432static void *
433fd_resource_transfer_map(struct pipe_context *pctx,
434		struct pipe_resource *prsc,
435		unsigned level, unsigned usage,
436		const struct pipe_box *box,
437		struct pipe_transfer **pptrans)
438{
439	struct fd_context *ctx = fd_context(pctx);
440	struct fd_resource *rsc = fd_resource(prsc);
441	struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
442	struct fd_transfer *trans;
443	struct pipe_transfer *ptrans;
444	enum pipe_format format = prsc->format;
445	uint32_t op = 0;
446	uint32_t offset;
447	char *buf;
448	int ret = 0;
449
450	DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
451		box->width, box->height, box->x, box->y);
452
453	ptrans = util_slab_alloc(&ctx->transfer_pool);
454	if (!ptrans)
455		return NULL;
456
457	/* util_slab_alloc() doesn't zero: */
458	trans = fd_transfer(ptrans);
459	memset(trans, 0, sizeof(*trans));
460
461	pipe_resource_reference(&ptrans->resource, prsc);
462	ptrans->level = level;
463	ptrans->usage = usage;
464	ptrans->box = *box;
465	ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
466	ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0;
467
468	if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ))
469		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
470
471	if (usage & PIPE_TRANSFER_READ)
472		op |= DRM_FREEDRENO_PREP_READ;
473
474	if (usage & PIPE_TRANSFER_WRITE)
475		op |= DRM_FREEDRENO_PREP_WRITE;
476
477	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
478		realloc_bo(rsc, fd_bo_size(rsc->bo));
479		if (rsc->stencil)
480			realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
481		fd_invalidate_resource(ctx, prsc);
482	} else if ((usage & PIPE_TRANSFER_WRITE) &&
483			   prsc->target == PIPE_BUFFER &&
484			   !util_ranges_intersect(&rsc->valid_buffer_range,
485									  box->x, box->x + box->width)) {
486		/* We are trying to write to a previously uninitialized range. No need
487		 * to wait.
488		 */
489	} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
490		if ((usage & PIPE_TRANSFER_WRITE) && rsc->write_batch &&
491				rsc->write_batch->back_blit) {
492			/* if only thing pending is a back-blit, we can discard it: */
493			fd_batch_reset(rsc->write_batch);
494		}
495
496		/* If the GPU is writing to the resource, or if it is reading from the
497		 * resource and we're trying to write to it, flush the renders.
498		 */
499		bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE));
500		bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo,
501				ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC));
502
503		/* if we need to flush/stall, see if we can make a shadow buffer
504		 * to avoid this:
505		 *
506		 * TODO we could go down this path !reorder && !busy_for_read
507		 * ie. we only *don't* want to go down this path if the blit
508		 * will trigger a flush!
509		 */
510		if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) {
511			if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) {
512				needs_flush = busy = false;
513				fd_invalidate_resource(ctx, prsc);
514			}
515		}
516
517		if (needs_flush) {
518			if (usage & PIPE_TRANSFER_WRITE) {
519				struct fd_batch *batch, *last_batch = NULL;
520				foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) {
521					fd_batch_reference(&last_batch, batch);
522					fd_batch_flush(batch, false);
523				}
524				if (last_batch) {
525					fd_batch_sync(last_batch);
526					fd_batch_reference(&last_batch, NULL);
527				}
528				assert(rsc->batch_mask == 0);
529			} else {
530				fd_batch_flush(rsc->write_batch, true);
531			}
532			assert(!rsc->write_batch);
533		}
534
535		/* The GPU keeps track of how the various bo's are being used, and
536		 * will wait if necessary for the proper operation to have
537		 * completed.
538		 */
539		if (busy) {
540			ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
541			if (ret)
542				goto fail;
543		}
544	}
545
546	buf = fd_bo_map(rsc->bo);
547	if (!buf)
548		goto fail;
549
550	offset = slice->offset +
551		box->y / util_format_get_blockheight(format) * ptrans->stride +
552		box->x / util_format_get_blockwidth(format) * rsc->cpp +
553		fd_resource_layer_offset(rsc, slice, box->z);
554
555	if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
556		prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
557		assert(trans->base.box.depth == 1);
558
559		trans->base.stride = trans->base.box.width * rsc->cpp * 2;
560		trans->staging = malloc(trans->base.stride * trans->base.box.height);
561		if (!trans->staging)
562			goto fail;
563
564		/* if we're not discarding the whole range (or resource), we must copy
565		 * the real data in.
566		 */
567		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
568					   PIPE_TRANSFER_DISCARD_RANGE))) {
569			struct fd_resource_slice *sslice =
570				fd_resource_slice(rsc->stencil, level);
571			void *sbuf = fd_bo_map(rsc->stencil->bo);
572			if (!sbuf)
573				goto fail;
574
575			float *depth = (float *)(buf + slice->offset +
576				fd_resource_layer_offset(rsc, slice, box->z) +
577				box->y * slice->pitch * 4 + box->x * 4);
578			uint8_t *stencil = sbuf + sslice->offset +
579				fd_resource_layer_offset(rsc->stencil, sslice, box->z) +
580				box->y * sslice->pitch + box->x;
581
582			if (format != PIPE_FORMAT_X32_S8X24_UINT)
583				util_format_z32_float_s8x24_uint_pack_z_float(
584						trans->staging, trans->base.stride,
585						depth, slice->pitch * 4,
586						box->width, box->height);
587
588			util_format_z32_float_s8x24_uint_pack_s_8uint(
589					trans->staging, trans->base.stride,
590					stencil, sslice->pitch,
591					box->width, box->height);
592		}
593
594		buf = trans->staging;
595		offset = 0;
596	} else if (rsc->internal_format != format &&
597			   util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC) {
598		assert(trans->base.box.depth == 1);
599
600		trans->base.stride = util_format_get_stride(
601				format, trans->base.box.width);
602		trans->staging = malloc(
603				util_format_get_2d_size(format, trans->base.stride,
604										trans->base.box.height));
605		if (!trans->staging)
606			goto fail;
607
608		/* if we're not discarding the whole range (or resource), we must copy
609		 * the real data in.
610		 */
611		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
612					   PIPE_TRANSFER_DISCARD_RANGE))) {
613			uint8_t *rgba8 = (uint8_t *)buf + slice->offset +
614				fd_resource_layer_offset(rsc, slice, box->z) +
615				box->y * slice->pitch * rsc->cpp + box->x * rsc->cpp;
616
617			switch (format) {
618			case PIPE_FORMAT_RGTC1_UNORM:
619			case PIPE_FORMAT_RGTC1_SNORM:
620			case PIPE_FORMAT_LATC1_UNORM:
621			case PIPE_FORMAT_LATC1_SNORM:
622				util_format_rgtc1_unorm_pack_rgba_8unorm(
623					trans->staging, trans->base.stride,
624					rgba8, slice->pitch * rsc->cpp,
625					box->width, box->height);
626				break;
627			case PIPE_FORMAT_RGTC2_UNORM:
628			case PIPE_FORMAT_RGTC2_SNORM:
629			case PIPE_FORMAT_LATC2_UNORM:
630			case PIPE_FORMAT_LATC2_SNORM:
631				util_format_rgtc2_unorm_pack_rgba_8unorm(
632					trans->staging, trans->base.stride,
633					rgba8, slice->pitch * rsc->cpp,
634					box->width, box->height);
635				break;
636			default:
637				assert(!"Unexpected format");
638				break;
639			}
640		}
641
642		buf = trans->staging;
643		offset = 0;
644	}
645
646	*pptrans = ptrans;
647
648	return buf + offset;
649
650fail:
651	fd_resource_transfer_unmap(pctx, ptrans);
652	return NULL;
653}
654
655static void
656fd_resource_destroy(struct pipe_screen *pscreen,
657		struct pipe_resource *prsc)
658{
659	struct fd_resource *rsc = fd_resource(prsc);
660	fd_bc_invalidate_resource(rsc, true);
661	if (rsc->bo)
662		fd_bo_del(rsc->bo);
663	util_range_destroy(&rsc->valid_buffer_range);
664	FREE(rsc);
665}
666
667static boolean
668fd_resource_get_handle(struct pipe_screen *pscreen,
669		struct pipe_resource *prsc,
670		struct winsys_handle *handle)
671{
672	struct fd_resource *rsc = fd_resource(prsc);
673
674	return fd_screen_bo_get_handle(pscreen, rsc->bo,
675			rsc->slices[0].pitch * rsc->cpp, handle);
676}
677
678
679static const struct u_resource_vtbl fd_resource_vtbl = {
680		.resource_get_handle      = fd_resource_get_handle,
681		.resource_destroy         = fd_resource_destroy,
682		.transfer_map             = fd_resource_transfer_map,
683		.transfer_flush_region    = fd_resource_transfer_flush_region,
684		.transfer_unmap           = fd_resource_transfer_unmap,
685};
686
687static uint32_t
688setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
689{
690	struct pipe_resource *prsc = &rsc->base.b;
691	enum util_format_layout layout = util_format_description(format)->layout;
692	uint32_t level, size = 0;
693	uint32_t width = prsc->width0;
694	uint32_t height = prsc->height0;
695	uint32_t depth = prsc->depth0;
696	/* in layer_first layout, the level (slice) contains just one
697	 * layer (since in fact the layer contains the slices)
698	 */
699	uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
700
701	for (level = 0; level <= prsc->last_level; level++) {
702		struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
703		uint32_t blocks;
704
705		if (layout == UTIL_FORMAT_LAYOUT_ASTC)
706			slice->pitch = width =
707				util_align_npot(width, 32 * util_format_get_blockwidth(format));
708		else
709			slice->pitch = width = align(width, 32);
710		slice->offset = size;
711		blocks = util_format_get_nblocks(format, width, height);
712		/* 1d array and 2d array textures must all have the same layer size
713		 * for each miplevel on a3xx. 3d textures can have different layer
714		 * sizes for high levels, but the hw auto-sizer is buggy (or at least
715		 * different than what this code does), so as soon as the layer size
716		 * range gets into range, we stop reducing it.
717		 */
718		if (prsc->target == PIPE_TEXTURE_3D && (
719					level == 1 ||
720					(level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
721			slice->size0 = align(blocks * rsc->cpp, alignment);
722		else if (level == 0 || rsc->layer_first || alignment == 1)
723			slice->size0 = align(blocks * rsc->cpp, alignment);
724		else
725			slice->size0 = rsc->slices[level - 1].size0;
726
727		size += slice->size0 * depth * layers_in_level;
728
729		width = u_minify(width, 1);
730		height = u_minify(height, 1);
731		depth = u_minify(depth, 1);
732	}
733
734	return size;
735}
736
737static uint32_t
738slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
739{
740	/* on a3xx, 2d array and 3d textures seem to want their
741	 * layers aligned to page boundaries:
742	 */
743	switch (tmpl->target) {
744	case PIPE_TEXTURE_3D:
745	case PIPE_TEXTURE_1D_ARRAY:
746	case PIPE_TEXTURE_2D_ARRAY:
747		return 4096;
748	default:
749		return 1;
750	}
751}
752
753/* special case to resize query buf after allocated.. */
754void
755fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
756{
757	struct fd_resource *rsc = fd_resource(prsc);
758
759	debug_assert(prsc->width0 == 0);
760	debug_assert(prsc->target == PIPE_BUFFER);
761	debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
762
763	prsc->width0 = sz;
764	realloc_bo(rsc, setup_slices(rsc, 1, prsc->format));
765}
766
767/**
768 * Create a new texture object, using the given template info.
769 */
770static struct pipe_resource *
771fd_resource_create(struct pipe_screen *pscreen,
772		const struct pipe_resource *tmpl)
773{
774	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
775	struct pipe_resource *prsc = &rsc->base.b;
776	enum pipe_format format = tmpl->format;
777	uint32_t size, alignment;
778
779	DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
780			"nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
781			tmpl->target, util_format_name(format),
782			tmpl->width0, tmpl->height0, tmpl->depth0,
783			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
784			tmpl->usage, tmpl->bind, tmpl->flags);
785
786	if (!rsc)
787		return NULL;
788
789	*prsc = *tmpl;
790
791	pipe_reference_init(&prsc->reference, 1);
792
793	prsc->screen = pscreen;
794
795	util_range_init(&rsc->valid_buffer_range);
796
797	rsc->base.vtbl = &fd_resource_vtbl;
798
799	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
800		format = PIPE_FORMAT_Z32_FLOAT;
801	else if (fd_screen(pscreen)->gpu_id < 400 &&
802			 util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_RGTC)
803		format = PIPE_FORMAT_R8G8B8A8_UNORM;
804	rsc->internal_format = format;
805	rsc->cpp = util_format_get_blocksize(format);
806
807	assert(rsc->cpp);
808
809	alignment = slice_alignment(pscreen, tmpl);
810	if (is_a4xx(fd_screen(pscreen))) {
811		switch (tmpl->target) {
812		case PIPE_TEXTURE_3D:
813			rsc->layer_first = false;
814			break;
815		default:
816			rsc->layer_first = true;
817			alignment = 1;
818			break;
819		}
820	}
821
822	size = setup_slices(rsc, alignment, format);
823
824	/* special case for hw-query buffer, which we need to allocate before we
825	 * know the size:
826	 */
827	if (size == 0) {
828		/* note, semi-intention == instead of & */
829		debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
830		return prsc;
831	}
832
833	if (rsc->layer_first) {
834		rsc->layer_size = align(size, 4096);
835		size = rsc->layer_size * prsc->array_size;
836	}
837
838	realloc_bo(rsc, size);
839	if (!rsc->bo)
840		goto fail;
841
842	/* There is no native Z32F_S8 sampling or rendering format, so this must
843	 * be emulated via two separate textures. The depth texture still keeps
844	 * its Z32F_S8 format though, and we also keep a reference to a separate
845	 * S8 texture.
846	 */
847	if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
848		struct pipe_resource stencil = *tmpl;
849		stencil.format = PIPE_FORMAT_S8_UINT;
850		rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
851		if (!rsc->stencil)
852			goto fail;
853	}
854
855	return prsc;
856fail:
857	fd_resource_destroy(pscreen, prsc);
858	return NULL;
859}
860
861/**
862 * Create a texture from a winsys_handle. The handle is often created in
863 * another process by first creating a pipe texture and then calling
864 * resource_get_handle.
865 */
866static struct pipe_resource *
867fd_resource_from_handle(struct pipe_screen *pscreen,
868		const struct pipe_resource *tmpl,
869		struct winsys_handle *handle, unsigned usage)
870{
871	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
872	struct fd_resource_slice *slice = &rsc->slices[0];
873	struct pipe_resource *prsc = &rsc->base.b;
874
875	DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
876			"nr_samples=%u, usage=%u, bind=%x, flags=%x",
877			tmpl->target, util_format_name(tmpl->format),
878			tmpl->width0, tmpl->height0, tmpl->depth0,
879			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
880			tmpl->usage, tmpl->bind, tmpl->flags);
881
882	if (!rsc)
883		return NULL;
884
885	*prsc = *tmpl;
886
887	pipe_reference_init(&prsc->reference, 1);
888
889	prsc->screen = pscreen;
890
891	util_range_init(&rsc->valid_buffer_range);
892
893	rsc->bo = fd_screen_bo_from_handle(pscreen, handle, &slice->pitch);
894	if (!rsc->bo)
895		goto fail;
896
897	rsc->base.vtbl = &fd_resource_vtbl;
898	rsc->cpp = util_format_get_blocksize(tmpl->format);
899	slice->pitch /= rsc->cpp;
900	slice->offset = handle->offset;
901
902	assert(rsc->cpp);
903
904	return prsc;
905
906fail:
907	fd_resource_destroy(pscreen, prsc);
908	return NULL;
909}
910
911/**
912 * _copy_region using pipe (3d engine)
913 */
914static bool
915fd_blitter_pipe_copy_region(struct fd_context *ctx,
916		struct pipe_resource *dst,
917		unsigned dst_level,
918		unsigned dstx, unsigned dsty, unsigned dstz,
919		struct pipe_resource *src,
920		unsigned src_level,
921		const struct pipe_box *src_box)
922{
923	/* not until we allow rendertargets to be buffers */
924	if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
925		return false;
926
927	if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
928		return false;
929
930	/* TODO we could discard if dst box covers dst level fully.. */
931	fd_blitter_pipe_begin(ctx, false, false);
932	util_blitter_copy_texture(ctx->blitter,
933			dst, dst_level, dstx, dsty, dstz,
934			src, src_level, src_box);
935	fd_blitter_pipe_end(ctx);
936
937	return true;
938}
939
940/**
941 * Copy a block of pixels from one resource to another.
942 * The resource must be of the same format.
943 * Resources with nr_samples > 1 are not allowed.
944 */
945static void
946fd_resource_copy_region(struct pipe_context *pctx,
947		struct pipe_resource *dst,
948		unsigned dst_level,
949		unsigned dstx, unsigned dsty, unsigned dstz,
950		struct pipe_resource *src,
951		unsigned src_level,
952		const struct pipe_box *src_box)
953{
954	struct fd_context *ctx = fd_context(pctx);
955
956	/* TODO if we have 2d core, or other DMA engine that could be used
957	 * for simple copies and reasonably easily synchronized with the 3d
958	 * core, this is where we'd plug it in..
959	 */
960
961	/* try blit on 3d pipe: */
962	if (fd_blitter_pipe_copy_region(ctx,
963			dst, dst_level, dstx, dsty, dstz,
964			src, src_level, src_box))
965		return;
966
967	/* else fallback to pure sw: */
968	util_resource_copy_region(pctx,
969			dst, dst_level, dstx, dsty, dstz,
970			src, src_level, src_box);
971}
972
973bool
974fd_render_condition_check(struct pipe_context *pctx)
975{
976	struct fd_context *ctx = fd_context(pctx);
977
978	if (!ctx->cond_query)
979		return true;
980
981	union pipe_query_result res = { 0 };
982	bool wait =
983		ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
984		ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
985
986	if (pctx->get_query_result(pctx, ctx->cond_query, wait, &res))
987			return (bool)res.u64 != ctx->cond_cond;
988
989	return true;
990}
991
992/**
993 * Optimal hardware path for blitting pixels.
994 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
995 */
996static void
997fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
998{
999	struct fd_context *ctx = fd_context(pctx);
1000	struct pipe_blit_info info = *blit_info;
1001	bool discard = false;
1002
1003	if (info.src.resource->nr_samples > 1 &&
1004			info.dst.resource->nr_samples <= 1 &&
1005			!util_format_is_depth_or_stencil(info.src.resource->format) &&
1006			!util_format_is_pure_integer(info.src.resource->format)) {
1007		DBG("color resolve unimplemented");
1008		return;
1009	}
1010
1011	if (info.render_condition_enable && !fd_render_condition_check(pctx))
1012		return;
1013
1014	if (!info.scissor_enable && !info.alpha_blend) {
1015		discard = util_texrange_covers_whole_level(info.dst.resource,
1016				info.dst.level, info.dst.box.x, info.dst.box.y,
1017				info.dst.box.z, info.dst.box.width,
1018				info.dst.box.height, info.dst.box.depth);
1019	}
1020
1021	if (util_try_blit_via_copy_region(pctx, &info)) {
1022		return; /* done */
1023	}
1024
1025	if (info.mask & PIPE_MASK_S) {
1026		DBG("cannot blit stencil, skipping");
1027		info.mask &= ~PIPE_MASK_S;
1028	}
1029
1030	if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
1031		DBG("blit unsupported %s -> %s",
1032				util_format_short_name(info.src.resource->format),
1033				util_format_short_name(info.dst.resource->format));
1034		return;
1035	}
1036
1037	fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard);
1038	util_blitter_blit(ctx->blitter, &info);
1039	fd_blitter_pipe_end(ctx);
1040}
1041
1042static void
1043fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard)
1044{
1045	util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
1046	util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
1047	util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
1048	util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
1049			ctx->streamout.targets);
1050	util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
1051	util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
1052	util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
1053	util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
1054	util_blitter_save_blend(ctx->blitter, ctx->blend);
1055	util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
1056	util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
1057	util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
1058	util_blitter_save_framebuffer(ctx->blitter,
1059			ctx->batch ? &ctx->batch->framebuffer : NULL);
1060	util_blitter_save_fragment_sampler_states(ctx->blitter,
1061			ctx->fragtex.num_samplers,
1062			(void **)ctx->fragtex.samplers);
1063	util_blitter_save_fragment_sampler_views(ctx->blitter,
1064			ctx->fragtex.num_textures, ctx->fragtex.textures);
1065	if (!render_cond)
1066		util_blitter_save_render_condition(ctx->blitter,
1067			ctx->cond_query, ctx->cond_cond, ctx->cond_mode);
1068
1069	if (ctx->batch)
1070		fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_BLIT);
1071
1072	ctx->in_blit = discard;
1073}
1074
1075static void
1076fd_blitter_pipe_end(struct fd_context *ctx)
1077{
1078	if (ctx->batch)
1079		fd_hw_query_set_stage(ctx->batch, ctx->batch->draw, FD_STAGE_NULL);
1080	ctx->in_blit = false;
1081}
1082
1083static void
1084fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
1085{
1086	struct fd_resource *rsc = fd_resource(prsc);
1087
1088	if (rsc->write_batch)
1089		fd_batch_flush(rsc->write_batch, true);
1090
1091	assert(!rsc->write_batch);
1092}
1093
1094void
1095fd_resource_screen_init(struct pipe_screen *pscreen)
1096{
1097	pscreen->resource_create = fd_resource_create;
1098	pscreen->resource_from_handle = fd_resource_from_handle;
1099	pscreen->resource_get_handle = u_resource_get_handle_vtbl;
1100	pscreen->resource_destroy = u_resource_destroy_vtbl;
1101}
1102
1103void
1104fd_resource_context_init(struct pipe_context *pctx)
1105{
1106	pctx->transfer_map = u_transfer_map_vtbl;
1107	pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
1108	pctx->transfer_unmap = u_transfer_unmap_vtbl;
1109	pctx->buffer_subdata = u_default_buffer_subdata;
1110        pctx->texture_subdata = u_default_texture_subdata;
1111	pctx->create_surface = fd_create_surface;
1112	pctx->surface_destroy = fd_surface_destroy;
1113	pctx->resource_copy_region = fd_resource_copy_region;
1114	pctx->blit = fd_blit;
1115	pctx->flush_resource = fd_flush_resource;
1116}
1117