1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 *      Adam Rak <adam.rak@streamnovation.com>
23 */
24
25#include "pipe/p_defines.h"
26#include "pipe/p_state.h"
27#include "pipe/p_context.h"
28#include "util/u_blitter.h"
29#include "util/u_double_list.h"
30#include "util/u_transfer.h"
31#include "util/u_surface.h"
32#include "util/u_pack_color.h"
33#include "util/u_memory.h"
34#include "util/u_inlines.h"
35#include "util/u_framebuffer.h"
36#include "r600.h"
37#include "r600_resource.h"
38#include "r600_shader.h"
39#include "r600_pipe.h"
40#include "r600_formats.h"
41#include "compute_memory_pool.h"
42#include "evergreen_compute_internal.h"
43
44static struct r600_texture * create_pool_texture(struct r600_screen * screen,
45		unsigned size_in_dw)
46{
47
48	struct pipe_resource templ;
49	struct r600_texture * tex;
50
51	if (size_in_dw == 0) {
52		return NULL;
53	}
54	memset(&templ, 0, sizeof(templ));
55	templ.target = PIPE_TEXTURE_1D;
56	templ.format = PIPE_FORMAT_R32_UINT;
57	templ.bind = PIPE_BIND_CUSTOM;
58	templ.usage = PIPE_USAGE_IMMUTABLE;
59	templ.flags = 0;
60	templ.width0 = size_in_dw;
61	templ.height0 = 1;
62	templ.depth0 = 1;
63	templ.array_size = 1;
64
65	tex = (struct r600_texture *)r600_texture_create(
66						&screen->screen, &templ);
67	/* XXX: Propagate this error */
68	assert(tex && "Out of memory");
69	tex->is_rat = 1;
70	return tex;
71}
72
73/**
74 * Creates a new pool
75 */
76struct compute_memory_pool* compute_memory_pool_new(
77	struct r600_screen * rscreen)
78{
79	struct compute_memory_pool* pool = (struct compute_memory_pool*)
80				CALLOC(sizeof(struct compute_memory_pool), 1);
81
82	COMPUTE_DBG("* compute_memory_pool_new()\n");
83
84	pool->screen = rscreen;
85	return pool;
86}
87
88static void compute_memory_pool_init(struct compute_memory_pool * pool,
89	unsigned initial_size_in_dw)
90{
91
92	COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
93		initial_size_in_dw);
94
95	/* XXX: pool->shadow is used when the buffer needs to be resized, but
96	 * resizing does not work at the moment.
97	 * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
98	 */
99	pool->next_id = 1;
100	pool->size_in_dw = initial_size_in_dw;
101	pool->bo = (struct r600_resource*)create_pool_texture(pool->screen,
102							pool->size_in_dw);
103}
104
105/**
106 * Frees all stuff in the pool and the pool struct itself too
107 */
108void compute_memory_pool_delete(struct compute_memory_pool* pool)
109{
110	COMPUTE_DBG("* compute_memory_pool_delete()\n");
111	free(pool->shadow);
112	if (pool->bo) {
113		pool->screen->screen.resource_destroy((struct pipe_screen *)
114			pool->screen, (struct pipe_resource *)pool->bo);
115	}
116	free(pool);
117}
118
119/**
120 * Searches for an empty space in the pool, return with the pointer to the
121 * allocatable space in the pool, returns -1 on failure.
122 */
123int64_t compute_memory_prealloc_chunk(
124	struct compute_memory_pool* pool,
125	int64_t size_in_dw)
126{
127	assert(size_in_dw <= pool->size_in_dw);
128
129	struct compute_memory_item *item;
130
131	int last_end = 0;
132
133	COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
134		size_in_dw);
135
136	for (item = pool->item_list; item; item = item->next) {
137		if (item->start_in_dw > -1) {
138			if (item->start_in_dw-last_end > size_in_dw) {
139				return last_end;
140			}
141
142			last_end = item->start_in_dw + item->size_in_dw;
143			last_end += (1024 - last_end % 1024);
144		}
145	}
146
147	if (pool->size_in_dw - last_end < size_in_dw) {
148		return -1;
149	}
150
151	return last_end;
152}
153
154/**
155 *  Search for the chunk where we can link our new chunk after it.
156 */
157struct compute_memory_item* compute_memory_postalloc_chunk(
158	struct compute_memory_pool* pool,
159	int64_t start_in_dw)
160{
161	struct compute_memory_item* item;
162
163	COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
164		start_in_dw);
165
166	for (item = pool->item_list; item; item = item->next) {
167		if (item->next) {
168			if (item->start_in_dw < start_in_dw
169				&& item->next->start_in_dw > start_in_dw) {
170				return item;
171			}
172		}
173		else {
174			/* end of chain */
175			assert(item->start_in_dw < start_in_dw);
176			return item;
177		}
178	}
179
180	assert(0 && "unreachable");
181	return NULL;
182}
183
184/**
185 * Reallocates pool, conserves data
186 */
187void compute_memory_grow_pool(struct compute_memory_pool* pool,
188	struct pipe_context * pipe, int new_size_in_dw)
189{
190	COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
191		new_size_in_dw);
192
193	assert(new_size_in_dw >= pool->size_in_dw);
194
195	assert(!pool->bo && "Growing the global memory pool is not yet "
196		"supported.  You will see this message if you are trying to"
197		"use more than 64 kb of memory");
198
199	if (!pool->bo) {
200		compute_memory_pool_init(pool, 1024 * 16);
201	} else {
202		/* XXX: Growing memory pools does not work at the moment.  I think
203		 * it is because we are using fragment shaders to copy data to
204		 * the new texture and some of the compute registers are being
205		 * included in the 3D command stream. */
206		fprintf(stderr, "Warning: growing the global memory pool to"
207				"more than 64 kb is not yet supported\n");
208		new_size_in_dw += 1024 - (new_size_in_dw % 1024);
209
210		COMPUTE_DBG("  Aligned size = %d\n", new_size_in_dw);
211
212		compute_memory_shadow(pool, pipe, 1);
213		pool->shadow = (uint32_t*)realloc(pool->shadow, new_size_in_dw*4);
214		pool->size_in_dw = new_size_in_dw;
215		pool->screen->screen.resource_destroy(
216			(struct pipe_screen *)pool->screen,
217			(struct pipe_resource *)pool->bo);
218		pool->bo = (struct r600_resource*)create_pool_texture(
219							pool->screen,
220							pool->size_in_dw);
221		compute_memory_shadow(pool, pipe, 0);
222	}
223}
224
225/**
226 * Copy pool from device to host, or host to device.
227 */
228void compute_memory_shadow(struct compute_memory_pool* pool,
229	struct pipe_context * pipe, int device_to_host)
230{
231	struct compute_memory_item chunk;
232
233	COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
234		device_to_host);
235
236	chunk.id = 0;
237	chunk.start_in_dw = 0;
238	chunk.size_in_dw = pool->size_in_dw;
239	chunk.prev = chunk.next = NULL;
240	compute_memory_transfer(pool, pipe, device_to_host, &chunk,
241				pool->shadow, 0, pool->size_in_dw*4);
242}
243
244/**
245 * Allocates pending allocations in the pool
246 */
247void compute_memory_finalize_pending(struct compute_memory_pool* pool,
248	struct pipe_context * pipe)
249{
250	struct compute_memory_item *pending_list = NULL, *end_p = NULL;
251	struct compute_memory_item *item, *next;
252
253	int64_t allocated = 0;
254	int64_t unallocated = 0;
255
256	COMPUTE_DBG("* compute_memory_finalize_pending()\n");
257
258	for (item = pool->item_list; item; item = item->next) {
259		COMPUTE_DBG("list: %i %p\n", item->start_in_dw, item->next);
260	}
261
262	for (item = pool->item_list; item; item = next) {
263		next = item->next;
264
265
266		if (item->start_in_dw == -1) {
267			if (end_p) {
268				end_p->next = item;
269			}
270			else {
271				pending_list = item;
272			}
273
274			if (item->prev) {
275				item->prev->next = next;
276			}
277			else {
278				pool->item_list = next;
279			}
280
281			if (next) {
282				next->prev = item->prev;
283			}
284
285			item->prev = end_p;
286			item->next = NULL;
287			end_p = item;
288
289			unallocated += item->size_in_dw+1024;
290		}
291		else {
292			allocated += item->size_in_dw;
293		}
294	}
295
296	if (pool->size_in_dw < allocated+unallocated) {
297		compute_memory_grow_pool(pool, pipe, allocated+unallocated);
298	}
299
300	for (item = pending_list; item; item = next) {
301		next = item->next;
302
303		int64_t start_in_dw;
304
305		while ((start_in_dw=compute_memory_prealloc_chunk(pool,
306						item->size_in_dw)) == -1) {
307			int64_t need = item->size_in_dw+2048 -
308						(pool->size_in_dw - allocated);
309
310			need += 1024 - (need % 1024);
311
312			if (need > 0) {
313				compute_memory_grow_pool(pool,
314						pipe,
315						pool->size_in_dw + need);
316			}
317			else {
318				need = pool->size_in_dw / 10;
319				need += 1024 - (need % 1024);
320				compute_memory_grow_pool(pool,
321						pipe,
322						pool->size_in_dw + need);
323			}
324		}
325
326		item->start_in_dw = start_in_dw;
327		item->next = NULL;
328		item->prev = NULL;
329
330		if (pool->item_list) {
331			struct compute_memory_item *pos;
332
333			pos = compute_memory_postalloc_chunk(pool, start_in_dw);
334			item->prev = pos;
335			item->next = pos->next;
336			pos->next = item;
337
338			if (item->next) {
339				item->next->prev = item;
340			}
341		}
342		else {
343			pool->item_list = item;
344		}
345
346		allocated += item->size_in_dw;
347	}
348}
349
350
351void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
352{
353	struct compute_memory_item *item, *next;
354
355	COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
356
357	for (item = pool->item_list; item; item = next) {
358		next = item->next;
359
360		if (item->id == id) {
361			if (item->prev) {
362				item->prev->next = item->next;
363			}
364			else {
365				pool->item_list = item->next;
366			}
367
368			if (item->next) {
369				item->next->prev = item->prev;
370			}
371
372			free(item);
373
374			return;
375		}
376	}
377
378	fprintf(stderr, "Internal error, invalid id %ld "
379		"for compute_memory_free\n", id);
380
381	assert(0 && "error");
382}
383
384/**
385 * Creates pending allocations
386 */
387struct compute_memory_item* compute_memory_alloc(
388	struct compute_memory_pool* pool,
389	int64_t size_in_dw)
390{
391	struct compute_memory_item *new_item;
392
393	COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld\n", size_in_dw);
394
395	new_item = (struct compute_memory_item *)
396				CALLOC(sizeof(struct compute_memory_item), 1);
397	new_item->size_in_dw = size_in_dw;
398	new_item->start_in_dw = -1; /* mark pending */
399	new_item->id = pool->next_id++;
400	new_item->pool = pool;
401
402	struct compute_memory_item *last_item;
403
404	if (pool->item_list) {
405		for (last_item = pool->item_list; last_item->next;
406						last_item = last_item->next);
407
408		last_item->next = new_item;
409		new_item->prev = last_item;
410	}
411	else {
412		pool->item_list = new_item;
413	}
414
415	return new_item;
416}
417
418/**
419 * Transfer data host<->device, offset and size is in bytes
420 */
421void compute_memory_transfer(
422	struct compute_memory_pool* pool,
423	struct pipe_context * pipe,
424	int device_to_host,
425	struct compute_memory_item* chunk,
426	void* data,
427	int offset_in_chunk,
428	int size)
429{
430	int64_t aligned_size = pool->size_in_dw;
431	struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
432	int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
433
434	struct pipe_transfer *xfer;
435	uint32_t *map;
436
437	assert(gart);
438
439	COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
440		"offset_in_chunk = %d, size = %d\n", device_to_host,
441		offset_in_chunk, size);
442
443	if (device_to_host)
444	{
445		xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
446			&(struct pipe_box) { .width = aligned_size,
447			.height = 1, .depth = 1 });
448		assert(xfer);
449		map = pipe->transfer_map(pipe, xfer);
450		assert(map);
451		memcpy(data, map + internal_offset, size);
452		pipe->transfer_unmap(pipe, xfer);
453		pipe->transfer_destroy(pipe, xfer);
454	} else {
455		xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
456			&(struct pipe_box) { .width = aligned_size,
457			.height = 1, .depth = 1 });
458		assert(xfer);
459		map = pipe->transfer_map(pipe, xfer);
460		assert(map);
461		memcpy(map + internal_offset, data, size);
462		pipe->transfer_unmap(pipe, xfer);
463		pipe->transfer_destroy(pipe, xfer);
464	}
465}
466
467/**
468 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
469 */
470void compute_memory_transfer_direct(
471	struct compute_memory_pool* pool,
472	int chunk_to_data,
473	struct compute_memory_item* chunk,
474	struct r600_resource* data,
475	int offset_in_chunk,
476	int offset_in_data,
477	int size)
478{
479	///TODO: DMA
480}
481