radeon_drm_cs.c revision 6caac3ecb8bc32d92c35fdb1f0a67541ffa8af29
1/*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27/*
28 * Authors:
29 *      Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 *      Aapo Tahkola <aet@rasterburn.org>
33 *      Nicolai Haehnle <prefect_@gmx.net>
34 *      Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37/*
38    This file replaces libdrm's radeon_cs_gem with our own implemention.
39    It's optimized specifically for r300g, but r600g could use it as well.
40    Reloc writes and space checking are faster and simpler than their
41    counterparts in libdrm (the time complexity of all the functions
42    is O(1) in nearly all scenarios, thanks to hashing).
43
44    It works like this:
45
46    cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47    also adds the size of 'buf' to the used_gart and used_vram winsys variables
48    based on the domains, which are simply or'd for the accounting purposes.
49    The adding is skipped if the reloc is already present in the list, but it
50    accounts any newly-referenced domains.
51
52    cs_validate is then called, which just checks:
53        used_vram/gart < vram/gart_size * 0.8
54    The 0.8 number allows for some memory fragmentation. If the validation
55    fails, the pipe driver flushes CS and tries do the validation again,
56    i.e. it validates only that one operation. If it fails again, it drops
57    the operation on the floor and prints some nasty message to stderr.
58    (done in the pipe driver)
59
60    cs_write_reloc(cs, buf) just writes a reloc that has been added using
61    cs_add_reloc. The read_domain and write_domain parameters have been removed,
62    because we already specify them in cs_add_reloc.
63*/
64
65#include "radeon_drm_cs.h"
66
67#include "util/u_memory.h"
68
69#include <stdio.h>
70#include <stdlib.h>
71#include <stdint.h>
72#include <xf86drm.h>
73
74#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
75
76static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
77{
78    csc->fd = fd;
79    csc->nrelocs = 512;
80    csc->relocs_bo = (struct radeon_bo**)
81                     CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
82    if (!csc->relocs_bo) {
83        return FALSE;
84    }
85
86    csc->relocs = (struct drm_radeon_cs_reloc*)
87                  CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
88    if (!csc->relocs) {
89        FREE(csc->relocs_bo);
90        return FALSE;
91    }
92
93    csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
94    csc->chunks[0].length_dw = 0;
95    csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
96    csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
97    csc->chunks[1].length_dw = 0;
98    csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
99
100    csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
101    csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
102
103    csc->cs.num_chunks = 2;
104    csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
105    return TRUE;
106}
107
108static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
109{
110    unsigned i;
111
112    for (i = 0; i < csc->crelocs; i++) {
113        p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
114        radeon_bo_reference(&csc->relocs_bo[i], NULL);
115    }
116
117    csc->crelocs = 0;
118    csc->chunks[0].length_dw = 0;
119    csc->chunks[1].length_dw = 0;
120    csc->used_gart = 0;
121    csc->used_vram = 0;
122    memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
123}
124
125static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
126{
127    radeon_cs_context_cleanup(csc);
128    FREE(csc->relocs_bo);
129    FREE(csc->relocs);
130}
131
132static struct r300_winsys_cs *radeon_drm_cs_create(struct r300_winsys_screen *rws)
133{
134    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
135    struct radeon_drm_cs *cs;
136
137    cs = CALLOC_STRUCT(radeon_drm_cs);
138    if (!cs) {
139        return NULL;
140    }
141
142    cs->ws = ws;
143
144    if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) {
145        FREE(cs);
146        return NULL;
147    }
148    if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) {
149        radeon_destroy_cs_context(&cs->csc1);
150        FREE(cs);
151        return NULL;
152    }
153
154    /* Set the first command buffer as current. */
155    cs->csc = &cs->csc1;
156    cs->cst = &cs->csc2;
157    cs->base.buf = cs->csc->buf;
158
159    p_atomic_inc(&ws->num_cs);
160    return &cs->base;
161}
162
163#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
164
165static INLINE void update_domains(struct drm_radeon_cs_reloc *reloc,
166                                  enum r300_buffer_domain rd,
167                                  enum r300_buffer_domain wd,
168                                  enum r300_buffer_domain *added_domains)
169{
170    *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
171
172    if (reloc->read_domains & wd) {
173        reloc->read_domains = rd;
174        reloc->write_domain = wd;
175    } else if (rd & reloc->write_domain) {
176        reloc->read_domains = rd;
177        reloc->write_domain |= wd;
178    } else {
179        reloc->read_domains |= rd;
180        reloc->write_domain |= wd;
181    }
182}
183
184int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
185{
186    struct drm_radeon_cs_reloc *reloc;
187    unsigned i;
188    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
189
190    if (csc->is_handle_added[hash]) {
191        reloc = csc->relocs_hashlist[hash];
192        if (reloc->handle == bo->handle) {
193            return csc->reloc_indices_hashlist[hash];
194        }
195
196        /* Hash collision, look for the BO in the list of relocs linearly. */
197        for (i = csc->crelocs; i != 0;) {
198            --i;
199            reloc = &csc->relocs[i];
200            if (reloc->handle == bo->handle) {
201                /* Put this reloc in the hash list.
202                 * This will prevent additional hash collisions if there are
203                 * several subsequent get_reloc calls of the same buffer.
204                 *
205                 * Example: Assuming buffers A,B,C collide in the hash list,
206                 * the following sequence of relocs:
207                 *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
208                 * will collide here: ^ and here:   ^,
209                 * meaning that we should get very few collisions in the end. */
210                csc->relocs_hashlist[hash] = reloc;
211                csc->reloc_indices_hashlist[hash] = i;
212                /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
213                return i;
214            }
215        }
216    }
217
218    return -1;
219}
220
221static void radeon_add_reloc(struct radeon_cs_context *csc,
222                             struct radeon_bo *bo,
223                             enum r300_buffer_domain rd,
224                             enum r300_buffer_domain wd,
225                             enum r300_buffer_domain *added_domains)
226{
227    struct drm_radeon_cs_reloc *reloc;
228    unsigned i;
229    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
230
231    if (csc->is_handle_added[hash]) {
232        reloc = csc->relocs_hashlist[hash];
233        if (reloc->handle == bo->handle) {
234            update_domains(reloc, rd, wd, added_domains);
235            return;
236        }
237
238        /* Hash collision, look for the BO in the list of relocs linearly. */
239        for (i = csc->crelocs; i != 0;) {
240            --i;
241            reloc = &csc->relocs[i];
242            if (reloc->handle == bo->handle) {
243                update_domains(reloc, rd, wd, added_domains);
244
245                csc->relocs_hashlist[hash] = reloc;
246                csc->reloc_indices_hashlist[hash] = i;
247                /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
248                return;
249            }
250        }
251    }
252
253    /* New relocation, check if the backing array is large enough. */
254    if (csc->crelocs >= csc->nrelocs) {
255        uint32_t size;
256        csc->nrelocs += 10;
257
258        size = csc->nrelocs * sizeof(struct radeon_bo*);
259        csc->relocs_bo = (struct radeon_bo**)realloc(csc->relocs_bo, size);
260
261        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
262        csc->relocs = (struct drm_radeon_cs_reloc*)realloc(csc->relocs, size);
263
264        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
265    }
266
267    /* Initialize the new relocation. */
268    p_atomic_inc(&bo->num_cs_references);
269    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
270    reloc = &csc->relocs[csc->crelocs];
271    reloc->handle = bo->handle;
272    reloc->read_domains = rd;
273    reloc->write_domain = wd;
274    reloc->flags = 0;
275
276    csc->is_handle_added[hash] = TRUE;
277    csc->relocs_hashlist[hash] = reloc;
278    csc->reloc_indices_hashlist[hash] = csc->crelocs;
279
280    csc->chunks[1].length_dw += RELOC_DWORDS;
281    csc->crelocs++;
282
283    *added_domains = rd | wd;
284}
285
286static void radeon_drm_cs_add_reloc(struct r300_winsys_cs *rcs,
287                                    struct r300_winsys_cs_handle *buf,
288                                    enum r300_buffer_domain rd,
289                                    enum r300_buffer_domain wd)
290{
291    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
292    struct radeon_bo *bo = (struct radeon_bo*)buf;
293    enum r300_buffer_domain added_domains;
294
295    radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains);
296
297    if (!added_domains)
298        return;
299
300    if (added_domains & R300_DOMAIN_GTT)
301        cs->csc->used_gart += bo->size;
302    if (added_domains & R300_DOMAIN_VRAM)
303        cs->csc->used_vram += bo->size;
304}
305
306static boolean radeon_drm_cs_validate(struct r300_winsys_cs *rcs)
307{
308    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
309
310    return cs->csc->used_gart < cs->ws->gart_size * 0.8 &&
311           cs->csc->used_vram < cs->ws->vram_size * 0.8;
312}
313
314static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs,
315                                      struct r300_winsys_cs_handle *buf)
316{
317    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
318    struct radeon_bo *bo = (struct radeon_bo*)buf;
319
320    unsigned index = radeon_get_reloc(cs->csc, bo);
321
322    if (index == -1) {
323        fprintf(stderr, "r300: Cannot get a relocation in %s.\n", __func__);
324        return;
325    }
326
327    OUT_CS(&cs->base, 0xc0001000);
328    OUT_CS(&cs->base, index * RELOC_DWORDS);
329}
330
331static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param)
332{
333    struct radeon_cs_context *csc = (struct radeon_cs_context*)param;
334    unsigned i;
335
336    if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
337                            &csc->cs, sizeof(struct drm_radeon_cs))) {
338        if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
339            unsigned i;
340
341            fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
342            for (i = 0; i < csc->chunks[0].length_dw; i++) {
343                fprintf(stderr, "0x%08X\n", csc->buf[i]);
344            }
345        } else {
346            fprintf(stderr, "radeon: The kernel rejected CS, "
347                    "see dmesg for more information.\n");
348        }
349    }
350
351    for (i = 0; i < csc->crelocs; i++)
352        p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
353    return NULL;
354}
355
356void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs)
357{
358    /* Wait for any pending ioctl to complete. */
359    if (cs->thread) {
360        pipe_thread_wait(cs->thread);
361        cs->thread = 0;
362    }
363}
364
365DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
366
367static void radeon_drm_cs_flush(struct r300_winsys_cs *rcs, unsigned flags)
368{
369    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
370    struct radeon_cs_context *tmp;
371
372    radeon_drm_cs_sync_flush(cs);
373
374    /* If the CS is not empty, emit it in a newly-spawned thread. */
375    if (cs->base.cdw) {
376        unsigned i, crelocs = cs->csc->crelocs;
377
378        cs->csc->chunks[0].length_dw = cs->base.cdw;
379
380        for (i = 0; i < crelocs; i++) {
381            /* Update the number of active asynchronous CS ioctls for the buffer. */
382            p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls);
383
384            /* Update whether the buffer is busy for write. */
385            if (cs->csc->relocs[i].write_domain) {
386                cs->csc->relocs_bo[i]->busy_for_write = TRUE;
387            }
388        }
389
390        if (cs->ws->num_cpus > 1 && debug_get_option_thread() &&
391            (flags & R300_FLUSH_ASYNC)) {
392            cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs->csc);
393            assert(cs->thread);
394        } else {
395            radeon_drm_cs_emit_ioctl(cs->csc);
396        }
397    }
398
399    /* Flip command streams. */
400    tmp = cs->csc;
401    cs->csc = cs->cst;
402    cs->cst = tmp;
403
404    /* Prepare a new CS. */
405    radeon_cs_context_cleanup(cs->csc);
406
407    cs->base.buf = cs->csc->buf;
408    cs->base.cdw = 0;
409}
410
411static void radeon_drm_cs_destroy(struct r300_winsys_cs *rcs)
412{
413    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
414    radeon_drm_cs_sync_flush(cs);
415    radeon_cs_context_cleanup(&cs->csc1);
416    radeon_cs_context_cleanup(&cs->csc2);
417    p_atomic_dec(&cs->ws->num_cs);
418    radeon_destroy_cs_context(&cs->csc1);
419    radeon_destroy_cs_context(&cs->csc2);
420    FREE(cs);
421}
422
423static void radeon_drm_cs_set_flush(struct r300_winsys_cs *rcs,
424                                    void (*flush)(void *ctx, unsigned flags),
425                                    void *user)
426{
427    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
428    cs->flush_cs = flush;
429    cs->flush_data = user;
430}
431
432static boolean radeon_bo_is_referenced(struct r300_winsys_cs *rcs,
433                                       struct r300_winsys_cs_handle *_buf)
434{
435    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
436    struct radeon_bo *bo = (struct radeon_bo*)_buf;
437
438    return radeon_bo_is_referenced_by_cs(cs, bo);
439}
440
441void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
442{
443    ws->base.cs_create = radeon_drm_cs_create;
444    ws->base.cs_destroy = radeon_drm_cs_destroy;
445    ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
446    ws->base.cs_validate = radeon_drm_cs_validate;
447    ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
448    ws->base.cs_flush = radeon_drm_cs_flush;
449    ws->base.cs_set_flush = radeon_drm_cs_set_flush;
450    ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
451}
452