radeon_drm_cs.c revision 076db67217741aa820feadccc66067516d4cf4ca
1/*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27/*
28 * Authors:
29 *      Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 *      Aapo Tahkola <aet@rasterburn.org>
33 *      Nicolai Haehnle <prefect_@gmx.net>
34 *      Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37/*
38    This file replaces libdrm's radeon_cs_gem with our own implemention.
39    It's optimized specifically for Radeon DRM.
40    Adding buffers and space checking are faster and simpler than their
41    counterparts in libdrm (the time complexity of all the functions
42    is O(1) in nearly all scenarios, thanks to hashing).
43
44    It works like this:
45
46    cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
47    also adds the size of 'buf' to the used_gart and used_vram winsys variables
48    based on the domains, which are simply or'd for the accounting purposes.
49    The adding is skipped if the reloc is already present in the list, but it
50    accounts any newly-referenced domains.
51
52    cs_validate is then called, which just checks:
53        used_vram/gart < vram/gart_size * 0.8
54    The 0.8 number allows for some memory fragmentation. If the validation
55    fails, the pipe driver flushes CS and tries do the validation again,
56    i.e. it validates only that one operation. If it fails again, it drops
57    the operation on the floor and prints some nasty message to stderr.
58    (done in the pipe driver)
59
60    cs_write_reloc(cs, buf) just writes a reloc that has been added using
61    cs_add_buffer. The read_domain and write_domain parameters have been removed,
62    because we already specify them in cs_add_buffer.
63*/
64
65#include "radeon_drm_cs.h"
66
67#include "util/u_memory.h"
68#include "os/os_time.h"
69
70#include <stdio.h>
71#include <stdlib.h>
72#include <stdint.h>
73#include <xf86drm.h>
74
75
76#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78static struct pipe_fence_handle *
79radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80static void radeon_fence_reference(struct pipe_fence_handle **dst,
81                                   struct pipe_fence_handle *src);
82
83static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
84{
85    /* No context support here. Just return the winsys pointer
86     * as the "context". */
87    return (struct radeon_winsys_ctx*)ws;
88}
89
90static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
91{
92    /* No context support here. */
93}
94
95static bool radeon_init_cs_context(struct radeon_cs_context *csc,
96                                   struct radeon_drm_winsys *ws)
97{
98    int i;
99
100    csc->fd = ws->fd;
101    csc->nrelocs = 512;
102    csc->relocs_bo = (struct radeon_bo_item*)
103                     CALLOC(1, csc->nrelocs * sizeof(csc->relocs_bo[0]));
104    if (!csc->relocs_bo) {
105        return false;
106    }
107
108    csc->relocs = (struct drm_radeon_cs_reloc*)
109                  CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
110    if (!csc->relocs) {
111        FREE(csc->relocs_bo);
112        return false;
113    }
114
115    csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
116    csc->chunks[0].length_dw = 0;
117    csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
118    csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
119    csc->chunks[1].length_dw = 0;
120    csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
121    csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
122    csc->chunks[2].length_dw = 2;
123    csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
124
125    csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
126    csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
127    csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
128
129    csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
130
131    for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
132        csc->reloc_indices_hashlist[i] = -1;
133    }
134    return true;
135}
136
137static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
138{
139    unsigned i;
140
141    for (i = 0; i < csc->crelocs; i++) {
142        p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
143        radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
144    }
145
146    csc->crelocs = 0;
147    csc->validated_crelocs = 0;
148    csc->chunks[0].length_dw = 0;
149    csc->chunks[1].length_dw = 0;
150
151    for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
152        csc->reloc_indices_hashlist[i] = -1;
153    }
154}
155
156static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
157{
158    radeon_cs_context_cleanup(csc);
159    FREE(csc->relocs_bo);
160    FREE(csc->relocs);
161}
162
163
164static struct radeon_winsys_cs *
165radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
166                     enum ring_type ring_type,
167                     void (*flush)(void *ctx, unsigned flags,
168                                   struct pipe_fence_handle **fence),
169                     void *flush_ctx)
170{
171    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
172    struct radeon_drm_cs *cs;
173
174    cs = CALLOC_STRUCT(radeon_drm_cs);
175    if (!cs) {
176        return NULL;
177    }
178    util_queue_fence_init(&cs->flush_completed);
179
180    cs->ws = ws;
181    cs->flush_cs = flush;
182    cs->flush_data = flush_ctx;
183
184    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
185        FREE(cs);
186        return NULL;
187    }
188    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
189        radeon_destroy_cs_context(&cs->csc1);
190        FREE(cs);
191        return NULL;
192    }
193
194    /* Set the first command buffer as current. */
195    cs->csc = &cs->csc1;
196    cs->cst = &cs->csc2;
197    cs->base.current.buf = cs->csc->buf;
198    cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
199    cs->ring_type = ring_type;
200
201    p_atomic_inc(&ws->num_cs);
202    return &cs->base;
203}
204
205#define OUT_CS(cs, value) (cs)->current.buf[(cs)->current.cdw++] = (value)
206
207static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
208                                enum radeon_bo_domain rd,
209                                enum radeon_bo_domain wd,
210                                unsigned priority,
211                                enum radeon_bo_domain *added_domains)
212{
213    *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
214
215    reloc->read_domains |= rd;
216    reloc->write_domain |= wd;
217    reloc->flags = MAX2(reloc->flags, priority);
218}
219
220int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
221{
222    unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
223    int i = csc->reloc_indices_hashlist[hash];
224
225    /* not found or found */
226    if (i == -1 || csc->relocs_bo[i].bo == bo)
227        return i;
228
229    /* Hash collision, look for the BO in the list of relocs linearly. */
230    for (i = csc->crelocs - 1; i >= 0; i--) {
231        if (csc->relocs_bo[i].bo == bo) {
232            /* Put this reloc in the hash list.
233             * This will prevent additional hash collisions if there are
234             * several consecutive lookup_buffer calls for the same buffer.
235             *
236             * Example: Assuming buffers A,B,C collide in the hash list,
237             * the following sequence of relocs:
238             *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
239             * will collide here: ^ and here:   ^,
240             * meaning that we should get very few collisions in the end. */
241            csc->reloc_indices_hashlist[hash] = i;
242            return i;
243        }
244    }
245    return -1;
246}
247
248static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
249                                 struct radeon_bo *bo,
250                                 enum radeon_bo_usage usage,
251                                 enum radeon_bo_domain domains,
252                                 unsigned priority,
253                                 enum radeon_bo_domain *added_domains)
254{
255    struct radeon_cs_context *csc = cs->csc;
256    struct drm_radeon_cs_reloc *reloc;
257    unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
258    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
259    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
260    int i = -1;
261
262    assert(priority < 64);
263    *added_domains = 0;
264
265    i = radeon_lookup_buffer(csc, bo);
266
267    if (i >= 0) {
268        reloc = &csc->relocs[i];
269        update_reloc(reloc, rd, wd, priority / 4, added_domains);
270        csc->relocs_bo[i].priority_usage |= 1llu << priority;
271
272        /* For async DMA, every add_buffer call must add a buffer to the list
273         * no matter how many duplicates there are. This is due to the fact
274         * the DMA CS checker doesn't use NOP packets for offset patching,
275         * but always uses the i-th buffer from the list to patch the i-th
276         * offset. If there are N offsets in a DMA CS, there must also be N
277         * buffers in the relocation list.
278         *
279         * This doesn't have to be done if virtual memory is enabled,
280         * because there is no offset patching with virtual memory.
281         */
282        if (cs->ring_type != RING_DMA || cs->ws->info.has_virtual_memory) {
283            return i;
284        }
285    }
286
287    /* New relocation, check if the backing array is large enough. */
288    if (csc->crelocs >= csc->nrelocs) {
289        uint32_t size;
290        csc->nrelocs += 10;
291
292        size = csc->nrelocs * sizeof(csc->relocs_bo[0]);
293        csc->relocs_bo = realloc(csc->relocs_bo, size);
294
295        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
296        csc->relocs = realloc(csc->relocs, size);
297
298        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
299    }
300
301    /* Initialize the new relocation. */
302    csc->relocs_bo[csc->crelocs].bo = NULL;
303    csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority;
304    radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo);
305    p_atomic_inc(&bo->num_cs_references);
306    reloc = &csc->relocs[csc->crelocs];
307    reloc->handle = bo->handle;
308    reloc->read_domains = rd;
309    reloc->write_domain = wd;
310    reloc->flags = priority / 4;
311
312    csc->reloc_indices_hashlist[hash] = csc->crelocs;
313
314    csc->chunks[1].length_dw += RELOC_DWORDS;
315
316    *added_domains = rd | wd;
317    return csc->crelocs++;
318}
319
320static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
321                                        struct pb_buffer *buf,
322                                        enum radeon_bo_usage usage,
323                                        enum radeon_bo_domain domains,
324                                        enum radeon_bo_priority priority)
325{
326    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
327    struct radeon_bo *bo = (struct radeon_bo*)buf;
328    enum radeon_bo_domain added_domains;
329    unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
330                                       &added_domains);
331
332    if (added_domains & RADEON_DOMAIN_VRAM)
333        cs->base.used_vram += bo->base.size;
334    else if (added_domains & RADEON_DOMAIN_GTT)
335        cs->base.used_gart += bo->base.size;
336
337    return index;
338}
339
340static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
341                                   struct pb_buffer *buf)
342{
343    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
344
345    return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
346}
347
348static bool radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
349{
350    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
351    bool status =
352        cs->base.used_gart < cs->ws->info.gart_size * 0.8 &&
353        cs->base.used_vram < cs->ws->info.vram_size * 0.8;
354
355    if (status) {
356        cs->csc->validated_crelocs = cs->csc->crelocs;
357    } else {
358        /* Remove lately-added buffers. The validation failed with them
359         * and the CS is about to be flushed because of that. Keep only
360         * the already-validated buffers. */
361        unsigned i;
362
363        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
364            p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
365            radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
366        }
367        cs->csc->crelocs = cs->csc->validated_crelocs;
368
369        /* Flush if there are any relocs. Clean up otherwise. */
370        if (cs->csc->crelocs) {
371            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
372        } else {
373            radeon_cs_context_cleanup(cs->csc);
374            cs->base.used_vram = 0;
375            cs->base.used_gart = 0;
376
377            assert(cs->base.current.cdw == 0);
378            if (cs->base.current.cdw != 0) {
379                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
380            }
381        }
382    }
383    return status;
384}
385
386static bool radeon_drm_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw)
387{
388   assert(rcs->current.cdw <= rcs->current.max_dw);
389   return rcs->current.max_dw - rcs->current.cdw >= dw;
390}
391
392static bool radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
393{
394    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
395
396    vram += cs->base.used_vram;
397    gtt += cs->base.used_gart;
398
399    /* Anything that goes above the VRAM size should go to GTT. */
400    if (vram > cs->ws->info.vram_size)
401        gtt += vram - cs->ws->info.vram_size;
402
403    /* Now we just need to check if we have enough GTT. */
404    return gtt < cs->ws->info.gart_size * 0.7;
405}
406
407static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
408                                              struct radeon_bo_list_item *list)
409{
410    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
411    int i;
412
413    if (list) {
414        for (i = 0; i < cs->csc->crelocs; i++) {
415            list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size;
416            list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
417            list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
418        }
419    }
420    return cs->csc->crelocs;
421}
422
423void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index)
424{
425    struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst;
426    unsigned i;
427    int r;
428
429    r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
430                            &csc->cs, sizeof(struct drm_radeon_cs));
431    if (r) {
432	if (r == -ENOMEM)
433	    fprintf(stderr, "radeon: Not enough memory for command submission.\n");
434	else if (debug_get_bool_option("RADEON_DUMP_CS", false)) {
435            unsigned i;
436
437            fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
438            for (i = 0; i < csc->chunks[0].length_dw; i++) {
439                fprintf(stderr, "0x%08X\n", csc->buf[i]);
440            }
441        } else {
442            fprintf(stderr, "radeon: The kernel rejected CS, "
443                    "see dmesg for more information (%i).\n", r);
444        }
445    }
446
447    for (i = 0; i < csc->crelocs; i++)
448        p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
449
450    radeon_cs_context_cleanup(csc);
451}
452
453/*
454 * Make sure previous submission of this cs are completed
455 */
456void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
457{
458    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
459
460    /* Wait for any pending ioctl of this CS to complete. */
461    if (util_queue_is_initialized(&cs->ws->cs_queue))
462        util_queue_job_wait(&cs->flush_completed);
463}
464
465DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
466
467static int radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
468                               unsigned flags,
469                               struct pipe_fence_handle **fence)
470{
471    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
472    struct radeon_cs_context *tmp;
473
474    switch (cs->ring_type) {
475    case RING_DMA:
476        /* pad DMA ring to 8 DWs */
477        if (cs->ws->info.chip_class <= SI) {
478            while (rcs->current.cdw & 7)
479                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
480        } else {
481            while (rcs->current.cdw & 7)
482                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
483        }
484        break;
485    case RING_GFX:
486        /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
487         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
488         */
489        if (cs->ws->info.gfx_ib_pad_with_type2) {
490            while (rcs->current.cdw & 7)
491                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
492        } else {
493            while (rcs->current.cdw & 7)
494                OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
495        }
496        break;
497    case RING_UVD:
498        while (rcs->current.cdw & 15)
499            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
500        break;
501    default:
502        break;
503    }
504
505    if (rcs->current.cdw > rcs->current.max_dw) {
506       fprintf(stderr, "radeon: command stream overflowed\n");
507    }
508
509    if (fence) {
510        radeon_fence_reference(fence, NULL);
511        *fence = radeon_cs_create_fence(rcs);
512    }
513
514    radeon_drm_cs_sync_flush(rcs);
515
516    /* Swap command streams. */
517    tmp = cs->csc;
518    cs->csc = cs->cst;
519    cs->cst = tmp;
520
521    /* If the CS is not empty or overflowed, emit it in a separate thread. */
522    if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
523        unsigned i, crelocs;
524
525        crelocs = cs->cst->crelocs;
526
527        cs->cst->chunks[0].length_dw = cs->base.current.cdw;
528
529        for (i = 0; i < crelocs; i++) {
530            /* Update the number of active asynchronous CS ioctls for the buffer. */
531            p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
532        }
533
534        switch (cs->ring_type) {
535        case RING_DMA:
536            cs->cst->flags[0] = 0;
537            cs->cst->flags[1] = RADEON_CS_RING_DMA;
538            cs->cst->cs.num_chunks = 3;
539            if (cs->ws->info.has_virtual_memory) {
540                cs->cst->flags[0] |= RADEON_CS_USE_VM;
541            }
542            break;
543
544        case RING_UVD:
545            cs->cst->flags[0] = 0;
546            cs->cst->flags[1] = RADEON_CS_RING_UVD;
547            cs->cst->cs.num_chunks = 3;
548            break;
549
550        case RING_VCE:
551            cs->cst->flags[0] = 0;
552            cs->cst->flags[1] = RADEON_CS_RING_VCE;
553            cs->cst->cs.num_chunks = 3;
554            break;
555
556        default:
557        case RING_GFX:
558        case RING_COMPUTE:
559            cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
560            cs->cst->flags[1] = RADEON_CS_RING_GFX;
561            cs->cst->cs.num_chunks = 3;
562
563            if (cs->ws->info.has_virtual_memory) {
564                cs->cst->flags[0] |= RADEON_CS_USE_VM;
565                cs->cst->cs.num_chunks = 3;
566            }
567            if (flags & RADEON_FLUSH_END_OF_FRAME) {
568                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
569                cs->cst->cs.num_chunks = 3;
570            }
571            if (cs->ring_type == RING_COMPUTE) {
572                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
573                cs->cst->cs.num_chunks = 3;
574            }
575            break;
576        }
577
578        if (util_queue_is_initialized(&cs->ws->cs_queue)) {
579            util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
580                               radeon_drm_cs_emit_ioctl_oneshot, NULL);
581            if (!(flags & RADEON_FLUSH_ASYNC))
582                radeon_drm_cs_sync_flush(rcs);
583        } else {
584            radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
585        }
586    } else {
587        radeon_cs_context_cleanup(cs->cst);
588    }
589
590    /* Prepare a new CS. */
591    cs->base.current.buf = cs->csc->buf;
592    cs->base.current.cdw = 0;
593    cs->base.used_vram = 0;
594    cs->base.used_gart = 0;
595
596    cs->ws->num_cs_flushes++;
597    return 0;
598}
599
600static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
601{
602    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
603
604    radeon_drm_cs_sync_flush(rcs);
605    util_queue_fence_destroy(&cs->flush_completed);
606    radeon_cs_context_cleanup(&cs->csc1);
607    radeon_cs_context_cleanup(&cs->csc2);
608    p_atomic_dec(&cs->ws->num_cs);
609    radeon_destroy_cs_context(&cs->csc1);
610    radeon_destroy_cs_context(&cs->csc2);
611    FREE(cs);
612}
613
614static bool radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
615                                    struct pb_buffer *_buf,
616                                    enum radeon_bo_usage usage)
617{
618    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
619    struct radeon_bo *bo = (struct radeon_bo*)_buf;
620    int index;
621
622    if (!bo->num_cs_references)
623        return false;
624
625    index = radeon_lookup_buffer(cs->csc, bo);
626    if (index == -1)
627        return false;
628
629    if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
630        return true;
631    if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
632        return true;
633
634    return false;
635}
636
637/* FENCES */
638
639static struct pipe_fence_handle *
640radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
641{
642    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
643    struct pb_buffer *fence;
644
645    /* Create a fence, which is a dummy BO. */
646    fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1,
647                                       RADEON_DOMAIN_GTT, 0);
648    /* Add the fence as a dummy relocation. */
649    cs->ws->base.cs_add_buffer(rcs, fence,
650                              RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
651                              RADEON_PRIO_FENCE);
652    return (struct pipe_fence_handle*)fence;
653}
654
655static bool radeon_fence_wait(struct radeon_winsys *ws,
656                              struct pipe_fence_handle *fence,
657                              uint64_t timeout)
658{
659    return ws->buffer_wait((struct pb_buffer*)fence, timeout,
660                           RADEON_USAGE_READWRITE);
661}
662
663static void radeon_fence_reference(struct pipe_fence_handle **dst,
664                                   struct pipe_fence_handle *src)
665{
666    pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
667}
668
669void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
670{
671    ws->base.ctx_create = radeon_drm_ctx_create;
672    ws->base.ctx_destroy = radeon_drm_ctx_destroy;
673    ws->base.cs_create = radeon_drm_cs_create;
674    ws->base.cs_destroy = radeon_drm_cs_destroy;
675    ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
676    ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
677    ws->base.cs_validate = radeon_drm_cs_validate;
678    ws->base.cs_check_space = radeon_drm_cs_check_space;
679    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
680    ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
681    ws->base.cs_flush = radeon_drm_cs_flush;
682    ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
683    ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
684    ws->base.fence_wait = radeon_fence_wait;
685    ws->base.fence_reference = radeon_fence_reference;
686}
687