radeon_drm_cs.c revision d66bbfbeded04cb728a2d8e4356c8e2b5e67ace1
1/*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27/*
28 * Authors:
29 *      Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 *      Aapo Tahkola <aet@rasterburn.org>
33 *      Nicolai Haehnle <prefect_@gmx.net>
34 *      Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37/*
38    This file replaces libdrm's radeon_cs_gem with our own implemention.
39    It's optimized specifically for Radeon DRM.
40    Adding buffers and space checking are faster and simpler than their
41    counterparts in libdrm (the time complexity of all the functions
42    is O(1) in nearly all scenarios, thanks to hashing).
43
44    It works like this:
45
46    cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
47    also adds the size of 'buf' to the used_gart and used_vram winsys variables
48    based on the domains, which are simply or'd for the accounting purposes.
49    The adding is skipped if the reloc is already present in the list, but it
50    accounts any newly-referenced domains.
51
52    cs_validate is then called, which just checks:
53        used_vram/gart < vram/gart_size * 0.8
54    The 0.8 number allows for some memory fragmentation. If the validation
55    fails, the pipe driver flushes CS and tries do the validation again,
56    i.e. it validates only that one operation. If it fails again, it drops
57    the operation on the floor and prints some nasty message to stderr.
58    (done in the pipe driver)
59
60    cs_write_reloc(cs, buf) just writes a reloc that has been added using
61    cs_add_buffer. The read_domain and write_domain parameters have been removed,
62    because we already specify them in cs_add_buffer.
63*/
64
65#include "radeon_drm_cs.h"
66
67#include "util/u_memory.h"
68#include "os/os_time.h"
69
70#include <stdio.h>
71#include <stdlib.h>
72#include <stdint.h>
73#include <xf86drm.h>
74
75
76#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78static struct pipe_fence_handle *
79radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80static void radeon_fence_reference(struct pipe_fence_handle **dst,
81                                   struct pipe_fence_handle *src);
82
83static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
84{
85    /* No context support here. Just return the winsys pointer
86     * as the "context". */
87    return (struct radeon_winsys_ctx*)ws;
88}
89
90static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
91{
92    /* No context support here. */
93}
94
95static bool radeon_init_cs_context(struct radeon_cs_context *csc,
96                                   struct radeon_drm_winsys *ws)
97{
98    int i;
99
100    csc->fd = ws->fd;
101
102    csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
103    csc->chunks[0].length_dw = 0;
104    csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
105    csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
106    csc->chunks[1].length_dw = 0;
107    csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
108    csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
109    csc->chunks[2].length_dw = 2;
110    csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
111
112    csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
113    csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
114    csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
115
116    csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
117
118    for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
119        csc->reloc_indices_hashlist[i] = -1;
120    }
121    return true;
122}
123
124static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
125{
126    unsigned i;
127
128    for (i = 0; i < csc->crelocs; i++) {
129        p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
130        radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
131    }
132
133    csc->crelocs = 0;
134    csc->validated_crelocs = 0;
135    csc->chunks[0].length_dw = 0;
136    csc->chunks[1].length_dw = 0;
137
138    for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
139        csc->reloc_indices_hashlist[i] = -1;
140    }
141}
142
143static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
144{
145    radeon_cs_context_cleanup(csc);
146    FREE(csc->relocs_bo);
147    FREE(csc->relocs);
148}
149
150
151static struct radeon_winsys_cs *
152radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
153                     enum ring_type ring_type,
154                     void (*flush)(void *ctx, unsigned flags,
155                                   struct pipe_fence_handle **fence),
156                     void *flush_ctx)
157{
158    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
159    struct radeon_drm_cs *cs;
160
161    cs = CALLOC_STRUCT(radeon_drm_cs);
162    if (!cs) {
163        return NULL;
164    }
165    util_queue_fence_init(&cs->flush_completed);
166
167    cs->ws = ws;
168    cs->flush_cs = flush;
169    cs->flush_data = flush_ctx;
170
171    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
172        FREE(cs);
173        return NULL;
174    }
175    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
176        radeon_destroy_cs_context(&cs->csc1);
177        FREE(cs);
178        return NULL;
179    }
180
181    /* Set the first command buffer as current. */
182    cs->csc = &cs->csc1;
183    cs->cst = &cs->csc2;
184    cs->base.current.buf = cs->csc->buf;
185    cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
186    cs->ring_type = ring_type;
187
188    p_atomic_inc(&ws->num_cs);
189    return &cs->base;
190}
191
192static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
193                                enum radeon_bo_domain rd,
194                                enum radeon_bo_domain wd,
195                                unsigned priority,
196                                enum radeon_bo_domain *added_domains)
197{
198    *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
199
200    reloc->read_domains |= rd;
201    reloc->write_domain |= wd;
202    reloc->flags = MAX2(reloc->flags, priority);
203}
204
205int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
206{
207    unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
208    int i = csc->reloc_indices_hashlist[hash];
209
210    /* not found or found */
211    if (i == -1 || csc->relocs_bo[i].bo == bo)
212        return i;
213
214    /* Hash collision, look for the BO in the list of relocs linearly. */
215    for (i = csc->crelocs - 1; i >= 0; i--) {
216        if (csc->relocs_bo[i].bo == bo) {
217            /* Put this reloc in the hash list.
218             * This will prevent additional hash collisions if there are
219             * several consecutive lookup_buffer calls for the same buffer.
220             *
221             * Example: Assuming buffers A,B,C collide in the hash list,
222             * the following sequence of relocs:
223             *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
224             * will collide here: ^ and here:   ^,
225             * meaning that we should get very few collisions in the end. */
226            csc->reloc_indices_hashlist[hash] = i;
227            return i;
228        }
229    }
230    return -1;
231}
232
233static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
234                                 struct radeon_bo *bo,
235                                 enum radeon_bo_usage usage,
236                                 enum radeon_bo_domain domains,
237                                 unsigned priority,
238                                 enum radeon_bo_domain *added_domains)
239{
240    struct radeon_cs_context *csc = cs->csc;
241    struct drm_radeon_cs_reloc *reloc;
242    unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
243    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
244    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
245    int i = -1;
246
247    assert(priority < 64);
248    *added_domains = 0;
249
250    i = radeon_lookup_buffer(csc, bo);
251
252    if (i >= 0) {
253        reloc = &csc->relocs[i];
254        update_reloc(reloc, rd, wd, priority / 4, added_domains);
255        csc->relocs_bo[i].priority_usage |= 1llu << priority;
256
257        /* For async DMA, every add_buffer call must add a buffer to the list
258         * no matter how many duplicates there are. This is due to the fact
259         * the DMA CS checker doesn't use NOP packets for offset patching,
260         * but always uses the i-th buffer from the list to patch the i-th
261         * offset. If there are N offsets in a DMA CS, there must also be N
262         * buffers in the relocation list.
263         *
264         * This doesn't have to be done if virtual memory is enabled,
265         * because there is no offset patching with virtual memory.
266         */
267        if (cs->ring_type != RING_DMA || cs->ws->info.has_virtual_memory) {
268            return i;
269        }
270    }
271
272    /* New relocation, check if the backing array is large enough. */
273    if (csc->crelocs >= csc->nrelocs) {
274        uint32_t size;
275        csc->nrelocs = MAX2(csc->nrelocs + 16, (unsigned)(csc->nrelocs * 1.3));
276
277        size = csc->nrelocs * sizeof(csc->relocs_bo[0]);
278        csc->relocs_bo = realloc(csc->relocs_bo, size);
279
280        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
281        csc->relocs = realloc(csc->relocs, size);
282
283        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
284    }
285
286    /* Initialize the new relocation. */
287    csc->relocs_bo[csc->crelocs].bo = NULL;
288    csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority;
289    radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo);
290    p_atomic_inc(&bo->num_cs_references);
291    reloc = &csc->relocs[csc->crelocs];
292    reloc->handle = bo->handle;
293    reloc->read_domains = rd;
294    reloc->write_domain = wd;
295    reloc->flags = priority / 4;
296
297    csc->reloc_indices_hashlist[hash] = csc->crelocs;
298
299    csc->chunks[1].length_dw += RELOC_DWORDS;
300
301    *added_domains = rd | wd;
302    return csc->crelocs++;
303}
304
305static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
306                                        struct pb_buffer *buf,
307                                        enum radeon_bo_usage usage,
308                                        enum radeon_bo_domain domains,
309                                        enum radeon_bo_priority priority)
310{
311    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
312    struct radeon_bo *bo = (struct radeon_bo*)buf;
313    enum radeon_bo_domain added_domains;
314    unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
315                                       &added_domains);
316
317    if (added_domains & RADEON_DOMAIN_VRAM)
318        cs->base.used_vram += bo->base.size;
319    else if (added_domains & RADEON_DOMAIN_GTT)
320        cs->base.used_gart += bo->base.size;
321
322    return index;
323}
324
325static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
326                                   struct pb_buffer *buf)
327{
328    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
329
330    return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
331}
332
333static bool radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
334{
335    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
336    bool status =
337        cs->base.used_gart < cs->ws->info.gart_size * 0.8 &&
338        cs->base.used_vram < cs->ws->info.vram_size * 0.8;
339
340    if (status) {
341        cs->csc->validated_crelocs = cs->csc->crelocs;
342    } else {
343        /* Remove lately-added buffers. The validation failed with them
344         * and the CS is about to be flushed because of that. Keep only
345         * the already-validated buffers. */
346        unsigned i;
347
348        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
349            p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
350            radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
351        }
352        cs->csc->crelocs = cs->csc->validated_crelocs;
353
354        /* Flush if there are any relocs. Clean up otherwise. */
355        if (cs->csc->crelocs) {
356            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
357        } else {
358            radeon_cs_context_cleanup(cs->csc);
359            cs->base.used_vram = 0;
360            cs->base.used_gart = 0;
361
362            assert(cs->base.current.cdw == 0);
363            if (cs->base.current.cdw != 0) {
364                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
365            }
366        }
367    }
368    return status;
369}
370
371static bool radeon_drm_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw)
372{
373   assert(rcs->current.cdw <= rcs->current.max_dw);
374   return rcs->current.max_dw - rcs->current.cdw >= dw;
375}
376
377static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
378                                              struct radeon_bo_list_item *list)
379{
380    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
381    int i;
382
383    if (list) {
384        for (i = 0; i < cs->csc->crelocs; i++) {
385            list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size;
386            list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
387            list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
388        }
389    }
390    return cs->csc->crelocs;
391}
392
393void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index)
394{
395    struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst;
396    unsigned i;
397    int r;
398
399    r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
400                            &csc->cs, sizeof(struct drm_radeon_cs));
401    if (r) {
402	if (r == -ENOMEM)
403	    fprintf(stderr, "radeon: Not enough memory for command submission.\n");
404	else if (debug_get_bool_option("RADEON_DUMP_CS", false)) {
405            unsigned i;
406
407            fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
408            for (i = 0; i < csc->chunks[0].length_dw; i++) {
409                fprintf(stderr, "0x%08X\n", csc->buf[i]);
410            }
411        } else {
412            fprintf(stderr, "radeon: The kernel rejected CS, "
413                    "see dmesg for more information (%i).\n", r);
414        }
415    }
416
417    for (i = 0; i < csc->crelocs; i++)
418        p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
419
420    radeon_cs_context_cleanup(csc);
421}
422
423/*
424 * Make sure previous submission of this cs are completed
425 */
426void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
427{
428    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
429
430    /* Wait for any pending ioctl of this CS to complete. */
431    if (util_queue_is_initialized(&cs->ws->cs_queue))
432        util_queue_job_wait(&cs->flush_completed);
433}
434
435DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
436
437static int radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
438                               unsigned flags,
439                               struct pipe_fence_handle **fence)
440{
441    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
442    struct radeon_cs_context *tmp;
443
444    switch (cs->ring_type) {
445    case RING_DMA:
446        /* pad DMA ring to 8 DWs */
447        if (cs->ws->info.chip_class <= SI) {
448            while (rcs->current.cdw & 7)
449                radeon_emit(&cs->base, 0xf0000000); /* NOP packet */
450        } else {
451            while (rcs->current.cdw & 7)
452                radeon_emit(&cs->base, 0x00000000); /* NOP packet */
453        }
454        break;
455    case RING_GFX:
456        /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
457         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
458         */
459        if (cs->ws->info.gfx_ib_pad_with_type2) {
460            while (rcs->current.cdw & 7)
461                radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
462        } else {
463            while (rcs->current.cdw & 7)
464                radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */
465        }
466        break;
467    case RING_UVD:
468        while (rcs->current.cdw & 15)
469            radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
470        break;
471    default:
472        break;
473    }
474
475    if (rcs->current.cdw > rcs->current.max_dw) {
476       fprintf(stderr, "radeon: command stream overflowed\n");
477    }
478
479    if (fence) {
480       if (cs->next_fence) {
481          radeon_fence_reference(fence, cs->next_fence);
482       } else {
483          radeon_fence_reference(fence, NULL);
484          *fence = radeon_cs_create_fence(rcs);
485       }
486    }
487    radeon_fence_reference(&cs->next_fence, NULL);
488
489    radeon_drm_cs_sync_flush(rcs);
490
491    /* Swap command streams. */
492    tmp = cs->csc;
493    cs->csc = cs->cst;
494    cs->cst = tmp;
495
496    /* If the CS is not empty or overflowed, emit it in a separate thread. */
497    if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
498        unsigned i, crelocs;
499
500        crelocs = cs->cst->crelocs;
501
502        cs->cst->chunks[0].length_dw = cs->base.current.cdw;
503
504        for (i = 0; i < crelocs; i++) {
505            /* Update the number of active asynchronous CS ioctls for the buffer. */
506            p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
507        }
508
509        switch (cs->ring_type) {
510        case RING_DMA:
511            cs->cst->flags[0] = 0;
512            cs->cst->flags[1] = RADEON_CS_RING_DMA;
513            cs->cst->cs.num_chunks = 3;
514            if (cs->ws->info.has_virtual_memory) {
515                cs->cst->flags[0] |= RADEON_CS_USE_VM;
516            }
517            break;
518
519        case RING_UVD:
520            cs->cst->flags[0] = 0;
521            cs->cst->flags[1] = RADEON_CS_RING_UVD;
522            cs->cst->cs.num_chunks = 3;
523            break;
524
525        case RING_VCE:
526            cs->cst->flags[0] = 0;
527            cs->cst->flags[1] = RADEON_CS_RING_VCE;
528            cs->cst->cs.num_chunks = 3;
529            break;
530
531        default:
532        case RING_GFX:
533        case RING_COMPUTE:
534            cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
535            cs->cst->flags[1] = RADEON_CS_RING_GFX;
536            cs->cst->cs.num_chunks = 3;
537
538            if (cs->ws->info.has_virtual_memory) {
539                cs->cst->flags[0] |= RADEON_CS_USE_VM;
540                cs->cst->cs.num_chunks = 3;
541            }
542            if (flags & RADEON_FLUSH_END_OF_FRAME) {
543                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
544                cs->cst->cs.num_chunks = 3;
545            }
546            if (cs->ring_type == RING_COMPUTE) {
547                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
548                cs->cst->cs.num_chunks = 3;
549            }
550            break;
551        }
552
553        if (util_queue_is_initialized(&cs->ws->cs_queue)) {
554            util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
555                               radeon_drm_cs_emit_ioctl_oneshot, NULL);
556            if (!(flags & RADEON_FLUSH_ASYNC))
557                radeon_drm_cs_sync_flush(rcs);
558        } else {
559            radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
560        }
561    } else {
562        radeon_cs_context_cleanup(cs->cst);
563    }
564
565    /* Prepare a new CS. */
566    cs->base.current.buf = cs->csc->buf;
567    cs->base.current.cdw = 0;
568    cs->base.used_vram = 0;
569    cs->base.used_gart = 0;
570
571    cs->ws->num_cs_flushes++;
572    return 0;
573}
574
575static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
576{
577    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
578
579    radeon_drm_cs_sync_flush(rcs);
580    util_queue_fence_destroy(&cs->flush_completed);
581    radeon_cs_context_cleanup(&cs->csc1);
582    radeon_cs_context_cleanup(&cs->csc2);
583    p_atomic_dec(&cs->ws->num_cs);
584    radeon_destroy_cs_context(&cs->csc1);
585    radeon_destroy_cs_context(&cs->csc2);
586    radeon_fence_reference(&cs->next_fence, NULL);
587    FREE(cs);
588}
589
590static bool radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
591                                    struct pb_buffer *_buf,
592                                    enum radeon_bo_usage usage)
593{
594    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
595    struct radeon_bo *bo = (struct radeon_bo*)_buf;
596    int index;
597
598    if (!bo->num_cs_references)
599        return false;
600
601    index = radeon_lookup_buffer(cs->csc, bo);
602    if (index == -1)
603        return false;
604
605    if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
606        return true;
607    if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
608        return true;
609
610    return false;
611}
612
613/* FENCES */
614
615static struct pipe_fence_handle *
616radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
617{
618    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
619    struct pb_buffer *fence;
620
621    /* Create a fence, which is a dummy BO. */
622    fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1,
623                                       RADEON_DOMAIN_GTT, 0);
624    /* Add the fence as a dummy relocation. */
625    cs->ws->base.cs_add_buffer(rcs, fence,
626                              RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
627                              RADEON_PRIO_FENCE);
628    return (struct pipe_fence_handle*)fence;
629}
630
631static bool radeon_fence_wait(struct radeon_winsys *ws,
632                              struct pipe_fence_handle *fence,
633                              uint64_t timeout)
634{
635    return ws->buffer_wait((struct pb_buffer*)fence, timeout,
636                           RADEON_USAGE_READWRITE);
637}
638
639static void radeon_fence_reference(struct pipe_fence_handle **dst,
640                                   struct pipe_fence_handle *src)
641{
642    pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
643}
644
645static struct pipe_fence_handle *
646radeon_drm_cs_get_next_fence(struct radeon_winsys_cs *rcs)
647{
648   struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
649   struct pipe_fence_handle *fence = NULL;
650
651   if (cs->next_fence) {
652      radeon_fence_reference(&fence, cs->next_fence);
653      return fence;
654   }
655
656   fence = radeon_cs_create_fence(rcs);
657   if (!fence)
658      return NULL;
659
660   radeon_fence_reference(&cs->next_fence, fence);
661   return fence;
662}
663
664void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
665{
666    ws->base.ctx_create = radeon_drm_ctx_create;
667    ws->base.ctx_destroy = radeon_drm_ctx_destroy;
668    ws->base.cs_create = radeon_drm_cs_create;
669    ws->base.cs_destroy = radeon_drm_cs_destroy;
670    ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
671    ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
672    ws->base.cs_validate = radeon_drm_cs_validate;
673    ws->base.cs_check_space = radeon_drm_cs_check_space;
674    ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
675    ws->base.cs_flush = radeon_drm_cs_flush;
676    ws->base.cs_get_next_fence = radeon_drm_cs_get_next_fence;
677    ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
678    ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
679    ws->base.fence_wait = radeon_fence_wait;
680    ws->base.fence_reference = radeon_fence_reference;
681}
682