radeon_drm_cs.c revision 07c65b85eada8dd34019763b6e82ed4257a9b4a6
1/*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27/*
28 * Authors:
29 *      Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 *      Aapo Tahkola <aet@rasterburn.org>
33 *      Nicolai Haehnle <prefect_@gmx.net>
34 *      Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37/*
38    This file replaces libdrm's radeon_cs_gem with our own implemention.
39    It's optimized specifically for Radeon DRM.
40    Reloc writes and space checking are faster and simpler than their
41    counterparts in libdrm (the time complexity of all the functions
42    is O(1) in nearly all scenarios, thanks to hashing).
43
44    It works like this:
45
46    cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47    also adds the size of 'buf' to the used_gart and used_vram winsys variables
48    based on the domains, which are simply or'd for the accounting purposes.
49    The adding is skipped if the reloc is already present in the list, but it
50    accounts any newly-referenced domains.
51
52    cs_validate is then called, which just checks:
53        used_vram/gart < vram/gart_size * 0.8
54    The 0.8 number allows for some memory fragmentation. If the validation
55    fails, the pipe driver flushes CS and tries do the validation again,
56    i.e. it validates only that one operation. If it fails again, it drops
57    the operation on the floor and prints some nasty message to stderr.
58    (done in the pipe driver)
59
60    cs_write_reloc(cs, buf) just writes a reloc that has been added using
61    cs_add_reloc. The read_domain and write_domain parameters have been removed,
62    because we already specify them in cs_add_reloc.
63*/
64
65#include "radeon_drm_cs.h"
66
67#include "util/u_memory.h"
68#include "os/os_time.h"
69
70#include <stdio.h>
71#include <stdlib.h>
72#include <stdint.h>
73#include <xf86drm.h>
74
75
76#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78static struct pipe_fence_handle *
79radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80static void radeon_fence_reference(struct pipe_fence_handle **dst,
81                                   struct pipe_fence_handle *src);
82
83static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
84                                      struct radeon_drm_winsys *ws)
85{
86    int i;
87
88    csc->fd = ws->fd;
89    csc->nrelocs = 512;
90    csc->relocs_bo = (struct radeon_bo**)
91                     CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
92    if (!csc->relocs_bo) {
93        return FALSE;
94    }
95
96    csc->relocs = (struct drm_radeon_cs_reloc*)
97                  CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
98    if (!csc->relocs) {
99        FREE(csc->relocs_bo);
100        return FALSE;
101    }
102
103    csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
104    csc->chunks[0].length_dw = 0;
105    csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
106    csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
107    csc->chunks[1].length_dw = 0;
108    csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
109    csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
110    csc->chunks[2].length_dw = 2;
111    csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
112
113    csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
114    csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
115    csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
116
117    csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
118
119    for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
120        csc->reloc_indices_hashlist[i] = -1;
121    }
122    return TRUE;
123}
124
125static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
126{
127    unsigned i;
128
129    for (i = 0; i < csc->crelocs; i++) {
130        p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
131        radeon_bo_reference(&csc->relocs_bo[i], NULL);
132    }
133
134    csc->crelocs = 0;
135    csc->validated_crelocs = 0;
136    csc->chunks[0].length_dw = 0;
137    csc->chunks[1].length_dw = 0;
138    csc->used_gart = 0;
139    csc->used_vram = 0;
140
141    for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
142        csc->reloc_indices_hashlist[i] = -1;
143    }
144}
145
146static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
147{
148    radeon_cs_context_cleanup(csc);
149    FREE(csc->relocs_bo);
150    FREE(csc->relocs);
151}
152
153
154static struct radeon_winsys_cs *
155radeon_drm_cs_create(struct radeon_winsys *rws,
156                     enum ring_type ring_type,
157                     void (*flush)(void *ctx, unsigned flags,
158				   struct pipe_fence_handle **fence),
159                     void *flush_ctx,
160                     struct radeon_winsys_cs_handle *trace_buf)
161{
162    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
163    struct radeon_drm_cs *cs;
164
165    cs = CALLOC_STRUCT(radeon_drm_cs);
166    if (!cs) {
167        return NULL;
168    }
169    pipe_semaphore_init(&cs->flush_completed, 1);
170
171    cs->ws = ws;
172    cs->flush_cs = flush;
173    cs->flush_data = flush_ctx;
174    cs->trace_buf = (struct radeon_bo*)trace_buf;
175
176    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
177        FREE(cs);
178        return NULL;
179    }
180    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
181        radeon_destroy_cs_context(&cs->csc1);
182        FREE(cs);
183        return NULL;
184    }
185
186    /* Set the first command buffer as current. */
187    cs->csc = &cs->csc1;
188    cs->cst = &cs->csc2;
189    cs->base.buf = cs->csc->buf;
190    cs->base.ring_type = ring_type;
191
192    p_atomic_inc(&ws->num_cs);
193    return &cs->base;
194}
195
196#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
197
198static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc,
199				enum radeon_bo_domain rd,
200				enum radeon_bo_domain wd,
201				unsigned priority,
202				enum radeon_bo_domain *added_domains)
203{
204    *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
205
206    reloc->read_domains |= rd;
207    reloc->write_domain |= wd;
208    reloc->flags = MAX2(reloc->flags, priority);
209}
210
211int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
212{
213    unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
214    int i = csc->reloc_indices_hashlist[hash];
215
216    /* not found or found */
217    if (i == -1 || csc->relocs_bo[i] == bo)
218        return i;
219
220    /* Hash collision, look for the BO in the list of relocs linearly. */
221    for (i = csc->crelocs - 1; i >= 0; i--) {
222        if (csc->relocs_bo[i] == bo) {
223            /* Put this reloc in the hash list.
224             * This will prevent additional hash collisions if there are
225             * several consecutive get_reloc calls for the same buffer.
226             *
227             * Example: Assuming buffers A,B,C collide in the hash list,
228             * the following sequence of relocs:
229             *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
230             * will collide here: ^ and here:   ^,
231             * meaning that we should get very few collisions in the end. */
232            csc->reloc_indices_hashlist[hash] = i;
233            return i;
234        }
235    }
236    return -1;
237}
238
239static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
240                                 struct radeon_bo *bo,
241                                 enum radeon_bo_usage usage,
242                                 enum radeon_bo_domain domains,
243                                 unsigned priority,
244                                 enum radeon_bo_domain *added_domains)
245{
246    struct radeon_cs_context *csc = cs->csc;
247    struct drm_radeon_cs_reloc *reloc;
248    unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
249    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
250    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
251    int i = -1;
252
253    priority = MIN2(priority, 15);
254    *added_domains = 0;
255
256    i = radeon_get_reloc(csc, bo);
257
258    if (i >= 0) {
259        reloc = &csc->relocs[i];
260        update_reloc(reloc, rd, wd, priority, added_domains);
261
262        /* For async DMA, every add_reloc call must add a buffer to the list
263         * no matter how many duplicates there are. This is due to the fact
264         * the DMA CS checker doesn't use NOP packets for offset patching,
265         * but always uses the i-th buffer from the list to patch the i-th
266         * offset. If there are N offsets in a DMA CS, there must also be N
267         * buffers in the relocation list.
268         *
269         * This doesn't have to be done if virtual memory is enabled,
270         * because there is no offset patching with virtual memory.
271         */
272        if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
273            return i;
274        }
275    }
276
277    /* New relocation, check if the backing array is large enough. */
278    if (csc->crelocs >= csc->nrelocs) {
279        uint32_t size;
280        csc->nrelocs += 10;
281
282        size = csc->nrelocs * sizeof(struct radeon_bo*);
283        csc->relocs_bo = realloc(csc->relocs_bo, size);
284
285        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
286        csc->relocs = realloc(csc->relocs, size);
287
288        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
289    }
290
291    /* Initialize the new relocation. */
292    csc->relocs_bo[csc->crelocs] = NULL;
293    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
294    p_atomic_inc(&bo->num_cs_references);
295    reloc = &csc->relocs[csc->crelocs];
296    reloc->handle = bo->handle;
297    reloc->read_domains = rd;
298    reloc->write_domain = wd;
299    reloc->flags = priority;
300
301    csc->reloc_indices_hashlist[hash] = csc->crelocs;
302
303    csc->chunks[1].length_dw += RELOC_DWORDS;
304
305    *added_domains = rd | wd;
306    return csc->crelocs++;
307}
308
309static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
310                                        struct radeon_winsys_cs_handle *buf,
311                                        enum radeon_bo_usage usage,
312                                        enum radeon_bo_domain domains,
313                                        enum radeon_bo_priority priority)
314{
315    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
316    struct radeon_bo *bo = (struct radeon_bo*)buf;
317    enum radeon_bo_domain added_domains;
318    unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
319
320    if (added_domains & RADEON_DOMAIN_GTT)
321        cs->csc->used_gart += bo->base.size;
322    if (added_domains & RADEON_DOMAIN_VRAM)
323        cs->csc->used_vram += bo->base.size;
324
325    return index;
326}
327
328static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
329                                   struct radeon_winsys_cs_handle *buf)
330{
331    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
332
333    return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf);
334}
335
336static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
337{
338    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
339    boolean status =
340        cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
341        cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
342
343    if (status) {
344        cs->csc->validated_crelocs = cs->csc->crelocs;
345    } else {
346        /* Remove lately-added relocations. The validation failed with them
347         * and the CS is about to be flushed because of that. Keep only
348         * the already-validated relocations. */
349        unsigned i;
350
351        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
352            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
353            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
354        }
355        cs->csc->crelocs = cs->csc->validated_crelocs;
356
357        /* Flush if there are any relocs. Clean up otherwise. */
358        if (cs->csc->crelocs) {
359            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
360        } else {
361            radeon_cs_context_cleanup(cs->csc);
362
363            assert(cs->base.cdw == 0);
364            if (cs->base.cdw != 0) {
365                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
366            }
367        }
368    }
369    return status;
370}
371
372static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
373{
374    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
375    boolean status =
376        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
377        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
378
379    return status;
380}
381
382void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
383{
384    unsigned i;
385
386    if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
387                            &csc->cs, sizeof(struct drm_radeon_cs))) {
388        if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
389            unsigned i;
390
391            fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
392            for (i = 0; i < csc->chunks[0].length_dw; i++) {
393                fprintf(stderr, "0x%08X\n", csc->buf[i]);
394            }
395        } else {
396            fprintf(stderr, "radeon: The kernel rejected CS, "
397                    "see dmesg for more information.\n");
398        }
399    }
400
401    if (cs->trace_buf) {
402        radeon_dump_cs_on_lockup(cs, csc);
403    }
404
405    for (i = 0; i < csc->crelocs; i++)
406        p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
407
408    radeon_cs_context_cleanup(csc);
409}
410
411/*
412 * Make sure previous submission of this cs are completed
413 */
414void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
415{
416    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
417
418    /* Wait for any pending ioctl to complete. */
419    if (cs->ws->thread) {
420        pipe_semaphore_wait(&cs->flush_completed);
421        pipe_semaphore_signal(&cs->flush_completed);
422    }
423}
424
425DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
426
427static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
428                                unsigned flags,
429                                struct pipe_fence_handle **fence,
430                                uint32_t cs_trace_id)
431{
432    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
433    struct radeon_cs_context *tmp;
434
435    switch (cs->base.ring_type) {
436    case RING_DMA:
437	    /* pad DMA ring to 8 DWs */
438	    if (cs->ws->info.chip_class <= SI) {
439		    while (rcs->cdw & 7)
440			    OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
441	    } else {
442		    while (rcs->cdw & 7)
443			    OUT_CS(&cs->base, 0x00000000); /* NOP packet */
444	    }
445	    break;
446    case RING_GFX:
447	    /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
448	     * r6xx, requires at least 4 dw alignment to avoid a hw bug.
449	     */
450	    if (cs->ws->info.chip_class <= SI) {
451		    while (rcs->cdw & 7)
452			    OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
453	    } else {
454		    while (rcs->cdw & 7)
455			    OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
456	    }
457	    break;
458    case RING_UVD:
459            while (rcs->cdw & 15)
460		OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
461	    break;
462    default:
463	    break;
464    }
465
466    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
467       fprintf(stderr, "radeon: command stream overflowed\n");
468    }
469
470    if (fence) {
471        radeon_fence_reference(fence, NULL);
472        *fence = radeon_cs_create_fence(rcs);
473    }
474
475    radeon_drm_cs_sync_flush(rcs);
476
477    /* Swap command streams. */
478    tmp = cs->csc;
479    cs->csc = cs->cst;
480    cs->cst = tmp;
481
482    cs->cst->cs_trace_id = cs_trace_id;
483
484    /* If the CS is not empty or overflowed, emit it in a separate thread. */
485    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
486        unsigned i, crelocs;
487
488        crelocs = cs->cst->crelocs;
489
490        cs->cst->chunks[0].length_dw = cs->base.cdw;
491
492        for (i = 0; i < crelocs; i++) {
493            /* Update the number of active asynchronous CS ioctls for the buffer. */
494            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
495        }
496
497        switch (cs->base.ring_type) {
498        case RING_DMA:
499            cs->cst->flags[0] = 0;
500            cs->cst->flags[1] = RADEON_CS_RING_DMA;
501            cs->cst->cs.num_chunks = 3;
502            if (cs->ws->info.r600_virtual_address) {
503                cs->cst->flags[0] |= RADEON_CS_USE_VM;
504            }
505            break;
506
507        case RING_UVD:
508            cs->cst->flags[0] = 0;
509            cs->cst->flags[1] = RADEON_CS_RING_UVD;
510            cs->cst->cs.num_chunks = 3;
511            break;
512
513        case RING_VCE:
514            cs->cst->flags[0] = 0;
515            cs->cst->flags[1] = RADEON_CS_RING_VCE;
516            cs->cst->cs.num_chunks = 3;
517            break;
518
519        default:
520        case RING_GFX:
521            cs->cst->flags[0] = 0;
522            cs->cst->flags[1] = RADEON_CS_RING_GFX;
523            cs->cst->cs.num_chunks = 2;
524            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
525                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
526                cs->cst->cs.num_chunks = 3;
527            }
528            if (cs->ws->info.r600_virtual_address) {
529                cs->cst->flags[0] |= RADEON_CS_USE_VM;
530                cs->cst->cs.num_chunks = 3;
531            }
532            if (flags & RADEON_FLUSH_END_OF_FRAME) {
533                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
534                cs->cst->cs.num_chunks = 3;
535            }
536            if (flags & RADEON_FLUSH_COMPUTE) {
537                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
538                cs->cst->cs.num_chunks = 3;
539            }
540            break;
541        }
542
543        if (cs->ws->thread) {
544            pipe_semaphore_wait(&cs->flush_completed);
545            radeon_drm_ws_queue_cs(cs->ws, cs);
546            if (!(flags & RADEON_FLUSH_ASYNC))
547                radeon_drm_cs_sync_flush(rcs);
548        } else {
549            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
550        }
551    } else {
552        radeon_cs_context_cleanup(cs->cst);
553    }
554
555    /* Prepare a new CS. */
556    cs->base.buf = cs->csc->buf;
557    cs->base.cdw = 0;
558
559    cs->ws->num_cs_flushes++;
560}
561
562static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
563{
564    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
565
566    radeon_drm_cs_sync_flush(rcs);
567    pipe_semaphore_destroy(&cs->flush_completed);
568    radeon_cs_context_cleanup(&cs->csc1);
569    radeon_cs_context_cleanup(&cs->csc2);
570    p_atomic_dec(&cs->ws->num_cs);
571    radeon_destroy_cs_context(&cs->csc1);
572    radeon_destroy_cs_context(&cs->csc2);
573    FREE(cs);
574}
575
576static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
577                                       struct radeon_winsys_cs_handle *_buf,
578                                       enum radeon_bo_usage usage)
579{
580    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
581    struct radeon_bo *bo = (struct radeon_bo*)_buf;
582    int index;
583
584    if (!bo->num_cs_references)
585        return FALSE;
586
587    index = radeon_get_reloc(cs->csc, bo);
588    if (index == -1)
589        return FALSE;
590
591    if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
592        return TRUE;
593    if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
594        return TRUE;
595
596    return FALSE;
597}
598
599/* FENCES */
600
601static struct pipe_fence_handle *
602radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
603{
604    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
605    struct pb_buffer *fence;
606
607    /* Create a fence, which is a dummy BO. */
608    fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
609                                       RADEON_DOMAIN_GTT, 0);
610    /* Add the fence as a dummy relocation. */
611    cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
612                              RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
613                              RADEON_PRIO_MIN);
614    return (struct pipe_fence_handle*)fence;
615}
616
617static bool radeon_fence_wait(struct radeon_winsys *ws,
618                              struct pipe_fence_handle *fence,
619                              uint64_t timeout)
620{
621    struct pb_buffer *rfence = (struct pb_buffer*)fence;
622
623    if (timeout == 0)
624        return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
625
626    if (timeout != PIPE_TIMEOUT_INFINITE) {
627        int64_t start_time = os_time_get();
628
629        /* Convert to microseconds. */
630        timeout /= 1000;
631
632        /* Wait in a loop. */
633        while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
634            if (os_time_get() - start_time >= timeout) {
635                return FALSE;
636            }
637            os_time_sleep(10);
638        }
639        return TRUE;
640    }
641
642    ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
643    return TRUE;
644}
645
646static void radeon_fence_reference(struct pipe_fence_handle **dst,
647                                   struct pipe_fence_handle *src)
648{
649    pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
650}
651
652void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
653{
654    ws->base.cs_create = radeon_drm_cs_create;
655    ws->base.cs_destroy = radeon_drm_cs_destroy;
656    ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
657    ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
658    ws->base.cs_validate = radeon_drm_cs_validate;
659    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
660    ws->base.cs_flush = radeon_drm_cs_flush;
661    ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
662    ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
663    ws->base.fence_wait = radeon_fence_wait;
664    ws->base.fence_reference = radeon_fence_reference;
665}
666