radeon_drm_cs.c revision a2a1a5805fd617e7f3cc8be44dd79b50da07ebb9
1/*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27/*
28 * Authors:
29 *      Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 *      Aapo Tahkola <aet@rasterburn.org>
33 *      Nicolai Haehnle <prefect_@gmx.net>
34 *      Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37/*
38    This file replaces libdrm's radeon_cs_gem with our own implemention.
39    It's optimized specifically for Radeon DRM.
40    Reloc writes and space checking are faster and simpler than their
41    counterparts in libdrm (the time complexity of all the functions
42    is O(1) in nearly all scenarios, thanks to hashing).
43
44    It works like this:
45
46    cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47    also adds the size of 'buf' to the used_gart and used_vram winsys variables
48    based on the domains, which are simply or'd for the accounting purposes.
49    The adding is skipped if the reloc is already present in the list, but it
50    accounts any newly-referenced domains.
51
52    cs_validate is then called, which just checks:
53        used_vram/gart < vram/gart_size * 0.8
54    The 0.8 number allows for some memory fragmentation. If the validation
55    fails, the pipe driver flushes CS and tries do the validation again,
56    i.e. it validates only that one operation. If it fails again, it drops
57    the operation on the floor and prints some nasty message to stderr.
58    (done in the pipe driver)
59
60    cs_write_reloc(cs, buf) just writes a reloc that has been added using
61    cs_add_reloc. The read_domain and write_domain parameters have been removed,
62    because we already specify them in cs_add_reloc.
63*/
64
65#include "radeon_drm_cs.h"
66
67#include "util/u_memory.h"
68#include "os/os_time.h"
69
70#include <stdio.h>
71#include <stdlib.h>
72#include <stdint.h>
73#include <xf86drm.h>
74
75
76#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78static struct pipe_fence_handle *
79radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80static void radeon_fence_reference(struct pipe_fence_handle **dst,
81                                   struct pipe_fence_handle *src);
82
83static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
84                                      struct radeon_drm_winsys *ws)
85{
86    int i;
87
88    csc->fd = ws->fd;
89    csc->nrelocs = 512;
90    csc->relocs_bo = (struct radeon_bo**)
91                     CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
92    if (!csc->relocs_bo) {
93        return FALSE;
94    }
95
96    csc->relocs = (struct drm_radeon_cs_reloc*)
97                  CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
98    if (!csc->relocs) {
99        FREE(csc->relocs_bo);
100        return FALSE;
101    }
102
103    csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
104    csc->chunks[0].length_dw = 0;
105    csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
106    csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
107    csc->chunks[1].length_dw = 0;
108    csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
109    csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
110    csc->chunks[2].length_dw = 2;
111    csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
112
113    csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
114    csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
115    csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
116
117    csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
118
119    for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
120        csc->reloc_indices_hashlist[i] = -1;
121    }
122    return TRUE;
123}
124
125static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
126{
127    unsigned i;
128
129    for (i = 0; i < csc->crelocs; i++) {
130        p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
131        radeon_bo_reference(&csc->relocs_bo[i], NULL);
132    }
133
134    csc->crelocs = 0;
135    csc->validated_crelocs = 0;
136    csc->chunks[0].length_dw = 0;
137    csc->chunks[1].length_dw = 0;
138    csc->used_gart = 0;
139    csc->used_vram = 0;
140
141    for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
142        csc->reloc_indices_hashlist[i] = -1;
143    }
144}
145
146static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
147{
148    radeon_cs_context_cleanup(csc);
149    FREE(csc->relocs_bo);
150    FREE(csc->relocs);
151}
152
153
154static struct radeon_winsys_cs *
155radeon_drm_cs_create(struct radeon_winsys *rws,
156                     enum ring_type ring_type,
157                     void (*flush)(void *ctx, unsigned flags,
158                                   struct pipe_fence_handle **fence),
159                     void *flush_ctx,
160                     struct radeon_winsys_cs_handle *trace_buf)
161{
162    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
163    struct radeon_drm_cs *cs;
164
165    cs = CALLOC_STRUCT(radeon_drm_cs);
166    if (!cs) {
167        return NULL;
168    }
169    pipe_semaphore_init(&cs->flush_completed, 1);
170
171    cs->ws = ws;
172    cs->flush_cs = flush;
173    cs->flush_data = flush_ctx;
174    cs->trace_buf = (struct radeon_bo*)trace_buf;
175
176    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
177        FREE(cs);
178        return NULL;
179    }
180    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
181        radeon_destroy_cs_context(&cs->csc1);
182        FREE(cs);
183        return NULL;
184    }
185
186    /* Set the first command buffer as current. */
187    cs->csc = &cs->csc1;
188    cs->cst = &cs->csc2;
189    cs->base.buf = cs->csc->buf;
190    cs->base.ring_type = ring_type;
191
192    p_atomic_inc(&ws->num_cs);
193    return &cs->base;
194}
195
196#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
197
198static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
199                                enum radeon_bo_domain rd,
200                                enum radeon_bo_domain wd,
201                                unsigned priority,
202                                enum radeon_bo_domain *added_domains)
203{
204    *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
205
206    reloc->read_domains |= rd;
207    reloc->write_domain |= wd;
208    reloc->flags = MAX2(reloc->flags, priority);
209}
210
211int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
212{
213    unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
214    int i = csc->reloc_indices_hashlist[hash];
215
216    /* not found or found */
217    if (i == -1 || csc->relocs_bo[i] == bo)
218        return i;
219
220    /* Hash collision, look for the BO in the list of relocs linearly. */
221    for (i = csc->crelocs - 1; i >= 0; i--) {
222        if (csc->relocs_bo[i] == bo) {
223            /* Put this reloc in the hash list.
224             * This will prevent additional hash collisions if there are
225             * several consecutive get_reloc calls for the same buffer.
226             *
227             * Example: Assuming buffers A,B,C collide in the hash list,
228             * the following sequence of relocs:
229             *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
230             * will collide here: ^ and here:   ^,
231             * meaning that we should get very few collisions in the end. */
232            csc->reloc_indices_hashlist[hash] = i;
233            return i;
234        }
235    }
236    return -1;
237}
238
239static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
240                                 struct radeon_bo *bo,
241                                 enum radeon_bo_usage usage,
242                                 enum radeon_bo_domain domains,
243                                 unsigned priority,
244                                 enum radeon_bo_domain *added_domains)
245{
246    struct radeon_cs_context *csc = cs->csc;
247    struct drm_radeon_cs_reloc *reloc;
248    unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
249    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
250    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
251    int i = -1;
252
253    priority = MIN2(priority, 15);
254    *added_domains = 0;
255
256    i = radeon_get_reloc(csc, bo);
257
258    if (i >= 0) {
259        reloc = &csc->relocs[i];
260        update_reloc(reloc, rd, wd, priority, added_domains);
261
262        /* For async DMA, every add_reloc call must add a buffer to the list
263         * no matter how many duplicates there are. This is due to the fact
264         * the DMA CS checker doesn't use NOP packets for offset patching,
265         * but always uses the i-th buffer from the list to patch the i-th
266         * offset. If there are N offsets in a DMA CS, there must also be N
267         * buffers in the relocation list.
268         *
269         * This doesn't have to be done if virtual memory is enabled,
270         * because there is no offset patching with virtual memory.
271         */
272        if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
273            return i;
274        }
275    }
276
277    /* New relocation, check if the backing array is large enough. */
278    if (csc->crelocs >= csc->nrelocs) {
279        uint32_t size;
280        csc->nrelocs += 10;
281
282        size = csc->nrelocs * sizeof(struct radeon_bo*);
283        csc->relocs_bo = realloc(csc->relocs_bo, size);
284
285        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
286        csc->relocs = realloc(csc->relocs, size);
287
288        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
289    }
290
291    /* Initialize the new relocation. */
292    csc->relocs_bo[csc->crelocs] = NULL;
293    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
294    p_atomic_inc(&bo->num_cs_references);
295    reloc = &csc->relocs[csc->crelocs];
296    reloc->handle = bo->handle;
297    reloc->read_domains = rd;
298    reloc->write_domain = wd;
299    reloc->flags = priority;
300
301    csc->reloc_indices_hashlist[hash] = csc->crelocs;
302
303    csc->chunks[1].length_dw += RELOC_DWORDS;
304
305    *added_domains = rd | wd;
306    return csc->crelocs++;
307}
308
309static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
310                                        struct radeon_winsys_cs_handle *buf,
311                                        enum radeon_bo_usage usage,
312                                        enum radeon_bo_domain domains,
313                                        enum radeon_bo_priority priority)
314{
315    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
316    struct radeon_bo *bo = (struct radeon_bo*)buf;
317    enum radeon_bo_domain added_domains;
318    unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
319
320    if (added_domains & RADEON_DOMAIN_GTT)
321        cs->csc->used_gart += bo->base.size;
322    if (added_domains & RADEON_DOMAIN_VRAM)
323        cs->csc->used_vram += bo->base.size;
324
325    return index;
326}
327
328static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
329                                   struct radeon_winsys_cs_handle *buf)
330{
331    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
332
333    return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf);
334}
335
336static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
337{
338    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
339    boolean status =
340        cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
341        cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
342
343    if (status) {
344        cs->csc->validated_crelocs = cs->csc->crelocs;
345    } else {
346        /* Remove lately-added relocations. The validation failed with them
347         * and the CS is about to be flushed because of that. Keep only
348         * the already-validated relocations. */
349        unsigned i;
350
351        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
352            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
353            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
354        }
355        cs->csc->crelocs = cs->csc->validated_crelocs;
356
357        /* Flush if there are any relocs. Clean up otherwise. */
358        if (cs->csc->crelocs) {
359            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
360        } else {
361            radeon_cs_context_cleanup(cs->csc);
362
363            assert(cs->base.cdw == 0);
364            if (cs->base.cdw != 0) {
365                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
366            }
367        }
368    }
369    return status;
370}
371
372static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
373{
374    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
375    boolean status =
376        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
377        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
378
379    return status;
380}
381
382void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
383{
384    unsigned i;
385
386    if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
387                            &csc->cs, sizeof(struct drm_radeon_cs))) {
388        if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
389            unsigned i;
390
391            fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
392            for (i = 0; i < csc->chunks[0].length_dw; i++) {
393                fprintf(stderr, "0x%08X\n", csc->buf[i]);
394            }
395        } else {
396            fprintf(stderr, "radeon: The kernel rejected CS, "
397                    "see dmesg for more information.\n");
398        }
399    }
400
401    if (cs->trace_buf) {
402        radeon_dump_cs_on_lockup(cs, csc);
403    }
404
405    for (i = 0; i < csc->crelocs; i++)
406        p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
407
408    radeon_cs_context_cleanup(csc);
409}
410
411/*
412 * Make sure previous submission of this cs are completed
413 */
414void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
415{
416    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
417
418    /* Wait for any pending ioctl to complete. */
419    if (cs->ws->thread) {
420        pipe_semaphore_wait(&cs->flush_completed);
421        pipe_semaphore_signal(&cs->flush_completed);
422    }
423}
424
425DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
426
427static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
428                                unsigned flags,
429                                struct pipe_fence_handle **fence,
430                                uint32_t cs_trace_id)
431{
432    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
433    struct radeon_cs_context *tmp;
434
435    switch (cs->base.ring_type) {
436    case RING_DMA:
437        /* pad DMA ring to 8 DWs */
438        if (cs->ws->info.chip_class <= SI) {
439            while (rcs->cdw & 7)
440                OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
441        } else {
442            while (rcs->cdw & 7)
443                OUT_CS(&cs->base, 0x00000000); /* NOP packet */
444        }
445        break;
446    case RING_GFX:
447        /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
448         * r6xx, requires at least 4 dw alignment to avoid a hw bug.
449         * hawaii with old firmware needs type2 nop packet.
450         * accel_working2 with value 3 indicates the new firmware.
451         */
452        if (cs->ws->info.chip_class <= SI ||
453            (cs->ws->info.family == CHIP_HAWAII &&
454             cs->ws->accel_working2 < 3)) {
455            while (rcs->cdw & 7)
456                OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
457        } else {
458            while (rcs->cdw & 7)
459                OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
460        }
461        break;
462    case RING_UVD:
463        while (rcs->cdw & 15)
464            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
465        break;
466    default:
467        break;
468    }
469
470    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
471       fprintf(stderr, "radeon: command stream overflowed\n");
472    }
473
474    if (fence) {
475        radeon_fence_reference(fence, NULL);
476        *fence = radeon_cs_create_fence(rcs);
477    }
478
479    radeon_drm_cs_sync_flush(rcs);
480
481    /* Swap command streams. */
482    tmp = cs->csc;
483    cs->csc = cs->cst;
484    cs->cst = tmp;
485
486    cs->cst->cs_trace_id = cs_trace_id;
487
488    /* If the CS is not empty or overflowed, emit it in a separate thread. */
489    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
490        unsigned i, crelocs;
491
492        crelocs = cs->cst->crelocs;
493
494        cs->cst->chunks[0].length_dw = cs->base.cdw;
495
496        for (i = 0; i < crelocs; i++) {
497            /* Update the number of active asynchronous CS ioctls for the buffer. */
498            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
499        }
500
501        switch (cs->base.ring_type) {
502        case RING_DMA:
503            cs->cst->flags[0] = 0;
504            cs->cst->flags[1] = RADEON_CS_RING_DMA;
505            cs->cst->cs.num_chunks = 3;
506            if (cs->ws->info.r600_virtual_address) {
507                cs->cst->flags[0] |= RADEON_CS_USE_VM;
508            }
509            break;
510
511        case RING_UVD:
512            cs->cst->flags[0] = 0;
513            cs->cst->flags[1] = RADEON_CS_RING_UVD;
514            cs->cst->cs.num_chunks = 3;
515            break;
516
517        case RING_VCE:
518            cs->cst->flags[0] = 0;
519            cs->cst->flags[1] = RADEON_CS_RING_VCE;
520            cs->cst->cs.num_chunks = 3;
521            break;
522
523        default:
524        case RING_GFX:
525            cs->cst->flags[0] = 0;
526            cs->cst->flags[1] = RADEON_CS_RING_GFX;
527            cs->cst->cs.num_chunks = 2;
528            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
529                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
530                cs->cst->cs.num_chunks = 3;
531            }
532            if (cs->ws->info.r600_virtual_address) {
533                cs->cst->flags[0] |= RADEON_CS_USE_VM;
534                cs->cst->cs.num_chunks = 3;
535            }
536            if (flags & RADEON_FLUSH_END_OF_FRAME) {
537                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
538                cs->cst->cs.num_chunks = 3;
539            }
540            if (flags & RADEON_FLUSH_COMPUTE) {
541                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
542                cs->cst->cs.num_chunks = 3;
543            }
544            break;
545        }
546
547        if (cs->ws->thread) {
548            pipe_semaphore_wait(&cs->flush_completed);
549            radeon_drm_ws_queue_cs(cs->ws, cs);
550            if (!(flags & RADEON_FLUSH_ASYNC))
551                radeon_drm_cs_sync_flush(rcs);
552        } else {
553            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
554        }
555    } else {
556        radeon_cs_context_cleanup(cs->cst);
557    }
558
559    /* Prepare a new CS. */
560    cs->base.buf = cs->csc->buf;
561    cs->base.cdw = 0;
562
563    cs->ws->num_cs_flushes++;
564}
565
566static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
567{
568    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
569
570    radeon_drm_cs_sync_flush(rcs);
571    pipe_semaphore_destroy(&cs->flush_completed);
572    radeon_cs_context_cleanup(&cs->csc1);
573    radeon_cs_context_cleanup(&cs->csc2);
574    p_atomic_dec(&cs->ws->num_cs);
575    radeon_destroy_cs_context(&cs->csc1);
576    radeon_destroy_cs_context(&cs->csc2);
577    FREE(cs);
578}
579
580static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
581                                       struct radeon_winsys_cs_handle *_buf,
582                                       enum radeon_bo_usage usage)
583{
584    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
585    struct radeon_bo *bo = (struct radeon_bo*)_buf;
586    int index;
587
588    if (!bo->num_cs_references)
589        return FALSE;
590
591    index = radeon_get_reloc(cs->csc, bo);
592    if (index == -1)
593        return FALSE;
594
595    if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
596        return TRUE;
597    if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
598        return TRUE;
599
600    return FALSE;
601}
602
603/* FENCES */
604
605static struct pipe_fence_handle *
606radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
607{
608    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
609    struct pb_buffer *fence;
610
611    /* Create a fence, which is a dummy BO. */
612    fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
613                                       RADEON_DOMAIN_GTT, 0);
614    /* Add the fence as a dummy relocation. */
615    cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
616                              RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
617                              RADEON_PRIO_MIN);
618    return (struct pipe_fence_handle*)fence;
619}
620
621static bool radeon_fence_wait(struct radeon_winsys *ws,
622                              struct pipe_fence_handle *fence,
623                              uint64_t timeout)
624{
625    struct pb_buffer *rfence = (struct pb_buffer*)fence;
626
627    if (timeout == 0)
628        return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
629
630    if (timeout != PIPE_TIMEOUT_INFINITE) {
631        int64_t start_time = os_time_get();
632
633        /* Convert to microseconds. */
634        timeout /= 1000;
635
636        /* Wait in a loop. */
637        while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
638            if (os_time_get() - start_time >= timeout) {
639                return FALSE;
640            }
641            os_time_sleep(10);
642        }
643        return TRUE;
644    }
645
646    ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
647    return TRUE;
648}
649
650static void radeon_fence_reference(struct pipe_fence_handle **dst,
651                                   struct pipe_fence_handle *src)
652{
653    pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
654}
655
656void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
657{
658    ws->base.cs_create = radeon_drm_cs_create;
659    ws->base.cs_destroy = radeon_drm_cs_destroy;
660    ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
661    ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
662    ws->base.cs_validate = radeon_drm_cs_validate;
663    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
664    ws->base.cs_flush = radeon_drm_cs_flush;
665    ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
666    ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
667    ws->base.fence_wait = radeon_fence_wait;
668    ws->base.fence_reference = radeon_fence_reference;
669}
670