radeon_drm_cs.c revision 900ac63ee88a16b7fb7f0ca2b03a40259b8ebd84
1/*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27/*
28 * Authors:
29 *      Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 *      Aapo Tahkola <aet@rasterburn.org>
33 *      Nicolai Haehnle <prefect_@gmx.net>
34 *      Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37/*
38    This file replaces libdrm's radeon_cs_gem with our own implemention.
39    It's optimized specifically for Radeon DRM.
40    Reloc writes and space checking are faster and simpler than their
41    counterparts in libdrm (the time complexity of all the functions
42    is O(1) in nearly all scenarios, thanks to hashing).
43
44    It works like this:
45
46    cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47    also adds the size of 'buf' to the used_gart and used_vram winsys variables
48    based on the domains, which are simply or'd for the accounting purposes.
49    The adding is skipped if the reloc is already present in the list, but it
50    accounts any newly-referenced domains.
51
52    cs_validate is then called, which just checks:
53        used_vram/gart < vram/gart_size * 0.8
54    The 0.8 number allows for some memory fragmentation. If the validation
55    fails, the pipe driver flushes CS and tries do the validation again,
56    i.e. it validates only that one operation. If it fails again, it drops
57    the operation on the floor and prints some nasty message to stderr.
58    (done in the pipe driver)
59
60    cs_write_reloc(cs, buf) just writes a reloc that has been added using
61    cs_add_reloc. The read_domain and write_domain parameters have been removed,
62    because we already specify them in cs_add_reloc.
63*/
64
65#include "radeon_drm_cs.h"
66
67#include "util/u_memory.h"
68#include "os/os_time.h"
69
70#include <stdio.h>
71#include <stdlib.h>
72#include <stdint.h>
73#include <xf86drm.h>
74
75/*
76 * this are copy from radeon_drm, once an updated libdrm is released
77 * we should bump configure.ac requirement for it and remove the following
78 * field
79 */
80#ifndef RADEON_CHUNK_ID_FLAGS
81#define RADEON_CHUNK_ID_FLAGS       0x03
82
83/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
84#define RADEON_CS_KEEP_TILING_FLAGS 0x01
85#endif
86
87#ifndef RADEON_CS_USE_VM
88#define RADEON_CS_USE_VM            0x02
89/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
90#define RADEON_CS_RING_GFX          0
91#define RADEON_CS_RING_COMPUTE      1
92#endif
93
94#ifndef RADEON_CS_RING_DMA
95#define RADEON_CS_RING_DMA          2
96#endif
97
98#ifndef RADEON_CS_RING_UVD
99#define RADEON_CS_RING_UVD          3
100#endif
101
102#ifndef RADEON_CS_END_OF_FRAME
103#define RADEON_CS_END_OF_FRAME      0x04
104#endif
105
106
107#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
108
109static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
110                                      struct radeon_drm_winsys *ws)
111{
112    csc->fd = ws->fd;
113    csc->nrelocs = 512;
114    csc->relocs_bo = (struct radeon_bo**)
115                     CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
116    if (!csc->relocs_bo) {
117        return FALSE;
118    }
119
120    csc->relocs = (struct drm_radeon_cs_reloc*)
121                  CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
122    if (!csc->relocs) {
123        FREE(csc->relocs_bo);
124        return FALSE;
125    }
126
127    csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
128    csc->chunks[0].length_dw = 0;
129    csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
130    csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
131    csc->chunks[1].length_dw = 0;
132    csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
133    csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
134    csc->chunks[2].length_dw = 2;
135    csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
136
137    csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
138    csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
139    csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
140
141    csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
142    return TRUE;
143}
144
145static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
146{
147    unsigned i;
148
149    for (i = 0; i < csc->crelocs; i++) {
150        p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
151        radeon_bo_reference(&csc->relocs_bo[i], NULL);
152    }
153
154    csc->crelocs = 0;
155    csc->validated_crelocs = 0;
156    csc->chunks[0].length_dw = 0;
157    csc->chunks[1].length_dw = 0;
158    csc->used_gart = 0;
159    csc->used_vram = 0;
160    memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
161}
162
163static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
164{
165    radeon_cs_context_cleanup(csc);
166    FREE(csc->relocs_bo);
167    FREE(csc->relocs);
168}
169
170
171static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
172                                                     enum ring_type ring_type,
173                                                     struct radeon_winsys_cs_handle *trace_buf)
174{
175    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
176    struct radeon_drm_cs *cs;
177
178    cs = CALLOC_STRUCT(radeon_drm_cs);
179    if (!cs) {
180        return NULL;
181    }
182    pipe_semaphore_init(&cs->flush_completed, 1);
183
184    cs->ws = ws;
185    cs->trace_buf = (struct radeon_bo*)trace_buf;
186
187    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
188        FREE(cs);
189        return NULL;
190    }
191    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
192        radeon_destroy_cs_context(&cs->csc1);
193        FREE(cs);
194        return NULL;
195    }
196
197    /* Set the first command buffer as current. */
198    cs->csc = &cs->csc1;
199    cs->cst = &cs->csc2;
200    cs->base.buf = cs->csc->buf;
201    cs->base.ring_type = ring_type;
202
203    p_atomic_inc(&ws->num_cs);
204    return &cs->base;
205}
206
207#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
208
209static INLINE void update_reloc_domains(struct drm_radeon_cs_reloc *reloc,
210                                        enum radeon_bo_domain rd,
211                                        enum radeon_bo_domain wd,
212                                        enum radeon_bo_domain *added_domains)
213{
214    *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
215
216    reloc->read_domains |= rd;
217    reloc->write_domain |= wd;
218}
219
220int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
221{
222    struct drm_radeon_cs_reloc *reloc;
223    unsigned i;
224    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
225
226    if (csc->is_handle_added[hash]) {
227        i = csc->reloc_indices_hashlist[hash];
228        reloc = &csc->relocs[i];
229        if (reloc->handle == bo->handle) {
230            return i;
231        }
232
233        /* Hash collision, look for the BO in the list of relocs linearly. */
234        for (i = csc->crelocs; i != 0;) {
235            --i;
236            reloc = &csc->relocs[i];
237            if (reloc->handle == bo->handle) {
238                /* Put this reloc in the hash list.
239                 * This will prevent additional hash collisions if there are
240                 * several consecutive get_reloc calls for the same buffer.
241                 *
242                 * Example: Assuming buffers A,B,C collide in the hash list,
243                 * the following sequence of relocs:
244                 *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
245                 * will collide here: ^ and here:   ^,
246                 * meaning that we should get very few collisions in the end. */
247                csc->reloc_indices_hashlist[hash] = i;
248                /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
249                return i;
250            }
251        }
252    }
253
254    return -1;
255}
256
257static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
258                                 struct radeon_bo *bo,
259                                 enum radeon_bo_usage usage,
260                                 enum radeon_bo_domain domains,
261                                 enum radeon_bo_domain *added_domains)
262{
263    struct radeon_cs_context *csc = cs->csc;
264    struct drm_radeon_cs_reloc *reloc;
265    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
266    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
267    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
268    bool update_hash = TRUE;
269    int i;
270
271    *added_domains = 0;
272    if (csc->is_handle_added[hash]) {
273        i = csc->reloc_indices_hashlist[hash];
274        reloc = &csc->relocs[i];
275        if (reloc->handle != bo->handle) {
276            /* Hash collision, look for the BO in the list of relocs linearly. */
277            for (i = csc->crelocs - 1; i >= 0; i--) {
278                reloc = &csc->relocs[i];
279                if (reloc->handle == bo->handle) {
280                    /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
281                    break;
282                }
283            }
284        }
285
286        if (i >= 0) {
287            /* On DMA ring we need to emit as many relocation as there is use of the bo
288             * thus each time this function is call we should grow add again the bo to
289             * the relocation buffer
290             *
291             * Do not update the hash table if it's dma ring, so that first hash always point
292             * to first bo relocation which will the one used by the kernel. Following relocation
293             * will be ignore by the kernel memory placement (but still use by the kernel to
294             * update the cmd stream with proper buffer offset).
295             */
296            update_hash = FALSE;
297            update_reloc_domains(reloc, rd, wd, added_domains);
298            if (cs->base.ring_type != RING_DMA) {
299                csc->reloc_indices_hashlist[hash] = i;
300                return i;
301            }
302        }
303    }
304
305    /* New relocation, check if the backing array is large enough. */
306    if (csc->crelocs >= csc->nrelocs) {
307        uint32_t size;
308        csc->nrelocs += 10;
309
310        size = csc->nrelocs * sizeof(struct radeon_bo*);
311        csc->relocs_bo = realloc(csc->relocs_bo, size);
312
313        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
314        csc->relocs = realloc(csc->relocs, size);
315
316        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
317    }
318
319    /* Initialize the new relocation. */
320    csc->relocs_bo[csc->crelocs] = NULL;
321    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
322    p_atomic_inc(&bo->num_cs_references);
323    reloc = &csc->relocs[csc->crelocs];
324    reloc->handle = bo->handle;
325    reloc->read_domains = rd;
326    reloc->write_domain = wd;
327    reloc->flags = 0;
328
329    csc->is_handle_added[hash] = TRUE;
330    if (update_hash) {
331        csc->reloc_indices_hashlist[hash] = csc->crelocs;
332    }
333
334    csc->chunks[1].length_dw += RELOC_DWORDS;
335
336    *added_domains = rd | wd;
337    return csc->crelocs++;
338}
339
340static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
341                                        struct radeon_winsys_cs_handle *buf,
342                                        enum radeon_bo_usage usage,
343                                        enum radeon_bo_domain domains)
344{
345    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
346    struct radeon_bo *bo = (struct radeon_bo*)buf;
347    enum radeon_bo_domain added_domains;
348    unsigned index = radeon_add_reloc(cs, bo, usage, domains, &added_domains);
349
350    if (added_domains & RADEON_DOMAIN_GTT)
351        cs->csc->used_gart += bo->base.size;
352    if (added_domains & RADEON_DOMAIN_VRAM)
353        cs->csc->used_vram += bo->base.size;
354
355    return index;
356}
357
358static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
359{
360    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
361    boolean status =
362        cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
363        cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
364
365    if (status) {
366        cs->csc->validated_crelocs = cs->csc->crelocs;
367    } else {
368        /* Remove lately-added relocations. The validation failed with them
369         * and the CS is about to be flushed because of that. Keep only
370         * the already-validated relocations. */
371        unsigned i;
372
373        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
374            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
375            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
376        }
377        cs->csc->crelocs = cs->csc->validated_crelocs;
378
379        /* Flush if there are any relocs. Clean up otherwise. */
380        if (cs->csc->crelocs) {
381            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
382        } else {
383            radeon_cs_context_cleanup(cs->csc);
384
385            assert(cs->base.cdw == 0);
386            if (cs->base.cdw != 0) {
387                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
388            }
389        }
390    }
391    return status;
392}
393
394static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
395{
396    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
397    boolean status =
398        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
399        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
400
401    return status;
402}
403
404static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
405                                      struct radeon_winsys_cs_handle *buf)
406{
407    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
408    struct radeon_bo *bo = (struct radeon_bo*)buf;
409    unsigned index = radeon_get_reloc(cs->csc, bo);
410
411    if (index == -1) {
412        fprintf(stderr, "radeon: Cannot get a relocation in %s.\n", __func__);
413        return;
414    }
415
416    OUT_CS(&cs->base, 0xc0001000);
417    OUT_CS(&cs->base, index * RELOC_DWORDS);
418}
419
420void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
421{
422    unsigned i;
423
424    if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
425                            &csc->cs, sizeof(struct drm_radeon_cs))) {
426        if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
427            unsigned i;
428
429            fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
430            for (i = 0; i < csc->chunks[0].length_dw; i++) {
431                fprintf(stderr, "0x%08X\n", csc->buf[i]);
432            }
433        } else {
434            fprintf(stderr, "radeon: The kernel rejected CS, "
435                    "see dmesg for more information.\n");
436        }
437    }
438
439    if (cs->trace_buf) {
440        radeon_dump_cs_on_lockup(cs, csc);
441    }
442
443    for (i = 0; i < csc->crelocs; i++)
444        p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
445
446    radeon_cs_context_cleanup(csc);
447}
448
449/*
450 * Make sure previous submission of this cs are completed
451 */
452void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
453{
454    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
455
456    /* Wait for any pending ioctl to complete. */
457    if (cs->ws->thread) {
458        pipe_semaphore_wait(&cs->flush_completed);
459        pipe_semaphore_signal(&cs->flush_completed);
460    }
461}
462
463DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
464
465static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
466{
467    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
468    struct radeon_cs_context *tmp;
469
470    switch (cs->base.ring_type) {
471    case RING_DMA:
472	    /* pad DMA ring to 8 DWs */
473	    if (cs->ws->info.chip_class <= SI) {
474		    while (rcs->cdw & 7)
475			    OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
476	    } else {
477		    while (rcs->cdw & 7)
478			    OUT_CS(&cs->base, 0x00000000); /* NOP packet */
479	    }
480	    break;
481    case RING_GFX:
482	    /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
483	     * r6xx, requires at least 4 dw alignment to avoid a hw bug.
484	     */
485	    if (cs->ws->info.chip_class <= SI) {
486		    while (rcs->cdw & 7)
487			    OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
488	    } else {
489		    while (rcs->cdw & 7)
490			    OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
491	    }
492	    break;
493    case RING_UVD:
494            while (rcs->cdw & 15)
495		OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
496	    break;
497    default:
498	    break;
499    }
500
501    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
502       fprintf(stderr, "radeon: command stream overflowed\n");
503    }
504
505    radeon_drm_cs_sync_flush(rcs);
506
507    /* Flip command streams. */
508    tmp = cs->csc;
509    cs->csc = cs->cst;
510    cs->cst = tmp;
511
512    cs->cst->cs_trace_id = cs_trace_id;
513
514    /* If the CS is not empty or overflowed, emit it in a separate thread. */
515    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
516        unsigned i, crelocs = cs->cst->crelocs;
517
518        cs->cst->chunks[0].length_dw = cs->base.cdw;
519
520        for (i = 0; i < crelocs; i++) {
521            /* Update the number of active asynchronous CS ioctls for the buffer. */
522            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
523        }
524
525        switch (cs->base.ring_type) {
526        case RING_DMA:
527            cs->cst->flags[0] = 0;
528            cs->cst->flags[1] = RADEON_CS_RING_DMA;
529            cs->cst->cs.num_chunks = 3;
530            if (cs->ws->info.r600_virtual_address) {
531                cs->cst->flags[0] |= RADEON_CS_USE_VM;
532            }
533            break;
534
535        case RING_UVD:
536            cs->cst->flags[0] = 0;
537            cs->cst->flags[1] = RADEON_CS_RING_UVD;
538            cs->cst->cs.num_chunks = 3;
539            break;
540
541        default:
542        case RING_GFX:
543            cs->cst->flags[0] = 0;
544            cs->cst->flags[1] = RADEON_CS_RING_GFX;
545            cs->cst->cs.num_chunks = 2;
546            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
547                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
548                cs->cst->cs.num_chunks = 3;
549            }
550            if (cs->ws->info.r600_virtual_address) {
551                cs->cst->flags[0] |= RADEON_CS_USE_VM;
552                cs->cst->cs.num_chunks = 3;
553            }
554            if (flags & RADEON_FLUSH_END_OF_FRAME) {
555                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
556                cs->cst->cs.num_chunks = 3;
557            }
558            if (flags & RADEON_FLUSH_COMPUTE) {
559                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
560                cs->cst->cs.num_chunks = 3;
561            }
562            break;
563        }
564
565        if (cs->ws->thread) {
566            pipe_semaphore_wait(&cs->flush_completed);
567            radeon_drm_ws_queue_cs(cs->ws, cs);
568            if (!(flags & RADEON_FLUSH_ASYNC))
569                radeon_drm_cs_sync_flush(rcs);
570        } else {
571            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
572        }
573    } else {
574        radeon_cs_context_cleanup(cs->cst);
575    }
576
577    /* Prepare a new CS. */
578    cs->base.buf = cs->csc->buf;
579    cs->base.cdw = 0;
580}
581
582static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
583{
584    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
585
586    radeon_drm_cs_sync_flush(rcs);
587    pipe_semaphore_destroy(&cs->flush_completed);
588    radeon_cs_context_cleanup(&cs->csc1);
589    radeon_cs_context_cleanup(&cs->csc2);
590    p_atomic_dec(&cs->ws->num_cs);
591    radeon_destroy_cs_context(&cs->csc1);
592    radeon_destroy_cs_context(&cs->csc2);
593    FREE(cs);
594}
595
596static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs,
597                                    void (*flush)(void *ctx, unsigned flags),
598                                    void *user)
599{
600    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
601
602    cs->flush_cs = flush;
603    cs->flush_data = user;
604}
605
606static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
607                                       struct radeon_winsys_cs_handle *_buf,
608                                       enum radeon_bo_usage usage)
609{
610    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
611    struct radeon_bo *bo = (struct radeon_bo*)_buf;
612    int index;
613
614    if (!bo->num_cs_references)
615        return FALSE;
616
617    index = radeon_get_reloc(cs->csc, bo);
618    if (index == -1)
619        return FALSE;
620
621    if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
622        return TRUE;
623    if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
624        return TRUE;
625
626    return FALSE;
627}
628
629/* FENCES */
630
631static struct pipe_fence_handle *
632radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
633{
634    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
635    struct pb_buffer *fence;
636
637    /* Create a fence, which is a dummy BO. */
638    fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
639                                       RADEON_DOMAIN_GTT);
640    /* Add the fence as a dummy relocation. */
641    cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
642                              RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT);
643    return (struct pipe_fence_handle*)fence;
644}
645
646static bool radeon_fence_wait(struct radeon_winsys *ws,
647                              struct pipe_fence_handle *fence,
648                              uint64_t timeout)
649{
650    struct pb_buffer *rfence = (struct pb_buffer*)fence;
651
652    if (timeout == 0)
653        return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
654
655    if (timeout != PIPE_TIMEOUT_INFINITE) {
656        int64_t start_time = os_time_get();
657
658        /* Convert to microseconds. */
659        timeout /= 1000;
660
661        /* Wait in a loop. */
662        while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
663            if (os_time_get() - start_time >= timeout) {
664                return FALSE;
665            }
666            os_time_sleep(10);
667        }
668        return TRUE;
669    }
670
671    ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
672    return TRUE;
673}
674
675static void radeon_fence_reference(struct pipe_fence_handle **dst,
676                                   struct pipe_fence_handle *src)
677{
678    pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
679}
680
681void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
682{
683    ws->base.cs_create = radeon_drm_cs_create;
684    ws->base.cs_destroy = radeon_drm_cs_destroy;
685    ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
686    ws->base.cs_validate = radeon_drm_cs_validate;
687    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
688    ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
689    ws->base.cs_flush = radeon_drm_cs_flush;
690    ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
691    ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
692    ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
693    ws->base.cs_create_fence = radeon_cs_create_fence;
694    ws->base.fence_wait = radeon_fence_wait;
695    ws->base.fence_reference = radeon_fence_reference;
696}
697