radeon_drm_cs.c revision 67aef6dafa29fed008ea6065c425a6a92a651be9
1/*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27/*
28 * Authors:
29 *      Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 *      Aapo Tahkola <aet@rasterburn.org>
33 *      Nicolai Haehnle <prefect_@gmx.net>
34 *      Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37/*
38    This file replaces libdrm's radeon_cs_gem with our own implemention.
39    It's optimized specifically for Radeon DRM.
40    Reloc writes and space checking are faster and simpler than their
41    counterparts in libdrm (the time complexity of all the functions
42    is O(1) in nearly all scenarios, thanks to hashing).
43
44    It works like this:
45
46    cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47    also adds the size of 'buf' to the used_gart and used_vram winsys variables
48    based on the domains, which are simply or'd for the accounting purposes.
49    The adding is skipped if the reloc is already present in the list, but it
50    accounts any newly-referenced domains.
51
52    cs_validate is then called, which just checks:
53        used_vram/gart < vram/gart_size * 0.8
54    The 0.8 number allows for some memory fragmentation. If the validation
55    fails, the pipe driver flushes CS and tries do the validation again,
56    i.e. it validates only that one operation. If it fails again, it drops
57    the operation on the floor and prints some nasty message to stderr.
58    (done in the pipe driver)
59
60    cs_write_reloc(cs, buf) just writes a reloc that has been added using
61    cs_add_reloc. The read_domain and write_domain parameters have been removed,
62    because we already specify them in cs_add_reloc.
63*/
64
65#include "radeon_drm_cs.h"
66
67#include "util/u_memory.h"
68#include "os/os_time.h"
69
70#include <stdio.h>
71#include <stdlib.h>
72#include <stdint.h>
73#include <xf86drm.h>
74
75/*
76 * this are copy from radeon_drm, once an updated libdrm is released
77 * we should bump configure.ac requirement for it and remove the following
78 * field
79 */
80#ifndef RADEON_CHUNK_ID_FLAGS
81#define RADEON_CHUNK_ID_FLAGS       0x03
82
83/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
84#define RADEON_CS_KEEP_TILING_FLAGS 0x01
85#endif
86
87#ifndef RADEON_CS_USE_VM
88#define RADEON_CS_USE_VM            0x02
89/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
90#define RADEON_CS_RING_GFX          0
91#define RADEON_CS_RING_COMPUTE      1
92#endif
93
94#ifndef RADEON_CS_RING_DMA
95#define RADEON_CS_RING_DMA          2
96#endif
97
98#ifndef RADEON_CS_RING_UVD
99#define RADEON_CS_RING_UVD          3
100#endif
101
102#ifndef RADEON_CS_RING_VCE
103#define RADEON_CS_RING_VCE          4
104#endif
105
106#ifndef RADEON_CS_END_OF_FRAME
107#define RADEON_CS_END_OF_FRAME      0x04
108#endif
109
110
111#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
112
113static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
114                                      struct radeon_drm_winsys *ws)
115{
116    csc->fd = ws->fd;
117    csc->nrelocs = 512;
118    csc->relocs_bo = (struct radeon_bo**)
119                     CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
120    if (!csc->relocs_bo) {
121        return FALSE;
122    }
123
124    csc->relocs = (struct drm_radeon_cs_reloc*)
125                  CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
126    if (!csc->relocs) {
127        FREE(csc->relocs_bo);
128        return FALSE;
129    }
130
131    csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
132    csc->chunks[0].length_dw = 0;
133    csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
134    csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
135    csc->chunks[1].length_dw = 0;
136    csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
137    csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
138    csc->chunks[2].length_dw = 2;
139    csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
140
141    csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
142    csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
143    csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
144
145    csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
146    return TRUE;
147}
148
149static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
150{
151    unsigned i;
152
153    for (i = 0; i < csc->crelocs; i++) {
154        p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
155        radeon_bo_reference(&csc->relocs_bo[i], NULL);
156    }
157
158    csc->crelocs = 0;
159    csc->validated_crelocs = 0;
160    csc->chunks[0].length_dw = 0;
161    csc->chunks[1].length_dw = 0;
162    csc->used_gart = 0;
163    csc->used_vram = 0;
164    memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
165}
166
167static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
168{
169    radeon_cs_context_cleanup(csc);
170    FREE(csc->relocs_bo);
171    FREE(csc->relocs);
172}
173
174
175static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
176                                                     enum ring_type ring_type,
177                                                     struct radeon_winsys_cs_handle *trace_buf)
178{
179    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
180    struct radeon_drm_cs *cs;
181
182    cs = CALLOC_STRUCT(radeon_drm_cs);
183    if (!cs) {
184        return NULL;
185    }
186    pipe_semaphore_init(&cs->flush_completed, 1);
187
188    cs->ws = ws;
189    cs->trace_buf = (struct radeon_bo*)trace_buf;
190
191    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
192        FREE(cs);
193        return NULL;
194    }
195    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
196        radeon_destroy_cs_context(&cs->csc1);
197        FREE(cs);
198        return NULL;
199    }
200
201    /* Set the first command buffer as current. */
202    cs->csc = &cs->csc1;
203    cs->cst = &cs->csc2;
204    cs->base.buf = cs->csc->buf;
205    cs->base.ring_type = ring_type;
206
207    p_atomic_inc(&ws->num_cs);
208    return &cs->base;
209}
210
211#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
212
213static INLINE void update_reloc_domains(struct drm_radeon_cs_reloc *reloc,
214                                        enum radeon_bo_usage usage,
215                                        enum radeon_bo_domain new_domain,
216                                        enum radeon_bo_domain *added_domains)
217{
218    enum radeon_bo_domain current = reloc->read_domains | reloc->write_domain;
219    enum radeon_bo_domain final;
220
221    /* If there is at least one command which wants the buffer to be in VRAM
222     * only, keep it in VRAM. */
223    if ((current & new_domain) == RADEON_DOMAIN_VRAM)
224        final = RADEON_DOMAIN_VRAM;
225    else
226        final = current | new_domain;
227
228    *added_domains = final & ~current;
229
230    /* If we have at least one write usage... */
231    if (usage & RADEON_USAGE_WRITE || reloc->write_domain) {
232        reloc->write_domain = final;
233        reloc->read_domains = 0;
234    } else {
235        /* write_domain is zero */
236        reloc->read_domains = final;
237    }
238}
239
240int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
241{
242    struct drm_radeon_cs_reloc *reloc;
243    unsigned i;
244    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
245
246    if (csc->is_handle_added[hash]) {
247        i = csc->reloc_indices_hashlist[hash];
248        reloc = &csc->relocs[i];
249        if (reloc->handle == bo->handle) {
250            return i;
251        }
252
253        /* Hash collision, look for the BO in the list of relocs linearly. */
254        for (i = csc->crelocs; i != 0;) {
255            --i;
256            reloc = &csc->relocs[i];
257            if (reloc->handle == bo->handle) {
258                /* Put this reloc in the hash list.
259                 * This will prevent additional hash collisions if there are
260                 * several consecutive get_reloc calls for the same buffer.
261                 *
262                 * Example: Assuming buffers A,B,C collide in the hash list,
263                 * the following sequence of relocs:
264                 *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
265                 * will collide here: ^ and here:   ^,
266                 * meaning that we should get very few collisions in the end. */
267                csc->reloc_indices_hashlist[hash] = i;
268                /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
269                return i;
270            }
271        }
272    }
273
274    return -1;
275}
276
277static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
278                                 struct radeon_bo *bo,
279                                 enum radeon_bo_usage usage,
280                                 enum radeon_bo_domain domains,
281                                 enum radeon_bo_domain *added_domains)
282{
283    struct radeon_cs_context *csc = cs->csc;
284    struct drm_radeon_cs_reloc *reloc;
285    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
286    bool update_hash = TRUE;
287    int i;
288
289    *added_domains = 0;
290    if (csc->is_handle_added[hash]) {
291        i = csc->reloc_indices_hashlist[hash];
292        reloc = &csc->relocs[i];
293        if (reloc->handle != bo->handle) {
294            /* Hash collision, look for the BO in the list of relocs linearly. */
295            for (i = csc->crelocs - 1; i >= 0; i--) {
296                reloc = &csc->relocs[i];
297                if (reloc->handle == bo->handle) {
298                    /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
299                    break;
300                }
301            }
302        }
303
304        if (i >= 0) {
305            /* On DMA ring we need to emit as many relocation as there is use of the bo
306             * thus each time this function is call we should grow add again the bo to
307             * the relocation buffer
308             *
309             * Do not update the hash table if it's dma ring, so that first hash always point
310             * to first bo relocation which will the one used by the kernel. Following relocation
311             * will be ignore by the kernel memory placement (but still use by the kernel to
312             * update the cmd stream with proper buffer offset).
313             */
314            update_hash = FALSE;
315            update_reloc_domains(reloc, usage, domains, added_domains);
316            if (cs->base.ring_type != RING_DMA) {
317                csc->reloc_indices_hashlist[hash] = i;
318                return i;
319            }
320        }
321    }
322
323    /* New relocation, check if the backing array is large enough. */
324    if (csc->crelocs >= csc->nrelocs) {
325        uint32_t size;
326        csc->nrelocs += 10;
327
328        size = csc->nrelocs * sizeof(struct radeon_bo*);
329        csc->relocs_bo = realloc(csc->relocs_bo, size);
330
331        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
332        csc->relocs = realloc(csc->relocs, size);
333
334        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
335    }
336
337    /* Initialize the new relocation. */
338    csc->relocs_bo[csc->crelocs] = NULL;
339    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
340    p_atomic_inc(&bo->num_cs_references);
341    reloc = &csc->relocs[csc->crelocs];
342    reloc->handle = bo->handle;
343    if (usage & RADEON_USAGE_WRITE)
344        reloc->write_domain = domains;
345    else
346        reloc->read_domains = domains;
347    reloc->flags = 0;
348
349    csc->is_handle_added[hash] = TRUE;
350    if (update_hash) {
351        csc->reloc_indices_hashlist[hash] = csc->crelocs;
352    }
353
354    csc->chunks[1].length_dw += RELOC_DWORDS;
355
356    *added_domains = domains;
357    return csc->crelocs++;
358}
359
360static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
361                                        struct radeon_winsys_cs_handle *buf,
362                                        enum radeon_bo_usage usage,
363                                        enum radeon_bo_domain domains)
364{
365    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
366    struct radeon_bo *bo = (struct radeon_bo*)buf;
367    enum radeon_bo_domain added_domains;
368    unsigned index = radeon_add_reloc(cs, bo, usage, domains, &added_domains);
369
370    if (added_domains & RADEON_DOMAIN_GTT)
371        cs->csc->used_gart += bo->base.size;
372    if (added_domains & RADEON_DOMAIN_VRAM)
373        cs->csc->used_vram += bo->base.size;
374
375    return index;
376}
377
378static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
379{
380    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
381    boolean status =
382        cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
383        cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
384
385    if (status) {
386        cs->csc->validated_crelocs = cs->csc->crelocs;
387    } else {
388        /* Remove lately-added relocations. The validation failed with them
389         * and the CS is about to be flushed because of that. Keep only
390         * the already-validated relocations. */
391        unsigned i;
392
393        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
394            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
395            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
396        }
397        cs->csc->crelocs = cs->csc->validated_crelocs;
398
399        /* Flush if there are any relocs. Clean up otherwise. */
400        if (cs->csc->crelocs) {
401            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
402        } else {
403            radeon_cs_context_cleanup(cs->csc);
404
405            assert(cs->base.cdw == 0);
406            if (cs->base.cdw != 0) {
407                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
408            }
409        }
410    }
411    return status;
412}
413
414static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
415{
416    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
417    boolean status =
418        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
419        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
420
421    return status;
422}
423
424static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
425                                      struct radeon_winsys_cs_handle *buf)
426{
427    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
428    struct radeon_bo *bo = (struct radeon_bo*)buf;
429    unsigned index = radeon_get_reloc(cs->csc, bo);
430
431    if (index == -1) {
432        fprintf(stderr, "radeon: Cannot get a relocation in %s.\n", __func__);
433        return;
434    }
435
436    OUT_CS(&cs->base, 0xc0001000);
437    OUT_CS(&cs->base, index * RELOC_DWORDS);
438}
439
440void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
441{
442    unsigned i;
443
444    if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
445                            &csc->cs, sizeof(struct drm_radeon_cs))) {
446        if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
447            unsigned i;
448
449            fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
450            for (i = 0; i < csc->chunks[0].length_dw; i++) {
451                fprintf(stderr, "0x%08X\n", csc->buf[i]);
452            }
453        } else {
454            fprintf(stderr, "radeon: The kernel rejected CS, "
455                    "see dmesg for more information.\n");
456        }
457    }
458
459    if (cs->trace_buf) {
460        radeon_dump_cs_on_lockup(cs, csc);
461    }
462
463    for (i = 0; i < csc->crelocs; i++)
464        p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
465
466    radeon_cs_context_cleanup(csc);
467}
468
469/*
470 * Make sure previous submission of this cs are completed
471 */
472void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
473{
474    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
475
476    /* Wait for any pending ioctl to complete. */
477    if (cs->ws->thread) {
478        pipe_semaphore_wait(&cs->flush_completed);
479        pipe_semaphore_signal(&cs->flush_completed);
480    }
481}
482
483DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
484
485static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
486{
487    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
488    struct radeon_cs_context *tmp;
489
490    switch (cs->base.ring_type) {
491    case RING_DMA:
492	    /* pad DMA ring to 8 DWs */
493	    if (cs->ws->info.chip_class <= SI) {
494		    while (rcs->cdw & 7)
495			    OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
496	    } else {
497		    while (rcs->cdw & 7)
498			    OUT_CS(&cs->base, 0x00000000); /* NOP packet */
499	    }
500	    break;
501    case RING_GFX:
502	    /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
503	     * r6xx, requires at least 4 dw alignment to avoid a hw bug.
504	     */
505	    if (cs->ws->info.chip_class <= SI) {
506		    while (rcs->cdw & 7)
507			    OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
508	    } else {
509		    while (rcs->cdw & 7)
510			    OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
511	    }
512	    break;
513    case RING_UVD:
514            while (rcs->cdw & 15)
515		OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
516	    break;
517    default:
518	    break;
519    }
520
521    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
522       fprintf(stderr, "radeon: command stream overflowed\n");
523    }
524
525    radeon_drm_cs_sync_flush(rcs);
526
527    /* Flip command streams. */
528    tmp = cs->csc;
529    cs->csc = cs->cst;
530    cs->cst = tmp;
531
532    cs->cst->cs_trace_id = cs_trace_id;
533
534    /* If the CS is not empty or overflowed, emit it in a separate thread. */
535    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
536        unsigned i, crelocs = cs->cst->crelocs;
537
538        cs->cst->chunks[0].length_dw = cs->base.cdw;
539
540        for (i = 0; i < crelocs; i++) {
541            /* Update the number of active asynchronous CS ioctls for the buffer. */
542            p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
543        }
544
545        switch (cs->base.ring_type) {
546        case RING_DMA:
547            cs->cst->flags[0] = 0;
548            cs->cst->flags[1] = RADEON_CS_RING_DMA;
549            cs->cst->cs.num_chunks = 3;
550            if (cs->ws->info.r600_virtual_address) {
551                cs->cst->flags[0] |= RADEON_CS_USE_VM;
552            }
553            break;
554
555        case RING_UVD:
556            cs->cst->flags[0] = 0;
557            cs->cst->flags[1] = RADEON_CS_RING_UVD;
558            cs->cst->cs.num_chunks = 3;
559            break;
560
561        case RING_VCE:
562            cs->cst->flags[0] = 0;
563            cs->cst->flags[1] = RADEON_CS_RING_VCE;
564            cs->cst->cs.num_chunks = 3;
565            break;
566
567        default:
568        case RING_GFX:
569            cs->cst->flags[0] = 0;
570            cs->cst->flags[1] = RADEON_CS_RING_GFX;
571            cs->cst->cs.num_chunks = 2;
572            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
573                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
574                cs->cst->cs.num_chunks = 3;
575            }
576            if (cs->ws->info.r600_virtual_address) {
577                cs->cst->flags[0] |= RADEON_CS_USE_VM;
578                cs->cst->cs.num_chunks = 3;
579            }
580            if (flags & RADEON_FLUSH_END_OF_FRAME) {
581                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
582                cs->cst->cs.num_chunks = 3;
583            }
584            if (flags & RADEON_FLUSH_COMPUTE) {
585                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
586                cs->cst->cs.num_chunks = 3;
587            }
588            break;
589        }
590
591        if (cs->ws->thread) {
592            pipe_semaphore_wait(&cs->flush_completed);
593            radeon_drm_ws_queue_cs(cs->ws, cs);
594            if (!(flags & RADEON_FLUSH_ASYNC))
595                radeon_drm_cs_sync_flush(rcs);
596        } else {
597            radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
598        }
599    } else {
600        radeon_cs_context_cleanup(cs->cst);
601    }
602
603    /* Prepare a new CS. */
604    cs->base.buf = cs->csc->buf;
605    cs->base.cdw = 0;
606}
607
608static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
609{
610    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
611
612    radeon_drm_cs_sync_flush(rcs);
613    pipe_semaphore_destroy(&cs->flush_completed);
614    radeon_cs_context_cleanup(&cs->csc1);
615    radeon_cs_context_cleanup(&cs->csc2);
616    p_atomic_dec(&cs->ws->num_cs);
617    radeon_destroy_cs_context(&cs->csc1);
618    radeon_destroy_cs_context(&cs->csc2);
619    FREE(cs);
620}
621
622static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs,
623                                    void (*flush)(void *ctx, unsigned flags),
624                                    void *user)
625{
626    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
627
628    cs->flush_cs = flush;
629    cs->flush_data = user;
630}
631
632static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
633                                       struct radeon_winsys_cs_handle *_buf,
634                                       enum radeon_bo_usage usage)
635{
636    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
637    struct radeon_bo *bo = (struct radeon_bo*)_buf;
638    int index;
639
640    if (!bo->num_cs_references)
641        return FALSE;
642
643    index = radeon_get_reloc(cs->csc, bo);
644    if (index == -1)
645        return FALSE;
646
647    if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
648        return TRUE;
649    if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
650        return TRUE;
651
652    return FALSE;
653}
654
655/* FENCES */
656
657static struct pipe_fence_handle *
658radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
659{
660    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
661    struct pb_buffer *fence;
662
663    /* Create a fence, which is a dummy BO. */
664    fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
665                                       RADEON_DOMAIN_GTT);
666    /* Add the fence as a dummy relocation. */
667    cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
668                              RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT);
669    return (struct pipe_fence_handle*)fence;
670}
671
672static bool radeon_fence_wait(struct radeon_winsys *ws,
673                              struct pipe_fence_handle *fence,
674                              uint64_t timeout)
675{
676    struct pb_buffer *rfence = (struct pb_buffer*)fence;
677
678    if (timeout == 0)
679        return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
680
681    if (timeout != PIPE_TIMEOUT_INFINITE) {
682        int64_t start_time = os_time_get();
683
684        /* Convert to microseconds. */
685        timeout /= 1000;
686
687        /* Wait in a loop. */
688        while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
689            if (os_time_get() - start_time >= timeout) {
690                return FALSE;
691            }
692            os_time_sleep(10);
693        }
694        return TRUE;
695    }
696
697    ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
698    return TRUE;
699}
700
701static void radeon_fence_reference(struct pipe_fence_handle **dst,
702                                   struct pipe_fence_handle *src)
703{
704    pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
705}
706
707void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
708{
709    ws->base.cs_create = radeon_drm_cs_create;
710    ws->base.cs_destroy = radeon_drm_cs_destroy;
711    ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
712    ws->base.cs_validate = radeon_drm_cs_validate;
713    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
714    ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
715    ws->base.cs_flush = radeon_drm_cs_flush;
716    ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
717    ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
718    ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
719    ws->base.cs_create_fence = radeon_cs_create_fence;
720    ws->base.fence_wait = radeon_fence_wait;
721    ws->base.fence_reference = radeon_fence_reference;
722}
723