radeon_drm_cs.c revision 6ccab620a0e7364ab6c0d902b3ddf58ee988f7fa
1/*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27/*
28 * Authors:
29 *      Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 *      Aapo Tahkola <aet@rasterburn.org>
33 *      Nicolai Haehnle <prefect_@gmx.net>
34 *      Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37/*
38    This file replaces libdrm's radeon_cs_gem with our own implemention.
39    It's optimized specifically for r300g, but r600g could use it as well.
40    Reloc writes and space checking are faster and simpler than their
41    counterparts in libdrm (the time complexity of all the functions
42    is O(1) in nearly all scenarios, thanks to hashing).
43
44    It works like this:
45
46    cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47    also adds the size of 'buf' to the used_gart and used_vram winsys variables
48    based on the domains, which are simply or'd for the accounting purposes.
49    The adding is skipped if the reloc is already present in the list, but it
50    accounts any newly-referenced domains.
51
52    cs_validate is then called, which just checks:
53        used_vram/gart < vram/gart_size * 0.8
54    The 0.8 number allows for some memory fragmentation. If the validation
55    fails, the pipe driver flushes CS and tries do the validation again,
56    i.e. it validates only that one operation. If it fails again, it drops
57    the operation on the floor and prints some nasty message to stderr.
58    (done in the pipe driver)
59
60    cs_write_reloc(cs, buf) just writes a reloc that has been added using
61    cs_add_reloc. The read_domain and write_domain parameters have been removed,
62    because we already specify them in cs_add_reloc.
63*/
64
65#include "radeon_drm_cs.h"
66
67#include "util/u_memory.h"
68
69#include <stdio.h>
70#include <stdlib.h>
71#include <stdint.h>
72#include <xf86drm.h>
73
74#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
75
76static struct r300_winsys_cs *radeon_drm_cs_create(struct r300_winsys_screen *rws)
77{
78    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
79    struct radeon_drm_cs *cs;
80
81    cs = CALLOC_STRUCT(radeon_drm_cs);
82    if (!cs) {
83        return NULL;
84    }
85
86    cs->ws = ws;
87    cs->nrelocs = 256;
88    cs->relocs_bo = (struct radeon_bo**)
89                     CALLOC(1, cs->nrelocs * sizeof(struct radeon_bo*));
90    if (!cs->relocs_bo) {
91        FREE(cs);
92        return NULL;
93    }
94
95    cs->relocs = (struct drm_radeon_cs_reloc*)
96                 CALLOC(1, cs->nrelocs * sizeof(struct drm_radeon_cs_reloc));
97    if (!cs->relocs) {
98        FREE(cs->relocs_bo);
99        FREE(cs);
100        return NULL;
101    }
102
103    cs->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
104    cs->chunks[0].length_dw = 0;
105    cs->chunks[0].chunk_data = (uint64_t)(uintptr_t)cs->base.buf;
106    cs->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
107    cs->chunks[1].length_dw = 0;
108    cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs;
109    return &cs->base;
110}
111
112#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
113
114static inline void update_domains(struct drm_radeon_cs_reloc *reloc,
115                                  enum r300_buffer_domain rd,
116                                  enum r300_buffer_domain wd,
117                                  enum r300_buffer_domain *added_domains)
118{
119    *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
120
121    if (reloc->read_domains & wd) {
122        reloc->read_domains = rd;
123        reloc->write_domain = wd;
124    } else if (rd & reloc->write_domain) {
125        reloc->read_domains = rd;
126        reloc->write_domain |= wd;
127    } else {
128        reloc->read_domains |= rd;
129        reloc->write_domain |= wd;
130    }
131}
132
133int radeon_get_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo)
134{
135    struct drm_radeon_cs_reloc *reloc;
136    unsigned i;
137    unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1);
138
139    if (cs->is_handle_added[hash]) {
140        reloc = cs->relocs_hashlist[hash];
141        if (reloc->handle == bo->handle) {
142            return cs->reloc_indices_hashlist[hash];
143        }
144
145        /* Hash collision, look for the BO in the list of relocs linearly. */
146        for (i = cs->crelocs; i != 0;) {
147            --i;
148            reloc = &cs->relocs[i];
149            if (reloc->handle == bo->handle) {
150                /* Put this reloc in the hash list.
151                 * This will prevent additional hash collisions if there are
152                 * several subsequent get_reloc calls of the same buffer.
153                 *
154                 * Example: Assuming buffers A,B,C collide in the hash list,
155                 * the following sequence of relocs:
156                 *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
157                 * will collide here: ^ and here:   ^,
158                 * meaning that we should get very few collisions in the end. */
159                cs->relocs_hashlist[hash] = reloc;
160                cs->reloc_indices_hashlist[hash] = i;
161                /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
162                return i;
163            }
164        }
165    }
166
167    return -1;
168}
169
170static void radeon_add_reloc(struct radeon_drm_cs *cs,
171                             struct radeon_bo *bo,
172                             enum r300_buffer_domain rd,
173                             enum r300_buffer_domain wd,
174                             enum r300_buffer_domain *added_domains)
175{
176    struct drm_radeon_cs_reloc *reloc;
177    unsigned i;
178    unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1);
179
180    if (cs->is_handle_added[hash]) {
181        reloc = cs->relocs_hashlist[hash];
182        if (reloc->handle == bo->handle) {
183            update_domains(reloc, rd, wd, added_domains);
184            return;
185        }
186
187        /* Hash collision, look for the BO in the list of relocs linearly. */
188        for (i = cs->crelocs; i != 0;) {
189            --i;
190            reloc = &cs->relocs[i];
191            if (reloc->handle == bo->handle) {
192                update_domains(reloc, rd, wd, added_domains);
193
194                cs->relocs_hashlist[hash] = reloc;
195                cs->reloc_indices_hashlist[hash] = i;
196                /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
197                return;
198            }
199        }
200    }
201
202    /* New relocation, check if the backing array is large enough. */
203    if (cs->crelocs >= cs->nrelocs) {
204        uint32_t size;
205        cs->nrelocs += 10;
206
207        size = cs->nrelocs * sizeof(struct radeon_bo*);
208        cs->relocs_bo = (struct radeon_bo**)realloc(cs->relocs_bo, size);
209
210        size = cs->nrelocs * sizeof(struct drm_radeon_cs_reloc);
211        cs->relocs = (struct drm_radeon_cs_reloc*)realloc(cs->relocs, size);
212
213        cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs;
214    }
215
216    /* Initialize the new relocation. */
217    radeon_bo_ref(bo);
218    cs->relocs_bo[cs->crelocs] = bo;
219    reloc = &cs->relocs[cs->crelocs];
220    reloc->handle = bo->handle;
221    reloc->read_domains = rd;
222    reloc->write_domain = wd;
223    reloc->flags = 0;
224
225    cs->is_handle_added[hash] = TRUE;
226    cs->relocs_hashlist[hash] = reloc;
227    cs->reloc_indices_hashlist[hash] = cs->crelocs;
228
229    cs->chunks[1].length_dw += RELOC_DWORDS;
230    cs->crelocs++;
231
232    *added_domains = rd | wd;
233}
234
235static void radeon_drm_cs_add_reloc(struct r300_winsys_cs *rcs,
236                                    struct r300_winsys_cs_handle *buf,
237                                    enum r300_buffer_domain rd,
238                                    enum r300_buffer_domain wd)
239{
240    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
241    struct radeon_bo *bo = (struct radeon_bo*)buf;
242    enum r300_buffer_domain added_domains;
243
244    radeon_add_reloc(cs, bo, rd, wd, &added_domains);
245
246    if (!added_domains)
247        return;
248
249    if (added_domains & R300_DOMAIN_GTT)
250        cs->used_gart += bo->size;
251    if (added_domains & R300_DOMAIN_VRAM)
252        cs->used_vram += bo->size;
253}
254
255static boolean radeon_drm_cs_validate(struct r300_winsys_cs *rcs)
256{
257    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
258
259    return cs->used_gart < cs->ws->gart_size * 0.8 &&
260           cs->used_vram < cs->ws->vram_size * 0.8;
261}
262
263static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs,
264                                      struct r300_winsys_cs_handle *buf)
265{
266    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
267    struct radeon_bo *bo = (struct radeon_bo*)buf;
268
269    unsigned index = radeon_get_reloc(cs, bo);
270
271    if (index == -1) {
272        fprintf(stderr, "r300: Cannot get a relocation in %s.\n", __func__);
273        return;
274    }
275
276    OUT_CS(&cs->base, 0xc0001000);
277    OUT_CS(&cs->base, index * RELOC_DWORDS);
278}
279
280static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs)
281{
282    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
283    uint64_t chunk_array[2];
284    unsigned i;
285    int r;
286
287    if (cs->base.cdw) {
288        /* Prepare the arguments. */
289        cs->chunks[0].length_dw = cs->base.cdw;
290
291        chunk_array[0] = (uint64_t)(uintptr_t)&cs->chunks[0];
292        chunk_array[1] = (uint64_t)(uintptr_t)&cs->chunks[1];
293
294        cs->cs.num_chunks = 2;
295        cs->cs.chunks = (uint64_t)(uintptr_t)chunk_array;
296
297        /* Emit. */
298        r = drmCommandWriteRead(cs->ws->fd, DRM_RADEON_CS,
299                                &cs->cs, sizeof(struct drm_radeon_cs));
300        if (r) {
301            if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
302                fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
303                fprintf(stderr, "VENDORID:DEVICEID 0x%04X:0x%04X\n", 0x1002,
304                        cs->ws->pci_id);
305                for (i = 0; i < cs->base.cdw; i++) {
306                    fprintf(stderr, "0x%08X\n", cs->base.buf[i]);
307                }
308            } else {
309                fprintf(stderr, "radeon: The kernel rejected CS, "
310                                "see dmesg for more information.\n");
311            }
312        }
313    }
314
315    /* Unreference buffers, cleanup. */
316    for (i = 0; i < cs->crelocs; i++) {
317        radeon_bo_unref(cs->relocs_bo[i]);
318        cs->relocs_bo[i] = NULL;
319    }
320
321    cs->base.cdw = 0;
322    cs->crelocs = 0;
323    cs->chunks[0].length_dw = 0;
324    cs->chunks[1].length_dw = 0;
325    cs->used_gart = 0;
326    cs->used_vram = 0;
327    memset(cs->is_handle_added, 0, sizeof(cs->is_handle_added));
328}
329
330static void radeon_drm_cs_destroy(struct r300_winsys_cs *rcs)
331{
332    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
333    FREE(cs->relocs_bo);
334    FREE(cs->relocs);
335    FREE(cs);
336}
337
338static void radeon_drm_cs_set_flush(struct r300_winsys_cs *rcs,
339                                    void (*flush)(void *), void *user)
340{
341    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
342    cs->flush_cs = flush;
343    cs->flush_data = user;
344}
345
346static boolean radeon_bo_is_referenced(struct r300_winsys_cs *rcs,
347                                       struct r300_winsys_cs_handle *_buf)
348{
349    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
350    struct radeon_bo *bo = (struct radeon_bo*)_buf;
351
352    return radeon_bo_is_referenced_by_cs(cs, bo);
353}
354
355void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
356{
357    ws->base.cs_create = radeon_drm_cs_create;
358    ws->base.cs_destroy = radeon_drm_cs_destroy;
359    ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
360    ws->base.cs_validate = radeon_drm_cs_validate;
361    ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
362    ws->base.cs_flush = radeon_drm_cs_emit;
363    ws->base.cs_set_flush = radeon_drm_cs_set_flush;
364    ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
365}
366