radeon_dma.c revision 06d3732a9094030fc33120f16f162e0d405f132c
1/************************************************************************** 2 3Copyright (C) 2004 Nicolai Haehnle. 4Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 5 6The Weather Channel (TM) funded Tungsten Graphics to develop the 7initial release of the Radeon 8500 driver under the XFree86 license. 8This notice must be preserved. 9 10All Rights Reserved. 11 12Permission is hereby granted, free of charge, to any person obtaining a 13copy of this software and associated documentation files (the "Software"), 14to deal in the Software without restriction, including without limitation 15on the rights to use, copy, modify, merge, publish, distribute, sub 16license, and/or sell copies of the Software, and to permit persons to whom 17the Software is furnished to do so, subject to the following conditions: 18 19The above copyright notice and this permission notice (including the next 20paragraph) shall be included in all copies or substantial portions of the 21Software. 22 23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 29USE OR OTHER DEALINGS IN THE SOFTWARE. 30 31**************************************************************************/ 32 33#include <errno.h> 34#include "radeon_common.h" 35#include "main/simple_list.h" 36 37#if defined(USE_X86_ASM) 38#define COPY_DWORDS( dst, src, nr ) \ 39do { \ 40 int __tmp; \ 41 __asm__ __volatile__( "rep ; movsl" \ 42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ 43 : "0" (nr), \ 44 "D" ((long)dst), \ 45 "S" ((long)src) ); \ 46} while (0) 47#else 48#define COPY_DWORDS( dst, src, nr ) \ 49do { \ 50 int j; \ 51 for ( j = 0 ; j < nr ; j++ ) \ 52 dst[j] = ((int *)src)[j]; \ 53 dst += nr; \ 54} while (0) 55#endif 56 57void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) 58{ 59 int i; 60 61 if (RADEON_DEBUG & RADEON_VERTS) 62 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 63 __FUNCTION__, count, stride, (void *)out, (void *)data); 64 65 if (stride == 4) 66 COPY_DWORDS(out, data, count); 67 else 68 for (i = 0; i < count; i++) { 69 out[0] = *(int *)data; 70 out++; 71 data += stride; 72 } 73} 74 75void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) 76{ 77 int i; 78 79 if (RADEON_DEBUG & RADEON_VERTS) 80 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 81 __FUNCTION__, count, stride, (void *)out, (void *)data); 82 83 if (stride == 8) 84 COPY_DWORDS(out, data, count * 2); 85 else 86 for (i = 0; i < count; i++) { 87 out[0] = *(int *)data; 88 out[1] = *(int *)(data + 4); 89 out += 2; 90 data += stride; 91 } 92} 93 94void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) 95{ 96 int i; 97 98 if (RADEON_DEBUG & RADEON_VERTS) 99 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 100 __FUNCTION__, count, stride, (void *)out, (void *)data); 101 102 if (stride == 12) { 103 COPY_DWORDS(out, data, count * 3); 104 } 105 else 106 for (i = 0; i < count; i++) { 107 out[0] = *(int *)data; 108 out[1] = *(int *)(data + 4); 109 out[2] = *(int *)(data + 8); 110 out += 3; 111 data += stride; 112 } 113} 114 115void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) 116{ 117 int i; 118 119 if (RADEON_DEBUG & RADEON_VERTS) 120 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 121 __FUNCTION__, count, stride, (void *)out, (void *)data); 122 123 if (stride == 16) 124 COPY_DWORDS(out, data, count * 4); 125 else 126 for (i = 0; i < count; i++) { 127 out[0] = *(int *)data; 128 out[1] = *(int *)(data + 4); 129 out[2] = *(int *)(data + 8); 130 out[3] = *(int *)(data + 12); 131 out += 4; 132 data += stride; 133 } 134} 135 136void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, 137 const GLvoid * data, int size, int stride, int count) 138{ 139 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 140 uint32_t *out; 141 142 if (stride == 0) { 143 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); 144 count = 1; 145 aos->stride = 0; 146 } else { 147 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 148 aos->stride = size; 149 } 150 151 aos->components = size; 152 aos->count = count; 153 154 radeon_bo_map(aos->bo, 1); 155 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); 156 switch (size) { 157 case 1: radeonEmitVec4(out, data, stride, count); break; 158 case 2: radeonEmitVec8(out, data, stride, count); break; 159 case 3: radeonEmitVec12(out, data, stride, count); break; 160 case 4: radeonEmitVec16(out, data, stride, count); break; 161 default: 162 assert(0); 163 break; 164 } 165 radeon_bo_unmap(aos->bo); 166} 167 168void radeon_init_dma(radeonContextPtr rmesa) 169{ 170 make_empty_list(&rmesa->dma.free); 171 make_empty_list(&rmesa->dma.wait); 172 make_empty_list(&rmesa->dma.reserved); 173 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ; 174} 175 176void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) 177{ 178 struct radeon_dma_bo *dma_bo = NULL; 179 /* we set minimum sizes to at least requested size 180 aligned to next 16 bytes. */ 181 if (size > rmesa->dma.minimum_size) 182 rmesa->dma.minimum_size = (size + 15) & (~15); 183 184 radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n", 185 __FUNCTION__, size, rmesa->dma.minimum_size); 186 187 188 if (is_empty_list(&rmesa->dma.free) 189 || last_elem(&rmesa->dma.free)->bo->size < size) { 190 dma_bo = CALLOC_STRUCT(radeon_dma_bo); 191 assert(dma_bo); 192 193again_alloc: 194 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom, 195 0, rmesa->dma.minimum_size, 4, 196 RADEON_GEM_DOMAIN_GTT, 0); 197 198 if (!dma_bo->bo) { 199 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 200 goto again_alloc; 201 } 202 insert_at_head(&rmesa->dma.reserved, dma_bo); 203 } else { 204 /* We push and pop buffers from end of list so we can keep 205 counter on unused buffers for later freeing them from 206 begin of list */ 207 dma_bo = last_elem(&rmesa->dma.free); 208 assert(dma_bo->bo->cref == 1); 209 remove_from_list(dma_bo); 210 insert_at_head(&rmesa->dma.reserved, dma_bo); 211 } 212 213 rmesa->dma.current_used = 0; 214 rmesa->dma.current_vertexptr = 0; 215 216 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, 217 first_elem(&rmesa->dma.reserved)->bo, 218 RADEON_GEM_DOMAIN_GTT, 0)) 219 fprintf(stderr,"failure to revalidate BOs - badness\n"); 220 221 if (is_empty_list(&rmesa->dma.reserved)) { 222 /* Cmd buff have been flushed in radeon_revalidate_bos */ 223 goto again_alloc; 224 } 225} 226 227/* Allocates a region from rmesa->dma.current. If there isn't enough 228 * space in current, grab a new buffer (and discard what was left of current) 229 */ 230void radeonAllocDmaRegion(radeonContextPtr rmesa, 231 struct radeon_bo **pbo, int *poffset, 232 int bytes, int alignment) 233{ 234 if (RADEON_DEBUG & RADEON_IOCTL) 235 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); 236 237 if (rmesa->dma.flush) 238 rmesa->dma.flush(rmesa->glCtx); 239 240 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); 241 242 alignment--; 243 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; 244 245 if (is_empty_list(&rmesa->dma.reserved) 246 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size) 247 radeonRefillCurrentDmaRegion(rmesa, bytes); 248 249 *poffset = rmesa->dma.current_used; 250 *pbo = first_elem(&rmesa->dma.reserved)->bo; 251 radeon_bo_ref(*pbo); 252 253 /* Always align to at least 16 bytes */ 254 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; 255 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 256 257 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size); 258} 259 260void radeonFreeDmaRegions(radeonContextPtr rmesa) 261{ 262 struct radeon_dma_bo *dma_bo; 263 struct radeon_dma_bo *temp; 264 if (RADEON_DEBUG & RADEON_DMA) 265 fprintf(stderr, "%s\n", __FUNCTION__); 266 267 foreach_s(dma_bo, temp, &rmesa->dma.free) { 268 remove_from_list(dma_bo); 269 radeon_bo_unref(dma_bo->bo); 270 FREE(dma_bo); 271 } 272 273 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 274 remove_from_list(dma_bo); 275 radeon_bo_unref(dma_bo->bo); 276 FREE(dma_bo); 277 } 278 279 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 280 remove_from_list(dma_bo); 281 radeon_bo_unref(dma_bo->bo); 282 FREE(dma_bo); 283 } 284} 285 286void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes) 287{ 288 if (is_empty_list(&rmesa->dma.reserved)) 289 return; 290 291 if (RADEON_DEBUG & RADEON_IOCTL) 292 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes); 293 rmesa->dma.current_used -= return_bytes; 294 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 295} 296 297static int radeon_bo_is_idle(struct radeon_bo* bo) 298{ 299 uint32_t domain; 300 int ret = radeon_bo_is_busy(bo, &domain); 301 if (ret == -EINVAL) { 302 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n" 303 "This may cause small performance drop for you.\n"); 304 } 305 return ret != -EBUSY; 306} 307 308void radeonReleaseDmaRegions(radeonContextPtr rmesa) 309{ 310 struct radeon_dma_bo *dma_bo; 311 struct radeon_dma_bo *temp; 312 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME; 313 const int time = rmesa->dma.free.expire_counter; 314 315 if (RADEON_DEBUG & RADEON_DMA) { 316 size_t free = 0, 317 wait = 0, 318 reserved = 0; 319 foreach(dma_bo, &rmesa->dma.free) 320 ++free; 321 322 foreach(dma_bo, &rmesa->dma.wait) 323 ++wait; 324 325 foreach(dma_bo, &rmesa->dma.reserved) 326 ++reserved; 327 328 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n", 329 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size); 330 } 331 332 if (!rmesa->radeonScreen->driScreen->dri2.enabled) { 333 /* request updated cs processing information from kernel */ 334 legacy_track_pending(rmesa->radeonScreen->bom, 0); 335 } 336 /* move waiting bos to free list. 337 wait list provides gpu time to handle data before reuse */ 338 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 339 if (dma_bo->expire_counter == time) { 340 WARN_ONCE("Leaking dma buffer object!\n"); 341 radeon_bo_unref(dma_bo->bo); 342 remove_from_list(dma_bo); 343 FREE(dma_bo); 344 continue; 345 } 346 /* free objects that are too small to be used because of large request */ 347 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 348 radeon_bo_unref(dma_bo->bo); 349 remove_from_list(dma_bo); 350 FREE(dma_bo); 351 continue; 352 } 353 if (!radeon_bo_is_idle(dma_bo->bo)) 354 continue; 355 remove_from_list(dma_bo); 356 dma_bo->expire_counter = expire_at; 357 insert_at_tail(&rmesa->dma.free, dma_bo); 358 } 359 360 /* move reserved to wait list */ 361 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 362 /* free objects that are too small to be used because of large request */ 363 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 364 radeon_bo_unref(dma_bo->bo); 365 remove_from_list(dma_bo); 366 FREE(dma_bo); 367 continue; 368 } 369 remove_from_list(dma_bo); 370 dma_bo->expire_counter = expire_at; 371 insert_at_tail(&rmesa->dma.wait, dma_bo); 372 } 373 374 /* free bos that have been unused for some time */ 375 foreach_s(dma_bo, temp, &rmesa->dma.free) { 376 if (dma_bo->expire_counter != time) 377 break; 378 remove_from_list(dma_bo); 379 radeon_bo_unref(dma_bo->bo); 380 FREE(dma_bo); 381 } 382 383} 384 385 386/* Flush vertices in the current dma region. 387 */ 388void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) 389{ 390 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 391 struct radeon_dma *dma = &rmesa->dma; 392 393 if (RADEON_DEBUG & RADEON_IOCTL) 394 fprintf(stderr, "%s\n", __FUNCTION__); 395 dma->flush = NULL; 396 397 radeon_bo_unmap(rmesa->swtcl.bo); 398 399 if (!is_empty_list(&dma->reserved)) { 400 GLuint current_offset = dma->current_used; 401 402 assert (dma->current_used + 403 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 404 dma->current_vertexptr); 405 406 if (dma->current_used != dma->current_vertexptr) { 407 dma->current_used = dma->current_vertexptr; 408 409 rmesa->vtbl.swtcl_flush(ctx, current_offset); 410 } 411 rmesa->swtcl.numverts = 0; 412 } 413 radeon_bo_unref(rmesa->swtcl.bo); 414 rmesa->swtcl.bo = NULL; 415} 416/* Alloc space in the current dma region. 417 */ 418void * 419rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) 420{ 421 GLuint bytes = vsize * nverts; 422 void *head; 423 if (RADEON_DEBUG & RADEON_IOCTL) 424 fprintf(stderr, "%s\n", __FUNCTION__); 425 426 if(is_empty_list(&rmesa->dma.reserved) 427 ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { 428 if (rmesa->dma.flush) { 429 rmesa->dma.flush(rmesa->glCtx); 430 } 431 432 radeonRefillCurrentDmaRegion(rmesa, bytes); 433 434 return NULL; 435 } 436 437 if (!rmesa->dma.flush) { 438 /* if cmdbuf flushed DMA restart */ 439 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; 440 rmesa->dma.flush = rcommon_flush_last_swtcl_prim; 441 } 442 443 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); 444 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); 445 ASSERT( rmesa->dma.current_used + 446 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 447 rmesa->dma.current_vertexptr ); 448 449 if (!rmesa->swtcl.bo) { 450 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo; 451 radeon_bo_ref(rmesa->swtcl.bo); 452 radeon_bo_map(rmesa->swtcl.bo, 1); 453 } 454 455 head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr); 456 rmesa->dma.current_vertexptr += bytes; 457 rmesa->swtcl.numverts += nverts; 458 return head; 459} 460 461void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) 462{ 463 radeonContextPtr radeon = RADEON_CONTEXT( ctx ); 464 int i; 465 if (RADEON_DEBUG & RADEON_IOCTL) 466 fprintf(stderr, "%s\n", __FUNCTION__); 467 468 if (radeon->dma.flush) { 469 radeon->dma.flush(radeon->glCtx); 470 } 471 for (i = 0; i < radeon->tcl.aos_count; i++) { 472 if (radeon->tcl.aos[i].bo) { 473 radeon_bo_unref(radeon->tcl.aos[i].bo); 474 radeon->tcl.aos[i].bo = NULL; 475 476 } 477 } 478} 479