radeon_dma.c revision 1c28073fdfb56a241424c739b57845f47fa05002
1/************************************************************************** 2 3Copyright (C) 2004 Nicolai Haehnle. 4Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 5 6The Weather Channel (TM) funded Tungsten Graphics to develop the 7initial release of the Radeon 8500 driver under the XFree86 license. 8This notice must be preserved. 9 10All Rights Reserved. 11 12Permission is hereby granted, free of charge, to any person obtaining a 13copy of this software and associated documentation files (the "Software"), 14to deal in the Software without restriction, including without limitation 15on the rights to use, copy, modify, merge, publish, distribute, sub 16license, and/or sell copies of the Software, and to permit persons to whom 17the Software is furnished to do so, subject to the following conditions: 18 19The above copyright notice and this permission notice (including the next 20paragraph) shall be included in all copies or substantial portions of the 21Software. 22 23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 29USE OR OTHER DEALINGS IN THE SOFTWARE. 30 31**************************************************************************/ 32 33#include <errno.h> 34#include "radeon_common.h" 35#include "main/simple_list.h" 36 37#if defined(USE_X86_ASM) 38#define COPY_DWORDS( dst, src, nr ) \ 39do { \ 40 int __tmp; \ 41 __asm__ __volatile__( "rep ; movsl" \ 42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ 43 : "0" (nr), \ 44 "D" ((long)dst), \ 45 "S" ((long)src) ); \ 46} while (0) 47#else 48#define COPY_DWORDS( dst, src, nr ) \ 49do { \ 50 int j; \ 51 for ( j = 0 ; j < nr ; j++ ) \ 52 dst[j] = ((int *)src)[j]; \ 53 dst += nr; \ 54} while (0) 55#endif 56 57void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) 58{ 59 int i; 60 61 if (RADEON_DEBUG & RADEON_VERTS) 62 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 63 __FUNCTION__, count, stride, (void *)out, (void *)data); 64 65 if (stride == 4) 66 COPY_DWORDS(out, data, count); 67 else 68 for (i = 0; i < count; i++) { 69 out[0] = *(int *)data; 70 out++; 71 data += stride; 72 } 73} 74 75void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) 76{ 77 int i; 78 79 if (RADEON_DEBUG & RADEON_VERTS) 80 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 81 __FUNCTION__, count, stride, (void *)out, (void *)data); 82 83 if (stride == 8) 84 COPY_DWORDS(out, data, count * 2); 85 else 86 for (i = 0; i < count; i++) { 87 out[0] = *(int *)data; 88 out[1] = *(int *)(data + 4); 89 out += 2; 90 data += stride; 91 } 92} 93 94void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) 95{ 96 int i; 97 98 if (RADEON_DEBUG & RADEON_VERTS) 99 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 100 __FUNCTION__, count, stride, (void *)out, (void *)data); 101 102 if (stride == 12) { 103 COPY_DWORDS(out, data, count * 3); 104 } 105 else 106 for (i = 0; i < count; i++) { 107 out[0] = *(int *)data; 108 out[1] = *(int *)(data + 4); 109 out[2] = *(int *)(data + 8); 110 out += 3; 111 data += stride; 112 } 113} 114 115void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) 116{ 117 int i; 118 119 if (RADEON_DEBUG & RADEON_VERTS) 120 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 121 __FUNCTION__, count, stride, (void *)out, (void *)data); 122 123 if (stride == 16) 124 COPY_DWORDS(out, data, count * 4); 125 else 126 for (i = 0; i < count; i++) { 127 out[0] = *(int *)data; 128 out[1] = *(int *)(data + 4); 129 out[2] = *(int *)(data + 8); 130 out[3] = *(int *)(data + 12); 131 out += 4; 132 data += stride; 133 } 134} 135 136void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, 137 const GLvoid * data, int size, int stride, int count) 138{ 139 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 140 uint32_t *out; 141 142 if (stride == 0) { 143 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); 144 count = 1; 145 aos->stride = 0; 146 } else { 147 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 148 aos->stride = size; 149 } 150 151 aos->components = size; 152 aos->count = count; 153 154 radeon_bo_map(aos->bo, 1); 155 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); 156 switch (size) { 157 case 1: radeonEmitVec4(out, data, stride, count); break; 158 case 2: radeonEmitVec8(out, data, stride, count); break; 159 case 3: radeonEmitVec12(out, data, stride, count); break; 160 case 4: radeonEmitVec16(out, data, stride, count); break; 161 default: 162 assert(0); 163 break; 164 } 165 radeon_bo_unmap(aos->bo); 166} 167 168void radeon_init_dma(radeonContextPtr rmesa) 169{ 170 make_empty_list(&rmesa->dma.free); 171 make_empty_list(&rmesa->dma.wait); 172 make_empty_list(&rmesa->dma.reserved); 173 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ; 174} 175 176void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) 177{ 178 struct radeon_dma_bo *dma_bo = NULL; 179 /* we set minimum sizes to at least requested size 180 aligned to next 16 bytes. */ 181 if (size > rmesa->dma.minimum_size) 182 rmesa->dma.minimum_size = (size + 15) & (~15); 183 184 radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n", 185 __FUNCTION__, size, rmesa->dma.minimum_size); 186 187 188 if (is_empty_list(&rmesa->dma.free) 189 || last_elem(&rmesa->dma.free)->bo->size < size) { 190 dma_bo = CALLOC_STRUCT(radeon_dma_bo); 191 assert(dma_bo); 192 193again_alloc: 194 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom, 195 0, rmesa->dma.minimum_size, 4, 196 RADEON_GEM_DOMAIN_GTT, 0); 197 198 if (!dma_bo->bo) { 199 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 200 goto again_alloc; 201 } 202 insert_at_head(&rmesa->dma.reserved, dma_bo); 203 } else { 204 /* We push and pop buffers from end of list so we can keep 205 counter on unused buffers for later freeing them from 206 begin of list */ 207 dma_bo = last_elem(&rmesa->dma.free); 208 remove_from_list(dma_bo); 209 insert_at_head(&rmesa->dma.reserved, dma_bo); 210 } 211 212 rmesa->dma.current_used = 0; 213 rmesa->dma.current_vertexptr = 0; 214 215 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, 216 first_elem(&rmesa->dma.reserved)->bo, 217 RADEON_GEM_DOMAIN_GTT, 0)) 218 fprintf(stderr,"failure to revalidate BOs - badness\n"); 219 220 if (is_empty_list(&rmesa->dma.reserved)) { 221 /* Cmd buff have been flushed in radeon_revalidate_bos */ 222 goto again_alloc; 223 } 224} 225 226/* Allocates a region from rmesa->dma.current. If there isn't enough 227 * space in current, grab a new buffer (and discard what was left of current) 228 */ 229void radeonAllocDmaRegion(radeonContextPtr rmesa, 230 struct radeon_bo **pbo, int *poffset, 231 int bytes, int alignment) 232{ 233 if (RADEON_DEBUG & RADEON_IOCTL) 234 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); 235 236 if (rmesa->dma.flush) 237 rmesa->dma.flush(rmesa->glCtx); 238 239 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); 240 241 alignment--; 242 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; 243 244 if (is_empty_list(&rmesa->dma.reserved) 245 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size) 246 radeonRefillCurrentDmaRegion(rmesa, bytes); 247 248 *poffset = rmesa->dma.current_used; 249 *pbo = first_elem(&rmesa->dma.reserved)->bo; 250 radeon_bo_ref(*pbo); 251 252 /* Always align to at least 16 bytes */ 253 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; 254 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 255 256 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size); 257} 258 259void radeonFreeDmaRegions(radeonContextPtr rmesa) 260{ 261 struct radeon_dma_bo *dma_bo; 262 struct radeon_dma_bo *temp; 263 if (RADEON_DEBUG & RADEON_DMA) 264 fprintf(stderr, "%s\n", __FUNCTION__); 265 266 foreach_s(dma_bo, temp, &rmesa->dma.free) { 267 remove_from_list(dma_bo); 268 radeon_bo_unref(dma_bo->bo); 269 FREE(dma_bo); 270 } 271 272 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 273 remove_from_list(dma_bo); 274 radeon_bo_unref(dma_bo->bo); 275 FREE(dma_bo); 276 } 277 278 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 279 remove_from_list(dma_bo); 280 radeon_bo_unref(dma_bo->bo); 281 FREE(dma_bo); 282 } 283} 284 285void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes) 286{ 287 if (is_empty_list(&rmesa->dma.reserved)) 288 return; 289 290 if (RADEON_DEBUG & RADEON_IOCTL) 291 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes); 292 rmesa->dma.current_used -= return_bytes; 293 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 294} 295 296static int radeon_bo_is_idle(struct radeon_bo* bo) 297{ 298 uint32_t domain; 299 int ret = radeon_bo_is_busy(bo, &domain); 300 if (ret == -EINVAL) { 301 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n" 302 "This may cause small performance drop for you.\n"); 303 } 304 return ret != -EBUSY; 305} 306 307void radeonReleaseDmaRegions(radeonContextPtr rmesa) 308{ 309 struct radeon_dma_bo *dma_bo; 310 struct radeon_dma_bo *temp; 311 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME; 312 const int time = rmesa->dma.free.expire_counter; 313 314 if (RADEON_DEBUG & RADEON_DMA) { 315 size_t free = 0, 316 wait = 0, 317 reserved = 0; 318 foreach(dma_bo, &rmesa->dma.free) 319 ++free; 320 321 foreach(dma_bo, &rmesa->dma.wait) 322 ++wait; 323 324 foreach(dma_bo, &rmesa->dma.reserved) 325 ++reserved; 326 327 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n", 328 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size); 329 } 330 331 if (!rmesa->radeonScreen->driScreen->dri2.enabled) { 332 /* request updated cs processing information from kernel */ 333 legacy_track_pending(rmesa->radeonScreen->bom, 0); 334 } 335 /* move waiting bos to free list. 336 wait list provides gpu time to handle data before reuse */ 337 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 338 if (dma_bo->expire_counter == time) { 339 WARN_ONCE("Leaking dma buffer object!\n"); 340 radeon_bo_unref(dma_bo->bo); 341 remove_from_list(dma_bo); 342 FREE(dma_bo); 343 continue; 344 } 345 /* free objects that are too small to be used because of large request */ 346 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 347 radeon_bo_unref(dma_bo->bo); 348 remove_from_list(dma_bo); 349 FREE(dma_bo); 350 continue; 351 } 352 if (!radeon_bo_is_idle(dma_bo->bo)) 353 continue; 354 remove_from_list(dma_bo); 355 dma_bo->expire_counter = expire_at; 356 insert_at_tail(&rmesa->dma.free, dma_bo); 357 } 358 359 /* move reserved to wait list */ 360 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 361 /* free objects that are too small to be used because of large request */ 362 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 363 radeon_bo_unref(dma_bo->bo); 364 remove_from_list(dma_bo); 365 FREE(dma_bo); 366 continue; 367 } 368 remove_from_list(dma_bo); 369 dma_bo->expire_counter = expire_at; 370 insert_at_tail(&rmesa->dma.wait, dma_bo); 371 } 372 373 /* free bos that have been unused for some time */ 374 foreach_s(dma_bo, temp, &rmesa->dma.free) { 375 if (dma_bo->expire_counter != time) 376 break; 377 remove_from_list(dma_bo); 378 radeon_bo_unref(dma_bo->bo); 379 FREE(dma_bo); 380 } 381 382} 383 384 385/* Flush vertices in the current dma region. 386 */ 387void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) 388{ 389 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 390 struct radeon_dma *dma = &rmesa->dma; 391 392 if (RADEON_DEBUG & RADEON_IOCTL) 393 fprintf(stderr, "%s\n", __FUNCTION__); 394 dma->flush = NULL; 395 396 radeon_bo_unmap(rmesa->swtcl.bo); 397 398 if (!is_empty_list(&dma->reserved)) { 399 GLuint current_offset = dma->current_used; 400 401 assert (dma->current_used + 402 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 403 dma->current_vertexptr); 404 405 if (dma->current_used != dma->current_vertexptr) { 406 dma->current_used = dma->current_vertexptr; 407 408 rmesa->vtbl.swtcl_flush(ctx, current_offset); 409 } 410 rmesa->swtcl.numverts = 0; 411 } 412 radeon_bo_unref(rmesa->swtcl.bo); 413 rmesa->swtcl.bo = NULL; 414} 415/* Alloc space in the current dma region. 416 */ 417void * 418rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) 419{ 420 GLuint bytes = vsize * nverts; 421 void *head; 422 if (RADEON_DEBUG & RADEON_IOCTL) 423 fprintf(stderr, "%s\n", __FUNCTION__); 424 425 if(is_empty_list(&rmesa->dma.reserved) 426 ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { 427 if (rmesa->dma.flush) { 428 rmesa->dma.flush(rmesa->glCtx); 429 } 430 431 radeonRefillCurrentDmaRegion(rmesa, bytes); 432 433 return NULL; 434 } 435 436 if (!rmesa->dma.flush) { 437 /* if cmdbuf flushed DMA restart */ 438 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; 439 rmesa->dma.flush = rcommon_flush_last_swtcl_prim; 440 } 441 442 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); 443 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); 444 ASSERT( rmesa->dma.current_used + 445 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 446 rmesa->dma.current_vertexptr ); 447 448 if (!rmesa->swtcl.bo) { 449 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo; 450 radeon_bo_ref(rmesa->swtcl.bo); 451 radeon_bo_map(rmesa->swtcl.bo, 1); 452 } 453 454 head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr); 455 rmesa->dma.current_vertexptr += bytes; 456 rmesa->swtcl.numverts += nverts; 457 return head; 458} 459 460void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) 461{ 462 radeonContextPtr radeon = RADEON_CONTEXT( ctx ); 463 int i; 464 if (RADEON_DEBUG & RADEON_IOCTL) 465 fprintf(stderr, "%s\n", __FUNCTION__); 466 467 if (radeon->dma.flush) { 468 radeon->dma.flush(radeon->glCtx); 469 } 470 for (i = 0; i < radeon->tcl.aos_count; i++) { 471 if (radeon->tcl.aos[i].bo) { 472 radeon_bo_unref(radeon->tcl.aos[i].bo); 473 radeon->tcl.aos[i].bo = NULL; 474 475 } 476 } 477} 478