radeon_dma.c revision 66bbafb6f9d44da3baddac6d948ba361182dde2a
1/************************************************************************** 2 3Copyright (C) 2004 Nicolai Haehnle. 4Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 5 6The Weather Channel (TM) funded Tungsten Graphics to develop the 7initial release of the Radeon 8500 driver under the XFree86 license. 8This notice must be preserved. 9 10All Rights Reserved. 11 12Permission is hereby granted, free of charge, to any person obtaining a 13copy of this software and associated documentation files (the "Software"), 14to deal in the Software without restriction, including without limitation 15on the rights to use, copy, modify, merge, publish, distribute, sub 16license, and/or sell copies of the Software, and to permit persons to whom 17the Software is furnished to do so, subject to the following conditions: 18 19The above copyright notice and this permission notice (including the next 20paragraph) shall be included in all copies or substantial portions of the 21Software. 22 23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 29USE OR OTHER DEALINGS IN THE SOFTWARE. 30 31**************************************************************************/ 32 33#include <errno.h> 34#include "radeon_common.h" 35#include "main/simple_list.h" 36 37#if defined(USE_X86_ASM) 38#define COPY_DWORDS( dst, src, nr ) \ 39do { \ 40 int __tmp; \ 41 __asm__ __volatile__( "rep ; movsl" \ 42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ 43 : "0" (nr), \ 44 "D" ((long)dst), \ 45 "S" ((long)src) ); \ 46} while (0) 47#else 48#define COPY_DWORDS( dst, src, nr ) \ 49do { \ 50 int j; \ 51 for ( j = 0 ; j < nr ; j++ ) \ 52 dst[j] = ((int *)src)[j]; \ 53 dst += nr; \ 54} while (0) 55#endif 56 57void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) 58{ 59 int i; 60 61 if (RADEON_DEBUG & DEBUG_VERTS) 62 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 63 __FUNCTION__, count, stride, (void *)out, (void *)data); 64 65 if (stride == 4) 66 COPY_DWORDS(out, data, count); 67 else 68 for (i = 0; i < count; i++) { 69 out[0] = *(int *)data; 70 out++; 71 data += stride; 72 } 73} 74 75void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) 76{ 77 int i; 78 79 if (RADEON_DEBUG & DEBUG_VERTS) 80 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 81 __FUNCTION__, count, stride, (void *)out, (void *)data); 82 83 if (stride == 8) 84 COPY_DWORDS(out, data, count * 2); 85 else 86 for (i = 0; i < count; i++) { 87 out[0] = *(int *)data; 88 out[1] = *(int *)(data + 4); 89 out += 2; 90 data += stride; 91 } 92} 93 94void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) 95{ 96 int i; 97 98 if (RADEON_DEBUG & DEBUG_VERTS) 99 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 100 __FUNCTION__, count, stride, (void *)out, (void *)data); 101 102 if (stride == 12) { 103 COPY_DWORDS(out, data, count * 3); 104 } 105 else 106 for (i = 0; i < count; i++) { 107 out[0] = *(int *)data; 108 out[1] = *(int *)(data + 4); 109 out[2] = *(int *)(data + 8); 110 out += 3; 111 data += stride; 112 } 113} 114 115void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) 116{ 117 int i; 118 119 if (RADEON_DEBUG & DEBUG_VERTS) 120 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 121 __FUNCTION__, count, stride, (void *)out, (void *)data); 122 123 if (stride == 16) 124 COPY_DWORDS(out, data, count * 4); 125 else 126 for (i = 0; i < count; i++) { 127 out[0] = *(int *)data; 128 out[1] = *(int *)(data + 4); 129 out[2] = *(int *)(data + 8); 130 out[3] = *(int *)(data + 12); 131 out += 4; 132 data += stride; 133 } 134} 135 136void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, 137 const GLvoid * data, int size, int stride, int count) 138{ 139 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 140 uint32_t *out; 141 142 if (stride == 0) { 143 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); 144 count = 1; 145 aos->stride = 0; 146 } else { 147 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 148 aos->stride = size; 149 } 150 151 aos->components = size; 152 aos->count = count; 153 154 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); 155 switch (size) { 156 case 1: radeonEmitVec4(out, data, stride, count); break; 157 case 2: radeonEmitVec8(out, data, stride, count); break; 158 case 3: radeonEmitVec12(out, data, stride, count); break; 159 case 4: radeonEmitVec16(out, data, stride, count); break; 160 default: 161 assert(0); 162 break; 163 } 164} 165 166void radeon_init_dma(radeonContextPtr rmesa) 167{ 168 make_empty_list(&rmesa->dma.free); 169 make_empty_list(&rmesa->dma.wait); 170 make_empty_list(&rmesa->dma.reserved); 171 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ; 172} 173 174void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) 175{ 176 /* we set minimum sizes to at least requested size 177 aligned to next 16 bytes. */ 178 if (size > rmesa->dma.minimum_size) 179 rmesa->dma.minimum_size = (size + 15) & (~15); 180 181 if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) 182 fprintf(stderr, "%s\n", __FUNCTION__); 183 184 if (rmesa->dma.flush) { 185 rmesa->dma.flush(rmesa->glCtx); 186 } 187 188 /* unmap old reserved bo */ 189 if (!is_empty_list(&rmesa->dma.reserved)) 190 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); 191 192 if (is_empty_list(&rmesa->dma.free) 193 || last_elem(&rmesa->dma.free)->bo->size < size) { 194 struct radeon_dma_bo *dma_bo = CALLOC(sizeof(struct radeon_dma_bo)); 195 assert(dma_bo); 196 197again_alloc: 198 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom, 199 0, rmesa->dma.minimum_size, 4, 200 RADEON_GEM_DOMAIN_GTT, 0); 201 202 if (!dma_bo->bo) { 203 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 204 goto again_alloc; 205 } 206 insert_at_head(&rmesa->dma.reserved, dma_bo); 207 } else { 208 /* We push and pop buffers from end of list so we can keep 209 counter on unused buffers for later freeing them from 210 begin of list */ 211 struct radeon_dma_bo *dma_bo = last_elem(&rmesa->dma.free); 212 assert(dma_bo->bo->cref == 1); 213 remove_from_list(dma_bo); 214 insert_at_head(&rmesa->dma.reserved, dma_bo); 215 } 216 217 rmesa->dma.current_used = 0; 218 rmesa->dma.current_vertexptr = 0; 219 220 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, 221 first_elem(&rmesa->dma.reserved)->bo, 222 RADEON_GEM_DOMAIN_GTT, 0)) 223 fprintf(stderr,"failure to revalidate BOs - badness\n"); 224 225 if (is_empty_list(&rmesa->dma.reserved)) { 226 /* Cmd buff have been flushed in radeon_revalidate_bos */ 227 goto again_alloc; 228 } 229 230 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1); 231} 232 233/* Allocates a region from rmesa->dma.current. If there isn't enough 234 * space in current, grab a new buffer (and discard what was left of current) 235 */ 236void radeonAllocDmaRegion(radeonContextPtr rmesa, 237 struct radeon_bo **pbo, int *poffset, 238 int bytes, int alignment) 239{ 240 if (RADEON_DEBUG & DEBUG_IOCTL) 241 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); 242 243 if (rmesa->dma.flush) 244 rmesa->dma.flush(rmesa->glCtx); 245 246 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); 247 248 alignment--; 249 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; 250 251 if (is_empty_list(&rmesa->dma.reserved) 252 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size) 253 radeonRefillCurrentDmaRegion(rmesa, bytes); 254 255 *poffset = rmesa->dma.current_used; 256 *pbo = first_elem(&rmesa->dma.reserved)->bo; 257 radeon_bo_ref(*pbo); 258 259 /* Always align to at least 16 bytes */ 260 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; 261 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 262 263 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size); 264} 265 266void radeonFreeDmaRegions(radeonContextPtr rmesa) 267{ 268 struct radeon_dma_bo *dma_bo; 269 struct radeon_dma_bo *temp; 270 if (RADEON_DEBUG & DEBUG_DMA) 271 fprintf(stderr, "%s\n", __FUNCTION__); 272 273 foreach_s(dma_bo, temp, &rmesa->dma.free) { 274 remove_from_list(dma_bo); 275 radeon_bo_unref(dma_bo->bo); 276 FREE(dma_bo); 277 } 278 279 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 280 remove_from_list(dma_bo); 281 radeon_bo_unref(dma_bo->bo); 282 FREE(dma_bo); 283 } 284 285 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 286 remove_from_list(dma_bo); 287 radeon_bo_unmap(dma_bo->bo); 288 radeon_bo_unref(dma_bo->bo); 289 FREE(dma_bo); 290 } 291} 292 293void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes) 294{ 295 if (is_empty_list(&rmesa->dma.reserved)) 296 return; 297 298 if (RADEON_DEBUG & DEBUG_IOCTL) 299 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes); 300 rmesa->dma.current_used -= return_bytes; 301 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 302} 303 304static int radeon_bo_is_idle(struct radeon_bo* bo) 305{ 306 uint32_t domain; 307 int ret = radeon_bo_is_busy(bo, &domain); 308 if (ret == -EINVAL) { 309 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n" 310 "This may cause small performance drop for you.\n"); 311 } 312 return ret != -EBUSY; 313} 314 315void radeonReleaseDmaRegions(radeonContextPtr rmesa) 316{ 317 struct radeon_dma_bo *dma_bo; 318 struct radeon_dma_bo *temp; 319 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME; 320 const int time = rmesa->dma.free.expire_counter; 321 322 if (RADEON_DEBUG & DEBUG_DMA) { 323 size_t free = 0, 324 wait = 0, 325 reserved = 0; 326 foreach(dma_bo, &rmesa->dma.free) 327 ++free; 328 329 foreach(dma_bo, &rmesa->dma.wait) 330 ++wait; 331 332 foreach(dma_bo, &rmesa->dma.reserved) 333 ++reserved; 334 335 fprintf(stderr, "%s: free %u, wait %u, reserved %u, minimum_size: %u\n", 336 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size); 337 } 338 339 if (!rmesa->radeonScreen->driScreen->dri2.enabled) { 340 /* request updated cs processing information from kernel */ 341 legacy_track_pending(rmesa->radeonScreen->bom, 0); 342 } 343 /* move waiting bos to free list. 344 wait list provides gpu time to handle data before reuse */ 345 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 346 if (dma_bo->expire_counter == time) { 347 WARN_ONCE("Leaking dma buffer object!\n"); 348 radeon_bo_unref(dma_bo->bo); 349 remove_from_list(dma_bo); 350 FREE(dma_bo); 351 continue; 352 } 353 /* free objects that are too small to be used because of large request */ 354 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 355 radeon_bo_unref(dma_bo->bo); 356 remove_from_list(dma_bo); 357 FREE(dma_bo); 358 continue; 359 } 360 if (!radeon_bo_is_idle(dma_bo->bo)) 361 continue; 362 remove_from_list(dma_bo); 363 dma_bo->expire_counter = expire_at; 364 insert_at_tail(&rmesa->dma.free, dma_bo); 365 } 366 367 /* unmap the last dma region */ 368 if (!is_empty_list(&rmesa->dma.reserved)) 369 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); 370 /* move reserved to wait list */ 371 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 372 /* free objects that are too small to be used because of large request */ 373 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 374 radeon_bo_unref(dma_bo->bo); 375 remove_from_list(dma_bo); 376 FREE(dma_bo); 377 continue; 378 } 379 remove_from_list(dma_bo); 380 dma_bo->expire_counter = expire_at; 381 insert_at_tail(&rmesa->dma.wait, dma_bo); 382 } 383 384 /* free bos that have been unused for some time */ 385 foreach_s(dma_bo, temp, &rmesa->dma.free) { 386 if (dma_bo->expire_counter != time) 387 break; 388 remove_from_list(dma_bo); 389 radeon_bo_unref(dma_bo->bo); 390 FREE(dma_bo); 391 } 392 393} 394 395 396/* Flush vertices in the current dma region. 397 */ 398void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) 399{ 400 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 401 struct radeon_dma *dma = &rmesa->dma; 402 403 404 if (RADEON_DEBUG & DEBUG_IOCTL) 405 fprintf(stderr, "%s\n", __FUNCTION__); 406 dma->flush = NULL; 407 408 if (!is_empty_list(&dma->reserved)) { 409 GLuint current_offset = dma->current_used; 410 411 assert (dma->current_used + 412 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 413 dma->current_vertexptr); 414 415 if (dma->current_used != dma->current_vertexptr) { 416 dma->current_used = dma->current_vertexptr; 417 418 rmesa->vtbl.swtcl_flush(ctx, current_offset); 419 } 420 rmesa->swtcl.numverts = 0; 421 } 422} 423/* Alloc space in the current dma region. 424 */ 425void * 426rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) 427{ 428 GLuint bytes = vsize * nverts; 429 void *head; 430restart: 431 if (RADEON_DEBUG & DEBUG_IOCTL) 432 fprintf(stderr, "%s\n", __FUNCTION__); 433 if (is_empty_list(&rmesa->dma.reserved) 434 || rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { 435 radeonRefillCurrentDmaRegion(rmesa, bytes); 436 } 437 438 if (!rmesa->dma.flush) { 439 /* make sure we have enough space to use this in cmdbuf */ 440 rcommonEnsureCmdBufSpace(rmesa, 441 rmesa->hw.max_state_size + (20*sizeof(int)), 442 __FUNCTION__); 443 /* if cmdbuf flushed DMA restart */ 444 if (is_empty_list(&rmesa->dma.reserved)) 445 goto restart; 446 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; 447 rmesa->dma.flush = rcommon_flush_last_swtcl_prim; 448 } 449 450 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); 451 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); 452 ASSERT( rmesa->dma.current_used + 453 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 454 rmesa->dma.current_vertexptr ); 455 456 head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr); 457 rmesa->dma.current_vertexptr += bytes; 458 rmesa->swtcl.numverts += nverts; 459 return head; 460} 461 462void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) 463{ 464 radeonContextPtr radeon = RADEON_CONTEXT( ctx ); 465 int i; 466 if (RADEON_DEBUG & DEBUG_IOCTL) 467 fprintf(stderr, "%s\n", __FUNCTION__); 468 469 if (radeon->dma.flush) { 470 radeon->dma.flush(radeon->glCtx); 471 } 472 for (i = 0; i < radeon->tcl.aos_count; i++) { 473 if (radeon->tcl.aos[i].bo) { 474 radeon_bo_unref(radeon->tcl.aos[i].bo); 475 radeon->tcl.aos[i].bo = NULL; 476 477 } 478 } 479} 480