radeon_dma.c revision 66e019c6c91e6ae3fb9e26a12d7b7782a0095a8d
1/************************************************************************** 2 3Copyright (C) 2004 Nicolai Haehnle. 4Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 5 6The Weather Channel (TM) funded Tungsten Graphics to develop the 7initial release of the Radeon 8500 driver under the XFree86 license. 8This notice must be preserved. 9 10All Rights Reserved. 11 12Permission is hereby granted, free of charge, to any person obtaining a 13copy of this software and associated documentation files (the "Software"), 14to deal in the Software without restriction, including without limitation 15on the rights to use, copy, modify, merge, publish, distribute, sub 16license, and/or sell copies of the Software, and to permit persons to whom 17the Software is furnished to do so, subject to the following conditions: 18 19The above copyright notice and this permission notice (including the next 20paragraph) shall be included in all copies or substantial portions of the 21Software. 22 23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 29USE OR OTHER DEALINGS IN THE SOFTWARE. 30 31**************************************************************************/ 32 33#include "radeon_common.h" 34#include "main/simple_list.h" 35 36#if defined(USE_X86_ASM) 37#define COPY_DWORDS( dst, src, nr ) \ 38do { \ 39 int __tmp; \ 40 __asm__ __volatile__( "rep ; movsl" \ 41 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ 42 : "0" (nr), \ 43 "D" ((long)dst), \ 44 "S" ((long)src) ); \ 45} while (0) 46#else 47#define COPY_DWORDS( dst, src, nr ) \ 48do { \ 49 int j; \ 50 for ( j = 0 ; j < nr ; j++ ) \ 51 dst[j] = ((int *)src)[j]; \ 52 dst += nr; \ 53} while (0) 54#endif 55 56void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) 57{ 58 int i; 59 60 if (RADEON_DEBUG & DEBUG_VERTS) 61 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 62 __FUNCTION__, count, stride, (void *)out, (void *)data); 63 64 if (stride == 4) 65 COPY_DWORDS(out, data, count); 66 else 67 for (i = 0; i < count; i++) { 68 out[0] = *(int *)data; 69 out++; 70 data += stride; 71 } 72} 73 74void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) 75{ 76 int i; 77 78 if (RADEON_DEBUG & DEBUG_VERTS) 79 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 80 __FUNCTION__, count, stride, (void *)out, (void *)data); 81 82 if (stride == 8) 83 COPY_DWORDS(out, data, count * 2); 84 else 85 for (i = 0; i < count; i++) { 86 out[0] = *(int *)data; 87 out[1] = *(int *)(data + 4); 88 out += 2; 89 data += stride; 90 } 91} 92 93void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) 94{ 95 int i; 96 97 if (RADEON_DEBUG & DEBUG_VERTS) 98 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 99 __FUNCTION__, count, stride, (void *)out, (void *)data); 100 101 if (stride == 12) { 102 COPY_DWORDS(out, data, count * 3); 103 } 104 else 105 for (i = 0; i < count; i++) { 106 out[0] = *(int *)data; 107 out[1] = *(int *)(data + 4); 108 out[2] = *(int *)(data + 8); 109 out += 3; 110 data += stride; 111 } 112} 113 114void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) 115{ 116 int i; 117 118 if (RADEON_DEBUG & DEBUG_VERTS) 119 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 120 __FUNCTION__, count, stride, (void *)out, (void *)data); 121 122 if (stride == 16) 123 COPY_DWORDS(out, data, count * 4); 124 else 125 for (i = 0; i < count; i++) { 126 out[0] = *(int *)data; 127 out[1] = *(int *)(data + 4); 128 out[2] = *(int *)(data + 8); 129 out[3] = *(int *)(data + 12); 130 out += 4; 131 data += stride; 132 } 133} 134 135void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, 136 const GLvoid * data, int size, int stride, int count) 137{ 138 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 139 uint32_t *out; 140 141 if (stride == 0) { 142 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); 143 count = 1; 144 aos->stride = 0; 145 } else { 146 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 147 aos->stride = size; 148 } 149 150 aos->components = size; 151 aos->count = count; 152 153 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); 154 switch (size) { 155 case 1: radeonEmitVec4(out, data, stride, count); break; 156 case 2: radeonEmitVec8(out, data, stride, count); break; 157 case 3: radeonEmitVec12(out, data, stride, count); break; 158 case 4: radeonEmitVec16(out, data, stride, count); break; 159 default: 160 assert(0); 161 break; 162 } 163} 164 165void radeon_init_dma(radeonContextPtr rmesa) 166{ 167 make_empty_list(&rmesa->dma.free); 168 make_empty_list(&rmesa->dma.wait); 169 make_empty_list(&rmesa->dma.reserved); 170 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ; 171} 172 173void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) 174{ 175 /* we set minimum sizes to at least requested size 176 aligned to next 16 bytes. */ 177 if (size > rmesa->dma.minimum_size) 178 rmesa->dma.minimum_size = (size + 15) & (~15); 179 180 if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) 181 fprintf(stderr, "%s\n", __FUNCTION__); 182 183 if (rmesa->dma.flush) { 184 rmesa->dma.flush(rmesa->glCtx); 185 } 186 187 /* unmap old reserved bo */ 188 if (!is_empty_list(&rmesa->dma.reserved)) 189 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); 190 191 if (is_empty_list(&rmesa->dma.free) 192 || last_elem(&rmesa->dma.free)->bo->size < size) { 193 struct radeon_dma_bo *dma_bo = CALLOC(sizeof(struct radeon_dma_bo)); 194 assert(dma_bo); 195 196again_alloc: 197 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom, 198 0, rmesa->dma.minimum_size, 4, 199 RADEON_GEM_DOMAIN_GTT, 0); 200 201 if (!dma_bo->bo) { 202 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 203 goto again_alloc; 204 } 205 insert_at_head(&rmesa->dma.reserved, dma_bo); 206 } else { 207 struct radeon_dma_bo *dma_bo = last_elem(&rmesa->dma.free); 208 assert(dma_bo->bo->cref == 1); 209 remove_from_list(dma_bo); 210 insert_at_head(&rmesa->dma.reserved, dma_bo); 211 } 212 213 rmesa->dma.current_used = 0; 214 rmesa->dma.current_vertexptr = 0; 215 216 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, 217 first_elem(&rmesa->dma.reserved)->bo, 218 RADEON_GEM_DOMAIN_GTT, 0)) 219 fprintf(stderr,"failure to revalidate BOs - badness\n"); 220 221 if (is_empty_list(&rmesa->dma.reserved)) { 222 /* Cmd buff have been flushed in radeon_revalidate_bos */ 223 goto again_alloc; 224 } 225 226 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1); 227} 228 229/* Allocates a region from rmesa->dma.current. If there isn't enough 230 * space in current, grab a new buffer (and discard what was left of current) 231 */ 232void radeonAllocDmaRegion(radeonContextPtr rmesa, 233 struct radeon_bo **pbo, int *poffset, 234 int bytes, int alignment) 235{ 236 if (RADEON_DEBUG & DEBUG_IOCTL) 237 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); 238 239 if (rmesa->dma.flush) 240 rmesa->dma.flush(rmesa->glCtx); 241 242 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); 243 244 alignment--; 245 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; 246 247 if (is_empty_list(&rmesa->dma.reserved) 248 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size) 249 radeonRefillCurrentDmaRegion(rmesa, bytes); 250 251 *poffset = rmesa->dma.current_used; 252 *pbo = first_elem(&rmesa->dma.reserved)->bo; 253 radeon_bo_ref(*pbo); 254 255 /* Always align to at least 16 bytes */ 256 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; 257 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 258 259 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size); 260} 261 262void radeonFreeDmaRegions(radeonContextPtr rmesa) 263{ 264 struct radeon_dma_bo *dma_bo; 265 struct radeon_dma_bo *temp; 266 if (RADEON_DEBUG & DEBUG_IOCTL) 267 fprintf(stderr, "%s\n", __FUNCTION__); 268 269 foreach_s(dma_bo, temp, &rmesa->dma.free) { 270 remove_from_list(dma_bo); 271 radeon_bo_unref(dma_bo->bo); 272 FREE(dma_bo); 273 } 274 275 foreach_s(dma_bo, temp, &rmesa->dma.free) { 276 remove_from_list(dma_bo); 277 radeon_bo_unref(dma_bo->bo); 278 FREE(dma_bo); 279 } 280 281 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 282 remove_from_list(dma_bo); 283 radeon_bo_unmap(dma_bo->bo); 284 radeon_bo_unref(dma_bo->bo); 285 FREE(dma_bo); 286 } 287} 288 289void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes) 290{ 291 if (is_empty_list(&rmesa->dma.reserved)) 292 return; 293 294 if (RADEON_DEBUG & DEBUG_IOCTL) 295 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes); 296 rmesa->dma.current_used -= return_bytes; 297 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 298} 299 300static int radeon_bo_is_idle(struct radeon_bo* bo) 301{ 302 return bo->cref == 1; 303} 304 305void radeonReleaseDmaRegions(radeonContextPtr rmesa) 306{ 307 struct radeon_dma_bo *dma_bo; 308 struct radeon_dma_bo *temp; 309 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME; 310 const int time = rmesa->dma.free.expire_counter; 311 if (RADEON_DEBUG & DEBUG_IOCTL) 312 fprintf(stderr, "%s\n", __FUNCTION__); 313 314 /* move waiting bos to free list. 315 wait list provides gpu time to handle data before reuse */ 316 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 317 if (dma_bo->expire_counter == time) { 318 WARN_ONCE("Leaking dma buffer object!\n"); 319 radeon_bo_unref(dma_bo->bo); 320 remove_from_list(dma_bo); 321 FREE(dma_bo); 322 continue; 323 } 324 /* free objects that are too small to be used because of large request */ 325 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 326 radeon_bo_unref(dma_bo->bo); 327 remove_from_list(dma_bo); 328 FREE(dma_bo); 329 continue; 330 } 331 if (!radeon_bo_is_idle(dma_bo->bo)) 332 continue; 333 remove_from_list(dma_bo); 334 dma_bo->expire_counter = expire_at; 335 insert_at_tail(&rmesa->dma.free, dma_bo); 336 } 337 338 /* unmap the last dma region */ 339 if (!is_empty_list(&rmesa->dma.reserved)) 340 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); 341 /* move reserved to wait list */ 342 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 343 /* free objects that are too small to be used because of large request */ 344 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 345 radeon_bo_unref(dma_bo->bo); 346 remove_from_list(dma_bo); 347 FREE(dma_bo); 348 continue; 349 } 350 remove_from_list(dma_bo); 351 dma_bo->expire_counter = expire_at; 352 insert_at_tail(&rmesa->dma.wait, dma_bo); 353 } 354 355 /* free bos that have been unused for some time */ 356 foreach_s(dma_bo, temp, &rmesa->dma.free) { 357 if (dma_bo->expire_counter != time) 358 break; 359 remove_from_list(dma_bo); 360 radeon_bo_unref(dma_bo->bo); 361 FREE(dma_bo); 362 } 363 364} 365 366 367/* Flush vertices in the current dma region. 368 */ 369void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) 370{ 371 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 372 struct radeon_dma *dma = &rmesa->dma; 373 374 375 if (RADEON_DEBUG & DEBUG_IOCTL) 376 fprintf(stderr, "%s\n", __FUNCTION__); 377 dma->flush = NULL; 378 379 if (!is_empty_list(&dma->reserved)) { 380 GLuint current_offset = dma->current_used; 381 382 assert (dma->current_used + 383 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 384 dma->current_vertexptr); 385 386 if (dma->current_used != dma->current_vertexptr) { 387 dma->current_used = dma->current_vertexptr; 388 389 rmesa->vtbl.swtcl_flush(ctx, current_offset); 390 } 391 rmesa->swtcl.numverts = 0; 392 } 393} 394/* Alloc space in the current dma region. 395 */ 396void * 397rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) 398{ 399 GLuint bytes = vsize * nverts; 400 void *head; 401restart: 402 if (RADEON_DEBUG & DEBUG_IOCTL) 403 fprintf(stderr, "%s\n", __FUNCTION__); 404 if (is_empty_list(&rmesa->dma.reserved) 405 || rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { 406 radeonRefillCurrentDmaRegion(rmesa, bytes); 407 } 408 409 if (!rmesa->dma.flush) { 410 /* make sure we have enough space to use this in cmdbuf */ 411 rcommonEnsureCmdBufSpace(rmesa, 412 rmesa->hw.max_state_size + (20*sizeof(int)), 413 __FUNCTION__); 414 /* if cmdbuf flushed DMA restart */ 415 if (is_empty_list(&rmesa->dma.reserved)) 416 goto restart; 417 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; 418 rmesa->dma.flush = rcommon_flush_last_swtcl_prim; 419 } 420 421 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); 422 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); 423 ASSERT( rmesa->dma.current_used + 424 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 425 rmesa->dma.current_vertexptr ); 426 427 head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr); 428 rmesa->dma.current_vertexptr += bytes; 429 rmesa->swtcl.numverts += nverts; 430 return head; 431} 432 433void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) 434{ 435 radeonContextPtr radeon = RADEON_CONTEXT( ctx ); 436 int i; 437 if (RADEON_DEBUG & DEBUG_IOCTL) 438 fprintf(stderr, "%s\n", __FUNCTION__); 439 440 if (radeon->dma.flush) { 441 radeon->dma.flush(radeon->glCtx); 442 } 443 for (i = 0; i < radeon->tcl.aos_count; i++) { 444 if (radeon->tcl.aos[i].bo) { 445 radeon_bo_unref(radeon->tcl.aos[i].bo); 446 radeon->tcl.aos[i].bo = NULL; 447 448 } 449 } 450} 451