radeon_dma.c revision f9b8562f32b77a27c872d4c70a86995032541107
1/************************************************************************** 2 3Copyright (C) 2004 Nicolai Haehnle. 4Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 5 6The Weather Channel (TM) funded Tungsten Graphics to develop the 7initial release of the Radeon 8500 driver under the XFree86 license. 8This notice must be preserved. 9 10All Rights Reserved. 11 12Permission is hereby granted, free of charge, to any person obtaining a 13copy of this software and associated documentation files (the "Software"), 14to deal in the Software without restriction, including without limitation 15on the rights to use, copy, modify, merge, publish, distribute, sub 16license, and/or sell copies of the Software, and to permit persons to whom 17the Software is furnished to do so, subject to the following conditions: 18 19The above copyright notice and this permission notice (including the next 20paragraph) shall be included in all copies or substantial portions of the 21Software. 22 23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 29USE OR OTHER DEALINGS IN THE SOFTWARE. 30 31**************************************************************************/ 32 33#include "radeon_common.h" 34#include "main/simple_list.h" 35 36#if defined(USE_X86_ASM) 37#define COPY_DWORDS( dst, src, nr ) \ 38do { \ 39 int __tmp; \ 40 __asm__ __volatile__( "rep ; movsl" \ 41 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ 42 : "0" (nr), \ 43 "D" ((long)dst), \ 44 "S" ((long)src) ); \ 45} while (0) 46#else 47#define COPY_DWORDS( dst, src, nr ) \ 48do { \ 49 int j; \ 50 for ( j = 0 ; j < nr ; j++ ) \ 51 dst[j] = ((int *)src)[j]; \ 52 dst += nr; \ 53} while (0) 54#endif 55 56void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) 57{ 58 int i; 59 60 if (RADEON_DEBUG & DEBUG_VERTS) 61 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 62 __FUNCTION__, count, stride, (void *)out, (void *)data); 63 64 if (stride == 4) 65 COPY_DWORDS(out, data, count); 66 else 67 for (i = 0; i < count; i++) { 68 out[0] = *(int *)data; 69 out++; 70 data += stride; 71 } 72} 73 74void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) 75{ 76 int i; 77 78 if (RADEON_DEBUG & DEBUG_VERTS) 79 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 80 __FUNCTION__, count, stride, (void *)out, (void *)data); 81 82 if (stride == 8) 83 COPY_DWORDS(out, data, count * 2); 84 else 85 for (i = 0; i < count; i++) { 86 out[0] = *(int *)data; 87 out[1] = *(int *)(data + 4); 88 out += 2; 89 data += stride; 90 } 91} 92 93void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) 94{ 95 int i; 96 97 if (RADEON_DEBUG & DEBUG_VERTS) 98 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 99 __FUNCTION__, count, stride, (void *)out, (void *)data); 100 101 if (stride == 12) { 102 COPY_DWORDS(out, data, count * 3); 103 } 104 else 105 for (i = 0; i < count; i++) { 106 out[0] = *(int *)data; 107 out[1] = *(int *)(data + 4); 108 out[2] = *(int *)(data + 8); 109 out += 3; 110 data += stride; 111 } 112} 113 114void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) 115{ 116 int i; 117 118 if (RADEON_DEBUG & DEBUG_VERTS) 119 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 120 __FUNCTION__, count, stride, (void *)out, (void *)data); 121 122 if (stride == 16) 123 COPY_DWORDS(out, data, count * 4); 124 else 125 for (i = 0; i < count; i++) { 126 out[0] = *(int *)data; 127 out[1] = *(int *)(data + 4); 128 out[2] = *(int *)(data + 8); 129 out[3] = *(int *)(data + 12); 130 out += 4; 131 data += stride; 132 } 133} 134 135void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, 136 const GLvoid * data, int size, int stride, int count) 137{ 138 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 139 uint32_t *out; 140 141 if (stride == 0) { 142 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); 143 count = 1; 144 aos->stride = 0; 145 } else { 146 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 147 aos->stride = size; 148 } 149 150 aos->components = size; 151 aos->count = count; 152 153 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); 154 switch (size) { 155 case 1: radeonEmitVec4(out, data, stride, count); break; 156 case 2: radeonEmitVec8(out, data, stride, count); break; 157 case 3: radeonEmitVec12(out, data, stride, count); break; 158 case 4: radeonEmitVec16(out, data, stride, count); break; 159 default: 160 assert(0); 161 break; 162 } 163} 164 165void radeon_init_dma(radeonContextPtr rmesa) 166{ 167 make_empty_list(&rmesa->dma.free); 168 make_empty_list(&rmesa->dma.wait); 169 make_empty_list(&rmesa->dma.reserved); 170} 171 172void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) 173{ 174 size = MAX2(size, MAX_DMA_BUF_SZ); 175 176 if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) 177 fprintf(stderr, "%s\n", __FUNCTION__); 178 179 if (rmesa->dma.flush) { 180 rmesa->dma.flush(rmesa->glCtx); 181 } 182 183 /* unmap old reserved bo */ 184 if (!is_empty_list(&rmesa->dma.reserved)) 185 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); 186 187 if (is_empty_list(&rmesa->dma.free)) { 188 struct radeon_dma_bo *dma_bo = CALLOC(sizeof(struct radeon_dma_bo)); 189 assert(dma_bo); 190 191again_alloc: 192 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom, 193 0, size, 4, RADEON_GEM_DOMAIN_GTT, 194 0); 195 196 if (!dma_bo->bo) { 197 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 198 goto again_alloc; 199 } 200 insert_at_head(&rmesa->dma.reserved, dma_bo); 201 } else { 202 struct radeon_dma_bo *dma_bo = last_elem(&rmesa->dma.free); 203 assert(dma_bo->bo->cref == 1); 204 remove_from_list(dma_bo); 205 insert_at_head(&rmesa->dma.reserved, dma_bo); 206 } 207 208 rmesa->dma.current_used = 0; 209 rmesa->dma.current_vertexptr = 0; 210 211 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, 212 first_elem(&rmesa->dma.reserved)->bo, 213 RADEON_GEM_DOMAIN_GTT, 0)) 214 fprintf(stderr,"failure to revalidate BOs - badness\n"); 215 216 if (is_empty_list(&rmesa->dma.reserved)) { 217 /* Cmd buff have been flushed in radeon_revalidate_bos */ 218 goto again_alloc; 219 } 220 221 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1); 222} 223 224/* Allocates a region from rmesa->dma.current. If there isn't enough 225 * space in current, grab a new buffer (and discard what was left of current) 226 */ 227void radeonAllocDmaRegion(radeonContextPtr rmesa, 228 struct radeon_bo **pbo, int *poffset, 229 int bytes, int alignment) 230{ 231 if (RADEON_DEBUG & DEBUG_IOCTL) 232 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); 233 234 if (rmesa->dma.flush) 235 rmesa->dma.flush(rmesa->glCtx); 236 237 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); 238 239 alignment--; 240 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; 241 242 if (is_empty_list(&rmesa->dma.reserved) 243 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size) 244 radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); 245 246 *poffset = rmesa->dma.current_used; 247 *pbo = first_elem(&rmesa->dma.reserved)->bo; 248 radeon_bo_ref(*pbo); 249 250 /* Always align to at least 16 bytes */ 251 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; 252 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 253 254 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size); 255} 256 257void radeonFreeDmaRegions(radeonContextPtr rmesa) 258{ 259 struct radeon_dma_bo *dma_bo; 260 struct radeon_dma_bo *temp; 261 if (RADEON_DEBUG & DEBUG_IOCTL) 262 fprintf(stderr, "%s\n", __FUNCTION__); 263 264 foreach_s(dma_bo, temp, &rmesa->dma.free) { 265 remove_from_list(dma_bo); 266 radeon_bo_unref(dma_bo->bo); 267 FREE(dma_bo); 268 } 269 270 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 271 remove_from_list(dma_bo); 272 radeon_bo_unmap(dma_bo->bo); 273 radeon_bo_unref(dma_bo->bo); 274 FREE(dma_bo); 275 } 276} 277 278void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes) 279{ 280 if (is_empty_list(&rmesa->dma.reserved)) 281 return; 282 283 if (RADEON_DEBUG & DEBUG_IOCTL) 284 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes); 285 rmesa->dma.current_used -= return_bytes; 286 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 287} 288 289void radeonReleaseDmaRegions(radeonContextPtr rmesa) 290{ 291 struct radeon_dma_bo *dma_bo; 292 struct radeon_dma_bo *temp; 293 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME; 294 const int time = rmesa->dma.free.expire_counter; 295 if (RADEON_DEBUG & DEBUG_IOCTL) 296 fprintf(stderr, "%s\n", __FUNCTION__); 297 298 /* move waiting bos to free list. 299 wait list provides gpu time to handle data before reuse */ 300 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 301 if (dma_bo->expire_counter == time) { 302 WARN_ONCE("Leaking dma buffer object!\n"); 303 radeon_bo_unref(dma_bo->bo); 304 remove_from_list(dma_bo); 305 FREE(dma_bo); 306 continue; 307 } 308 if (dma_bo->bo->cref > 1) 309 continue; 310 remove_from_list(dma_bo); 311 dma_bo->expire_counter = expire_at; 312 insert_at_tail(&rmesa->dma.free, dma_bo); 313 } 314 315 /* unmap the last dma region */ 316 if (!is_empty_list(&rmesa->dma.reserved)) 317 radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); 318 /* move reserved to wait list */ 319 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 320 remove_from_list(dma_bo); 321 dma_bo->expire_counter = expire_at; 322 insert_at_tail(&rmesa->dma.wait, dma_bo); 323 } 324 325 /* free bos that have been unused for some time */ 326 foreach_s(dma_bo, temp, &rmesa->dma.free) { 327 if (dma_bo->expire_counter != time) 328 break; 329 remove_from_list(dma_bo); 330 radeon_bo_unref(dma_bo->bo); 331 FREE(dma_bo); 332 } 333 334} 335 336 337/* Flush vertices in the current dma region. 338 */ 339void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) 340{ 341 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 342 struct radeon_dma *dma = &rmesa->dma; 343 344 345 if (RADEON_DEBUG & DEBUG_IOCTL) 346 fprintf(stderr, "%s\n", __FUNCTION__); 347 dma->flush = NULL; 348 349 if (!is_empty_list(&dma->reserved)) { 350 GLuint current_offset = dma->current_used; 351 352 assert (dma->current_used + 353 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 354 dma->current_vertexptr); 355 356 if (dma->current_used != dma->current_vertexptr) { 357 dma->current_used = dma->current_vertexptr; 358 359 rmesa->vtbl.swtcl_flush(ctx, current_offset); 360 } 361 rmesa->swtcl.numverts = 0; 362 } 363} 364/* Alloc space in the current dma region. 365 */ 366void * 367rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) 368{ 369 GLuint bytes = vsize * nverts; 370 void *head; 371restart: 372 if (RADEON_DEBUG & DEBUG_IOCTL) 373 fprintf(stderr, "%s\n", __FUNCTION__); 374 if (is_empty_list(&rmesa->dma.reserved) 375 || rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { 376 radeonRefillCurrentDmaRegion(rmesa, bytes); 377 } 378 379 if (!rmesa->dma.flush) { 380 /* make sure we have enough space to use this in cmdbuf */ 381 rcommonEnsureCmdBufSpace(rmesa, 382 rmesa->hw.max_state_size + (20*sizeof(int)), 383 __FUNCTION__); 384 /* if cmdbuf flushed DMA restart */ 385 if (is_empty_list(&rmesa->dma.reserved)) 386 goto restart; 387 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; 388 rmesa->dma.flush = rcommon_flush_last_swtcl_prim; 389 } 390 391 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); 392 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); 393 ASSERT( rmesa->dma.current_used + 394 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 395 rmesa->dma.current_vertexptr ); 396 397 head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr); 398 rmesa->dma.current_vertexptr += bytes; 399 rmesa->swtcl.numverts += nverts; 400 return head; 401} 402 403void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) 404{ 405 radeonContextPtr radeon = RADEON_CONTEXT( ctx ); 406 int i; 407 if (RADEON_DEBUG & DEBUG_IOCTL) 408 fprintf(stderr, "%s\n", __FUNCTION__); 409 410 if (radeon->dma.flush) { 411 radeon->dma.flush(radeon->glCtx); 412 } 413 for (i = 0; i < radeon->tcl.aos_count; i++) { 414 if (radeon->tcl.aos[i].bo) { 415 radeon_bo_unref(radeon->tcl.aos[i].bo); 416 radeon->tcl.aos[i].bo = NULL; 417 418 } 419 } 420} 421