radeon_dma.c revision cf24119d38360bfb25fa2683fe86a139826084f0
1cf24119d38360bfb25fa2683fe86a139826084f0Michel Dänzer/************************************************************************** 2 3Copyright (C) 2004 Nicolai Haehnle. 4Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 5 6The Weather Channel (TM) funded Tungsten Graphics to develop the 7initial release of the Radeon 8500 driver under the XFree86 license. 8This notice must be preserved. 9 10All Rights Reserved. 11 12Permission is hereby granted, free of charge, to any person obtaining a 13copy of this software and associated documentation files (the "Software"), 14to deal in the Software without restriction, including without limitation 15on the rights to use, copy, modify, merge, publish, distribute, sub 16license, and/or sell copies of the Software, and to permit persons to whom 17the Software is furnished to do so, subject to the following conditions: 18 19The above copyright notice and this permission notice (including the next 20paragraph) shall be included in all copies or substantial portions of the 21Software. 22 23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 29USE OR OTHER DEALINGS IN THE SOFTWARE. 30 31**************************************************************************/ 32 33#include "radeon_common.h" 34 35#if defined(USE_X86_ASM) 36#define COPY_DWORDS( dst, src, nr ) \ 37do { \ 38 int __tmp; \ 39 __asm__ __volatile__( "rep ; movsl" \ 40 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ 41 : "0" (nr), \ 42 "D" ((long)dst), \ 43 "S" ((long)src) ); \ 44} while (0) 45#else 46#define COPY_DWORDS( dst, src, nr ) \ 47do { \ 48 int j; \ 49 for ( j = 0 ; j < nr ; j++ ) \ 50 dst[j] = ((int *)src)[j]; \ 51 dst += nr; \ 52} while (0) 53#endif 54 55static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count) 56{ 57 int i; 58 59 if (RADEON_DEBUG & DEBUG_VERTS) 60 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 61 __FUNCTION__, count, stride, (void *)out, (void *)data); 62 63 if (stride == 4) 64 COPY_DWORDS(out, data, count); 65 else 66 for (i = 0; i < count; i++) { 67 out[0] = *(int *)data; 68 out++; 69 data += stride; 70 } 71} 72 73void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count) 74{ 75 int i; 76 77 if (RADEON_DEBUG & DEBUG_VERTS) 78 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 79 __FUNCTION__, count, stride, (void *)out, (void *)data); 80 81 if (stride == 8) 82 COPY_DWORDS(out, data, count * 2); 83 else 84 for (i = 0; i < count; i++) { 85 out[0] = *(int *)data; 86 out[1] = *(int *)(data + 4); 87 out += 2; 88 data += stride; 89 } 90} 91 92void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count) 93{ 94 int i; 95 96 if (RADEON_DEBUG & DEBUG_VERTS) 97 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 98 __FUNCTION__, count, stride, (void *)out, (void *)data); 99 100 if (stride == 12) { 101 COPY_DWORDS(out, data, count * 3); 102 } 103 else 104 for (i = 0; i < count; i++) { 105 out[0] = *(int *)data; 106 out[1] = *(int *)(data + 4); 107 out[2] = *(int *)(data + 8); 108 out += 3; 109 data += stride; 110 } 111} 112 113static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count) 114{ 115 int i; 116 117 if (RADEON_DEBUG & DEBUG_VERTS) 118 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 119 __FUNCTION__, count, stride, (void *)out, (void *)data); 120 121 if (stride == 16) 122 COPY_DWORDS(out, data, count * 4); 123 else 124 for (i = 0; i < count; i++) { 125 out[0] = *(int *)data; 126 out[1] = *(int *)(data + 4); 127 out[2] = *(int *)(data + 8); 128 out[3] = *(int *)(data + 12); 129 out += 4; 130 data += stride; 131 } 132} 133 134void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, 135 GLvoid * data, int size, int stride, int count) 136{ 137 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 138 uint32_t *out; 139 140 if (stride == 0) { 141 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); 142 count = 1; 143 aos->stride = 0; 144 } else { 145 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 146 aos->stride = size; 147 } 148 149 aos->components = size; 150 aos->count = count; 151 152 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); 153 switch (size) { 154 case 1: radeonEmitVec4(out, data, stride, count); break; 155 case 2: radeonEmitVec8(out, data, stride, count); break; 156 case 3: radeonEmitVec12(out, data, stride, count); break; 157 case 4: radeonEmitVec16(out, data, stride, count); break; 158 default: 159 assert(0); 160 break; 161 } 162} 163 164void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) 165{ 166 167 size = MAX2(size, MAX_DMA_BUF_SZ); 168 169 if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) 170 fprintf(stderr, "%s\n", __FUNCTION__); 171 172 if (rmesa->dma.flush) { 173 rmesa->dma.flush(rmesa->glCtx); 174 } 175 176 if (rmesa->dma.nr_released_bufs > 4) { 177 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 178 rmesa->dma.nr_released_bufs = 0; 179 } 180 181 if (rmesa->dma.current) { 182 radeon_bo_unmap(rmesa->dma.current); 183 radeon_bo_unref(rmesa->dma.current); 184 rmesa->dma.current = 0; 185 } 186 187again_alloc: 188#ifdef RADEON_DEBUG_BO 189 rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, 190 0, size, 4, RADEON_GEM_DOMAIN_GTT, 191 0, "dma.current"); 192#else 193 rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, 194 0, size, 4, RADEON_GEM_DOMAIN_GTT, 195 0); 196#endif /* RADEON_DEBUG_BO */ 197 198 if (!rmesa->dma.current) { 199 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 200 rmesa->dma.nr_released_bufs = 0; 201 goto again_alloc; 202 } 203 204 rmesa->dma.current_used = 0; 205 rmesa->dma.current_vertexptr = 0; 206 207 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, 208 rmesa->dma.current, 209 RADEON_GEM_DOMAIN_GTT, 0)) 210 fprintf(stderr,"failure to revalidate BOs - badness\n"); 211 212 if (!rmesa->dma.current) { 213 /* Cmd buff have been flushed in radeon_revalidate_bos */ 214 rmesa->dma.nr_released_bufs = 0; 215 goto again_alloc; 216 } 217 218 radeon_bo_map(rmesa->dma.current, 1); 219} 220 221/* Allocates a region from rmesa->dma.current. If there isn't enough 222 * space in current, grab a new buffer (and discard what was left of current) 223 */ 224void radeonAllocDmaRegion(radeonContextPtr rmesa, 225 struct radeon_bo **pbo, int *poffset, 226 int bytes, int alignment) 227{ 228 if (RADEON_DEBUG & DEBUG_IOCTL) 229 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); 230 231 if (rmesa->dma.flush) 232 rmesa->dma.flush(rmesa->glCtx); 233 234 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); 235 236 alignment--; 237 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; 238 239 if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) 240 radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); 241 242 *poffset = rmesa->dma.current_used; 243 *pbo = rmesa->dma.current; 244 radeon_bo_ref(*pbo); 245 246 /* Always align to at least 16 bytes */ 247 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; 248 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 249 250 assert(rmesa->dma.current_used <= rmesa->dma.current->size); 251} 252 253void radeonReleaseDmaRegion(radeonContextPtr rmesa) 254{ 255 if (RADEON_DEBUG & DEBUG_IOCTL) 256 fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current); 257 if (rmesa->dma.current) { 258 rmesa->dma.nr_released_bufs++; 259 radeon_bo_unmap(rmesa->dma.current); 260 radeon_bo_unref(rmesa->dma.current); 261 } 262 rmesa->dma.current = NULL; 263} 264 265 266/* Flush vertices in the current dma region. 267 */ 268void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) 269{ 270 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 271 struct radeon_dma *dma = &rmesa->dma; 272 273 274 if (RADEON_DEBUG & DEBUG_IOCTL) 275 fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current); 276 dma->flush = NULL; 277 278 if (dma->current) { 279 GLuint current_offset = dma->current_used; 280 281 assert (dma->current_used + 282 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 283 dma->current_vertexptr); 284 285 if (dma->current_used != dma->current_vertexptr) { 286 dma->current_used = dma->current_vertexptr; 287 288 rmesa->vtbl.swtcl_flush(ctx, current_offset); 289 } 290 rmesa->swtcl.numverts = 0; 291 } 292} 293/* Alloc space in the current dma region. 294 */ 295void * 296rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) 297{ 298 GLuint bytes = vsize * nverts; 299 void *head; 300restart: 301 if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) { 302 radeonRefillCurrentDmaRegion(rmesa, bytes); 303 } 304 305 if (!rmesa->dma.flush) { 306 /* make sure we have enough space to use this in cmdbuf */ 307 rcommonEnsureCmdBufSpace(rmesa, 308 rmesa->hw.max_state_size + (20*sizeof(int)), 309 __FUNCTION__); 310 /* if cmdbuf flushed DMA restart */ 311 if (!rmesa->dma.current) 312 goto restart; 313 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; 314 rmesa->dma.flush = rcommon_flush_last_swtcl_prim; 315 } 316 317 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); 318 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); 319 ASSERT( rmesa->dma.current_used + 320 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 321 rmesa->dma.current_vertexptr ); 322 323 head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr); 324 rmesa->dma.current_vertexptr += bytes; 325 rmesa->swtcl.numverts += nverts; 326 return head; 327} 328 329void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) 330{ 331 radeonContextPtr radeon = RADEON_CONTEXT( ctx ); 332 int i; 333 334 if (radeon->dma.flush) { 335 radeon->dma.flush(radeon->glCtx); 336 } 337 if (radeon->tcl.elt_dma_bo) { 338 radeon_bo_unref(radeon->tcl.elt_dma_bo); 339 radeon->tcl.elt_dma_bo = NULL; 340 } 341 for (i = 0; i < radeon->tcl.aos_count; i++) { 342 if (radeon->tcl.aos[i].bo) { 343 radeon_bo_unref(radeon->tcl.aos[i].bo); 344 radeon->tcl.aos[i].bo = NULL; 345 } 346 } 347} 348