radeon_dma.c revision c3380ded10200f2df0cfba4abbe9a9eb892f7cbb
1/************************************************************************** 2 3Copyright (C) 2004 Nicolai Haehnle. 4Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 5 6The Weather Channel (TM) funded Tungsten Graphics to develop the 7initial release of the Radeon 8500 driver under the XFree86 license. 8This notice must be preserved. 9 10All Rights Reserved. 11 12Permission is hereby granted, free of charge, to any person obtaining a 13copy of this software and associated documentation files (the "Software"), 14to deal in the Software without restriction, including without limitation 15on the rights to use, copy, modify, merge, publish, distribute, sub 16license, and/or sell copies of the Software, and to permit persons to whom 17the Software is furnished to do so, subject to the following conditions: 18 19The above copyright notice and this permission notice (including the next 20paragraph) shall be included in all copies or substantial portions of the 21Software. 22 23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 29USE OR OTHER DEALINGS IN THE SOFTWARE. 30 31**************************************************************************/ 32 33#include "radeon_common.h" 34 35#if defined(USE_X86_ASM) 36#define COPY_DWORDS( dst, src, nr ) \ 37do { \ 38 int __tmp; \ 39 __asm__ __volatile__( "rep ; movsl" \ 40 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ 41 : "0" (nr), \ 42 "D" ((long)dst), \ 43 "S" ((long)src) ); \ 44} while (0) 45#else 46#define COPY_DWORDS( dst, src, nr ) \ 47do { \ 48 int j; \ 49 for ( j = 0 ; j < nr ; j++ ) \ 50 dst[j] = ((int *)src)[j]; \ 51 dst += nr; \ 52} while (0) 53#endif 54 55void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) 56{ 57 int i; 58 59 if (RADEON_DEBUG & DEBUG_VERTS) 60 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 61 __FUNCTION__, count, stride, (void *)out, (void *)data); 62 63 if (stride == 4) 64 COPY_DWORDS(out, data, count); 65 else 66 for (i = 0; i < count; i++) { 67 out[0] = *(int *)data; 68 out++; 69 data += stride; 70 } 71} 72 73void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) 74{ 75 int i; 76 77 if (RADEON_DEBUG & DEBUG_VERTS) 78 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 79 __FUNCTION__, count, stride, (void *)out, (void *)data); 80 81 if (stride == 8) 82 COPY_DWORDS(out, data, count * 2); 83 else 84 for (i = 0; i < count; i++) { 85 out[0] = *(int *)data; 86 out[1] = *(int *)(data + 4); 87 out += 2; 88 data += stride; 89 } 90} 91 92void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) 93{ 94 int i; 95 96 if (RADEON_DEBUG & DEBUG_VERTS) 97 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 98 __FUNCTION__, count, stride, (void *)out, (void *)data); 99 100 if (stride == 12) { 101 COPY_DWORDS(out, data, count * 3); 102 } 103 else 104 for (i = 0; i < count; i++) { 105 out[0] = *(int *)data; 106 out[1] = *(int *)(data + 4); 107 out[2] = *(int *)(data + 8); 108 out += 3; 109 data += stride; 110 } 111} 112 113void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) 114{ 115 int i; 116 117 if (RADEON_DEBUG & DEBUG_VERTS) 118 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 119 __FUNCTION__, count, stride, (void *)out, (void *)data); 120 121 if (stride == 16) 122 COPY_DWORDS(out, data, count * 4); 123 else 124 for (i = 0; i < count; i++) { 125 out[0] = *(int *)data; 126 out[1] = *(int *)(data + 4); 127 out[2] = *(int *)(data + 8); 128 out[3] = *(int *)(data + 12); 129 out += 4; 130 data += stride; 131 } 132} 133 134void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, 135 const GLvoid * data, int size, int stride, int count) 136{ 137 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 138 uint32_t *out; 139 140 if (stride == 0) { 141 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); 142 count = 1; 143 aos->stride = 0; 144 } else { 145 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 146 aos->stride = size; 147 } 148 149 aos->components = size; 150 aos->count = count; 151 152 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); 153 switch (size) { 154 case 1: radeonEmitVec4(out, data, stride, count); break; 155 case 2: radeonEmitVec8(out, data, stride, count); break; 156 case 3: radeonEmitVec12(out, data, stride, count); break; 157 case 4: radeonEmitVec16(out, data, stride, count); break; 158 default: 159 assert(0); 160 break; 161 } 162} 163 164void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) 165{ 166 167 size = MAX2(size, MAX_DMA_BUF_SZ); 168 169 if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) 170 fprintf(stderr, "%s\n", __FUNCTION__); 171 172 if (rmesa->dma.flush) { 173 rmesa->dma.flush(rmesa->glCtx); 174 } 175 176 if (rmesa->dma.nr_released_bufs > 4) { 177 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 178 rmesa->dma.nr_released_bufs = 0; 179 } 180 181 if (rmesa->dma.current) { 182 radeon_bo_unmap(rmesa->dma.current); 183 radeon_bo_unref(rmesa->dma.current); 184 rmesa->dma.current = 0; 185 } 186 187again_alloc: 188 rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, 189 0, size, 4, RADEON_GEM_DOMAIN_GTT, 190 0); 191 192 if (!rmesa->dma.current) { 193 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 194 rmesa->dma.nr_released_bufs = 0; 195 goto again_alloc; 196 } 197 198 rmesa->dma.current_used = 0; 199 rmesa->dma.current_vertexptr = 0; 200 201 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, 202 rmesa->dma.current, 203 RADEON_GEM_DOMAIN_GTT, 0)) 204 fprintf(stderr,"failure to revalidate BOs - badness\n"); 205 206 if (!rmesa->dma.current) { 207 /* Cmd buff have been flushed in radeon_revalidate_bos */ 208 rmesa->dma.nr_released_bufs = 0; 209 goto again_alloc; 210 } 211 212 radeon_bo_map(rmesa->dma.current, 1); 213} 214 215/* Allocates a region from rmesa->dma.current. If there isn't enough 216 * space in current, grab a new buffer (and discard what was left of current) 217 */ 218void radeonAllocDmaRegion(radeonContextPtr rmesa, 219 struct radeon_bo **pbo, int *poffset, 220 int bytes, int alignment) 221{ 222 if (RADEON_DEBUG & DEBUG_IOCTL) 223 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); 224 225 if (rmesa->dma.flush) 226 rmesa->dma.flush(rmesa->glCtx); 227 228 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); 229 230 alignment--; 231 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; 232 233 if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) 234 radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); 235 236 *poffset = rmesa->dma.current_used; 237 *pbo = rmesa->dma.current; 238 radeon_bo_ref(*pbo); 239 240 /* Always align to at least 16 bytes */ 241 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; 242 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 243 244 assert(rmesa->dma.current_used <= rmesa->dma.current->size); 245} 246 247void radeonReleaseDmaRegion(radeonContextPtr rmesa) 248{ 249 if (RADEON_DEBUG & DEBUG_IOCTL) 250 fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current); 251 if (rmesa->dma.current) { 252 rmesa->dma.nr_released_bufs++; 253 radeon_bo_unmap(rmesa->dma.current); 254 radeon_bo_unref(rmesa->dma.current); 255 } 256 rmesa->dma.current = NULL; 257} 258 259 260/* Flush vertices in the current dma region. 261 */ 262void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) 263{ 264 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 265 struct radeon_dma *dma = &rmesa->dma; 266 267 268 if (RADEON_DEBUG & DEBUG_IOCTL) 269 fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current); 270 dma->flush = NULL; 271 272 if (dma->current) { 273 GLuint current_offset = dma->current_used; 274 275 assert (dma->current_used + 276 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 277 dma->current_vertexptr); 278 279 if (dma->current_used != dma->current_vertexptr) { 280 dma->current_used = dma->current_vertexptr; 281 282 rmesa->vtbl.swtcl_flush(ctx, current_offset); 283 } 284 rmesa->swtcl.numverts = 0; 285 } 286} 287/* Alloc space in the current dma region. 288 */ 289void * 290rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) 291{ 292 GLuint bytes = vsize * nverts; 293 void *head; 294restart: 295 if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) { 296 radeonRefillCurrentDmaRegion(rmesa, bytes); 297 } 298 299 if (!rmesa->dma.flush) { 300 /* make sure we have enough space to use this in cmdbuf */ 301 rcommonEnsureCmdBufSpace(rmesa, 302 rmesa->hw.max_state_size + (20*sizeof(int)), 303 __FUNCTION__); 304 /* if cmdbuf flushed DMA restart */ 305 if (!rmesa->dma.current) 306 goto restart; 307 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; 308 rmesa->dma.flush = rcommon_flush_last_swtcl_prim; 309 } 310 311 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); 312 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); 313 ASSERT( rmesa->dma.current_used + 314 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 315 rmesa->dma.current_vertexptr ); 316 317 head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr); 318 rmesa->dma.current_vertexptr += bytes; 319 rmesa->swtcl.numverts += nverts; 320 return head; 321} 322 323void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) 324{ 325 radeonContextPtr radeon = RADEON_CONTEXT( ctx ); 326 int i; 327 328 if (radeon->dma.flush) { 329 radeon->dma.flush(radeon->glCtx); 330 } 331 if (radeon->tcl.elt_dma_bo) { 332 radeon_bo_unref(radeon->tcl.elt_dma_bo); 333 radeon->tcl.elt_dma_bo = NULL; 334 } 335 for (i = 0; i < radeon->tcl.aos_count; i++) { 336 if (radeon->tcl.aos[i].bo) { 337 radeon_bo_unref(radeon->tcl.aos[i].bo); 338 radeon->tcl.aos[i].bo = NULL; 339 } 340 } 341} 342