radeon_dma.c revision 1386e8a6ba9732c578e0122de245abfd578a7d1d
1/************************************************************************** 2 3Copyright (C) 2004 Nicolai Haehnle. 4Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 5 6The Weather Channel (TM) funded Tungsten Graphics to develop the 7initial release of the Radeon 8500 driver under the XFree86 license. 8This notice must be preserved. 9 10All Rights Reserved. 11 12Permission is hereby granted, free of charge, to any person obtaining a 13copy of this software and associated documentation files (the "Software"), 14to deal in the Software without restriction, including without limitation 15on the rights to use, copy, modify, merge, publish, distribute, sub 16license, and/or sell copies of the Software, and to permit persons to whom 17the Software is furnished to do so, subject to the following conditions: 18 19The above copyright notice and this permission notice (including the next 20paragraph) shall be included in all copies or substantial portions of the 21Software. 22 23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 29USE OR OTHER DEALINGS IN THE SOFTWARE. 30 31**************************************************************************/ 32 33#include "radeon_common.h" 34 35#if defined(USE_X86_ASM) 36#define COPY_DWORDS( dst, src, nr ) \ 37do { \ 38 int __tmp; \ 39 __asm__ __volatile__( "rep ; movsl" \ 40 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ 41 : "0" (nr), \ 42 "D" ((long)dst), \ 43 "S" ((long)src) ); \ 44} while (0) 45#else 46#define COPY_DWORDS( dst, src, nr ) \ 47do { \ 48 int j; \ 49 for ( j = 0 ; j < nr ; j++ ) \ 50 dst[j] = ((int *)src)[j]; \ 51 dst += nr; \ 52} while (0) 53#endif 54 55static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count) 56{ 57 int i; 58 59 if (RADEON_DEBUG & DEBUG_VERTS) 60 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 61 __FUNCTION__, count, stride, (void *)out, (void *)data); 62 63 if (stride == 4) 64 COPY_DWORDS(out, data, count); 65 else 66 for (i = 0; i < count; i++) { 67 out[0] = *(int *)data; 68 out++; 69 data += stride; 70 } 71} 72 73void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count) 74{ 75 int i; 76 77 if (RADEON_DEBUG & DEBUG_VERTS) 78 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 79 __FUNCTION__, count, stride, (void *)out, (void *)data); 80 81 if (stride == 8) 82 COPY_DWORDS(out, data, count * 2); 83 else 84 for (i = 0; i < count; i++) { 85 out[0] = *(int *)data; 86 out[1] = *(int *)(data + 4); 87 out += 2; 88 data += stride; 89 } 90} 91 92void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count) 93{ 94 int i; 95 96 if (RADEON_DEBUG & DEBUG_VERTS) 97 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 98 __FUNCTION__, count, stride, (void *)out, (void *)data); 99 100 if (stride == 12) { 101 COPY_DWORDS(out, data, count * 3); 102 } 103 else 104 for (i = 0; i < count; i++) { 105 out[0] = *(int *)data; 106 out[1] = *(int *)(data + 4); 107 out[2] = *(int *)(data + 8); 108 out += 3; 109 data += stride; 110 } 111} 112 113static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count) 114{ 115 int i; 116 117 if (RADEON_DEBUG & DEBUG_VERTS) 118 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 119 __FUNCTION__, count, stride, (void *)out, (void *)data); 120 121 if (stride == 16) 122 COPY_DWORDS(out, data, count * 4); 123 else 124 for (i = 0; i < count; i++) { 125 out[0] = *(int *)data; 126 out[1] = *(int *)(data + 4); 127 out[2] = *(int *)(data + 8); 128 out[3] = *(int *)(data + 12); 129 out += 4; 130 data += stride; 131 } 132} 133 134void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, 135 GLvoid * data, int size, int stride, int count) 136{ 137 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 138 uint32_t *out; 139 140 if (stride == 0) { 141 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); 142 count = 1; 143 aos->stride = 0; 144 } else { 145 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 146 aos->stride = size; 147 } 148 149 aos->components = size; 150 aos->count = count; 151 152 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); 153 switch (size) { 154 case 1: radeonEmitVec4(out, data, stride, count); break; 155 case 2: radeonEmitVec8(out, data, stride, count); break; 156 case 3: radeonEmitVec12(out, data, stride, count); break; 157 case 4: radeonEmitVec16(out, data, stride, count); break; 158 default: 159 assert(0); 160 break; 161 } 162} 163 164void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) 165{ 166 struct radeon_cs_space_check bos[1]; 167 int flushed = 0, ret; 168 169 size = MAX2(size, MAX_DMA_BUF_SZ * 16); 170 171 if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) 172 fprintf(stderr, "%s\n", __FUNCTION__); 173 174 if (rmesa->dma.flush) { 175 rmesa->dma.flush(rmesa->glCtx); 176 } 177 178 if (rmesa->dma.nr_released_bufs > 4) { 179 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 180 rmesa->dma.nr_released_bufs = 0; 181 } 182 183 if (rmesa->dma.current) { 184 radeon_bo_unmap(rmesa->dma.current); 185 radeon_bo_unref(rmesa->dma.current); 186 rmesa->dma.current = 0; 187 } 188 189again_alloc: 190 rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, 191 0, size, 4, RADEON_GEM_DOMAIN_GTT, 192 0); 193 194 if (!rmesa->dma.current) { 195 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 196 rmesa->dma.nr_released_bufs = 0; 197 goto again_alloc; 198 } 199 200 rmesa->dma.current_used = 0; 201 rmesa->dma.current_vertexptr = 0; 202 203 bos[0].bo = rmesa->dma.current; 204 bos[0].read_domains = RADEON_GEM_DOMAIN_GTT; 205 bos[0].write_domain =0 ; 206 bos[0].new_accounted = 0; 207 208 ret = radeon_cs_space_check(rmesa->cmdbuf.cs, bos, 1); 209 if (ret == RADEON_CS_SPACE_OP_TO_BIG) { 210 fprintf(stderr,"Got OPEARTION TO BIG ILLEGAL - this cannot happen"); 211 assert(0); 212 } else if (ret == RADEON_CS_SPACE_FLUSH) { 213 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 214 if (flushed) { 215 fprintf(stderr,"flushed but still no space\n"); 216 assert(0); 217 } 218 flushed = 1; 219 goto again_alloc; 220 } 221 radeon_bo_map(rmesa->dma.current, 1); 222} 223 224/* Allocates a region from rmesa->dma.current. If there isn't enough 225 * space in current, grab a new buffer (and discard what was left of current) 226 */ 227void radeonAllocDmaRegion(radeonContextPtr rmesa, 228 struct radeon_bo **pbo, int *poffset, 229 int bytes, int alignment) 230{ 231 if (RADEON_DEBUG & DEBUG_IOCTL) 232 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); 233 234 if (rmesa->dma.flush) 235 rmesa->dma.flush(rmesa->glCtx); 236 237 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); 238 239 alignment--; 240 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; 241 242 if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) 243 radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); 244 245 *poffset = rmesa->dma.current_used; 246 *pbo = rmesa->dma.current; 247 radeon_bo_ref(*pbo); 248 249 /* Always align to at least 16 bytes */ 250 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; 251 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 252 253 assert(rmesa->dma.current_used <= rmesa->dma.current->size); 254} 255 256void radeonReleaseDmaRegion(radeonContextPtr rmesa) 257{ 258 if (RADEON_DEBUG & DEBUG_IOCTL) 259 fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current); 260 if (rmesa->dma.current) { 261 rmesa->dma.nr_released_bufs++; 262 radeon_bo_unmap(rmesa->dma.current); 263 radeon_bo_unref(rmesa->dma.current); 264 } 265 rmesa->dma.current = NULL; 266} 267 268 269/* Flush vertices in the current dma region. 270 */ 271void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) 272{ 273 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 274 struct radeon_dma *dma = &rmesa->dma; 275 276 277 if (RADEON_DEBUG & DEBUG_IOCTL) 278 fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current); 279 dma->flush = NULL; 280 281 if (dma->current) { 282 GLuint current_offset = dma->current_used; 283 284 assert (dma->current_used + 285 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 286 dma->current_vertexptr); 287 288 if (dma->current_used != dma->current_vertexptr) { 289 dma->current_used = dma->current_vertexptr; 290 291 rmesa->vtbl.swtcl_flush(ctx, current_offset); 292 } 293 rmesa->swtcl.numverts = 0; 294 } 295} 296/* Alloc space in the current dma region. 297 */ 298void * 299rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) 300{ 301 GLuint bytes = vsize * nverts; 302 void *head; 303restart: 304 if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) { 305 radeonRefillCurrentDmaRegion(rmesa, bytes); 306 } 307 308 if (!rmesa->dma.flush) { 309 /* make sure we have enough space to use this in cmdbuf */ 310 rcommonEnsureCmdBufSpace(rmesa, 311 rmesa->hw.max_state_size + (12*sizeof(int)), 312 __FUNCTION__); 313 /* if cmdbuf flushed DMA restart */ 314 if (!rmesa->dma.current) 315 goto restart; 316 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; 317 rmesa->dma.flush = rcommon_flush_last_swtcl_prim; 318 } 319 320 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); 321 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); 322 ASSERT( rmesa->dma.current_used + 323 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 324 rmesa->dma.current_vertexptr ); 325 326 head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr); 327 rmesa->dma.current_vertexptr += bytes; 328 rmesa->swtcl.numverts += nverts; 329 return head; 330} 331