nv50_vbo.c revision db2df0aa9b49e006de4fcfc4a0bab32ad0a30173
1/* 2 * Copyright 2008 Ben Skeggs 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23#include "pipe/p_context.h" 24#include "pipe/p_state.h" 25#include "util/u_inlines.h" 26#include "util/u_format.h" 27 28#include "nv50_context.h" 29 30static boolean 31nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned); 32 33static boolean 34nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned); 35 36static boolean 37nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned); 38 39static boolean 40nv50_push_arrays(struct nv50_context *, unsigned, unsigned); 41 42#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16) 43 44static INLINE unsigned 45nv50_prim(unsigned mode) 46{ 47 switch (mode) { 48 case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; 49 case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; 50 case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; 51 case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; 52 case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; 53 case PIPE_PRIM_TRIANGLE_STRIP: 54 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; 55 case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; 56 case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; 57 case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; 58 case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; 59 case PIPE_PRIM_LINES_ADJACENCY: 60 return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; 61 case PIPE_PRIM_LINE_STRIP_ADJACENCY: 62 return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; 63 case PIPE_PRIM_TRIANGLES_ADJACENCY: 64 return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; 65 case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: 66 return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; 67 default: 68 break; 69 } 70 71 NOUVEAU_ERR("invalid primitive type %d\n", mode); 72 return NV50TCL_VERTEX_BEGIN_POINTS; 73} 74 75static INLINE uint32_t 76nv50_vbo_type_to_hw(enum pipe_format format) 77{ 78 const struct util_format_description *desc; 79 80 desc = util_format_description(format); 81 assert(desc); 82 83 switch (desc->channel[0].type) { 84 case UTIL_FORMAT_TYPE_FLOAT: 85 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT; 86 case UTIL_FORMAT_TYPE_UNSIGNED: 87 if (desc->channel[0].normalized) { 88 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM; 89 } 90 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED; 91 case UTIL_FORMAT_TYPE_SIGNED: 92 if (desc->channel[0].normalized) { 93 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM; 94 } 95 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED; 96 /* 97 case PIPE_FORMAT_TYPE_UINT: 98 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT; 99 case PIPE_FORMAT_TYPE_SINT: 100 return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */ 101 default: 102 return 0; 103 } 104} 105 106static INLINE uint32_t 107nv50_vbo_size_to_hw(unsigned size, unsigned nr_c) 108{ 109 static const uint32_t hw_values[] = { 110 0, 0, 0, 0, 111 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8, 112 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8, 113 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8, 114 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8, 115 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16, 116 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16, 117 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16, 118 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16, 119 0, 0, 0, 0, 120 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32, 121 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32, 122 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32, 123 NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 }; 124 125 /* we'd also have R11G11B10 and R10G10B10A2 */ 126 127 assert(nr_c > 0 && nr_c <= 4); 128 129 if (size > 32) 130 return 0; 131 size >>= (3 - 2); 132 133 return hw_values[size + (nr_c - 1)]; 134} 135 136static INLINE uint32_t 137nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) 138{ 139 uint32_t hw_type, hw_size; 140 enum pipe_format pf = ve->src_format; 141 const struct util_format_description *desc; 142 unsigned size; 143 144 desc = util_format_description(pf); 145 assert(desc); 146 147 size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0); 148 149 hw_type = nv50_vbo_type_to_hw(pf); 150 hw_size = nv50_vbo_size_to_hw(size, ve->nr_components); 151 152 if (!hw_type || !hw_size) { 153 NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf)); 154 abort(); 155 return 0x24e80000; 156 } 157 158 if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */ 159 hw_size |= (1 << 31); /* no real swizzle bits :-( */ 160 161 return (hw_type | hw_size); 162} 163 164/* For instanced drawing from user buffers, hitting the FIFO repeatedly 165 * with the same vertex data is probably worse than uploading all data. 166 */ 167static boolean 168nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i) 169{ 170 struct nv50_screen *nscreen = nv50->screen; 171 struct pipe_screen *pscreen = &nscreen->base.base; 172 struct pipe_buffer *buf = nscreen->strm_vbuf[i]; 173 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; 174 uint8_t *src; 175 unsigned size = align(vb->buffer->size, 4096); 176 177 if (buf && buf->size < size) 178 pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL); 179 180 if (!nscreen->strm_vbuf[i]) { 181 nscreen->strm_vbuf[i] = pipe_buffer_create( 182 pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size); 183 buf = nscreen->strm_vbuf[i]; 184 } 185 186 src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); 187 if (!src) 188 return FALSE; 189 src += vb->buffer_offset; 190 191 size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */ 192 if (vb->buffer_offset + size > vb->buffer->size) 193 size = vb->buffer->size - vb->buffer_offset; 194 195 pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src); 196 pipe_buffer_unmap(pscreen, vb->buffer); 197 198 vb->buffer = buf; /* don't pipe_reference, this is a private copy */ 199 return TRUE; 200} 201 202static void 203nv50_upload_user_vbufs(struct nv50_context *nv50) 204{ 205 unsigned i; 206 207 if (nv50->vbo_fifo) 208 nv50->dirty |= NV50_NEW_ARRAYS; 209 if (!(nv50->dirty & NV50_NEW_ARRAYS)) 210 return; 211 212 for (i = 0; i < nv50->vtxbuf_nr; ++i) { 213 if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX) 214 continue; 215 nv50_upload_vtxbuf(nv50, i); 216 } 217} 218 219static void 220nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data) 221{ 222 struct nouveau_grobj *tesla = nv50->screen->tesla; 223 struct nouveau_channel *chan = tesla->channel; 224 float v[4]; 225 226 util_format_read_4f(nv50->vtxelt[i].src_format, 227 v, 0, data, 0, 0, 0, 1, 1); 228 229 switch (nv50->vtxelt[i].nr_components) { 230 case 4: 231 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4); 232 OUT_RINGf (chan, v[0]); 233 OUT_RINGf (chan, v[1]); 234 OUT_RINGf (chan, v[2]); 235 OUT_RINGf (chan, v[3]); 236 break; 237 case 3: 238 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3); 239 OUT_RINGf (chan, v[0]); 240 OUT_RINGf (chan, v[1]); 241 OUT_RINGf (chan, v[2]); 242 break; 243 case 2: 244 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2); 245 OUT_RINGf (chan, v[0]); 246 OUT_RINGf (chan, v[1]); 247 break; 248 case 1: 249 BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1); 250 OUT_RINGf (chan, v[0]); 251 break; 252 default: 253 assert(0); 254 break; 255 } 256} 257 258static unsigned 259init_per_instance_arrays_immd(struct nv50_context *nv50, 260 unsigned startInstance, 261 unsigned pos[16], unsigned step[16]) 262{ 263 struct nouveau_bo *bo; 264 unsigned i, b, count = 0; 265 266 for (i = 0; i < nv50->vtxelt_nr; ++i) { 267 if (!nv50->vtxelt[i].instance_divisor) 268 continue; 269 ++count; 270 b = nv50->vtxelt[i].vertex_buffer_index; 271 272 pos[i] = nv50->vtxelt[i].src_offset + 273 nv50->vtxbuf[b].buffer_offset + 274 startInstance * nv50->vtxbuf[b].stride; 275 step[i] = startInstance % nv50->vtxelt[i].instance_divisor; 276 277 bo = nouveau_bo(nv50->vtxbuf[b].buffer); 278 if (!bo->map) 279 nouveau_bo_map(bo, NOUVEAU_BO_RD); 280 281 nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); 282 } 283 284 return count; 285} 286 287static unsigned 288init_per_instance_arrays(struct nv50_context *nv50, 289 unsigned startInstance, 290 unsigned pos[16], unsigned step[16]) 291{ 292 struct nouveau_grobj *tesla = nv50->screen->tesla; 293 struct nouveau_channel *chan = tesla->channel; 294 struct nouveau_bo *bo; 295 struct nouveau_stateobj *so; 296 unsigned i, b, count = 0; 297 const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; 298 299 if (nv50->vbo_fifo) 300 return init_per_instance_arrays_immd(nv50, startInstance, 301 pos, step); 302 303 so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); 304 305 for (i = 0; i < nv50->vtxelt_nr; ++i) { 306 if (!nv50->vtxelt[i].instance_divisor) 307 continue; 308 ++count; 309 b = nv50->vtxelt[i].vertex_buffer_index; 310 311 pos[i] = nv50->vtxelt[i].src_offset + 312 nv50->vtxbuf[b].buffer_offset + 313 startInstance * nv50->vtxbuf[b].stride; 314 315 if (!startInstance) { 316 step[i] = 0; 317 continue; 318 } 319 step[i] = startInstance % nv50->vtxelt[i].instance_divisor; 320 321 bo = nouveau_bo(nv50->vtxbuf[b].buffer); 322 323 so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); 324 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); 325 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); 326 } 327 328 if (count && startInstance) { 329 so_ref (so, &nv50->state.instbuf); /* for flush notify */ 330 so_emit(chan, nv50->state.instbuf); 331 } 332 so_ref (NULL, &so); 333 334 return count; 335} 336 337static void 338step_per_instance_arrays_immd(struct nv50_context *nv50, 339 unsigned pos[16], unsigned step[16]) 340{ 341 struct nouveau_bo *bo; 342 unsigned i, b; 343 344 for (i = 0; i < nv50->vtxelt_nr; ++i) { 345 if (!nv50->vtxelt[i].instance_divisor) 346 continue; 347 if (++step[i] != nv50->vtxelt[i].instance_divisor) 348 continue; 349 b = nv50->vtxelt[i].vertex_buffer_index; 350 bo = nouveau_bo(nv50->vtxbuf[b].buffer); 351 352 step[i] = 0; 353 pos[i] += nv50->vtxbuf[b].stride; 354 355 nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); 356 } 357} 358 359static void 360step_per_instance_arrays(struct nv50_context *nv50, 361 unsigned pos[16], unsigned step[16]) 362{ 363 struct nouveau_grobj *tesla = nv50->screen->tesla; 364 struct nouveau_channel *chan = tesla->channel; 365 struct nouveau_bo *bo; 366 struct nouveau_stateobj *so; 367 unsigned i, b; 368 const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; 369 370 if (nv50->vbo_fifo) { 371 step_per_instance_arrays_immd(nv50, pos, step); 372 return; 373 } 374 375 so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); 376 377 for (i = 0; i < nv50->vtxelt_nr; ++i) { 378 if (!nv50->vtxelt[i].instance_divisor) 379 continue; 380 b = nv50->vtxelt[i].vertex_buffer_index; 381 382 if (++step[i] == nv50->vtxelt[i].instance_divisor) { 383 step[i] = 0; 384 pos[i] += nv50->vtxbuf[b].stride; 385 } 386 387 bo = nouveau_bo(nv50->vtxbuf[b].buffer); 388 389 so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); 390 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); 391 so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); 392 } 393 394 so_ref (so, &nv50->state.instbuf); /* for flush notify */ 395 so_ref (NULL, &so); 396 397 so_emit(chan, nv50->state.instbuf); 398} 399 400static INLINE void 401nv50_unmap_vbufs(struct nv50_context *nv50) 402{ 403 unsigned i; 404 405 for (i = 0; i < nv50->vtxbuf_nr; ++i) 406 if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) 407 nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); 408} 409 410void 411nv50_draw_arrays_instanced(struct pipe_context *pipe, 412 unsigned mode, unsigned start, unsigned count, 413 unsigned startInstance, unsigned instanceCount) 414{ 415 struct nv50_context *nv50 = nv50_context(pipe); 416 struct nouveau_channel *chan = nv50->screen->tesla->channel; 417 struct nouveau_grobj *tesla = nv50->screen->tesla; 418 unsigned i, nz_divisors; 419 unsigned step[16], pos[16]; 420 421 if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) 422 nv50_upload_user_vbufs(nv50); 423 424 nv50_state_validate(nv50); 425 426 nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); 427 428 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); 429 OUT_RING (chan, NV50_CB_AUX | (24 << 8)); 430 OUT_RING (chan, startInstance); 431 432 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); 433 OUT_RING (chan, nv50_prim(mode)); 434 435 if (nv50->vbo_fifo) 436 nv50_push_arrays(nv50, start, count); 437 else { 438 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); 439 OUT_RING (chan, start); 440 OUT_RING (chan, count); 441 } 442 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); 443 OUT_RING (chan, 0); 444 445 for (i = 1; i < instanceCount; i++) { 446 if (nz_divisors) /* any non-zero array divisors ? */ 447 step_per_instance_arrays(nv50, pos, step); 448 449 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); 450 OUT_RING (chan, nv50_prim(mode) | (1 << 28)); 451 452 if (nv50->vbo_fifo) 453 nv50_push_arrays(nv50, start, count); 454 else { 455 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); 456 OUT_RING (chan, start); 457 OUT_RING (chan, count); 458 } 459 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); 460 OUT_RING (chan, 0); 461 } 462 nv50_unmap_vbufs(nv50); 463 464 so_ref(NULL, &nv50->state.instbuf); 465} 466 467void 468nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, 469 unsigned count) 470{ 471 struct nv50_context *nv50 = nv50_context(pipe); 472 struct nouveau_channel *chan = nv50->screen->tesla->channel; 473 struct nouveau_grobj *tesla = nv50->screen->tesla; 474 boolean ret; 475 476 nv50_state_validate(nv50); 477 478 BEGIN_RING(chan, tesla, 0x142c, 1); 479 OUT_RING (chan, 0); 480 BEGIN_RING(chan, tesla, 0x142c, 1); 481 OUT_RING (chan, 0); 482 483 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); 484 OUT_RING (chan, nv50_prim(mode)); 485 486 if (nv50->vbo_fifo) 487 ret = nv50_push_arrays(nv50, start, count); 488 else { 489 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); 490 OUT_RING (chan, start); 491 OUT_RING (chan, count); 492 ret = TRUE; 493 } 494 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); 495 OUT_RING (chan, 0); 496 497 nv50_unmap_vbufs(nv50); 498 499 /* XXX: not sure what to do if ret != TRUE: flush and retry? 500 */ 501 assert(ret); 502} 503 504static INLINE boolean 505nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, 506 unsigned start, unsigned count) 507{ 508 struct nouveau_channel *chan = nv50->screen->tesla->channel; 509 struct nouveau_grobj *tesla = nv50->screen->tesla; 510 511 map += start; 512 513 if (nv50->vbo_fifo) 514 return nv50_push_elements_u08(nv50, map, count); 515 516 if (count & 1) { 517 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); 518 OUT_RING (chan, map[0]); 519 map++; 520 count--; 521 } 522 523 while (count) { 524 unsigned nr = count > 2046 ? 2046 : count; 525 int i; 526 527 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); 528 for (i = 0; i < nr; i += 2) 529 OUT_RING (chan, (map[i + 1] << 16) | map[i]); 530 531 count -= nr; 532 map += nr; 533 } 534 return TRUE; 535} 536 537static INLINE boolean 538nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, 539 unsigned start, unsigned count) 540{ 541 struct nouveau_channel *chan = nv50->screen->tesla->channel; 542 struct nouveau_grobj *tesla = nv50->screen->tesla; 543 544 map += start; 545 546 if (nv50->vbo_fifo) 547 return nv50_push_elements_u16(nv50, map, count); 548 549 if (count & 1) { 550 BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); 551 OUT_RING (chan, map[0]); 552 map++; 553 count--; 554 } 555 556 while (count) { 557 unsigned nr = count > 2046 ? 2046 : count; 558 int i; 559 560 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); 561 for (i = 0; i < nr; i += 2) 562 OUT_RING (chan, (map[i + 1] << 16) | map[i]); 563 564 count -= nr; 565 map += nr; 566 } 567 return TRUE; 568} 569 570static INLINE boolean 571nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, 572 unsigned start, unsigned count) 573{ 574 struct nouveau_channel *chan = nv50->screen->tesla->channel; 575 struct nouveau_grobj *tesla = nv50->screen->tesla; 576 577 map += start; 578 579 if (nv50->vbo_fifo) 580 return nv50_push_elements_u32(nv50, map, count); 581 582 while (count) { 583 unsigned nr = count > 2047 ? 2047 : count; 584 585 BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr); 586 OUT_RINGp (chan, map, nr); 587 588 count -= nr; 589 map += nr; 590 } 591 return TRUE; 592} 593 594static INLINE void 595nv50_draw_elements_inline(struct nv50_context *nv50, 596 void *map, unsigned indexSize, 597 unsigned start, unsigned count) 598{ 599 switch (indexSize) { 600 case 1: 601 nv50_draw_elements_inline_u08(nv50, map, start, count); 602 break; 603 case 2: 604 nv50_draw_elements_inline_u16(nv50, map, start, count); 605 break; 606 case 4: 607 nv50_draw_elements_inline_u32(nv50, map, start, count); 608 break; 609 } 610} 611 612void 613nv50_draw_elements_instanced(struct pipe_context *pipe, 614 struct pipe_buffer *indexBuffer, 615 unsigned indexSize, 616 unsigned mode, unsigned start, unsigned count, 617 unsigned startInstance, unsigned instanceCount) 618{ 619 struct nv50_context *nv50 = nv50_context(pipe); 620 struct nouveau_grobj *tesla = nv50->screen->tesla; 621 struct nouveau_channel *chan = tesla->channel; 622 struct pipe_screen *pscreen = pipe->screen; 623 void *map; 624 unsigned i, nz_divisors; 625 unsigned step[16], pos[16]; 626 627 map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); 628 629 if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) 630 nv50_upload_user_vbufs(nv50); 631 632 nv50_state_validate(nv50); 633 634 nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); 635 636 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); 637 OUT_RING (chan, NV50_CB_AUX | (24 << 8)); 638 OUT_RING (chan, startInstance); 639 640 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); 641 OUT_RING (chan, nv50_prim(mode)); 642 643 nv50_draw_elements_inline(nv50, map, indexSize, start, count); 644 645 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); 646 OUT_RING (chan, 0); 647 648 for (i = 1; i < instanceCount; ++i) { 649 if (nz_divisors) /* any non-zero array divisors ? */ 650 step_per_instance_arrays(nv50, pos, step); 651 652 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); 653 OUT_RING (chan, nv50_prim(mode) | (1 << 28)); 654 655 nv50_draw_elements_inline(nv50, map, indexSize, start, count); 656 657 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); 658 OUT_RING (chan, 0); 659 } 660 nv50_unmap_vbufs(nv50); 661 662 so_ref(NULL, &nv50->state.instbuf); 663} 664 665void 666nv50_draw_elements(struct pipe_context *pipe, 667 struct pipe_buffer *indexBuffer, unsigned indexSize, 668 unsigned mode, unsigned start, unsigned count) 669{ 670 struct nv50_context *nv50 = nv50_context(pipe); 671 struct nouveau_channel *chan = nv50->screen->tesla->channel; 672 struct nouveau_grobj *tesla = nv50->screen->tesla; 673 struct pipe_screen *pscreen = pipe->screen; 674 void *map; 675 676 map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); 677 678 nv50_state_validate(nv50); 679 680 BEGIN_RING(chan, tesla, 0x142c, 1); 681 OUT_RING (chan, 0); 682 BEGIN_RING(chan, tesla, 0x142c, 1); 683 OUT_RING (chan, 0); 684 685 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); 686 OUT_RING (chan, nv50_prim(mode)); 687 688 nv50_draw_elements_inline(nv50, map, indexSize, start, count); 689 690 BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); 691 OUT_RING (chan, 0); 692 693 nv50_unmap_vbufs(nv50); 694 695 pipe_buffer_unmap(pscreen, indexBuffer); 696} 697 698static INLINE boolean 699nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, 700 struct nouveau_stateobj **pso, 701 struct pipe_vertex_element *ve, 702 struct pipe_vertex_buffer *vb) 703 704{ 705 struct nouveau_stateobj *so; 706 struct nouveau_grobj *tesla = nv50->screen->tesla; 707 struct nouveau_bo *bo = nouveau_bo(vb->buffer); 708 float v[4]; 709 int ret; 710 711 ret = nouveau_bo_map(bo, NOUVEAU_BO_RD); 712 if (ret) 713 return FALSE; 714 715 util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map + 716 (vb->buffer_offset + ve->src_offset), 0, 717 0, 0, 1, 1); 718 so = *pso; 719 if (!so) 720 *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); 721 722 switch (ve->nr_components) { 723 case 4: 724 so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4); 725 so_data (so, fui(v[0])); 726 so_data (so, fui(v[1])); 727 so_data (so, fui(v[2])); 728 so_data (so, fui(v[3])); 729 break; 730 case 3: 731 so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3); 732 so_data (so, fui(v[0])); 733 so_data (so, fui(v[1])); 734 so_data (so, fui(v[2])); 735 break; 736 case 2: 737 so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2); 738 so_data (so, fui(v[0])); 739 so_data (so, fui(v[1])); 740 break; 741 case 1: 742 if (attrib == nv50->vertprog->cfg.edgeflag_in) { 743 so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); 744 so_data (so, v[0] ? 1 : 0); 745 } 746 so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1); 747 so_data (so, fui(v[0])); 748 break; 749 default: 750 nouveau_bo_unmap(bo); 751 return FALSE; 752 } 753 754 nouveau_bo_unmap(bo); 755 return TRUE; 756} 757 758void 759nv50_vbo_validate(struct nv50_context *nv50) 760{ 761 struct nouveau_grobj *tesla = nv50->screen->tesla; 762 struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr; 763 unsigned i, n_ve; 764 765 /* don't validate if Gallium took away our buffers */ 766 if (nv50->vtxbuf_nr == 0) 767 return; 768 nv50->vbo_fifo = 0; 769 770 for (i = 0; i < nv50->vtxbuf_nr; ++i) 771 if (nv50->vtxbuf[i].stride && 772 !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) 773 nv50->vbo_fifo = 0xffff; 774 775 if (NV50_USING_LOATHED_EDGEFLAG(nv50)) 776 nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ 777 778 n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); 779 780 vtxattr = NULL; 781 vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4); 782 vtxfmt = so_new(1, n_ve, 0); 783 so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); 784 785 for (i = 0; i < nv50->vtxelt_nr; i++) { 786 struct pipe_vertex_element *ve = &nv50->vtxelt[i]; 787 struct pipe_vertex_buffer *vb = 788 &nv50->vtxbuf[ve->vertex_buffer_index]; 789 struct nouveau_bo *bo = nouveau_bo(vb->buffer); 790 uint32_t hw = nv50_vbo_vtxelt_to_hw(ve); 791 792 if (!vb->stride && 793 nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) { 794 so_data(vtxfmt, hw | (1 << 4)); 795 796 so_method(vtxbuf, tesla, 797 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); 798 so_data (vtxbuf, 0); 799 800 nv50->vbo_fifo &= ~(1 << i); 801 continue; 802 } 803 804 if (nv50->vbo_fifo) { 805 so_data (vtxfmt, hw | 806 (ve->instance_divisor ? (1 << 4) : i)); 807 so_method(vtxbuf, tesla, 808 NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); 809 so_data (vtxbuf, 0); 810 continue; 811 } 812 so_data(vtxfmt, hw | i); 813 814 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); 815 so_data (vtxbuf, 0x20000000 | 816 (ve->instance_divisor ? 0 : vb->stride)); 817 so_reloc (vtxbuf, bo, vb->buffer_offset + 818 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | 819 NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); 820 so_reloc (vtxbuf, bo, vb->buffer_offset + 821 ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | 822 NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); 823 824 /* vertex array limits */ 825 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); 826 so_reloc (vtxbuf, bo, vb->buffer->size - 1, 827 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | 828 NOUVEAU_BO_HIGH, 0, 0); 829 so_reloc (vtxbuf, bo, vb->buffer->size - 1, 830 NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | 831 NOUVEAU_BO_LOW, 0, 0); 832 } 833 for (; i < n_ve; ++i) { 834 so_data (vtxfmt, 0x7e080010); 835 836 so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); 837 so_data (vtxbuf, 0); 838 } 839 nv50->state.vtxelt_nr = nv50->vtxelt_nr; 840 841 so_ref (vtxfmt, &nv50->state.vtxfmt); 842 so_ref (vtxbuf, &nv50->state.vtxbuf); 843 so_ref (vtxattr, &nv50->state.vtxattr); 844 so_ref (NULL, &vtxbuf); 845 so_ref (NULL, &vtxfmt); 846 so_ref (NULL, &vtxattr); 847} 848 849typedef void (*pfn_push)(struct nouveau_channel *, void *); 850 851struct nv50_vbo_emitctx 852{ 853 pfn_push push[16]; 854 uint8_t *map[16]; 855 unsigned stride[16]; 856 unsigned nr_ve; 857 unsigned vtx_dwords; 858 unsigned vtx_max; 859 860 float edgeflag; 861 unsigned ve_edgeflag; 862}; 863 864static INLINE void 865emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit) 866{ 867 unsigned i; 868 869 for (i = 0; i < emit->nr_ve; ++i) { 870 emit->push[i](chan, emit->map[i]); 871 emit->map[i] += emit->stride[i]; 872 } 873} 874 875static INLINE void 876emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit, 877 uint32_t vi) 878{ 879 unsigned i; 880 881 for (i = 0; i < emit->nr_ve; ++i) 882 emit->push[i](chan, emit->map[i] + emit->stride[i] * vi); 883} 884 885static INLINE boolean 886nv50_map_vbufs(struct nv50_context *nv50) 887{ 888 int i; 889 890 for (i = 0; i < nv50->vtxbuf_nr; ++i) { 891 struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; 892 unsigned size = vb->stride * (vb->max_index + 1) + 16; 893 894 if (nouveau_bo(vb->buffer)->map) 895 continue; 896 897 size = vb->stride * (vb->max_index + 1) + 16; 898 size = MIN2(size, vb->buffer->size); 899 if (!size) 900 size = vb->buffer->size; 901 902 if (nouveau_bo_map_range(nouveau_bo(vb->buffer), 903 0, size, NOUVEAU_BO_RD)) 904 break; 905 } 906 907 if (i == nv50->vtxbuf_nr) 908 return TRUE; 909 for (; i >= 0; --i) 910 nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); 911 return FALSE; 912} 913 914static void 915emit_b32_1(struct nouveau_channel *chan, void *data) 916{ 917 uint32_t *v = data; 918 919 OUT_RING(chan, v[0]); 920} 921 922static void 923emit_b32_2(struct nouveau_channel *chan, void *data) 924{ 925 uint32_t *v = data; 926 927 OUT_RING(chan, v[0]); 928 OUT_RING(chan, v[1]); 929} 930 931static void 932emit_b32_3(struct nouveau_channel *chan, void *data) 933{ 934 uint32_t *v = data; 935 936 OUT_RING(chan, v[0]); 937 OUT_RING(chan, v[1]); 938 OUT_RING(chan, v[2]); 939} 940 941static void 942emit_b32_4(struct nouveau_channel *chan, void *data) 943{ 944 uint32_t *v = data; 945 946 OUT_RING(chan, v[0]); 947 OUT_RING(chan, v[1]); 948 OUT_RING(chan, v[2]); 949 OUT_RING(chan, v[3]); 950} 951 952static void 953emit_b16_1(struct nouveau_channel *chan, void *data) 954{ 955 uint16_t *v = data; 956 957 OUT_RING(chan, v[0]); 958} 959 960static void 961emit_b16_3(struct nouveau_channel *chan, void *data) 962{ 963 uint16_t *v = data; 964 965 OUT_RING(chan, (v[1] << 16) | v[0]); 966 OUT_RING(chan, v[2]); 967} 968 969static void 970emit_b08_1(struct nouveau_channel *chan, void *data) 971{ 972 uint8_t *v = data; 973 974 OUT_RING(chan, v[0]); 975} 976 977static void 978emit_b08_3(struct nouveau_channel *chan, void *data) 979{ 980 uint8_t *v = data; 981 982 OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); 983} 984 985static boolean 986emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, 987 unsigned start) 988{ 989 unsigned i; 990 991 if (nv50_map_vbufs(nv50) == FALSE) 992 return FALSE; 993 994 emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in; 995 996 emit->edgeflag = 0.5f; 997 emit->nr_ve = 0; 998 emit->vtx_dwords = 0; 999 1000 for (i = 0; i < nv50->vtxelt_nr; ++i) { 1001 struct pipe_vertex_element *ve; 1002 struct pipe_vertex_buffer *vb; 1003 unsigned n, size; 1004 const struct util_format_description *desc; 1005 1006 ve = &nv50->vtxelt[i]; 1007 vb = &nv50->vtxbuf[ve->vertex_buffer_index]; 1008 if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor) 1009 continue; 1010 n = emit->nr_ve++; 1011 1012 emit->stride[n] = vb->stride; 1013 emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map + 1014 vb->buffer_offset + 1015 (start * vb->stride + ve->src_offset); 1016 1017 desc = util_format_description(ve->src_format); 1018 assert(desc); 1019 1020 size = util_format_get_component_bits( 1021 ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); 1022 1023 assert(ve->nr_components > 0 && ve->nr_components <= 4); 1024 1025 /* It shouldn't be necessary to push the implicit 1s 1026 * for case 3 and size 8 cases 1, 2, 3. 1027 */ 1028 switch (size) { 1029 default: 1030 NOUVEAU_ERR("unsupported vtxelt size: %u\n", size); 1031 return FALSE; 1032 case 32: 1033 switch (ve->nr_components) { 1034 case 1: emit->push[n] = emit_b32_1; break; 1035 case 2: emit->push[n] = emit_b32_2; break; 1036 case 3: emit->push[n] = emit_b32_3; break; 1037 case 4: emit->push[n] = emit_b32_4; break; 1038 } 1039 emit->vtx_dwords += ve->nr_components; 1040 break; 1041 case 16: 1042 switch (ve->nr_components) { 1043 case 1: emit->push[n] = emit_b16_1; break; 1044 case 2: emit->push[n] = emit_b32_1; break; 1045 case 3: emit->push[n] = emit_b16_3; break; 1046 case 4: emit->push[n] = emit_b32_2; break; 1047 } 1048 emit->vtx_dwords += (ve->nr_components + 1) >> 1; 1049 break; 1050 case 8: 1051 switch (ve->nr_components) { 1052 case 1: emit->push[n] = emit_b08_1; break; 1053 case 2: emit->push[n] = emit_b16_1; break; 1054 case 3: emit->push[n] = emit_b08_3; break; 1055 case 4: emit->push[n] = emit_b32_1; break; 1056 } 1057 emit->vtx_dwords += 1; 1058 break; 1059 } 1060 } 1061 1062 emit->vtx_max = 512 / emit->vtx_dwords; 1063 if (emit->ve_edgeflag < 16) 1064 emit->vtx_max = 1; 1065 1066 return TRUE; 1067} 1068 1069static INLINE void 1070set_edgeflag(struct nouveau_channel *chan, 1071 struct nouveau_grobj *tesla, 1072 struct nv50_vbo_emitctx *emit, uint32_t index) 1073{ 1074 unsigned i = emit->ve_edgeflag; 1075 1076 if (i < 16) { 1077 float f = *((float *)(emit->map[i] + index * emit->stride[i])); 1078 1079 if (emit->edgeflag != f) { 1080 emit->edgeflag = f; 1081 1082 BEGIN_RING(chan, tesla, 0x15e4, 1); 1083 OUT_RING (chan, f ? 1 : 0); 1084 } 1085 } 1086} 1087 1088static boolean 1089nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) 1090{ 1091 struct nouveau_channel *chan = nv50->screen->base.channel; 1092 struct nouveau_grobj *tesla = nv50->screen->tesla; 1093 struct nv50_vbo_emitctx emit; 1094 1095 if (emit_prepare(nv50, &emit, start) == FALSE) 1096 return FALSE; 1097 1098 while (count) { 1099 unsigned i, dw, nr = MIN2(count, emit.vtx_max); 1100 dw = nr * emit.vtx_dwords; 1101 1102 set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ 1103 1104 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); 1105 for (i = 0; i < nr; ++i) 1106 emit_vtx_next(chan, &emit); 1107 1108 count -= nr; 1109 } 1110 1111 return TRUE; 1112} 1113 1114static boolean 1115nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) 1116{ 1117 struct nouveau_channel *chan = nv50->screen->base.channel; 1118 struct nouveau_grobj *tesla = nv50->screen->tesla; 1119 struct nv50_vbo_emitctx emit; 1120 1121 if (emit_prepare(nv50, &emit, 0) == FALSE) 1122 return FALSE; 1123 1124 while (count) { 1125 unsigned i, dw, nr = MIN2(count, emit.vtx_max); 1126 dw = nr * emit.vtx_dwords; 1127 1128 set_edgeflag(chan, tesla, &emit, *map); 1129 1130 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); 1131 for (i = 0; i < nr; ++i) 1132 emit_vtx(chan, &emit, *map++); 1133 1134 count -= nr; 1135 } 1136 1137 return TRUE; 1138} 1139 1140static boolean 1141nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) 1142{ 1143 struct nouveau_channel *chan = nv50->screen->base.channel; 1144 struct nouveau_grobj *tesla = nv50->screen->tesla; 1145 struct nv50_vbo_emitctx emit; 1146 1147 if (emit_prepare(nv50, &emit, 0) == FALSE) 1148 return FALSE; 1149 1150 while (count) { 1151 unsigned i, dw, nr = MIN2(count, emit.vtx_max); 1152 dw = nr * emit.vtx_dwords; 1153 1154 set_edgeflag(chan, tesla, &emit, *map); 1155 1156 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); 1157 for (i = 0; i < nr; ++i) 1158 emit_vtx(chan, &emit, *map++); 1159 1160 count -= nr; 1161 } 1162 1163 return TRUE; 1164} 1165 1166static boolean 1167nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) 1168{ 1169 struct nouveau_channel *chan = nv50->screen->base.channel; 1170 struct nouveau_grobj *tesla = nv50->screen->tesla; 1171 struct nv50_vbo_emitctx emit; 1172 1173 if (emit_prepare(nv50, &emit, 0) == FALSE) 1174 return FALSE; 1175 1176 while (count) { 1177 unsigned i, dw, nr = MIN2(count, emit.vtx_max); 1178 dw = nr * emit.vtx_dwords; 1179 1180 set_edgeflag(chan, tesla, &emit, *map); 1181 1182 BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); 1183 for (i = 0; i < nr; ++i) 1184 emit_vtx(chan, &emit, *map++); 1185 1186 count -= nr; 1187 } 1188 1189 return TRUE; 1190} 1191