nv50_vbo.c revision db2df0aa9b49e006de4fcfc4a0bab32ad0a30173
1/*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include "pipe/p_context.h"
24#include "pipe/p_state.h"
25#include "util/u_inlines.h"
26#include "util/u_format.h"
27
28#include "nv50_context.h"
29
30static boolean
31nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
32
33static boolean
34nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
35
36static boolean
37nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
38
39static boolean
40nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
41
42#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
43
44static INLINE unsigned
45nv50_prim(unsigned mode)
46{
47	switch (mode) {
48	case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
49	case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
50	case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
51	case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
52	case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
53	case PIPE_PRIM_TRIANGLE_STRIP:
54		return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
55	case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
56	case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
57	case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
58	case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
59	case PIPE_PRIM_LINES_ADJACENCY:
60		return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
61	case PIPE_PRIM_LINE_STRIP_ADJACENCY:
62		return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
63	case PIPE_PRIM_TRIANGLES_ADJACENCY:
64		return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
65	case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
66		return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
67	default:
68		break;
69	}
70
71	NOUVEAU_ERR("invalid primitive type %d\n", mode);
72	return NV50TCL_VERTEX_BEGIN_POINTS;
73}
74
75static INLINE uint32_t
76nv50_vbo_type_to_hw(enum pipe_format format)
77{
78	const struct util_format_description *desc;
79
80	desc = util_format_description(format);
81	assert(desc);
82
83	switch (desc->channel[0].type) {
84	case UTIL_FORMAT_TYPE_FLOAT:
85		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
86	case UTIL_FORMAT_TYPE_UNSIGNED:
87		if (desc->channel[0].normalized) {
88			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
89		}
90		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
91	case UTIL_FORMAT_TYPE_SIGNED:
92		if (desc->channel[0].normalized) {
93			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
94		}
95		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
96	/*
97	case PIPE_FORMAT_TYPE_UINT:
98		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
99	case PIPE_FORMAT_TYPE_SINT:
100		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */
101	default:
102		return 0;
103	}
104}
105
106static INLINE uint32_t
107nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
108{
109	static const uint32_t hw_values[] = {
110		0, 0, 0, 0,
111		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
112		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
113		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
114		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
115		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
116		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
117		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
118		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
119		0, 0, 0, 0,
120		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
121		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
122		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
123		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
124
125	/* we'd also have R11G11B10 and R10G10B10A2 */
126
127	assert(nr_c > 0 && nr_c <= 4);
128
129	if (size > 32)
130		return 0;
131	size >>= (3 - 2);
132
133	return hw_values[size + (nr_c - 1)];
134}
135
136static INLINE uint32_t
137nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
138{
139	uint32_t hw_type, hw_size;
140	enum pipe_format pf = ve->src_format;
141	const struct util_format_description *desc;
142	unsigned size;
143
144	desc = util_format_description(pf);
145	assert(desc);
146
147	size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
148
149	hw_type = nv50_vbo_type_to_hw(pf);
150	hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
151
152	if (!hw_type || !hw_size) {
153		NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
154		abort();
155		return 0x24e80000;
156	}
157
158	if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
159		hw_size |= (1 << 31); /* no real swizzle bits :-( */
160
161	return (hw_type | hw_size);
162}
163
164/* For instanced drawing from user buffers, hitting the FIFO repeatedly
165 * with the same vertex data is probably worse than uploading all data.
166 */
167static boolean
168nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
169{
170	struct nv50_screen *nscreen = nv50->screen;
171	struct pipe_screen *pscreen = &nscreen->base.base;
172	struct pipe_buffer *buf = nscreen->strm_vbuf[i];
173	struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
174	uint8_t *src;
175	unsigned size = align(vb->buffer->size, 4096);
176
177	if (buf && buf->size < size)
178		pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
179
180	if (!nscreen->strm_vbuf[i]) {
181		nscreen->strm_vbuf[i] = pipe_buffer_create(
182			pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
183		buf = nscreen->strm_vbuf[i];
184	}
185
186	src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
187	if (!src)
188		return FALSE;
189	src += vb->buffer_offset;
190
191	size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
192	if (vb->buffer_offset + size > vb->buffer->size)
193		size = vb->buffer->size - vb->buffer_offset;
194
195	pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
196	pipe_buffer_unmap(pscreen, vb->buffer);
197
198	vb->buffer = buf; /* don't pipe_reference, this is a private copy */
199	return TRUE;
200}
201
202static void
203nv50_upload_user_vbufs(struct nv50_context *nv50)
204{
205	unsigned i;
206
207	if (nv50->vbo_fifo)
208		nv50->dirty |= NV50_NEW_ARRAYS;
209	if (!(nv50->dirty & NV50_NEW_ARRAYS))
210		return;
211
212	for (i = 0; i < nv50->vtxbuf_nr; ++i) {
213		if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
214			continue;
215		nv50_upload_vtxbuf(nv50, i);
216	}
217}
218
219static void
220nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
221{
222	struct nouveau_grobj *tesla = nv50->screen->tesla;
223	struct nouveau_channel *chan = tesla->channel;
224	float v[4];
225
226	util_format_read_4f(nv50->vtxelt[i].src_format,
227			    v, 0, data, 0, 0, 0, 1, 1);
228
229	switch (nv50->vtxelt[i].nr_components) {
230	case 4:
231		BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
232		OUT_RINGf (chan, v[0]);
233		OUT_RINGf (chan, v[1]);
234		OUT_RINGf (chan, v[2]);
235		OUT_RINGf (chan, v[3]);
236		break;
237	case 3:
238		BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3);
239		OUT_RINGf (chan, v[0]);
240		OUT_RINGf (chan, v[1]);
241		OUT_RINGf (chan, v[2]);
242		break;
243	case 2:
244		BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2);
245		OUT_RINGf (chan, v[0]);
246		OUT_RINGf (chan, v[1]);
247		break;
248	case 1:
249		BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1);
250		OUT_RINGf (chan, v[0]);
251		break;
252	default:
253		assert(0);
254		break;
255	}
256}
257
258static unsigned
259init_per_instance_arrays_immd(struct nv50_context *nv50,
260			      unsigned startInstance,
261			      unsigned pos[16], unsigned step[16])
262{
263	struct nouveau_bo *bo;
264	unsigned i, b, count = 0;
265
266	for (i = 0; i < nv50->vtxelt_nr; ++i) {
267		if (!nv50->vtxelt[i].instance_divisor)
268			continue;
269		++count;
270		b = nv50->vtxelt[i].vertex_buffer_index;
271
272		pos[i] = nv50->vtxelt[i].src_offset +
273			nv50->vtxbuf[b].buffer_offset +
274			startInstance * nv50->vtxbuf[b].stride;
275		step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
276
277		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
278		if (!bo->map)
279			nouveau_bo_map(bo, NOUVEAU_BO_RD);
280
281		nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
282	}
283
284	return count;
285}
286
287static unsigned
288init_per_instance_arrays(struct nv50_context *nv50,
289			 unsigned startInstance,
290			 unsigned pos[16], unsigned step[16])
291{
292	struct nouveau_grobj *tesla = nv50->screen->tesla;
293	struct nouveau_channel *chan = tesla->channel;
294	struct nouveau_bo *bo;
295	struct nouveau_stateobj *so;
296	unsigned i, b, count = 0;
297	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
298
299	if (nv50->vbo_fifo)
300		return init_per_instance_arrays_immd(nv50, startInstance,
301						     pos, step);
302
303	so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
304
305	for (i = 0; i < nv50->vtxelt_nr; ++i) {
306		if (!nv50->vtxelt[i].instance_divisor)
307			continue;
308		++count;
309		b = nv50->vtxelt[i].vertex_buffer_index;
310
311		pos[i] = nv50->vtxelt[i].src_offset +
312			nv50->vtxbuf[b].buffer_offset +
313			startInstance * nv50->vtxbuf[b].stride;
314
315		if (!startInstance) {
316			step[i] = 0;
317			continue;
318		}
319		step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
320
321		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
322
323		so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
324		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
325		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
326	}
327
328	if (count && startInstance) {
329		so_ref (so, &nv50->state.instbuf); /* for flush notify */
330		so_emit(chan, nv50->state.instbuf);
331	}
332	so_ref (NULL, &so);
333
334	return count;
335}
336
337static void
338step_per_instance_arrays_immd(struct nv50_context *nv50,
339			      unsigned pos[16], unsigned step[16])
340{
341	struct nouveau_bo *bo;
342	unsigned i, b;
343
344	for (i = 0; i < nv50->vtxelt_nr; ++i) {
345		if (!nv50->vtxelt[i].instance_divisor)
346			continue;
347		if (++step[i] != nv50->vtxelt[i].instance_divisor)
348			continue;
349		b = nv50->vtxelt[i].vertex_buffer_index;
350		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
351
352		step[i] = 0;
353		pos[i] += nv50->vtxbuf[b].stride;
354
355		nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
356	}
357}
358
359static void
360step_per_instance_arrays(struct nv50_context *nv50,
361			 unsigned pos[16], unsigned step[16])
362{
363	struct nouveau_grobj *tesla = nv50->screen->tesla;
364	struct nouveau_channel *chan = tesla->channel;
365	struct nouveau_bo *bo;
366	struct nouveau_stateobj *so;
367	unsigned i, b;
368	const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
369
370	if (nv50->vbo_fifo) {
371		step_per_instance_arrays_immd(nv50, pos, step);
372		return;
373	}
374
375	so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
376
377	for (i = 0; i < nv50->vtxelt_nr; ++i) {
378		if (!nv50->vtxelt[i].instance_divisor)
379			continue;
380		b = nv50->vtxelt[i].vertex_buffer_index;
381
382		if (++step[i] == nv50->vtxelt[i].instance_divisor) {
383			step[i] = 0;
384			pos[i] += nv50->vtxbuf[b].stride;
385		}
386
387		bo = nouveau_bo(nv50->vtxbuf[b].buffer);
388
389		so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
390		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
391		so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
392	}
393
394	so_ref (so, &nv50->state.instbuf); /* for flush notify */
395	so_ref (NULL, &so);
396
397	so_emit(chan, nv50->state.instbuf);
398}
399
400static INLINE void
401nv50_unmap_vbufs(struct nv50_context *nv50)
402{
403        unsigned i;
404
405        for (i = 0; i < nv50->vtxbuf_nr; ++i)
406                if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
407                        nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
408}
409
410void
411nv50_draw_arrays_instanced(struct pipe_context *pipe,
412			   unsigned mode, unsigned start, unsigned count,
413			   unsigned startInstance, unsigned instanceCount)
414{
415	struct nv50_context *nv50 = nv50_context(pipe);
416	struct nouveau_channel *chan = nv50->screen->tesla->channel;
417	struct nouveau_grobj *tesla = nv50->screen->tesla;
418	unsigned i, nz_divisors;
419	unsigned step[16], pos[16];
420
421	if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
422		nv50_upload_user_vbufs(nv50);
423
424	nv50_state_validate(nv50);
425
426	nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
427
428	BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
429	OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
430	OUT_RING  (chan, startInstance);
431
432	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
433	OUT_RING  (chan, nv50_prim(mode));
434
435	if (nv50->vbo_fifo)
436		nv50_push_arrays(nv50, start, count);
437	else {
438		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
439		OUT_RING  (chan, start);
440		OUT_RING  (chan, count);
441	}
442	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
443	OUT_RING  (chan, 0);
444
445	for (i = 1; i < instanceCount; i++) {
446		if (nz_divisors) /* any non-zero array divisors ? */
447			step_per_instance_arrays(nv50, pos, step);
448
449		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
450		OUT_RING  (chan, nv50_prim(mode) | (1 << 28));
451
452		if (nv50->vbo_fifo)
453			nv50_push_arrays(nv50, start, count);
454		else {
455			BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
456			OUT_RING  (chan, start);
457			OUT_RING  (chan, count);
458		}
459		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
460		OUT_RING  (chan, 0);
461	}
462	nv50_unmap_vbufs(nv50);
463
464	so_ref(NULL, &nv50->state.instbuf);
465}
466
467void
468nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
469		 unsigned count)
470{
471	struct nv50_context *nv50 = nv50_context(pipe);
472	struct nouveau_channel *chan = nv50->screen->tesla->channel;
473	struct nouveau_grobj *tesla = nv50->screen->tesla;
474	boolean ret;
475
476	nv50_state_validate(nv50);
477
478	BEGIN_RING(chan, tesla, 0x142c, 1);
479	OUT_RING  (chan, 0);
480	BEGIN_RING(chan, tesla, 0x142c, 1);
481	OUT_RING  (chan, 0);
482
483	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
484	OUT_RING  (chan, nv50_prim(mode));
485
486	if (nv50->vbo_fifo)
487		ret = nv50_push_arrays(nv50, start, count);
488	else {
489		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
490		OUT_RING  (chan, start);
491		OUT_RING  (chan, count);
492		ret = TRUE;
493	}
494	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
495	OUT_RING  (chan, 0);
496
497	nv50_unmap_vbufs(nv50);
498
499        /* XXX: not sure what to do if ret != TRUE: flush and retry?
500         */
501        assert(ret);
502}
503
504static INLINE boolean
505nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
506			      unsigned start, unsigned count)
507{
508	struct nouveau_channel *chan = nv50->screen->tesla->channel;
509	struct nouveau_grobj *tesla = nv50->screen->tesla;
510
511	map += start;
512
513	if (nv50->vbo_fifo)
514		return nv50_push_elements_u08(nv50, map, count);
515
516	if (count & 1) {
517		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
518		OUT_RING  (chan, map[0]);
519		map++;
520		count--;
521	}
522
523	while (count) {
524		unsigned nr = count > 2046 ? 2046 : count;
525		int i;
526
527		BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
528		for (i = 0; i < nr; i += 2)
529			OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
530
531		count -= nr;
532		map += nr;
533	}
534	return TRUE;
535}
536
537static INLINE boolean
538nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
539			      unsigned start, unsigned count)
540{
541	struct nouveau_channel *chan = nv50->screen->tesla->channel;
542	struct nouveau_grobj *tesla = nv50->screen->tesla;
543
544	map += start;
545
546	if (nv50->vbo_fifo)
547		return nv50_push_elements_u16(nv50, map, count);
548
549	if (count & 1) {
550		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
551		OUT_RING  (chan, map[0]);
552		map++;
553		count--;
554	}
555
556	while (count) {
557		unsigned nr = count > 2046 ? 2046 : count;
558		int i;
559
560		BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
561		for (i = 0; i < nr; i += 2)
562			OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
563
564		count -= nr;
565		map += nr;
566	}
567	return TRUE;
568}
569
570static INLINE boolean
571nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
572			      unsigned start, unsigned count)
573{
574	struct nouveau_channel *chan = nv50->screen->tesla->channel;
575	struct nouveau_grobj *tesla = nv50->screen->tesla;
576
577	map += start;
578
579	if (nv50->vbo_fifo)
580		return nv50_push_elements_u32(nv50, map, count);
581
582	while (count) {
583		unsigned nr = count > 2047 ? 2047 : count;
584
585		BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
586		OUT_RINGp (chan, map, nr);
587
588		count -= nr;
589		map += nr;
590	}
591	return TRUE;
592}
593
594static INLINE void
595nv50_draw_elements_inline(struct nv50_context *nv50,
596			  void *map, unsigned indexSize,
597			  unsigned start, unsigned count)
598{
599	switch (indexSize) {
600	case 1:
601		nv50_draw_elements_inline_u08(nv50, map, start, count);
602		break;
603	case 2:
604		nv50_draw_elements_inline_u16(nv50, map, start, count);
605		break;
606	case 4:
607		nv50_draw_elements_inline_u32(nv50, map, start, count);
608		break;
609	}
610}
611
612void
613nv50_draw_elements_instanced(struct pipe_context *pipe,
614			     struct pipe_buffer *indexBuffer,
615			     unsigned indexSize,
616			     unsigned mode, unsigned start, unsigned count,
617			     unsigned startInstance, unsigned instanceCount)
618{
619	struct nv50_context *nv50 = nv50_context(pipe);
620	struct nouveau_grobj *tesla = nv50->screen->tesla;
621	struct nouveau_channel *chan = tesla->channel;
622	struct pipe_screen *pscreen = pipe->screen;
623	void *map;
624	unsigned i, nz_divisors;
625	unsigned step[16], pos[16];
626
627	map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
628
629	if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
630		nv50_upload_user_vbufs(nv50);
631
632	nv50_state_validate(nv50);
633
634	nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
635
636	BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
637	OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
638	OUT_RING  (chan, startInstance);
639
640	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
641	OUT_RING  (chan, nv50_prim(mode));
642
643	nv50_draw_elements_inline(nv50, map, indexSize, start, count);
644
645	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
646	OUT_RING  (chan, 0);
647
648	for (i = 1; i < instanceCount; ++i) {
649		if (nz_divisors) /* any non-zero array divisors ? */
650			step_per_instance_arrays(nv50, pos, step);
651
652		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
653		OUT_RING  (chan, nv50_prim(mode) | (1 << 28));
654
655		nv50_draw_elements_inline(nv50, map, indexSize, start, count);
656
657		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
658		OUT_RING  (chan, 0);
659	}
660	nv50_unmap_vbufs(nv50);
661
662	so_ref(NULL, &nv50->state.instbuf);
663}
664
665void
666nv50_draw_elements(struct pipe_context *pipe,
667		   struct pipe_buffer *indexBuffer, unsigned indexSize,
668		   unsigned mode, unsigned start, unsigned count)
669{
670	struct nv50_context *nv50 = nv50_context(pipe);
671	struct nouveau_channel *chan = nv50->screen->tesla->channel;
672	struct nouveau_grobj *tesla = nv50->screen->tesla;
673	struct pipe_screen *pscreen = pipe->screen;
674	void *map;
675
676	map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
677
678	nv50_state_validate(nv50);
679
680	BEGIN_RING(chan, tesla, 0x142c, 1);
681	OUT_RING  (chan, 0);
682	BEGIN_RING(chan, tesla, 0x142c, 1);
683	OUT_RING  (chan, 0);
684
685	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
686	OUT_RING  (chan, nv50_prim(mode));
687
688	nv50_draw_elements_inline(nv50, map, indexSize, start, count);
689
690	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
691	OUT_RING  (chan, 0);
692
693	nv50_unmap_vbufs(nv50);
694
695	pipe_buffer_unmap(pscreen, indexBuffer);
696}
697
698static INLINE boolean
699nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
700		       struct nouveau_stateobj **pso,
701		       struct pipe_vertex_element *ve,
702		       struct pipe_vertex_buffer *vb)
703
704{
705	struct nouveau_stateobj *so;
706	struct nouveau_grobj *tesla = nv50->screen->tesla;
707	struct nouveau_bo *bo = nouveau_bo(vb->buffer);
708	float v[4];
709	int ret;
710
711	ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
712	if (ret)
713		return FALSE;
714
715	util_format_read_4f(ve->src_format, v, 0, (uint8_t *)bo->map +
716			    (vb->buffer_offset + ve->src_offset), 0,
717			    0, 0, 1, 1);
718	so = *pso;
719	if (!so)
720		*pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
721
722	switch (ve->nr_components) {
723	case 4:
724		so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
725		so_data  (so, fui(v[0]));
726		so_data  (so, fui(v[1]));
727		so_data  (so, fui(v[2]));
728		so_data  (so, fui(v[3]));
729		break;
730	case 3:
731		so_method(so, tesla, NV50TCL_VTX_ATTR_3F_X(attrib), 3);
732		so_data  (so, fui(v[0]));
733		so_data  (so, fui(v[1]));
734		so_data  (so, fui(v[2]));
735		break;
736	case 2:
737		so_method(so, tesla, NV50TCL_VTX_ATTR_2F_X(attrib), 2);
738		so_data  (so, fui(v[0]));
739		so_data  (so, fui(v[1]));
740		break;
741	case 1:
742		if (attrib == nv50->vertprog->cfg.edgeflag_in) {
743			so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
744			so_data  (so, v[0] ? 1 : 0);
745		}
746		so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
747		so_data  (so, fui(v[0]));
748		break;
749	default:
750		nouveau_bo_unmap(bo);
751		return FALSE;
752	}
753
754	nouveau_bo_unmap(bo);
755	return TRUE;
756}
757
758void
759nv50_vbo_validate(struct nv50_context *nv50)
760{
761	struct nouveau_grobj *tesla = nv50->screen->tesla;
762	struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr;
763	unsigned i, n_ve;
764
765	/* don't validate if Gallium took away our buffers */
766	if (nv50->vtxbuf_nr == 0)
767		return;
768	nv50->vbo_fifo = 0;
769
770	for (i = 0; i < nv50->vtxbuf_nr; ++i)
771		if (nv50->vtxbuf[i].stride &&
772		    !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
773			nv50->vbo_fifo = 0xffff;
774
775	if (NV50_USING_LOATHED_EDGEFLAG(nv50))
776		nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
777
778	n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
779
780	vtxattr = NULL;
781	vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
782	vtxfmt = so_new(1, n_ve, 0);
783	so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
784
785	for (i = 0; i < nv50->vtxelt_nr; i++) {
786		struct pipe_vertex_element *ve = &nv50->vtxelt[i];
787		struct pipe_vertex_buffer *vb =
788			&nv50->vtxbuf[ve->vertex_buffer_index];
789		struct nouveau_bo *bo = nouveau_bo(vb->buffer);
790		uint32_t hw = nv50_vbo_vtxelt_to_hw(ve);
791
792		if (!vb->stride &&
793		    nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
794			so_data(vtxfmt, hw | (1 << 4));
795
796			so_method(vtxbuf, tesla,
797				  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
798			so_data  (vtxbuf, 0);
799
800			nv50->vbo_fifo &= ~(1 << i);
801			continue;
802		}
803
804		if (nv50->vbo_fifo) {
805			so_data  (vtxfmt, hw |
806				  (ve->instance_divisor ? (1 << 4) : i));
807			so_method(vtxbuf, tesla,
808				  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
809			so_data  (vtxbuf, 0);
810			continue;
811		}
812		so_data(vtxfmt, hw | i);
813
814		so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
815		so_data  (vtxbuf, 0x20000000 |
816			  (ve->instance_divisor ? 0 : vb->stride));
817		so_reloc (vtxbuf, bo, vb->buffer_offset +
818			  ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
819			  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
820		so_reloc (vtxbuf, bo, vb->buffer_offset +
821			  ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
822			  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
823
824		/* vertex array limits */
825		so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
826		so_reloc (vtxbuf, bo, vb->buffer->size - 1,
827			  NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
828			  NOUVEAU_BO_HIGH, 0, 0);
829		so_reloc (vtxbuf, bo, vb->buffer->size - 1,
830			  NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
831			  NOUVEAU_BO_LOW, 0, 0);
832	}
833	for (; i < n_ve; ++i) {
834		so_data  (vtxfmt, 0x7e080010);
835
836		so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
837		so_data  (vtxbuf, 0);
838	}
839	nv50->state.vtxelt_nr = nv50->vtxelt_nr;
840
841	so_ref (vtxfmt, &nv50->state.vtxfmt);
842	so_ref (vtxbuf, &nv50->state.vtxbuf);
843	so_ref (vtxattr, &nv50->state.vtxattr);
844	so_ref (NULL, &vtxbuf);
845	so_ref (NULL, &vtxfmt);
846	so_ref (NULL, &vtxattr);
847}
848
849typedef void (*pfn_push)(struct nouveau_channel *, void *);
850
851struct nv50_vbo_emitctx
852{
853	pfn_push push[16];
854	uint8_t *map[16];
855	unsigned stride[16];
856	unsigned nr_ve;
857	unsigned vtx_dwords;
858	unsigned vtx_max;
859
860	float edgeflag;
861	unsigned ve_edgeflag;
862};
863
864static INLINE void
865emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
866{
867	unsigned i;
868
869	for (i = 0; i < emit->nr_ve; ++i) {
870		emit->push[i](chan, emit->map[i]);
871		emit->map[i] += emit->stride[i];
872	}
873}
874
875static INLINE void
876emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
877	 uint32_t vi)
878{
879	unsigned i;
880
881	for (i = 0; i < emit->nr_ve; ++i)
882		emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
883}
884
885static INLINE boolean
886nv50_map_vbufs(struct nv50_context *nv50)
887{
888	int i;
889
890	for (i = 0; i < nv50->vtxbuf_nr; ++i) {
891		struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
892		unsigned size = vb->stride * (vb->max_index + 1) + 16;
893
894		if (nouveau_bo(vb->buffer)->map)
895			continue;
896
897		size = vb->stride * (vb->max_index + 1) + 16;
898		size = MIN2(size, vb->buffer->size);
899		if (!size)
900			size = vb->buffer->size;
901
902		if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
903					 0, size, NOUVEAU_BO_RD))
904			break;
905	}
906
907	if (i == nv50->vtxbuf_nr)
908		return TRUE;
909	for (; i >= 0; --i)
910		nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
911	return FALSE;
912}
913
914static void
915emit_b32_1(struct nouveau_channel *chan, void *data)
916{
917	uint32_t *v = data;
918
919	OUT_RING(chan, v[0]);
920}
921
922static void
923emit_b32_2(struct nouveau_channel *chan, void *data)
924{
925	uint32_t *v = data;
926
927	OUT_RING(chan, v[0]);
928	OUT_RING(chan, v[1]);
929}
930
931static void
932emit_b32_3(struct nouveau_channel *chan, void *data)
933{
934	uint32_t *v = data;
935
936	OUT_RING(chan, v[0]);
937	OUT_RING(chan, v[1]);
938	OUT_RING(chan, v[2]);
939}
940
941static void
942emit_b32_4(struct nouveau_channel *chan, void *data)
943{
944	uint32_t *v = data;
945
946	OUT_RING(chan, v[0]);
947	OUT_RING(chan, v[1]);
948	OUT_RING(chan, v[2]);
949	OUT_RING(chan, v[3]);
950}
951
952static void
953emit_b16_1(struct nouveau_channel *chan, void *data)
954{
955	uint16_t *v = data;
956
957	OUT_RING(chan, v[0]);
958}
959
960static void
961emit_b16_3(struct nouveau_channel *chan, void *data)
962{
963	uint16_t *v = data;
964
965	OUT_RING(chan, (v[1] << 16) | v[0]);
966	OUT_RING(chan, v[2]);
967}
968
969static void
970emit_b08_1(struct nouveau_channel *chan, void *data)
971{
972	uint8_t *v = data;
973
974	OUT_RING(chan, v[0]);
975}
976
977static void
978emit_b08_3(struct nouveau_channel *chan, void *data)
979{
980	uint8_t *v = data;
981
982	OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
983}
984
985static boolean
986emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
987	     unsigned start)
988{
989	unsigned i;
990
991	if (nv50_map_vbufs(nv50) == FALSE)
992		return FALSE;
993
994	emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
995
996	emit->edgeflag = 0.5f;
997	emit->nr_ve = 0;
998	emit->vtx_dwords = 0;
999
1000	for (i = 0; i < nv50->vtxelt_nr; ++i) {
1001		struct pipe_vertex_element *ve;
1002		struct pipe_vertex_buffer *vb;
1003		unsigned n, size;
1004		const struct util_format_description *desc;
1005
1006		ve = &nv50->vtxelt[i];
1007		vb = &nv50->vtxbuf[ve->vertex_buffer_index];
1008		if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
1009			continue;
1010		n = emit->nr_ve++;
1011
1012		emit->stride[n] = vb->stride;
1013		emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
1014			vb->buffer_offset +
1015			(start * vb->stride + ve->src_offset);
1016
1017		desc = util_format_description(ve->src_format);
1018		assert(desc);
1019
1020		size = util_format_get_component_bits(
1021			ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
1022
1023		assert(ve->nr_components > 0 && ve->nr_components <= 4);
1024
1025		/* It shouldn't be necessary to push the implicit 1s
1026		 * for case 3 and size 8 cases 1, 2, 3.
1027		 */
1028		switch (size) {
1029		default:
1030			NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
1031			return FALSE;
1032		case 32:
1033			switch (ve->nr_components) {
1034			case 1: emit->push[n] = emit_b32_1; break;
1035			case 2: emit->push[n] = emit_b32_2; break;
1036			case 3: emit->push[n] = emit_b32_3; break;
1037			case 4: emit->push[n] = emit_b32_4; break;
1038			}
1039			emit->vtx_dwords += ve->nr_components;
1040			break;
1041		case 16:
1042			switch (ve->nr_components) {
1043			case 1: emit->push[n] = emit_b16_1; break;
1044			case 2: emit->push[n] = emit_b32_1; break;
1045			case 3: emit->push[n] = emit_b16_3; break;
1046			case 4: emit->push[n] = emit_b32_2; break;
1047			}
1048			emit->vtx_dwords += (ve->nr_components + 1) >> 1;
1049			break;
1050		case 8:
1051			switch (ve->nr_components) {
1052			case 1: emit->push[n] = emit_b08_1; break;
1053			case 2: emit->push[n] = emit_b16_1; break;
1054			case 3: emit->push[n] = emit_b08_3; break;
1055			case 4: emit->push[n] = emit_b32_1; break;
1056			}
1057			emit->vtx_dwords += 1;
1058			break;
1059		}
1060	}
1061
1062	emit->vtx_max = 512 / emit->vtx_dwords;
1063	if (emit->ve_edgeflag < 16)
1064		emit->vtx_max = 1;
1065
1066	return TRUE;
1067}
1068
1069static INLINE void
1070set_edgeflag(struct nouveau_channel *chan,
1071	     struct nouveau_grobj *tesla,
1072	     struct nv50_vbo_emitctx *emit, uint32_t index)
1073{
1074	unsigned i = emit->ve_edgeflag;
1075
1076	if (i < 16) {
1077		float f = *((float *)(emit->map[i] + index * emit->stride[i]));
1078
1079		if (emit->edgeflag != f) {
1080			emit->edgeflag = f;
1081
1082			BEGIN_RING(chan, tesla, 0x15e4, 1);
1083			OUT_RING  (chan, f ? 1 : 0);
1084		}
1085	}
1086}
1087
1088static boolean
1089nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
1090{
1091	struct nouveau_channel *chan = nv50->screen->base.channel;
1092	struct nouveau_grobj *tesla = nv50->screen->tesla;
1093	struct nv50_vbo_emitctx emit;
1094
1095	if (emit_prepare(nv50, &emit, start) == FALSE)
1096		return FALSE;
1097
1098	while (count) {
1099		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1100	        dw = nr * emit.vtx_dwords;
1101
1102		set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
1103
1104		BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1105		for (i = 0; i < nr; ++i)
1106			emit_vtx_next(chan, &emit);
1107
1108		count -= nr;
1109	}
1110
1111	return TRUE;
1112}
1113
1114static boolean
1115nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
1116{
1117	struct nouveau_channel *chan = nv50->screen->base.channel;
1118	struct nouveau_grobj *tesla = nv50->screen->tesla;
1119	struct nv50_vbo_emitctx emit;
1120
1121	if (emit_prepare(nv50, &emit, 0) == FALSE)
1122		return FALSE;
1123
1124	while (count) {
1125		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1126	        dw = nr * emit.vtx_dwords;
1127
1128		set_edgeflag(chan, tesla, &emit, *map);
1129
1130		BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1131		for (i = 0; i < nr; ++i)
1132			emit_vtx(chan, &emit, *map++);
1133
1134		count -= nr;
1135	}
1136
1137	return TRUE;
1138}
1139
1140static boolean
1141nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
1142{
1143	struct nouveau_channel *chan = nv50->screen->base.channel;
1144	struct nouveau_grobj *tesla = nv50->screen->tesla;
1145	struct nv50_vbo_emitctx emit;
1146
1147	if (emit_prepare(nv50, &emit, 0) == FALSE)
1148		return FALSE;
1149
1150	while (count) {
1151		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1152	        dw = nr * emit.vtx_dwords;
1153
1154		set_edgeflag(chan, tesla, &emit, *map);
1155
1156		BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1157		for (i = 0; i < nr; ++i)
1158			emit_vtx(chan, &emit, *map++);
1159
1160		count -= nr;
1161	}
1162
1163	return TRUE;
1164}
1165
1166static boolean
1167nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
1168{
1169	struct nouveau_channel *chan = nv50->screen->base.channel;
1170	struct nouveau_grobj *tesla = nv50->screen->tesla;
1171	struct nv50_vbo_emitctx emit;
1172
1173	if (emit_prepare(nv50, &emit, 0) == FALSE)
1174		return FALSE;
1175
1176	while (count) {
1177		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
1178	        dw = nr * emit.vtx_dwords;
1179
1180		set_edgeflag(chan, tesla, &emit, *map);
1181
1182		BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
1183		for (i = 0; i < nr; ++i)
1184			emit_vtx(chan, &emit, *map++);
1185
1186		count -= nr;
1187	}
1188
1189	return TRUE;
1190}
1191