nv50_push.c revision f2b80e5679741c39b79102e7ee5057d1af9bce82
1#include "pipe/p_context.h"
2#include "pipe/p_state.h"
3#include "util/u_inlines.h"
4#include "util/u_format.h"
5
6#include "nouveau/nouveau_util.h"
7#include "nv50_context.h"
8#include "nv50_resource.h"
9
10struct push_context {
11   struct nv50_context *nv50;
12
13   unsigned vtx_size;
14
15   void *idxbuf;
16   int32_t idxbias;
17   unsigned idxsize;
18
19   float edgeflag;
20   int edgeflag_attr;
21
22   struct {
23      void *map;
24      unsigned stride;
25      unsigned divisor;
26      unsigned step;
27      void (*push)(struct nouveau_channel *, void *);
28   } attr[16];
29   unsigned attr_nr;
30};
31
32static void
33emit_b32_1(struct nouveau_channel *chan, void *data)
34{
35   uint32_t *v = data;
36
37   OUT_RING(chan, v[0]);
38}
39
40static void
41emit_b32_2(struct nouveau_channel *chan, void *data)
42{
43   uint32_t *v = data;
44
45   OUT_RING(chan, v[0]);
46   OUT_RING(chan, v[1]);
47}
48
49static void
50emit_b32_3(struct nouveau_channel *chan, void *data)
51{
52   uint32_t *v = data;
53
54   OUT_RING(chan, v[0]);
55   OUT_RING(chan, v[1]);
56   OUT_RING(chan, v[2]);
57}
58
59static void
60emit_b32_4(struct nouveau_channel *chan, void *data)
61{
62   uint32_t *v = data;
63
64   OUT_RING(chan, v[0]);
65   OUT_RING(chan, v[1]);
66   OUT_RING(chan, v[2]);
67   OUT_RING(chan, v[3]);
68}
69
70static void
71emit_b16_1(struct nouveau_channel *chan, void *data)
72{
73   uint16_t *v = data;
74
75   OUT_RING(chan, v[0]);
76}
77
78static void
79emit_b16_3(struct nouveau_channel *chan, void *data)
80{
81   uint16_t *v = data;
82
83   OUT_RING(chan, (v[1] << 16) | v[0]);
84   OUT_RING(chan, v[2]);
85}
86
87static void
88emit_b08_1(struct nouveau_channel *chan, void *data)
89{
90   uint8_t *v = data;
91
92   OUT_RING(chan, v[0]);
93}
94
95static void
96emit_b08_3(struct nouveau_channel *chan, void *data)
97{
98   uint8_t *v = data;
99
100   OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
101}
102
103static INLINE void
104emit_vertex(struct push_context *ctx, unsigned n)
105{
106   struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
107   struct nouveau_channel *chan = tesla->channel;
108   int i;
109
110   if (ctx->edgeflag_attr < 16) {
111      float *edgeflag = (uint8_t *)ctx->attr[ctx->edgeflag_attr].map +
112                        ctx->attr[ctx->edgeflag_attr].stride * n;
113
114      if (*edgeflag != ctx->edgeflag) {
115         BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
116         OUT_RING  (chan, *edgeflag ? 1 : 0);
117         ctx->edgeflag = *edgeflag;
118      }
119   }
120
121   BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
122   for (i = 0; i < ctx->attr_nr; i++)
123      ctx->attr[i].push(chan,
124			(uint8_t *)ctx->attr[i].map + ctx->attr[i].stride * n);
125}
126
127static void
128emit_edgeflag(void *priv, boolean enabled)
129{
130   struct push_context *ctx = priv;
131   struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
132   struct nouveau_channel *chan = tesla->channel;
133
134   BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
135   OUT_RING  (chan, enabled ? 1 : 0);
136}
137
138static void
139emit_elt08(void *priv, unsigned start, unsigned count)
140{
141   struct push_context *ctx = priv;
142   uint8_t *idxbuf = ctx->idxbuf;
143
144   while (count--)
145      emit_vertex(ctx, idxbuf[start++]);
146}
147
148static void
149emit_elt08_biased(void *priv, unsigned start, unsigned count)
150{
151   struct push_context *ctx = priv;
152   uint8_t *idxbuf = ctx->idxbuf;
153
154   while (count--)
155      emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
156}
157
158static void
159emit_elt16(void *priv, unsigned start, unsigned count)
160{
161   struct push_context *ctx = priv;
162   uint16_t *idxbuf = ctx->idxbuf;
163
164   while (count--)
165      emit_vertex(ctx, idxbuf[start++]);
166}
167
168static void
169emit_elt16_biased(void *priv, unsigned start, unsigned count)
170{
171   struct push_context *ctx = priv;
172   uint16_t *idxbuf = ctx->idxbuf;
173
174   while (count--)
175      emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
176}
177
178static void
179emit_elt32(void *priv, unsigned start, unsigned count)
180{
181   struct push_context *ctx = priv;
182   uint32_t *idxbuf = ctx->idxbuf;
183
184   while (count--)
185      emit_vertex(ctx, idxbuf[start++]);
186}
187
188static void
189emit_elt32_biased(void *priv, unsigned start, unsigned count)
190{
191   struct push_context *ctx = priv;
192   uint32_t *idxbuf = ctx->idxbuf;
193
194   while (count--)
195      emit_vertex(ctx, idxbuf[start++] + ctx->idxbias);
196}
197
198static void
199emit_verts(void *priv, unsigned start, unsigned count)
200{
201   while (count--)
202      emit_vertex(priv, start++);
203}
204
205void
206nv50_push_elements_instanced(struct pipe_context *pipe,
207                             struct pipe_resource *idxbuf,
208                             unsigned idxsize, int idxbias,
209                             unsigned mode, unsigned start, unsigned count,
210                             unsigned i_start, unsigned i_count)
211{
212   struct nv50_context *nv50 = nv50_context(pipe);
213   struct nouveau_grobj *tesla = nv50->screen->tesla;
214   struct nouveau_channel *chan = tesla->channel;
215   struct push_context ctx;
216   const unsigned p_overhead = 4 + /* begin/end */
217                               4; /* potential edgeflag enable/disable */
218   const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
219                               2; /* potential edgeflag modification */
220   struct u_split_prim s;
221   unsigned vtx_size;
222   boolean nzi = FALSE;
223   int i;
224
225   ctx.nv50 = nv50;
226   ctx.attr_nr = 0;
227   ctx.idxbuf = NULL;
228   ctx.vtx_size = 0;
229   ctx.edgeflag = 0.5f;
230   ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
231
232   /* map vertex buffers, determine vertex size */
233   for (i = 0; i < nv50->vtxelt->num_elements; i++) {
234      struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
235      struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
236      struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo;
237      unsigned size, nr_components, n;
238
239      if (!(nv50->vbo_fifo & (1 << i)))
240         continue;
241      n = ctx.attr_nr++;
242
243      if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
244         assert(bo->map);
245         return;
246      }
247      ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset;
248      nouveau_bo_unmap(bo);
249
250      ctx.attr[n].stride = vb->stride;
251      ctx.attr[n].divisor = ve->instance_divisor;
252      if (ctx.attr[n].divisor) {
253         ctx.attr[n].step = i_start % ve->instance_divisor;
254         ctx.attr[n].map = (uint8_t *)ctx.attr[n].map + i_start * vb->stride;
255      }
256
257      size = util_format_get_component_bits(ve->src_format,
258                                            UTIL_FORMAT_COLORSPACE_RGB, 0);
259      nr_components = util_format_get_nr_components(ve->src_format);
260      switch (size) {
261      case 8:
262         switch (nr_components) {
263         case 1: ctx.attr[n].push = emit_b08_1; break;
264         case 2: ctx.attr[n].push = emit_b16_1; break;
265         case 3: ctx.attr[n].push = emit_b08_3; break;
266         case 4: ctx.attr[n].push = emit_b32_1; break;
267         }
268         ctx.vtx_size++;
269         break;
270      case 16:
271         switch (nr_components) {
272         case 1: ctx.attr[n].push = emit_b16_1; break;
273         case 2: ctx.attr[n].push = emit_b32_1; break;
274         case 3: ctx.attr[n].push = emit_b16_3; break;
275         case 4: ctx.attr[n].push = emit_b32_2; break;
276         }
277         ctx.vtx_size += (nr_components + 1) >> 1;
278         break;
279      case 32:
280         switch (nr_components) {
281         case 1: ctx.attr[n].push = emit_b32_1; break;
282         case 2: ctx.attr[n].push = emit_b32_2; break;
283         case 3: ctx.attr[n].push = emit_b32_3; break;
284         case 4: ctx.attr[n].push = emit_b32_4; break;
285         }
286         ctx.vtx_size += nr_components;
287         break;
288      default:
289         assert(0);
290         return;
291      }
292   }
293   vtx_size = ctx.vtx_size + v_overhead;
294
295   /* map index buffer, if present */
296   if (idxbuf) {
297      struct nouveau_bo *bo = nv50_resource(idxbuf)->bo;
298
299      if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
300         assert(bo->map);
301         return;
302      }
303      ctx.idxbuf = bo->map;
304      ctx.idxbias = idxbias;
305      ctx.idxsize = idxsize;
306      nouveau_bo_unmap(bo);
307   }
308
309   s.priv = &ctx;
310   s.edge = emit_edgeflag;
311   if (idxbuf) {
312      if (idxsize == 1)
313         s.emit = idxbias ? emit_elt08_biased : emit_elt08;
314      else
315      if (idxsize == 2)
316         s.emit = idxbias ? emit_elt16_biased : emit_elt16;
317      else
318         s.emit = idxbias ? emit_elt32_biased : emit_elt32;
319   } else
320      s.emit = emit_verts;
321
322   /* per-instance loop */
323   BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
324   OUT_RING  (chan, NV50_CB_AUX | (24 << 8));
325   OUT_RING  (chan, i_start);
326   while (i_count--) {
327      unsigned max_verts;
328      boolean done;
329
330      for (i = 0; i < ctx.attr_nr; i++) {
331         if (!ctx.attr[i].divisor ||
332              ctx.attr[i].divisor != ++ctx.attr[i].step)
333            continue;
334         ctx.attr[i].step = 0;
335         ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride;
336      }
337
338      u_split_prim_init(&s, mode, start, count);
339      do {
340         if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
341            FIRE_RING(chan);
342            if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
343               assert(0);
344               return;
345            }
346         }
347
348         max_verts  = AVAIL_RING(chan);
349         max_verts -= p_overhead;
350         max_verts /= vtx_size;
351
352         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
353         OUT_RING  (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
354         done = u_split_prim_next(&s, max_verts);
355         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
356         OUT_RING  (chan, 0);
357      } while (!done);
358
359      nzi = TRUE;
360   }
361}
362