u_vbuf.c revision c2cc630f2896175ff0f368d9199acbe24afb7e75
1/**************************************************************************
2 *
3 * Copyright 2011 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "util/u_vbuf.h"
29
30#include "util/u_dump.h"
31#include "util/u_format.h"
32#include "util/u_inlines.h"
33#include "util/u_memory.h"
34#include "util/u_upload_mgr.h"
35#include "translate/translate.h"
36#include "translate/translate_cache.h"
37#include "cso_cache/cso_cache.h"
38#include "cso_cache/cso_hash.h"
39
40struct u_vbuf_elements {
41   unsigned count;
42   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
43
44   unsigned src_format_size[PIPE_MAX_ATTRIBS];
45
46   /* If (velem[i].src_format != native_format[i]), the vertex buffer
47    * referenced by the vertex element cannot be used for rendering and
48    * its vertex data must be translated to native_format[i]. */
49   enum pipe_format native_format[PIPE_MAX_ATTRIBS];
50   unsigned native_format_size[PIPE_MAX_ATTRIBS];
51
52   /* This might mean two things:
53    * - src_format != native_format, as discussed above.
54    * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
55   boolean incompatible_layout;
56   /* Per-element flags. */
57   boolean incompatible_layout_elem[PIPE_MAX_ATTRIBS];
58};
59
60enum {
61   VB_VERTEX = 0,
62   VB_INSTANCE = 1,
63   VB_CONST = 2,
64   VB_NUM = 3
65};
66
67struct u_vbuf_priv {
68   struct u_vbuf b;
69   struct pipe_context *pipe;
70   struct translate_cache *translate_cache;
71   struct cso_cache *cso_cache;
72
73   /* Vertex element state bound by the state tracker. */
74   void *saved_ve;
75   /* and its associated helper structure for this module. */
76   struct u_vbuf_elements *ve;
77
78   /* Vertex elements used for the translate fallback. */
79   struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS];
80   /* If non-NULL, this is a vertex element state used for the translate
81    * fallback and therefore used for rendering too. */
82   void *fallback_ve;
83   /* The vertex buffer slot index where translated vertices have been
84    * stored in. */
85   unsigned fallback_vbs[VB_NUM];
86   /* When binding the fallback vertex element state, we don't want to
87    * change saved_ve and ve. This is set to TRUE in such cases. */
88   boolean ve_binding_lock;
89
90   /* Whether there is any user buffer. */
91   boolean any_user_vbs;
92   /* Whether there is a buffer with a non-native layout. */
93   boolean incompatible_vb_layout;
94   /* Per-buffer flags. */
95   boolean incompatible_vb[PIPE_MAX_ATTRIBS];
96};
97
98static void u_vbuf_init_format_caps(struct u_vbuf_priv *mgr)
99{
100   struct pipe_screen *screen = mgr->pipe->screen;
101
102   mgr->b.caps.format_fixed32 =
103      screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER,
104                                  0, PIPE_BIND_VERTEX_BUFFER);
105
106   mgr->b.caps.format_float16 =
107      screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER,
108                                  0, PIPE_BIND_VERTEX_BUFFER);
109
110   mgr->b.caps.format_float64 =
111      screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER,
112                                  0, PIPE_BIND_VERTEX_BUFFER);
113
114   mgr->b.caps.format_norm32 =
115      screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER,
116                                  0, PIPE_BIND_VERTEX_BUFFER) &&
117      screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER,
118                                  0, PIPE_BIND_VERTEX_BUFFER);
119
120   mgr->b.caps.format_scaled32 =
121      screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER,
122                                  0, PIPE_BIND_VERTEX_BUFFER) &&
123      screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER,
124                                  0, PIPE_BIND_VERTEX_BUFFER);
125}
126
127struct u_vbuf *
128u_vbuf_create(struct pipe_context *pipe,
129              unsigned upload_buffer_size,
130              unsigned upload_buffer_alignment,
131              unsigned upload_buffer_bind,
132              enum u_fetch_alignment fetch_alignment)
133{
134   struct u_vbuf_priv *mgr = CALLOC_STRUCT(u_vbuf_priv);
135
136   mgr->pipe = pipe;
137   mgr->cso_cache = cso_cache_create();
138   mgr->translate_cache = translate_cache_create();
139   memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
140
141   mgr->b.uploader = u_upload_create(pipe, upload_buffer_size,
142                                     upload_buffer_alignment,
143                                     upload_buffer_bind);
144
145   mgr->b.caps.fetch_dword_unaligned =
146         fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED;
147
148   u_vbuf_init_format_caps(mgr);
149
150   return &mgr->b;
151}
152
153/* XXX I had to fork this off of cso_context. */
154static void *
155u_vbuf_pipe_set_vertex_elements(struct u_vbuf_priv *mgr,
156                                unsigned count,
157                                const struct pipe_vertex_element *states)
158{
159   unsigned key_size, hash_key;
160   struct cso_hash_iter iter;
161   void *handle;
162   struct cso_velems_state velems_state;
163
164   /* need to include the count into the stored state data too. */
165   key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
166   velems_state.count = count;
167   memcpy(velems_state.velems, states,
168          sizeof(struct pipe_vertex_element) * count);
169   hash_key = cso_construct_key((void*)&velems_state, key_size);
170   iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS,
171                                  (void*)&velems_state, key_size);
172
173   if (cso_hash_iter_is_null(iter)) {
174      struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
175      memcpy(&cso->state, &velems_state, key_size);
176      cso->data =
177            mgr->pipe->create_vertex_elements_state(mgr->pipe, count,
178                                                    &cso->state.velems[0]);
179      cso->delete_state =
180            (cso_state_callback)mgr->pipe->delete_vertex_elements_state;
181      cso->context = mgr->pipe;
182
183      iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
184      handle = cso->data;
185   } else {
186      handle = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
187   }
188
189   mgr->pipe->bind_vertex_elements_state(mgr->pipe, handle);
190   return handle;
191}
192
193void u_vbuf_destroy(struct u_vbuf *mgrb)
194{
195   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
196   unsigned i;
197
198   for (i = 0; i < mgr->b.nr_vertex_buffers; i++) {
199      pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL);
200   }
201   for (i = 0; i < mgr->b.nr_real_vertex_buffers; i++) {
202      pipe_resource_reference(&mgr->b.real_vertex_buffer[i].buffer, NULL);
203   }
204
205   translate_cache_destroy(mgr->translate_cache);
206   u_upload_destroy(mgr->b.uploader);
207   cso_cache_delete(mgr->cso_cache);
208   FREE(mgr);
209}
210
211static void
212u_vbuf_translate_buffers(struct u_vbuf_priv *mgr, struct translate_key *key,
213                         unsigned vb_mask, unsigned out_vb,
214                         int start_vertex, unsigned num_vertices,
215                         int start_index, unsigned num_indices, int min_index,
216                         bool unroll_indices)
217{
218   struct translate *tr;
219   struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
220   struct pipe_resource *out_buffer = NULL;
221   uint8_t *out_map;
222   unsigned i, out_offset;
223
224   /* Get a translate object. */
225   tr = translate_cache_find(mgr->translate_cache, key);
226
227   /* Map buffers we want to translate. */
228   for (i = 0; i < mgr->b.nr_vertex_buffers; i++) {
229      if (vb_mask & (1 << i)) {
230         struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[i];
231         unsigned offset = vb->buffer_offset + vb->stride * start_vertex;
232         uint8_t *map;
233
234         if (u_vbuf_resource(vb->buffer)->user_ptr) {
235            map = u_vbuf_resource(vb->buffer)->user_ptr + offset;
236         } else {
237            unsigned size = vb->stride ? num_vertices * vb->stride
238                                       : sizeof(double)*4;
239
240            if (offset+size > vb->buffer->width0) {
241               size = vb->buffer->width0 - offset;
242            }
243
244            map = pipe_buffer_map_range(mgr->pipe, vb->buffer, offset, size,
245                                        PIPE_TRANSFER_READ, &vb_transfer[i]);
246         }
247
248         /* Subtract min_index so that indexing with the index buffer works. */
249         if (unroll_indices) {
250            map -= vb->stride * min_index;
251         }
252
253         tr->set_buffer(tr, i, map, vb->stride, ~0);
254      }
255   }
256
257   /* Translate. */
258   if (unroll_indices) {
259      struct pipe_index_buffer *ib = &mgr->b.index_buffer;
260      struct pipe_transfer *transfer = NULL;
261      unsigned offset = ib->offset + start_index * ib->index_size;
262      uint8_t *map;
263
264      assert(ib->buffer && ib->index_size);
265
266      if (u_vbuf_resource(ib->buffer)->user_ptr) {
267         map = u_vbuf_resource(ib->buffer)->user_ptr + offset;
268      } else {
269         map = pipe_buffer_map_range(mgr->pipe, ib->buffer, offset,
270                                     num_indices * ib->index_size,
271                                     PIPE_TRANSFER_READ, &transfer);
272      }
273
274      /* Create and map the output buffer. */
275      u_upload_alloc(mgr->b.uploader, 0,
276                     key->output_stride * num_indices,
277                     &out_offset, &out_buffer,
278                     (void**)&out_map);
279
280      switch (ib->index_size) {
281      case 4:
282         tr->run_elts(tr, (unsigned*)map, num_indices, 0, out_map);
283         break;
284      case 2:
285         tr->run_elts16(tr, (uint16_t*)map, num_indices, 0, out_map);
286         break;
287      case 1:
288         tr->run_elts8(tr, map, num_indices, 0, out_map);
289         break;
290      }
291
292      if (transfer) {
293         pipe_buffer_unmap(mgr->pipe, transfer);
294      }
295   } else {
296      /* Create and map the output buffer. */
297      u_upload_alloc(mgr->b.uploader,
298                     key->output_stride * start_vertex,
299                     key->output_stride * num_vertices,
300                     &out_offset, &out_buffer,
301                     (void**)&out_map);
302
303      out_offset -= key->output_stride * start_vertex;
304
305      tr->run(tr, 0, num_vertices, 0, out_map);
306   }
307
308   /* Unmap all buffers. */
309   for (i = 0; i < mgr->b.nr_vertex_buffers; i++) {
310      if (vb_transfer[i]) {
311         pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
312      }
313   }
314
315   /* Setup the new vertex buffer. */
316   mgr->b.real_vertex_buffer[out_vb].buffer_offset = out_offset;
317   mgr->b.real_vertex_buffer[out_vb].stride = key->output_stride;
318
319   /* Move the buffer reference. */
320   pipe_resource_reference(
321      &mgr->b.real_vertex_buffer[out_vb].buffer, NULL);
322   mgr->b.real_vertex_buffer[out_vb].buffer = out_buffer;
323}
324
325static boolean
326u_vbuf_translate_find_free_vb_slots(struct u_vbuf_priv *mgr,
327                                    unsigned mask[VB_NUM])
328{
329   unsigned i, type;
330   unsigned nr = mgr->ve->count;
331   boolean used_vb[PIPE_MAX_ATTRIBS] = {0};
332   unsigned fallback_vbs[VB_NUM];
333
334   memset(fallback_vbs, ~0, sizeof(fallback_vbs));
335
336   /* Mark used vertex buffers as... used. */
337   for (i = 0; i < nr; i++) {
338      if (!mgr->ve->incompatible_layout_elem[i]) {
339         unsigned index = mgr->ve->ve[i].vertex_buffer_index;
340
341         if (!mgr->incompatible_vb[index]) {
342            used_vb[index] = TRUE;
343         }
344      }
345   }
346
347   /* Find free slots for each type if needed. */
348   i = 0;
349   for (type = 0; type < VB_NUM; type++) {
350      if (mask[type]) {
351         for (; i < PIPE_MAX_ATTRIBS; i++) {
352            if (!used_vb[i]) {
353               /*printf("found slot=%i for type=%i\n", i, type);*/
354               fallback_vbs[type] = i;
355               i++;
356               if (i > mgr->b.nr_real_vertex_buffers) {
357                  mgr->b.nr_real_vertex_buffers = i;
358               }
359               break;
360            }
361         }
362         if (i == PIPE_MAX_ATTRIBS) {
363            /* fail, reset the number to its original value */
364            mgr->b.nr_real_vertex_buffers = mgr->b.nr_vertex_buffers;
365            return FALSE;
366         }
367      }
368   }
369
370   memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
371   return TRUE;
372}
373
374static boolean
375u_vbuf_translate_begin(struct u_vbuf_priv *mgr,
376                       int start_vertex, unsigned num_vertices,
377                       int start_instance, unsigned num_instances,
378                       int start_index, unsigned num_indices, int min_index,
379                       bool unroll_indices)
380{
381   unsigned mask[VB_NUM] = {0};
382   struct translate_key key[VB_NUM];
383   unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
384   unsigned i, type;
385
386   int start[VB_NUM] = {
387      start_vertex,     /* VERTEX */
388      start_instance,   /* INSTANCE */
389      0                 /* CONST */
390   };
391
392   unsigned num[VB_NUM] = {
393      num_vertices,     /* VERTEX */
394      num_instances,    /* INSTANCE */
395      1                 /* CONST */
396   };
397
398   memset(key, 0, sizeof(key));
399   memset(elem_index, ~0, sizeof(elem_index));
400
401   /* See if there are vertex attribs of each type to translate and
402    * which ones. */
403   for (i = 0; i < mgr->ve->count; i++) {
404      unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
405
406      if (!mgr->b.vertex_buffer[vb_index].stride) {
407         if (!mgr->ve->incompatible_layout_elem[i] &&
408             !mgr->incompatible_vb[vb_index]) {
409            continue;
410         }
411         mask[VB_CONST] |= 1 << vb_index;
412      } else if (mgr->ve->ve[i].instance_divisor) {
413         if (!mgr->ve->incompatible_layout_elem[i] &&
414             !mgr->incompatible_vb[vb_index]) {
415            continue;
416         }
417         mask[VB_INSTANCE] |= 1 << vb_index;
418      } else {
419         if (!unroll_indices &&
420             !mgr->ve->incompatible_layout_elem[i] &&
421             !mgr->incompatible_vb[vb_index]) {
422            continue;
423         }
424         mask[VB_VERTEX] |= 1 << vb_index;
425      }
426   }
427
428   assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
429
430   /* Find free vertex buffer slots. */
431   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
432      return FALSE;
433   }
434
435   /* Initialize the translate keys. */
436   for (i = 0; i < mgr->ve->count; i++) {
437      struct translate_key *k;
438      struct translate_element *te;
439      unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
440      bit = 1 << vb_index;
441
442      if (!mgr->ve->incompatible_layout_elem[i] &&
443          !mgr->incompatible_vb[vb_index] &&
444          (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
445         continue;
446      }
447
448      /* Set type to what we will translate.
449       * Whether vertex, instance, or constant attribs. */
450      for (type = 0; type < VB_NUM; type++) {
451         if (mask[type] & bit) {
452            break;
453         }
454      }
455      assert(type < VB_NUM);
456      assert(translate_is_output_format_supported(mgr->ve->native_format[i]));
457      /*printf("velem=%i type=%i\n", i, type);*/
458
459      /* Add the vertex element. */
460      k = &key[type];
461      elem_index[type][i] = k->nr_elements;
462
463      te = &k->element[k->nr_elements];
464      te->type = TRANSLATE_ELEMENT_NORMAL;
465      te->instance_divisor = 0;
466      te->input_buffer = vb_index;
467      te->input_format = mgr->ve->ve[i].src_format;
468      te->input_offset = mgr->ve->ve[i].src_offset;
469      te->output_format = mgr->ve->native_format[i];
470      te->output_offset = k->output_stride;
471
472      k->output_stride += mgr->ve->native_format_size[i];
473      k->nr_elements++;
474   }
475
476   /* Translate buffers. */
477   for (type = 0; type < VB_NUM; type++) {
478      if (key[type].nr_elements) {
479         u_vbuf_translate_buffers(mgr, &key[type], mask[type],
480                                  mgr->fallback_vbs[type],
481                                  start[type], num[type],
482                                  start_index, num_indices, min_index,
483                                  unroll_indices && type == VB_VERTEX);
484
485         /* Fixup the stride for constant attribs. */
486         if (type == VB_CONST) {
487            mgr->b.real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
488         }
489      }
490   }
491
492   /* Setup new vertex elements. */
493   for (i = 0; i < mgr->ve->count; i++) {
494      for (type = 0; type < VB_NUM; type++) {
495         if (elem_index[type][i] < key[type].nr_elements) {
496            struct translate_element *te = &key[type].element[elem_index[type][i]];
497            mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
498            mgr->fallback_velems[i].src_format = te->output_format;
499            mgr->fallback_velems[i].src_offset = te->output_offset;
500            mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
501
502            /* elem_index[type][i] can only be set for one type. */
503            assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0);
504            assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0);
505            break;
506         }
507      }
508      /* No translating, just copy the original vertex element over. */
509      if (type == VB_NUM) {
510         memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i],
511                sizeof(struct pipe_vertex_element));
512      }
513   }
514
515   /* Preserve saved_ve. */
516   mgr->ve_binding_lock = TRUE;
517   mgr->fallback_ve = u_vbuf_pipe_set_vertex_elements(mgr, mgr->ve->count,
518                                                      mgr->fallback_velems);
519   mgr->ve_binding_lock = FALSE;
520   return TRUE;
521}
522
523static void u_vbuf_translate_end(struct u_vbuf_priv *mgr)
524{
525   unsigned i;
526
527   /* Restore vertex elements. */
528   /* Note that saved_ve will be overwritten in bind_vertex_elements_state. */
529   mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->saved_ve);
530   mgr->fallback_ve = NULL;
531
532   /* Unreference the now-unused VBOs. */
533   for (i = 0; i < VB_NUM; i++) {
534      unsigned vb = mgr->fallback_vbs[i];
535      if (vb != ~0) {
536         pipe_resource_reference(&mgr->b.real_vertex_buffer[vb].buffer, NULL);
537         mgr->fallback_vbs[i] = ~0;
538      }
539   }
540   mgr->b.nr_real_vertex_buffers = mgr->b.nr_vertex_buffers;
541}
542
543#define FORMAT_REPLACE(what, withwhat) \
544    case PIPE_FORMAT_##what: format = PIPE_FORMAT_##withwhat; break
545
546struct u_vbuf_elements *
547u_vbuf_create_vertex_elements(struct u_vbuf *mgrb,
548                              unsigned count,
549                              const struct pipe_vertex_element *attribs,
550                              struct pipe_vertex_element *native_attribs)
551{
552   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
553   unsigned i;
554   struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
555
556   ve->count = count;
557
558   if (!count) {
559      return ve;
560   }
561
562   memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
563   memcpy(native_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
564
565   /* Set the best native format in case the original format is not
566    * supported. */
567   for (i = 0; i < count; i++) {
568      enum pipe_format format = ve->ve[i].src_format;
569
570      ve->src_format_size[i] = util_format_get_blocksize(format);
571
572      /* Choose a native format.
573       * For now we don't care about the alignment, that's going to
574       * be sorted out later. */
575      if (!mgr->b.caps.format_fixed32) {
576         switch (format) {
577            FORMAT_REPLACE(R32_FIXED,           R32_FLOAT);
578            FORMAT_REPLACE(R32G32_FIXED,        R32G32_FLOAT);
579            FORMAT_REPLACE(R32G32B32_FIXED,     R32G32B32_FLOAT);
580            FORMAT_REPLACE(R32G32B32A32_FIXED,  R32G32B32A32_FLOAT);
581            default:;
582         }
583      }
584      if (!mgr->b.caps.format_float16) {
585         switch (format) {
586            FORMAT_REPLACE(R16_FLOAT,           R32_FLOAT);
587            FORMAT_REPLACE(R16G16_FLOAT,        R32G32_FLOAT);
588            FORMAT_REPLACE(R16G16B16_FLOAT,     R32G32B32_FLOAT);
589            FORMAT_REPLACE(R16G16B16A16_FLOAT,  R32G32B32A32_FLOAT);
590            default:;
591         }
592      }
593      if (!mgr->b.caps.format_float64) {
594         switch (format) {
595            FORMAT_REPLACE(R64_FLOAT,           R32_FLOAT);
596            FORMAT_REPLACE(R64G64_FLOAT,        R32G32_FLOAT);
597            FORMAT_REPLACE(R64G64B64_FLOAT,     R32G32B32_FLOAT);
598            FORMAT_REPLACE(R64G64B64A64_FLOAT,  R32G32B32A32_FLOAT);
599            default:;
600         }
601      }
602      if (!mgr->b.caps.format_norm32) {
603         switch (format) {
604            FORMAT_REPLACE(R32_UNORM,           R32_FLOAT);
605            FORMAT_REPLACE(R32G32_UNORM,        R32G32_FLOAT);
606            FORMAT_REPLACE(R32G32B32_UNORM,     R32G32B32_FLOAT);
607            FORMAT_REPLACE(R32G32B32A32_UNORM,  R32G32B32A32_FLOAT);
608            FORMAT_REPLACE(R32_SNORM,           R32_FLOAT);
609            FORMAT_REPLACE(R32G32_SNORM,        R32G32_FLOAT);
610            FORMAT_REPLACE(R32G32B32_SNORM,     R32G32B32_FLOAT);
611            FORMAT_REPLACE(R32G32B32A32_SNORM,  R32G32B32A32_FLOAT);
612            default:;
613         }
614      }
615      if (!mgr->b.caps.format_scaled32) {
616         switch (format) {
617            FORMAT_REPLACE(R32_USCALED,         R32_FLOAT);
618            FORMAT_REPLACE(R32G32_USCALED,      R32G32_FLOAT);
619            FORMAT_REPLACE(R32G32B32_USCALED,   R32G32B32_FLOAT);
620            FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT);
621            FORMAT_REPLACE(R32_SSCALED,         R32_FLOAT);
622            FORMAT_REPLACE(R32G32_SSCALED,      R32G32_FLOAT);
623            FORMAT_REPLACE(R32G32B32_SSCALED,   R32G32B32_FLOAT);
624            FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT);
625            default:;
626         }
627      }
628
629      native_attribs[i].src_format = format;
630      ve->native_format[i] = format;
631      ve->native_format_size[i] =
632            util_format_get_blocksize(ve->native_format[i]);
633
634      ve->incompatible_layout_elem[i] =
635            ve->ve[i].src_format != ve->native_format[i] ||
636            (!mgr->b.caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0);
637      ve->incompatible_layout =
638            ve->incompatible_layout ||
639            ve->incompatible_layout_elem[i];
640   }
641
642   /* Align the formats to the size of DWORD if needed. */
643   if (!mgr->b.caps.fetch_dword_unaligned) {
644      for (i = 0; i < count; i++) {
645         ve->native_format_size[i] = align(ve->native_format_size[i], 4);
646      }
647   }
648
649   return ve;
650}
651
652void u_vbuf_bind_vertex_elements(struct u_vbuf *mgrb,
653                                 void *cso,
654                                 struct u_vbuf_elements *ve)
655{
656   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
657
658   if (!cso) {
659      return;
660   }
661
662   if (!mgr->ve_binding_lock) {
663      mgr->saved_ve = cso;
664      mgr->ve = ve;
665   }
666}
667
668void u_vbuf_destroy_vertex_elements(struct u_vbuf *mgr,
669                                    struct u_vbuf_elements *ve)
670{
671   FREE(ve);
672}
673
674void u_vbuf_set_vertex_buffers(struct u_vbuf *mgrb,
675                               unsigned count,
676                               const struct pipe_vertex_buffer *bufs)
677{
678   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
679   unsigned i;
680
681   mgr->any_user_vbs = FALSE;
682   mgr->incompatible_vb_layout = FALSE;
683   memset(mgr->incompatible_vb, 0, sizeof(mgr->incompatible_vb));
684
685   if (!mgr->b.caps.fetch_dword_unaligned) {
686      /* Check if the strides and offsets are aligned to the size of DWORD. */
687      for (i = 0; i < count; i++) {
688         if (bufs[i].buffer) {
689            if (bufs[i].stride % 4 != 0 ||
690                bufs[i].buffer_offset % 4 != 0) {
691               mgr->incompatible_vb_layout = TRUE;
692               mgr->incompatible_vb[i] = TRUE;
693            }
694         }
695      }
696   }
697
698   for (i = 0; i < count; i++) {
699      const struct pipe_vertex_buffer *vb = &bufs[i];
700
701      pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer);
702
703      mgr->b.real_vertex_buffer[i].buffer_offset =
704      mgr->b.vertex_buffer[i].buffer_offset = vb->buffer_offset;
705
706      mgr->b.real_vertex_buffer[i].stride =
707      mgr->b.vertex_buffer[i].stride = vb->stride;
708
709      if (!vb->buffer ||
710          mgr->incompatible_vb[i]) {
711         pipe_resource_reference(&mgr->b.real_vertex_buffer[i].buffer, NULL);
712         continue;
713      }
714
715      if (u_vbuf_resource(vb->buffer)->user_ptr) {
716         pipe_resource_reference(&mgr->b.real_vertex_buffer[i].buffer, NULL);
717         mgr->any_user_vbs = TRUE;
718         continue;
719      }
720
721      pipe_resource_reference(&mgr->b.real_vertex_buffer[i].buffer, vb->buffer);
722   }
723
724   for (i = count; i < mgr->b.nr_vertex_buffers; i++) {
725      pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL);
726   }
727   for (i = count; i < mgr->b.nr_real_vertex_buffers; i++) {
728      pipe_resource_reference(&mgr->b.real_vertex_buffer[i].buffer, NULL);
729   }
730
731   mgr->b.nr_vertex_buffers = count;
732   mgr->b.nr_real_vertex_buffers = count;
733}
734
735void u_vbuf_set_index_buffer(struct u_vbuf *mgr,
736                             const struct pipe_index_buffer *ib)
737{
738   if (ib && ib->buffer) {
739      assert(ib->offset % ib->index_size == 0);
740      pipe_resource_reference(&mgr->index_buffer.buffer, ib->buffer);
741      mgr->index_buffer.offset = ib->offset;
742      mgr->index_buffer.index_size = ib->index_size;
743   } else {
744      pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
745   }
746}
747
748static void
749u_vbuf_upload_buffers(struct u_vbuf_priv *mgr,
750                      int start_vertex, unsigned num_vertices,
751                      int start_instance, unsigned num_instances)
752{
753   unsigned i;
754   unsigned nr_velems = mgr->ve->count;
755   unsigned nr_vbufs = mgr->b.nr_vertex_buffers;
756   struct pipe_vertex_element *velems =
757         mgr->fallback_ve ? mgr->fallback_velems : mgr->ve->ve;
758   unsigned start_offset[PIPE_MAX_ATTRIBS];
759   unsigned end_offset[PIPE_MAX_ATTRIBS] = {0};
760
761   /* Determine how much data needs to be uploaded. */
762   for (i = 0; i < nr_velems; i++) {
763      struct pipe_vertex_element *velem = &velems[i];
764      unsigned index = velem->vertex_buffer_index;
765      struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[index];
766      unsigned instance_div, first, size;
767
768      /* Skip the buffers generated by translate. */
769      if (index == mgr->fallback_vbs[VB_VERTEX] ||
770          index == mgr->fallback_vbs[VB_INSTANCE] ||
771          index == mgr->fallback_vbs[VB_CONST]) {
772         continue;
773      }
774
775      assert(vb->buffer);
776
777      if (!u_vbuf_resource(vb->buffer)->user_ptr) {
778         continue;
779      }
780
781      instance_div = velem->instance_divisor;
782      first = vb->buffer_offset + velem->src_offset;
783
784      if (!vb->stride) {
785         /* Constant attrib. */
786         size = mgr->ve->src_format_size[i];
787      } else if (instance_div) {
788         /* Per-instance attrib. */
789         unsigned count = (num_instances + instance_div - 1) / instance_div;
790         first += vb->stride * start_instance;
791         size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
792      } else {
793         /* Per-vertex attrib. */
794         first += vb->stride * start_vertex;
795         size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i];
796      }
797
798      /* Update offsets. */
799      if (!end_offset[index]) {
800         start_offset[index] = first;
801         end_offset[index] = first + size;
802      } else {
803         if (first < start_offset[index])
804            start_offset[index] = first;
805         if (first + size > end_offset[index])
806            end_offset[index] = first + size;
807      }
808   }
809
810   /* Upload buffers. */
811   for (i = 0; i < nr_vbufs; i++) {
812      unsigned start, end = end_offset[i];
813      struct pipe_vertex_buffer *real_vb;
814      uint8_t *ptr;
815
816      if (!end) {
817         continue;
818      }
819
820      start = start_offset[i];
821      assert(start < end);
822
823      real_vb = &mgr->b.real_vertex_buffer[i];
824      ptr = u_vbuf_resource(mgr->b.vertex_buffer[i].buffer)->user_ptr;
825
826      u_upload_data(mgr->b.uploader, start, end - start, ptr + start,
827                    &real_vb->buffer_offset, &real_vb->buffer);
828
829      real_vb->buffer_offset -= start;
830   }
831}
832
833unsigned u_vbuf_draw_max_vertex_count(struct u_vbuf *mgrb)
834{
835   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
836   unsigned i, nr = mgr->ve->count;
837   struct pipe_vertex_element *velems =
838         mgr->fallback_ve ? mgr->fallback_velems : mgr->ve->ve;
839   unsigned result = ~0;
840
841   for (i = 0; i < nr; i++) {
842      struct pipe_vertex_buffer *vb =
843            &mgr->b.real_vertex_buffer[velems[i].vertex_buffer_index];
844      unsigned size, max_count, value;
845
846      /* We're not interested in constant and per-instance attribs. */
847      if (!vb->buffer ||
848          !vb->stride ||
849          velems[i].instance_divisor) {
850         continue;
851      }
852
853      size = vb->buffer->width0;
854
855      /* Subtract buffer_offset. */
856      value = vb->buffer_offset;
857      if (value >= size) {
858         return 0;
859      }
860      size -= value;
861
862      /* Subtract src_offset. */
863      value = velems[i].src_offset;
864      if (value >= size) {
865         return 0;
866      }
867      size -= value;
868
869      /* Subtract format_size. */
870      value = mgr->ve->native_format_size[i];
871      if (value >= size) {
872         return 0;
873      }
874      size -= value;
875
876      /* Compute the max count. */
877      max_count = 1 + size / vb->stride;
878      result = MIN2(result, max_count);
879   }
880   return result;
881}
882
883static boolean u_vbuf_need_minmax_index(struct u_vbuf_priv *mgr)
884{
885   unsigned i, nr = mgr->ve->count;
886
887   for (i = 0; i < nr; i++) {
888      struct pipe_vertex_buffer *vb;
889      unsigned index;
890
891      /* Per-instance attribs don't need min/max_index. */
892      if (mgr->ve->ve[i].instance_divisor) {
893         continue;
894      }
895
896      index = mgr->ve->ve[i].vertex_buffer_index;
897      vb = &mgr->b.vertex_buffer[index];
898
899      /* Constant attribs don't need min/max_index. */
900      if (!vb->stride) {
901         continue;
902      }
903
904      /* Per-vertex attribs need min/max_index. */
905      if (u_vbuf_resource(vb->buffer)->user_ptr ||
906          mgr->ve->incompatible_layout_elem[i] ||
907          mgr->incompatible_vb[index]) {
908         return TRUE;
909      }
910   }
911
912   return FALSE;
913}
914
915static void u_vbuf_get_minmax_index(struct pipe_context *pipe,
916                                    struct pipe_index_buffer *ib,
917                                    const struct pipe_draw_info *info,
918                                    int *out_min_index,
919                                    int *out_max_index)
920{
921   struct pipe_transfer *transfer = NULL;
922   const void *indices;
923   unsigned i;
924   unsigned restart_index = info->restart_index;
925
926   if (u_vbuf_resource(ib->buffer)->user_ptr) {
927      indices = u_vbuf_resource(ib->buffer)->user_ptr +
928                ib->offset + info->start * ib->index_size;
929   } else {
930      indices = pipe_buffer_map_range(pipe, ib->buffer,
931                                      ib->offset + info->start * ib->index_size,
932                                      info->count * ib->index_size,
933                                      PIPE_TRANSFER_READ, &transfer);
934   }
935
936   switch (ib->index_size) {
937   case 4: {
938      const unsigned *ui_indices = (const unsigned*)indices;
939      unsigned max_ui = 0;
940      unsigned min_ui = ~0U;
941      if (info->primitive_restart) {
942         for (i = 0; i < info->count; i++) {
943            if (ui_indices[i] != restart_index) {
944               if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
945               if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
946            }
947         }
948      }
949      else {
950         for (i = 0; i < info->count; i++) {
951            if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
952            if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
953         }
954      }
955      *out_min_index = min_ui;
956      *out_max_index = max_ui;
957      break;
958   }
959   case 2: {
960      const unsigned short *us_indices = (const unsigned short*)indices;
961      unsigned max_us = 0;
962      unsigned min_us = ~0U;
963      if (info->primitive_restart) {
964         for (i = 0; i < info->count; i++) {
965            if (us_indices[i] != restart_index) {
966               if (us_indices[i] > max_us) max_us = us_indices[i];
967               if (us_indices[i] < min_us) min_us = us_indices[i];
968            }
969         }
970      }
971      else {
972         for (i = 0; i < info->count; i++) {
973            if (us_indices[i] > max_us) max_us = us_indices[i];
974            if (us_indices[i] < min_us) min_us = us_indices[i];
975         }
976      }
977      *out_min_index = min_us;
978      *out_max_index = max_us;
979      break;
980   }
981   case 1: {
982      const unsigned char *ub_indices = (const unsigned char*)indices;
983      unsigned max_ub = 0;
984      unsigned min_ub = ~0U;
985      if (info->primitive_restart) {
986         for (i = 0; i < info->count; i++) {
987            if (ub_indices[i] != restart_index) {
988               if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
989               if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
990            }
991         }
992      }
993      else {
994         for (i = 0; i < info->count; i++) {
995            if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
996            if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
997         }
998      }
999      *out_min_index = min_ub;
1000      *out_max_index = max_ub;
1001      break;
1002   }
1003   default:
1004      assert(0);
1005      *out_min_index = 0;
1006      *out_max_index = 0;
1007   }
1008
1009   if (transfer) {
1010      pipe_buffer_unmap(pipe, transfer);
1011   }
1012}
1013
1014enum u_vbuf_return_flags
1015u_vbuf_draw_begin(struct u_vbuf *mgrb,
1016                  struct pipe_draw_info *info)
1017{
1018   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
1019   int start_vertex, min_index;
1020   unsigned num_vertices;
1021   bool unroll_indices = false;
1022
1023   if (!mgr->incompatible_vb_layout &&
1024       !mgr->ve->incompatible_layout &&
1025       !mgr->any_user_vbs) {
1026      return 0;
1027   }
1028
1029   if (info->indexed) {
1030      int max_index;
1031      bool index_bounds_valid = false;
1032
1033      if (info->max_index != ~0) {
1034         min_index = info->min_index;
1035         max_index = info->max_index;
1036         index_bounds_valid = true;
1037      } else if (u_vbuf_need_minmax_index(mgr)) {
1038         u_vbuf_get_minmax_index(mgr->pipe, &mgr->b.index_buffer, info,
1039                                 &min_index, &max_index);
1040         index_bounds_valid = true;
1041      }
1042
1043      /* If the index bounds are valid, it means some upload or translation
1044       * of per-vertex attribs will be performed. */
1045      if (index_bounds_valid) {
1046         assert(min_index <= max_index);
1047
1048         start_vertex = min_index + info->index_bias;
1049         num_vertices = max_index + 1 - min_index;
1050
1051         /* Primitive restart doesn't work when unrolling indices.
1052          * We would have to break this drawing operation into several ones. */
1053         /* Use some heuristic to see if unrolling indices improves
1054          * performance. */
1055         if (!info->primitive_restart &&
1056             num_vertices > info->count*2 &&
1057             num_vertices-info->count > 32) {
1058            /*printf("num_vertices=%i count=%i\n", num_vertices, info->count);*/
1059            unroll_indices = true;
1060         }
1061      } else {
1062         /* Nothing to do for per-vertex attribs. */
1063         start_vertex = 0;
1064         num_vertices = 0;
1065         min_index = 0;
1066      }
1067   } else {
1068      start_vertex = info->start;
1069      num_vertices = info->count;
1070      min_index = 0;
1071   }
1072
1073   /* Translate vertices with non-native layouts or formats. */
1074   if (unroll_indices ||
1075       mgr->incompatible_vb_layout ||
1076       mgr->ve->incompatible_layout) {
1077      /* XXX check the return value */
1078      u_vbuf_translate_begin(mgr, start_vertex, num_vertices,
1079                             info->start_instance, info->instance_count,
1080                             info->start, info->count, min_index,
1081                             unroll_indices);
1082   }
1083
1084   /* Upload user buffers. */
1085   if (mgr->any_user_vbs) {
1086      u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
1087                            info->start_instance, info->instance_count);
1088   }
1089
1090   /*
1091   if (unroll_indices) {
1092      printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1093             start_vertex, num_vertices);
1094      util_dump_draw_info(stdout, info);
1095      printf("\n");
1096   }
1097
1098   unsigned i;
1099   for (i = 0; i < mgr->b.nr_vertex_buffers; i++) {
1100      printf("input %i: ", i);
1101      util_dump_vertex_buffer(stdout, mgr->b.vertex_buffer+i);
1102      printf("\n");
1103   }
1104   for (i = 0; i < mgr->b.nr_real_vertex_buffers; i++) {
1105      printf("real %i: ", i);
1106      util_dump_vertex_buffer(stdout, mgr->b.real_vertex_buffer+i);
1107      printf("\n");
1108   }
1109   */
1110
1111   if (unroll_indices) {
1112      info->indexed = FALSE;
1113      info->index_bias = 0;
1114      info->min_index = 0;
1115      info->max_index = info->count - 1;
1116      info->start = 0;
1117   }
1118
1119   return U_VBUF_BUFFERS_UPDATED;
1120}
1121
1122void u_vbuf_draw_end(struct u_vbuf *mgrb)
1123{
1124   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
1125
1126   if (mgr->fallback_ve) {
1127      u_vbuf_translate_end(mgr);
1128   }
1129}
1130