1/**************************************************************************
2 *
3 * Copyright 2011 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "util/u_vbuf.h"
29
30#include "util/u_dump.h"
31#include "util/u_format.h"
32#include "util/u_inlines.h"
33#include "util/u_memory.h"
34#include "util/u_upload_mgr.h"
35#include "translate/translate.h"
36#include "translate/translate_cache.h"
37#include "cso_cache/cso_cache.h"
38#include "cso_cache/cso_hash.h"
39
40struct u_vbuf_elements {
41   unsigned count;
42   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
43
44   unsigned src_format_size[PIPE_MAX_ATTRIBS];
45
46   /* If (velem[i].src_format != native_format[i]), the vertex buffer
47    * referenced by the vertex element cannot be used for rendering and
48    * its vertex data must be translated to native_format[i]. */
49   enum pipe_format native_format[PIPE_MAX_ATTRIBS];
50   unsigned native_format_size[PIPE_MAX_ATTRIBS];
51
52   /* This might mean two things:
53    * - src_format != native_format, as discussed above.
54    * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
55   uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
56   /* Which buffer has at least one vertex element referencing it
57    * incompatible. */
58   uint32_t incompatible_vb_mask_any;
59   /* Which buffer has all vertex elements referencing it incompatible. */
60   uint32_t incompatible_vb_mask_all;
61   /* Which buffer has at least one vertex element referencing it
62    * compatible. */
63   uint32_t compatible_vb_mask_any;
64   /* Which buffer has all vertex elements referencing it compatible. */
65   uint32_t compatible_vb_mask_all;
66
67   /* Which buffer has at least one vertex element referencing it
68    * non-instanced. */
69   uint32_t noninstance_vb_mask_any;
70
71   void *driver_cso;
72};
73
74enum {
75   VB_VERTEX = 0,
76   VB_INSTANCE = 1,
77   VB_CONST = 2,
78   VB_NUM = 3
79};
80
81struct u_vbuf {
82   struct u_vbuf_caps caps;
83
84   struct pipe_context *pipe;
85   struct translate_cache *translate_cache;
86   struct cso_cache *cso_cache;
87   struct u_upload_mgr *uploader;
88
89   /* This is what was set in set_vertex_buffers.
90    * May contain user buffers. */
91   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
92   unsigned nr_vertex_buffers;
93
94   /* Saved vertex buffers. */
95   struct pipe_vertex_buffer vertex_buffer_saved[PIPE_MAX_ATTRIBS];
96   unsigned nr_vertex_buffers_saved;
97
98   /* Vertex buffers for the driver.
99    * There are no user buffers. */
100   struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
101   int nr_real_vertex_buffers;
102   boolean vertex_buffers_dirty;
103
104   /* The index buffer. */
105   struct pipe_index_buffer index_buffer;
106
107   /* Vertex elements. */
108   struct u_vbuf_elements *ve, *ve_saved;
109
110   /* Vertex elements used for the translate fallback. */
111   struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS];
112   /* If non-NULL, this is a vertex element state used for the translate
113    * fallback and therefore used for rendering too. */
114   boolean using_translate;
115   /* The vertex buffer slot index where translated vertices have been
116    * stored in. */
117   unsigned fallback_vbs[VB_NUM];
118
119   /* Which buffer is a user buffer. */
120   uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
121   /* Which buffer is incompatible (unaligned). */
122   uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
123   /* Which buffer has a non-zero stride. */
124   uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
125};
126
127static void *
128u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
129                              const struct pipe_vertex_element *attribs);
130static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso);
131
132
133void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps)
134{
135   caps->format_fixed32 =
136      screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER,
137                                  0, PIPE_BIND_VERTEX_BUFFER);
138
139   caps->format_float16 =
140      screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER,
141                                  0, PIPE_BIND_VERTEX_BUFFER);
142
143   caps->format_float64 =
144      screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER,
145                                  0, PIPE_BIND_VERTEX_BUFFER);
146
147   caps->format_norm32 =
148      screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER,
149                                  0, PIPE_BIND_VERTEX_BUFFER) &&
150      screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER,
151                                  0, PIPE_BIND_VERTEX_BUFFER);
152
153   caps->format_scaled32 =
154      screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER,
155                                  0, PIPE_BIND_VERTEX_BUFFER) &&
156      screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER,
157                                  0, PIPE_BIND_VERTEX_BUFFER);
158
159   caps->buffer_offset_unaligned =
160      !screen->get_param(screen,
161                        PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
162
163   caps->buffer_stride_unaligned =
164      !screen->get_param(screen,
165                        PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
166
167   caps->velem_src_offset_unaligned =
168      !screen->get_param(screen,
169                        PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
170
171   caps->user_vertex_buffers =
172      screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
173}
174
175struct u_vbuf *
176u_vbuf_create(struct pipe_context *pipe,
177              struct u_vbuf_caps *caps)
178{
179   struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
180
181   mgr->caps = *caps;
182   mgr->pipe = pipe;
183   mgr->cso_cache = cso_cache_create();
184   mgr->translate_cache = translate_cache_create();
185   memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
186
187   mgr->uploader = u_upload_create(pipe, 1024 * 1024, 4,
188                                   PIPE_BIND_VERTEX_BUFFER);
189
190   return mgr;
191}
192
193/* u_vbuf uses its own caching for vertex elements, because it needs to keep
194 * its own preprocessed state per vertex element CSO. */
195static struct u_vbuf_elements *
196u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count,
197                                    const struct pipe_vertex_element *states)
198{
199   struct pipe_context *pipe = mgr->pipe;
200   unsigned key_size, hash_key;
201   struct cso_hash_iter iter;
202   struct u_vbuf_elements *ve;
203   struct cso_velems_state velems_state;
204
205   /* need to include the count into the stored state data too. */
206   key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned);
207   velems_state.count = count;
208   memcpy(velems_state.velems, states,
209          sizeof(struct pipe_vertex_element) * count);
210   hash_key = cso_construct_key((void*)&velems_state, key_size);
211   iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS,
212                                  (void*)&velems_state, key_size);
213
214   if (cso_hash_iter_is_null(iter)) {
215      struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
216      memcpy(&cso->state, &velems_state, key_size);
217      cso->data = u_vbuf_create_vertex_elements(mgr, count, states);
218      cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements;
219      cso->context = (void*)mgr;
220
221      iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
222      ve = cso->data;
223   } else {
224      ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
225   }
226
227   assert(ve);
228
229   if (ve != mgr->ve)
230	   pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
231   return ve;
232}
233
234void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count,
235                               const struct pipe_vertex_element *states)
236{
237   mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states);
238}
239
240void u_vbuf_destroy(struct u_vbuf *mgr)
241{
242   unsigned i;
243
244   mgr->pipe->set_vertex_buffers(mgr->pipe, 0, NULL);
245
246   for (i = 0; i < mgr->nr_vertex_buffers; i++) {
247      pipe_resource_reference(&mgr->vertex_buffer[i].buffer, NULL);
248   }
249   for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
250      pipe_resource_reference(&mgr->real_vertex_buffer[i].buffer, NULL);
251   }
252
253   translate_cache_destroy(mgr->translate_cache);
254   u_upload_destroy(mgr->uploader);
255   cso_cache_delete(mgr->cso_cache);
256   FREE(mgr);
257}
258
259static enum pipe_error
260u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
261                         unsigned vb_mask, unsigned out_vb,
262                         int start_vertex, unsigned num_vertices,
263                         int start_index, unsigned num_indices, int min_index,
264                         boolean unroll_indices)
265{
266   struct translate *tr;
267   struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
268   struct pipe_resource *out_buffer = NULL;
269   uint8_t *out_map;
270   unsigned out_offset, i;
271   enum pipe_error err;
272
273   /* Get a translate object. */
274   tr = translate_cache_find(mgr->translate_cache, key);
275
276   /* Map buffers we want to translate. */
277   for (i = 0; i < mgr->nr_vertex_buffers; i++) {
278      if (vb_mask & (1 << i)) {
279         struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[i];
280         unsigned offset = vb->buffer_offset + vb->stride * start_vertex;
281         uint8_t *map;
282
283         if (vb->user_buffer) {
284            map = (uint8_t*)vb->user_buffer + offset;
285         } else {
286            unsigned size = vb->stride ? num_vertices * vb->stride
287                                       : sizeof(double)*4;
288
289            if (offset+size > vb->buffer->width0) {
290               size = vb->buffer->width0 - offset;
291            }
292
293            map = pipe_buffer_map_range(mgr->pipe, vb->buffer, offset, size,
294                                        PIPE_TRANSFER_READ, &vb_transfer[i]);
295         }
296
297         /* Subtract min_index so that indexing with the index buffer works. */
298         if (unroll_indices) {
299            map -= vb->stride * min_index;
300         }
301
302         tr->set_buffer(tr, i, map, vb->stride, ~0);
303      }
304   }
305
306   /* Translate. */
307   if (unroll_indices) {
308      struct pipe_index_buffer *ib = &mgr->index_buffer;
309      struct pipe_transfer *transfer = NULL;
310      unsigned offset = ib->offset + start_index * ib->index_size;
311      uint8_t *map;
312
313      assert((ib->buffer || ib->user_buffer) && ib->index_size);
314
315      /* Create and map the output buffer. */
316      err = u_upload_alloc(mgr->uploader, 0,
317                           key->output_stride * num_indices,
318                           &out_offset, &out_buffer,
319                           (void**)&out_map);
320      if (err != PIPE_OK)
321         return err;
322
323      if (ib->user_buffer) {
324         map = (uint8_t*)ib->user_buffer + offset;
325      } else {
326         map = pipe_buffer_map_range(mgr->pipe, ib->buffer, offset,
327                                     num_indices * ib->index_size,
328                                     PIPE_TRANSFER_READ, &transfer);
329      }
330
331      switch (ib->index_size) {
332      case 4:
333         tr->run_elts(tr, (unsigned*)map, num_indices, 0, out_map);
334         break;
335      case 2:
336         tr->run_elts16(tr, (uint16_t*)map, num_indices, 0, out_map);
337         break;
338      case 1:
339         tr->run_elts8(tr, map, num_indices, 0, out_map);
340         break;
341      }
342
343      if (transfer) {
344         pipe_buffer_unmap(mgr->pipe, transfer);
345      }
346   } else {
347      /* Create and map the output buffer. */
348      err = u_upload_alloc(mgr->uploader,
349                           key->output_stride * start_vertex,
350                           key->output_stride * num_vertices,
351                           &out_offset, &out_buffer,
352                           (void**)&out_map);
353      if (err != PIPE_OK)
354         return err;
355
356      out_offset -= key->output_stride * start_vertex;
357
358      tr->run(tr, 0, num_vertices, 0, out_map);
359   }
360
361   /* Unmap all buffers. */
362   for (i = 0; i < mgr->nr_vertex_buffers; i++) {
363      if (vb_transfer[i]) {
364         pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
365      }
366   }
367
368   /* Setup the new vertex buffer. */
369   mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
370   mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
371
372   /* Move the buffer reference. */
373   pipe_resource_reference(
374      &mgr->real_vertex_buffer[out_vb].buffer, NULL);
375   mgr->real_vertex_buffer[out_vb].buffer = out_buffer;
376
377   return PIPE_OK;
378}
379
380static boolean
381u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
382                                    unsigned mask[VB_NUM])
383{
384   unsigned type;
385   unsigned fallback_vbs[VB_NUM];
386   /* Set the bit for each buffer which is incompatible, or isn't set. */
387   uint32_t unused_vb_mask =
388      mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
389      ~((1 << mgr->nr_vertex_buffers) - 1);
390
391   memset(fallback_vbs, ~0, sizeof(fallback_vbs));
392
393   /* Find free slots for each type if needed. */
394   for (type = 0; type < VB_NUM; type++) {
395      if (mask[type]) {
396         uint32_t index;
397
398         if (!unused_vb_mask) {
399            /* fail, reset the number to its original value */
400            mgr->nr_real_vertex_buffers = mgr->nr_vertex_buffers;
401            return FALSE;
402         }
403
404         index = ffs(unused_vb_mask) - 1;
405         fallback_vbs[type] = index;
406         if (index >= mgr->nr_real_vertex_buffers) {
407            mgr->nr_real_vertex_buffers = index + 1;
408         }
409         /*printf("found slot=%i for type=%i\n", index, type);*/
410      }
411   }
412
413   memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
414   return TRUE;
415}
416
417static boolean
418u_vbuf_translate_begin(struct u_vbuf *mgr,
419                       int start_vertex, unsigned num_vertices,
420                       int start_instance, unsigned num_instances,
421                       int start_index, unsigned num_indices, int min_index,
422                       boolean unroll_indices)
423{
424   unsigned mask[VB_NUM] = {0};
425   struct translate_key key[VB_NUM];
426   unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
427   unsigned i, type;
428
429   int start[VB_NUM] = {
430      start_vertex,     /* VERTEX */
431      start_instance,   /* INSTANCE */
432      0                 /* CONST */
433   };
434
435   unsigned num[VB_NUM] = {
436      num_vertices,     /* VERTEX */
437      num_instances,    /* INSTANCE */
438      1                 /* CONST */
439   };
440
441   memset(key, 0, sizeof(key));
442   memset(elem_index, ~0, sizeof(elem_index));
443
444   /* See if there are vertex attribs of each type to translate and
445    * which ones. */
446   for (i = 0; i < mgr->ve->count; i++) {
447      unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
448
449      if (!mgr->vertex_buffer[vb_index].stride) {
450         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
451             !(mgr->incompatible_vb_mask & (1 << vb_index))) {
452            continue;
453         }
454         mask[VB_CONST] |= 1 << vb_index;
455      } else if (mgr->ve->ve[i].instance_divisor) {
456         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
457             !(mgr->incompatible_vb_mask & (1 << vb_index))) {
458            continue;
459         }
460         mask[VB_INSTANCE] |= 1 << vb_index;
461      } else {
462         if (!unroll_indices &&
463             !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
464             !(mgr->incompatible_vb_mask & (1 << vb_index))) {
465            continue;
466         }
467         mask[VB_VERTEX] |= 1 << vb_index;
468      }
469   }
470
471   assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
472
473   /* Find free vertex buffer slots. */
474   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
475      return FALSE;
476   }
477
478   /* Initialize the translate keys. */
479   for (i = 0; i < mgr->ve->count; i++) {
480      struct translate_key *k;
481      struct translate_element *te;
482      unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
483      bit = 1 << vb_index;
484
485      if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
486          !(mgr->incompatible_vb_mask & (1 << vb_index)) &&
487          (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
488         continue;
489      }
490
491      /* Set type to what we will translate.
492       * Whether vertex, instance, or constant attribs. */
493      for (type = 0; type < VB_NUM; type++) {
494         if (mask[type] & bit) {
495            break;
496         }
497      }
498      assert(type < VB_NUM);
499      assert(translate_is_output_format_supported(mgr->ve->native_format[i]));
500      /*printf("velem=%i type=%i\n", i, type);*/
501
502      /* Add the vertex element. */
503      k = &key[type];
504      elem_index[type][i] = k->nr_elements;
505
506      te = &k->element[k->nr_elements];
507      te->type = TRANSLATE_ELEMENT_NORMAL;
508      te->instance_divisor = 0;
509      te->input_buffer = vb_index;
510      te->input_format = mgr->ve->ve[i].src_format;
511      te->input_offset = mgr->ve->ve[i].src_offset;
512      te->output_format = mgr->ve->native_format[i];
513      te->output_offset = k->output_stride;
514
515      k->output_stride += mgr->ve->native_format_size[i];
516      k->nr_elements++;
517   }
518
519   /* Translate buffers. */
520   for (type = 0; type < VB_NUM; type++) {
521      if (key[type].nr_elements) {
522         enum pipe_error err;
523         err = u_vbuf_translate_buffers(mgr, &key[type], mask[type],
524                                        mgr->fallback_vbs[type],
525                                        start[type], num[type],
526                                        start_index, num_indices, min_index,
527                                        unroll_indices && type == VB_VERTEX);
528         if (err != PIPE_OK)
529            return FALSE;
530
531         /* Fixup the stride for constant attribs. */
532         if (type == VB_CONST) {
533            mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
534         }
535      }
536   }
537
538   /* Setup new vertex elements. */
539   for (i = 0; i < mgr->ve->count; i++) {
540      for (type = 0; type < VB_NUM; type++) {
541         if (elem_index[type][i] < key[type].nr_elements) {
542            struct translate_element *te = &key[type].element[elem_index[type][i]];
543            mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
544            mgr->fallback_velems[i].src_format = te->output_format;
545            mgr->fallback_velems[i].src_offset = te->output_offset;
546            mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
547
548            /* elem_index[type][i] can only be set for one type. */
549            assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0);
550            assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0);
551            break;
552         }
553      }
554      /* No translating, just copy the original vertex element over. */
555      if (type == VB_NUM) {
556         memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i],
557                sizeof(struct pipe_vertex_element));
558      }
559   }
560
561   u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count,
562                                       mgr->fallback_velems);
563   mgr->using_translate = TRUE;
564   return TRUE;
565}
566
567static void u_vbuf_translate_end(struct u_vbuf *mgr)
568{
569   unsigned i;
570
571   /* Restore vertex elements. */
572   mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
573   mgr->using_translate = FALSE;
574
575   /* Unreference the now-unused VBOs. */
576   for (i = 0; i < VB_NUM; i++) {
577      unsigned vb = mgr->fallback_vbs[i];
578      if (vb != ~0) {
579         pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer, NULL);
580         mgr->fallback_vbs[i] = ~0;
581      }
582   }
583   mgr->nr_real_vertex_buffers = mgr->nr_vertex_buffers;
584}
585
586#define FORMAT_REPLACE(what, withwhat) \
587    case PIPE_FORMAT_##what: format = PIPE_FORMAT_##withwhat; break
588
589static void *
590u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
591                              const struct pipe_vertex_element *attribs)
592{
593   struct pipe_context *pipe = mgr->pipe;
594   unsigned i;
595   struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
596   struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
597   uint32_t used_buffers = 0;
598
599   ve->count = count;
600
601   memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
602   memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
603
604   /* Set the best native format in case the original format is not
605    * supported. */
606   for (i = 0; i < count; i++) {
607      enum pipe_format format = ve->ve[i].src_format;
608
609      ve->src_format_size[i] = util_format_get_blocksize(format);
610
611      used_buffers |= 1 << ve->ve[i].vertex_buffer_index;
612
613      if (!ve->ve[i].instance_divisor) {
614         ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
615      }
616
617      /* Choose a native format.
618       * For now we don't care about the alignment, that's going to
619       * be sorted out later. */
620      if (!mgr->caps.format_fixed32) {
621         switch (format) {
622            FORMAT_REPLACE(R32_FIXED,           R32_FLOAT);
623            FORMAT_REPLACE(R32G32_FIXED,        R32G32_FLOAT);
624            FORMAT_REPLACE(R32G32B32_FIXED,     R32G32B32_FLOAT);
625            FORMAT_REPLACE(R32G32B32A32_FIXED,  R32G32B32A32_FLOAT);
626            default:;
627         }
628      }
629      if (!mgr->caps.format_float16) {
630         switch (format) {
631            FORMAT_REPLACE(R16_FLOAT,           R32_FLOAT);
632            FORMAT_REPLACE(R16G16_FLOAT,        R32G32_FLOAT);
633            FORMAT_REPLACE(R16G16B16_FLOAT,     R32G32B32_FLOAT);
634            FORMAT_REPLACE(R16G16B16A16_FLOAT,  R32G32B32A32_FLOAT);
635            default:;
636         }
637      }
638      if (!mgr->caps.format_float64) {
639         switch (format) {
640            FORMAT_REPLACE(R64_FLOAT,           R32_FLOAT);
641            FORMAT_REPLACE(R64G64_FLOAT,        R32G32_FLOAT);
642            FORMAT_REPLACE(R64G64B64_FLOAT,     R32G32B32_FLOAT);
643            FORMAT_REPLACE(R64G64B64A64_FLOAT,  R32G32B32A32_FLOAT);
644            default:;
645         }
646      }
647      if (!mgr->caps.format_norm32) {
648         switch (format) {
649            FORMAT_REPLACE(R32_UNORM,           R32_FLOAT);
650            FORMAT_REPLACE(R32G32_UNORM,        R32G32_FLOAT);
651            FORMAT_REPLACE(R32G32B32_UNORM,     R32G32B32_FLOAT);
652            FORMAT_REPLACE(R32G32B32A32_UNORM,  R32G32B32A32_FLOAT);
653            FORMAT_REPLACE(R32_SNORM,           R32_FLOAT);
654            FORMAT_REPLACE(R32G32_SNORM,        R32G32_FLOAT);
655            FORMAT_REPLACE(R32G32B32_SNORM,     R32G32B32_FLOAT);
656            FORMAT_REPLACE(R32G32B32A32_SNORM,  R32G32B32A32_FLOAT);
657            default:;
658         }
659      }
660      if (!mgr->caps.format_scaled32) {
661         switch (format) {
662            FORMAT_REPLACE(R32_USCALED,         R32_FLOAT);
663            FORMAT_REPLACE(R32G32_USCALED,      R32G32_FLOAT);
664            FORMAT_REPLACE(R32G32B32_USCALED,   R32G32B32_FLOAT);
665            FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT);
666            FORMAT_REPLACE(R32_SSCALED,         R32_FLOAT);
667            FORMAT_REPLACE(R32G32_SSCALED,      R32G32_FLOAT);
668            FORMAT_REPLACE(R32G32B32_SSCALED,   R32G32B32_FLOAT);
669            FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT);
670            default:;
671         }
672      }
673
674      driver_attribs[i].src_format = format;
675      ve->native_format[i] = format;
676      ve->native_format_size[i] =
677            util_format_get_blocksize(ve->native_format[i]);
678
679      if (ve->ve[i].src_format != format ||
680          (!mgr->caps.velem_src_offset_unaligned &&
681           ve->ve[i].src_offset % 4 != 0)) {
682         ve->incompatible_elem_mask |= 1 << i;
683         ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
684      } else {
685         ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index;
686      }
687   }
688
689   ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
690   ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
691
692   /* Align the formats to the size of DWORD if needed. */
693   if (!mgr->caps.velem_src_offset_unaligned) {
694      for (i = 0; i < count; i++) {
695         ve->native_format_size[i] = align(ve->native_format_size[i], 4);
696      }
697   }
698
699   ve->driver_cso =
700      pipe->create_vertex_elements_state(pipe, count, driver_attribs);
701   return ve;
702}
703
704static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso)
705{
706   struct pipe_context *pipe = mgr->pipe;
707   struct u_vbuf_elements *ve = cso;
708
709   pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
710   FREE(ve);
711}
712
713void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, unsigned count,
714                               const struct pipe_vertex_buffer *bufs)
715{
716   unsigned i;
717
718   mgr->user_vb_mask = 0;
719   mgr->incompatible_vb_mask = 0;
720   mgr->nonzero_stride_vb_mask = 0;
721
722   for (i = 0; i < count; i++) {
723      const struct pipe_vertex_buffer *vb = &bufs[i];
724      struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[i];
725      struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[i];
726
727      pipe_resource_reference(&orig_vb->buffer, vb->buffer);
728      orig_vb->user_buffer = vb->user_buffer;
729
730      real_vb->buffer_offset = orig_vb->buffer_offset = vb->buffer_offset;
731      real_vb->stride = orig_vb->stride = vb->stride;
732      real_vb->user_buffer = NULL;
733
734      if (vb->stride) {
735         mgr->nonzero_stride_vb_mask |= 1 << i;
736      }
737
738      if (!vb->buffer && !vb->user_buffer) {
739         pipe_resource_reference(&real_vb->buffer, NULL);
740         continue;
741      }
742
743      if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
744          (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
745         mgr->incompatible_vb_mask |= 1 << i;
746         pipe_resource_reference(&real_vb->buffer, NULL);
747         continue;
748      }
749
750      if (!mgr->caps.user_vertex_buffers && vb->user_buffer) {
751         mgr->user_vb_mask |= 1 << i;
752         pipe_resource_reference(&real_vb->buffer, NULL);
753         continue;
754      }
755
756      pipe_resource_reference(&real_vb->buffer, vb->buffer);
757      real_vb->user_buffer = vb->user_buffer;
758   }
759
760   for (i = count; i < mgr->nr_vertex_buffers; i++) {
761      pipe_resource_reference(&mgr->vertex_buffer[i].buffer, NULL);
762   }
763   for (i = count; i < mgr->nr_real_vertex_buffers; i++) {
764      pipe_resource_reference(&mgr->real_vertex_buffer[i].buffer, NULL);
765   }
766
767   mgr->nr_vertex_buffers = count;
768   mgr->nr_real_vertex_buffers = count;
769   mgr->vertex_buffers_dirty = TRUE;
770}
771
772void u_vbuf_set_index_buffer(struct u_vbuf *mgr,
773                             const struct pipe_index_buffer *ib)
774{
775   struct pipe_context *pipe = mgr->pipe;
776
777   if (ib) {
778      assert(ib->offset % ib->index_size == 0);
779      pipe_resource_reference(&mgr->index_buffer.buffer, ib->buffer);
780      memcpy(&mgr->index_buffer, ib, sizeof(*ib));
781   } else {
782      pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
783   }
784
785   pipe->set_index_buffer(pipe, ib);
786}
787
788static enum pipe_error
789u_vbuf_upload_buffers(struct u_vbuf *mgr,
790                      int start_vertex, unsigned num_vertices,
791                      int start_instance, unsigned num_instances)
792{
793   unsigned i;
794   unsigned nr_velems = mgr->ve->count;
795   unsigned nr_vbufs = mgr->nr_vertex_buffers;
796   struct pipe_vertex_element *velems =
797         mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve;
798   unsigned start_offset[PIPE_MAX_ATTRIBS];
799   unsigned end_offset[PIPE_MAX_ATTRIBS] = {0};
800
801   /* Determine how much data needs to be uploaded. */
802   for (i = 0; i < nr_velems; i++) {
803      struct pipe_vertex_element *velem = &velems[i];
804      unsigned index = velem->vertex_buffer_index;
805      struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
806      unsigned instance_div, first, size;
807
808      /* Skip the buffers generated by translate. */
809      if (index == mgr->fallback_vbs[VB_VERTEX] ||
810          index == mgr->fallback_vbs[VB_INSTANCE] ||
811          index == mgr->fallback_vbs[VB_CONST]) {
812         continue;
813      }
814
815      if (!vb->user_buffer) {
816         continue;
817      }
818
819      instance_div = velem->instance_divisor;
820      first = vb->buffer_offset + velem->src_offset;
821
822      if (!vb->stride) {
823         /* Constant attrib. */
824         size = mgr->ve->src_format_size[i];
825      } else if (instance_div) {
826         /* Per-instance attrib. */
827         unsigned count = (num_instances + instance_div - 1) / instance_div;
828         first += vb->stride * start_instance;
829         size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
830      } else {
831         /* Per-vertex attrib. */
832         first += vb->stride * start_vertex;
833         size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i];
834      }
835
836      /* Update offsets. */
837      if (!end_offset[index]) {
838         start_offset[index] = first;
839         end_offset[index] = first + size;
840      } else {
841         if (first < start_offset[index])
842            start_offset[index] = first;
843         if (first + size > end_offset[index])
844            end_offset[index] = first + size;
845      }
846   }
847
848   /* Upload buffers. */
849   for (i = 0; i < nr_vbufs; i++) {
850      unsigned start, end = end_offset[i];
851      struct pipe_vertex_buffer *real_vb;
852      const uint8_t *ptr;
853      enum pipe_error err;
854
855      if (!end) {
856         continue;
857      }
858
859      start = start_offset[i];
860      assert(start < end);
861
862      real_vb = &mgr->real_vertex_buffer[i];
863      ptr = mgr->vertex_buffer[i].user_buffer;
864
865      err = u_upload_data(mgr->uploader, start, end - start, ptr + start,
866                          &real_vb->buffer_offset, &real_vb->buffer);
867      if (err != PIPE_OK)
868         return err;
869
870      real_vb->buffer_offset -= start;
871   }
872
873   return PIPE_OK;
874}
875
876static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr)
877{
878   /* See if there are any per-vertex attribs which will be uploaded or
879    * translated. Use bitmasks to get the info instead of looping over vertex
880    * elements. */
881   return ((mgr->user_vb_mask | mgr->incompatible_vb_mask |
882            mgr->ve->incompatible_vb_mask_any) &
883           mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask) != 0;
884}
885
886static boolean u_vbuf_mapping_vertex_buffer_blocks(struct u_vbuf *mgr)
887{
888   /* Return true if there are hw buffers which don't need to be translated.
889    *
890    * We could query whether each buffer is busy, but that would
891    * be way more costly than this. */
892   return (~mgr->user_vb_mask & ~mgr->incompatible_vb_mask &
893           mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any &
894           mgr->nonzero_stride_vb_mask) != 0;
895}
896
897static void u_vbuf_get_minmax_index(struct pipe_context *pipe,
898                                    struct pipe_index_buffer *ib,
899                                    const struct pipe_draw_info *info,
900                                    int *out_min_index,
901                                    int *out_max_index)
902{
903   struct pipe_transfer *transfer = NULL;
904   const void *indices;
905   unsigned i;
906   unsigned restart_index = info->restart_index;
907
908   if (ib->user_buffer) {
909      indices = (uint8_t*)ib->user_buffer +
910                ib->offset + info->start * ib->index_size;
911   } else {
912      indices = pipe_buffer_map_range(pipe, ib->buffer,
913                                      ib->offset + info->start * ib->index_size,
914                                      info->count * ib->index_size,
915                                      PIPE_TRANSFER_READ, &transfer);
916   }
917
918   switch (ib->index_size) {
919   case 4: {
920      const unsigned *ui_indices = (const unsigned*)indices;
921      unsigned max_ui = 0;
922      unsigned min_ui = ~0U;
923      if (info->primitive_restart) {
924         for (i = 0; i < info->count; i++) {
925            if (ui_indices[i] != restart_index) {
926               if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
927               if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
928            }
929         }
930      }
931      else {
932         for (i = 0; i < info->count; i++) {
933            if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
934            if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
935         }
936      }
937      *out_min_index = min_ui;
938      *out_max_index = max_ui;
939      break;
940   }
941   case 2: {
942      const unsigned short *us_indices = (const unsigned short*)indices;
943      unsigned max_us = 0;
944      unsigned min_us = ~0U;
945      if (info->primitive_restart) {
946         for (i = 0; i < info->count; i++) {
947            if (us_indices[i] != restart_index) {
948               if (us_indices[i] > max_us) max_us = us_indices[i];
949               if (us_indices[i] < min_us) min_us = us_indices[i];
950            }
951         }
952      }
953      else {
954         for (i = 0; i < info->count; i++) {
955            if (us_indices[i] > max_us) max_us = us_indices[i];
956            if (us_indices[i] < min_us) min_us = us_indices[i];
957         }
958      }
959      *out_min_index = min_us;
960      *out_max_index = max_us;
961      break;
962   }
963   case 1: {
964      const unsigned char *ub_indices = (const unsigned char*)indices;
965      unsigned max_ub = 0;
966      unsigned min_ub = ~0U;
967      if (info->primitive_restart) {
968         for (i = 0; i < info->count; i++) {
969            if (ub_indices[i] != restart_index) {
970               if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
971               if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
972            }
973         }
974      }
975      else {
976         for (i = 0; i < info->count; i++) {
977            if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
978            if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
979         }
980      }
981      *out_min_index = min_ub;
982      *out_max_index = max_ub;
983      break;
984   }
985   default:
986      assert(0);
987      *out_min_index = 0;
988      *out_max_index = 0;
989   }
990
991   if (transfer) {
992      pipe_buffer_unmap(pipe, transfer);
993   }
994}
995
996void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
997{
998   struct pipe_context *pipe = mgr->pipe;
999   int start_vertex, min_index;
1000   unsigned num_vertices;
1001   boolean unroll_indices = FALSE;
1002   uint32_t user_vb_mask = mgr->user_vb_mask;
1003
1004   /* Normal draw. No fallback and no user buffers. */
1005   if (!mgr->incompatible_vb_mask &&
1006       !mgr->ve->incompatible_elem_mask &&
1007       !user_vb_mask) {
1008      /* Set vertex buffers if needed. */
1009      if (mgr->vertex_buffers_dirty) {
1010         pipe->set_vertex_buffers(pipe, mgr->nr_real_vertex_buffers,
1011                                  mgr->real_vertex_buffer);
1012         mgr->vertex_buffers_dirty = FALSE;
1013      }
1014
1015      pipe->draw_vbo(pipe, info);
1016      return;
1017   }
1018
1019   if (info->indexed) {
1020      /* See if anything needs to be done for per-vertex attribs. */
1021      if (u_vbuf_need_minmax_index(mgr)) {
1022         int max_index;
1023
1024         if (info->max_index != ~0) {
1025            min_index = info->min_index;
1026            max_index = info->max_index;
1027         } else {
1028            u_vbuf_get_minmax_index(mgr->pipe, &mgr->index_buffer, info,
1029                                    &min_index, &max_index);
1030         }
1031
1032         assert(min_index <= max_index);
1033
1034         start_vertex = min_index + info->index_bias;
1035         num_vertices = max_index + 1 - min_index;
1036
1037         /* Primitive restart doesn't work when unrolling indices.
1038          * We would have to break this drawing operation into several ones. */
1039         /* Use some heuristic to see if unrolling indices improves
1040          * performance. */
1041         if (!info->primitive_restart &&
1042             num_vertices > info->count*2 &&
1043             num_vertices-info->count > 32 &&
1044             !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
1045            /*printf("num_vertices=%i count=%i\n", num_vertices, info->count);*/
1046            unroll_indices = TRUE;
1047            user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
1048                              mgr->ve->noninstance_vb_mask_any);
1049         }
1050      } else {
1051         /* Nothing to do for per-vertex attribs. */
1052         start_vertex = 0;
1053         num_vertices = 0;
1054         min_index = 0;
1055      }
1056   } else {
1057      start_vertex = info->start;
1058      num_vertices = info->count;
1059      min_index = 0;
1060   }
1061
1062   /* Translate vertices with non-native layouts or formats. */
1063   if (unroll_indices ||
1064       mgr->incompatible_vb_mask ||
1065       mgr->ve->incompatible_elem_mask) {
1066      if (!u_vbuf_translate_begin(mgr, start_vertex, num_vertices,
1067                                  info->start_instance, info->instance_count,
1068                                  info->start, info->count, min_index,
1069                                  unroll_indices)) {
1070         debug_warn_once("u_vbuf_translate_begin() failed");
1071         return;
1072      }
1073
1074      user_vb_mask &= ~(mgr->incompatible_vb_mask |
1075                        mgr->ve->incompatible_vb_mask_all);
1076   }
1077
1078   /* Upload user buffers. */
1079   if (user_vb_mask) {
1080      if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
1081                                info->start_instance,
1082                                info->instance_count) != PIPE_OK) {
1083         debug_warn_once("u_vbuf_upload_buffers() failed");
1084         return;
1085      }
1086   }
1087
1088   /*
1089   if (unroll_indices) {
1090      printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1091             start_vertex, num_vertices);
1092      util_dump_draw_info(stdout, info);
1093      printf("\n");
1094   }
1095
1096   unsigned i;
1097   for (i = 0; i < mgr->nr_vertex_buffers; i++) {
1098      printf("input %i: ", i);
1099      util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
1100      printf("\n");
1101   }
1102   for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
1103      printf("real %i: ", i);
1104      util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
1105      printf("\n");
1106   }
1107   */
1108
1109   u_upload_unmap(mgr->uploader);
1110   pipe->set_vertex_buffers(pipe, mgr->nr_real_vertex_buffers,
1111                            mgr->real_vertex_buffer);
1112
1113   if (unlikely(unroll_indices)) {
1114      struct pipe_draw_info new_info = *info;
1115      new_info.indexed = FALSE;
1116      new_info.index_bias = 0;
1117      new_info.min_index = 0;
1118      new_info.max_index = info->count - 1;
1119      new_info.start = 0;
1120
1121      pipe->draw_vbo(pipe, &new_info);
1122   } else {
1123      pipe->draw_vbo(pipe, info);
1124   }
1125
1126   if (mgr->using_translate) {
1127      u_vbuf_translate_end(mgr);
1128   }
1129   mgr->vertex_buffers_dirty = TRUE;
1130}
1131
1132void u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
1133{
1134   assert(!mgr->ve_saved);
1135   mgr->ve_saved = mgr->ve;
1136}
1137
1138void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
1139{
1140   if (mgr->ve != mgr->ve_saved) {
1141      struct pipe_context *pipe = mgr->pipe;
1142
1143      mgr->ve = mgr->ve_saved;
1144      pipe->bind_vertex_elements_state(pipe,
1145                                       mgr->ve ? mgr->ve->driver_cso : NULL);
1146   }
1147   mgr->ve_saved = NULL;
1148}
1149
1150void u_vbuf_save_vertex_buffers(struct u_vbuf *mgr)
1151{
1152   util_copy_vertex_buffers(mgr->vertex_buffer_saved,
1153                            &mgr->nr_vertex_buffers_saved,
1154                            mgr->vertex_buffer,
1155                            mgr->nr_vertex_buffers);
1156}
1157
1158void u_vbuf_restore_vertex_buffers(struct u_vbuf *mgr)
1159{
1160   unsigned i;
1161
1162   u_vbuf_set_vertex_buffers(mgr, mgr->nr_vertex_buffers_saved,
1163                             mgr->vertex_buffer_saved);
1164   for (i = 0; i < mgr->nr_vertex_buffers_saved; i++) {
1165      pipe_resource_reference(&mgr->vertex_buffer_saved[i].buffer, NULL);
1166   }
1167   mgr->nr_vertex_buffers_saved = 0;
1168}
1169