translate_sse.c revision 68e74f1b0110348a44f589739c6edf3fe8e2b368
1/*
2 * Copyright 2003 Tungsten Graphics, inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
19 * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 *    Keith Whitwell <keithw@tungstengraphics.com>
26 */
27
28
29#include "pipe/p_config.h"
30#include "pipe/p_compiler.h"
31#include "util/u_memory.h"
32#include "util/u_math.h"
33
34#include "translate.h"
35
36
37#if defined(PIPE_ARCH_X86)
38
39#include "rtasm/rtasm_cpu.h"
40#include "rtasm/rtasm_x86sse.h"
41
42
43#define X    0
44#define Y    1
45#define Z    2
46#define W    3
47
48
49struct translate_buffer {
50   const void *base_ptr;
51   unsigned stride;
52   unsigned max_index;
53};
54
55struct translate_buffer_varient {
56   unsigned buffer_index;
57   unsigned instance_divisor;
58   void *ptr;                    /* updated either per vertex or per instance */
59};
60
61
62#define ELEMENT_BUFFER_INSTANCE_ID  1001
63
64
65struct translate_sse {
66   struct translate translate;
67
68   struct x86_function linear_func;
69   struct x86_function elt_func;
70   struct x86_function *func;
71
72   boolean loaded_identity;
73   boolean loaded_255;
74   boolean loaded_inv_255;
75
76   float identity[4];
77   float float_255[4];
78   float inv_255[4];
79
80   struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
81   unsigned nr_buffers;
82
83   /* Multiple buffer varients can map to a single buffer. */
84   struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS];
85   unsigned nr_buffer_varients;
86
87   /* Multiple elements can map to a single buffer varient. */
88   unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS];
89
90   boolean use_instancing;
91   unsigned instance_id;
92
93   /* these are actually known values, but putting them in a struct
94    * like this is helpful to keep them in sync across the file.
95    */
96   struct x86_reg tmp_EAX;
97   struct x86_reg idx_EBX;     /* either start+i or &elt[i] */
98   struct x86_reg outbuf_ECX;
99   struct x86_reg machine_EDX;
100   struct x86_reg count_ESI;    /* decrements to zero */
101};
102
103static int get_offset( const void *a, const void *b )
104{
105   return (const char *)b - (const char *)a;
106}
107
108
109
110static struct x86_reg get_identity( struct translate_sse *p )
111{
112   struct x86_reg reg = x86_make_reg(file_XMM, 6);
113
114   if (!p->loaded_identity) {
115      p->loaded_identity = TRUE;
116      p->identity[0] = 0;
117      p->identity[1] = 0;
118      p->identity[2] = 0;
119      p->identity[3] = 1;
120
121      sse_movups(p->func, reg,
122		 x86_make_disp(p->machine_EDX,
123			       get_offset(p, &p->identity[0])));
124   }
125
126   return reg;
127}
128
129static struct x86_reg get_255( struct translate_sse *p )
130{
131   struct x86_reg reg = x86_make_reg(file_XMM, 7);
132
133   if (!p->loaded_255) {
134      p->loaded_255 = TRUE;
135      p->float_255[0] =
136	 p->float_255[1] =
137	 p->float_255[2] =
138	 p->float_255[3] = 255.0f;
139
140      sse_movups(p->func, reg,
141		 x86_make_disp(p->machine_EDX,
142			       get_offset(p, &p->float_255[0])));
143   }
144
145   return reg;
146}
147
148static struct x86_reg get_inv_255( struct translate_sse *p )
149{
150   struct x86_reg reg = x86_make_reg(file_XMM, 5);
151
152   if (!p->loaded_inv_255) {
153      p->loaded_inv_255 = TRUE;
154      p->inv_255[0] =
155	 p->inv_255[1] =
156	 p->inv_255[2] =
157	 p->inv_255[3] = 1.0f / 255.0f;
158
159      sse_movups(p->func, reg,
160		 x86_make_disp(p->machine_EDX,
161			       get_offset(p, &p->inv_255[0])));
162   }
163
164   return reg;
165}
166
167
168static void emit_load_R32G32B32A32( struct translate_sse *p,
169				    struct x86_reg data,
170				    struct x86_reg arg0 )
171{
172   sse_movups(p->func, data, arg0);
173}
174
175static void emit_load_R32G32B32( struct translate_sse *p,
176				 struct x86_reg data,
177				 struct x86_reg arg0 )
178{
179   /* Have to jump through some hoops:
180    *
181    * c 0 0 0
182    * c 0 0 1
183    * 0 0 c 1
184    * a b c 1
185    */
186   sse_movss(p->func, data, x86_make_disp(arg0, 8));
187   sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
188   sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
189   sse_movlps(p->func, data, arg0);
190}
191
192static void emit_load_R32G32( struct translate_sse *p,
193			   struct x86_reg data,
194			   struct x86_reg arg0 )
195{
196   /* 0 0 0 1
197    * a b 0 1
198    */
199   sse_movups(p->func, data, get_identity(p) );
200   sse_movlps(p->func, data, arg0);
201}
202
203
204static void emit_load_R32( struct translate_sse *p,
205			   struct x86_reg data,
206			   struct x86_reg arg0 )
207{
208   /* a 0 0 0
209    * a 0 0 1
210    */
211   sse_movss(p->func, data, arg0);
212   sse_orps(p->func, data, get_identity(p) );
213}
214
215
216static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
217				       struct x86_reg data,
218				       struct x86_reg src )
219{
220
221   /* Load and unpack twice:
222    */
223   sse_movss(p->func, data, src);
224   sse2_punpcklbw(p->func, data, get_identity(p));
225   sse2_punpcklbw(p->func, data, get_identity(p));
226
227   /* Convert to float:
228    */
229   sse2_cvtdq2ps(p->func, data, data);
230
231
232   /* Scale by 1/255.0
233    */
234   sse_mulps(p->func, data, get_inv_255(p));
235}
236
237
238
239
240static void emit_store_R32G32B32A32( struct translate_sse *p,
241				     struct x86_reg dest,
242				     struct x86_reg dataXMM )
243{
244   sse_movups(p->func, dest, dataXMM);
245}
246
247static void emit_store_R32G32B32( struct translate_sse *p,
248				  struct x86_reg dest,
249				  struct x86_reg dataXMM )
250{
251   /* Emit two, shuffle, emit one.
252    */
253   sse_movlps(p->func, dest, dataXMM);
254   sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
255   sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
256}
257
258static void emit_store_R32G32( struct translate_sse *p,
259			       struct x86_reg dest,
260			       struct x86_reg dataXMM )
261{
262   sse_movlps(p->func, dest, dataXMM);
263}
264
265static void emit_store_R32( struct translate_sse *p,
266			    struct x86_reg dest,
267			    struct x86_reg dataXMM )
268{
269   sse_movss(p->func, dest, dataXMM);
270}
271
272
273
274static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
275				       struct x86_reg dest,
276				       struct x86_reg dataXMM )
277{
278   /* Scale by 255.0
279    */
280   sse_mulps(p->func, dataXMM, get_255(p));
281
282   /* Pack and emit:
283    */
284   sse2_cvtps2dq(p->func, dataXMM, dataXMM);
285   sse2_packssdw(p->func, dataXMM, dataXMM);
286   sse2_packuswb(p->func, dataXMM, dataXMM);
287   sse_movss(p->func, dest, dataXMM);
288}
289
290
291
292
293
294/* Extended swizzles?  Maybe later.
295 */
296static void emit_swizzle( struct translate_sse *p,
297			  struct x86_reg dest,
298			  struct x86_reg src,
299			  unsigned char shuffle )
300{
301   sse_shufps(p->func, dest, src, shuffle);
302}
303
304
305static boolean translate_attr( struct translate_sse *p,
306			       const struct translate_element *a,
307			       struct x86_reg srcECX,
308			       struct x86_reg dstEAX)
309{
310   struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
311
312   switch (a->input_format) {
313   case PIPE_FORMAT_R32_FLOAT:
314      emit_load_R32(p, dataXMM, srcECX);
315      break;
316   case PIPE_FORMAT_R32G32_FLOAT:
317      emit_load_R32G32(p, dataXMM, srcECX);
318      break;
319   case PIPE_FORMAT_R32G32B32_FLOAT:
320      emit_load_R32G32B32(p, dataXMM, srcECX);
321      break;
322   case PIPE_FORMAT_R32G32B32A32_FLOAT:
323      emit_load_R32G32B32A32(p, dataXMM, srcECX);
324      break;
325   case PIPE_FORMAT_B8G8R8A8_UNORM:
326      emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
327      emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
328      break;
329   case PIPE_FORMAT_R8G8B8A8_UNORM:
330      emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
331      break;
332   default:
333      return FALSE;
334   }
335
336   switch (a->output_format) {
337   case PIPE_FORMAT_R32_FLOAT:
338      emit_store_R32(p, dstEAX, dataXMM);
339      break;
340   case PIPE_FORMAT_R32G32_FLOAT:
341      emit_store_R32G32(p, dstEAX, dataXMM);
342      break;
343   case PIPE_FORMAT_R32G32B32_FLOAT:
344      emit_store_R32G32B32(p, dstEAX, dataXMM);
345      break;
346   case PIPE_FORMAT_R32G32B32A32_FLOAT:
347      emit_store_R32G32B32A32(p, dstEAX, dataXMM);
348      break;
349   case PIPE_FORMAT_B8G8R8A8_UNORM:
350      emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
351      emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
352      break;
353   case PIPE_FORMAT_R8G8B8A8_UNORM:
354      emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
355      break;
356   default:
357      return FALSE;
358   }
359
360   return TRUE;
361}
362
363
364static boolean init_inputs( struct translate_sse *p,
365                            boolean linear )
366{
367   unsigned i;
368   struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
369                                              get_offset(p, &p->instance_id));
370
371   for (i = 0; i < p->nr_buffer_varients; i++) {
372      struct translate_buffer_varient *varient = &p->buffer_varient[i];
373      struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
374
375      if (linear || varient->instance_divisor) {
376         struct x86_reg buf_stride   = x86_make_disp(p->machine_EDX,
377                                                     get_offset(p, &buffer->stride));
378         struct x86_reg buf_ptr      = x86_make_disp(p->machine_EDX,
379                                                     get_offset(p, &varient->ptr));
380         struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
381                                                     get_offset(p, &buffer->base_ptr));
382         struct x86_reg elt = p->idx_EBX;
383         struct x86_reg tmp_EAX = p->tmp_EAX;
384
385         /* Calculate pointer to first attrib:
386          *   base_ptr + stride * index, where index depends on instance divisor
387          */
388         if (varient->instance_divisor) {
389            /* Our index is instance ID divided by instance divisor.
390             */
391            x86_mov(p->func, tmp_EAX, instance_id);
392
393            if (varient->instance_divisor != 1) {
394               struct x86_reg tmp_EDX = p->machine_EDX;
395               struct x86_reg tmp_ECX = p->outbuf_ECX;
396
397               /* TODO: Add x86_shr() to rtasm and use it whenever
398                *       instance divisor is power of two.
399                */
400
401               x86_push(p->func, tmp_EDX);
402               x86_push(p->func, tmp_ECX);
403               x86_xor(p->func, tmp_EDX, tmp_EDX);
404               x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
405               x86_div(p->func, tmp_ECX);    /* EAX = EDX:EAX / ECX */
406               x86_pop(p->func, tmp_ECX);
407               x86_pop(p->func, tmp_EDX);
408            }
409         } else {
410            x86_mov(p->func, tmp_EAX, elt);
411         }
412
413         /*
414          * TODO: Respect translate_buffer::max_index.
415          */
416
417         x86_imul(p->func, tmp_EAX, buf_stride);
418         x86_add(p->func, tmp_EAX, buf_base_ptr);
419
420
421         /* In the linear case, keep the buffer pointer instead of the
422          * index number.
423          */
424         if (linear && p->nr_buffer_varients == 1)
425            x86_mov(p->func, elt, tmp_EAX);
426         else
427            x86_mov(p->func, buf_ptr, tmp_EAX);
428      }
429   }
430
431   return TRUE;
432}
433
434
435static struct x86_reg get_buffer_ptr( struct translate_sse *p,
436                                      boolean linear,
437                                      unsigned var_idx,
438                                      struct x86_reg elt )
439{
440   if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
441      return x86_make_disp(p->machine_EDX,
442                           get_offset(p, &p->instance_id));
443   }
444   if (linear && p->nr_buffer_varients == 1) {
445      return p->idx_EBX;
446   }
447   else if (linear || p->buffer_varient[var_idx].instance_divisor) {
448      struct x86_reg ptr = p->tmp_EAX;
449      struct x86_reg buf_ptr =
450         x86_make_disp(p->machine_EDX,
451                       get_offset(p, &p->buffer_varient[var_idx].ptr));
452
453      x86_mov(p->func, ptr, buf_ptr);
454      return ptr;
455   }
456   else {
457      struct x86_reg ptr = p->tmp_EAX;
458      const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
459
460      struct x86_reg buf_stride =
461         x86_make_disp(p->machine_EDX,
462                       get_offset(p, &p->buffer[varient->buffer_index].stride));
463
464      struct x86_reg buf_base_ptr =
465         x86_make_disp(p->machine_EDX,
466                       get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
467
468
469
470      /* Calculate pointer to current attrib:
471       */
472      x86_mov(p->func, ptr, buf_stride);
473      x86_imul(p->func, ptr, elt);
474      x86_add(p->func, ptr, buf_base_ptr);
475      return ptr;
476   }
477}
478
479
480
481static boolean incr_inputs( struct translate_sse *p,
482                            boolean linear )
483{
484   if (linear && p->nr_buffer_varients == 1) {
485      struct x86_reg stride = x86_make_disp(p->machine_EDX,
486                                            get_offset(p, &p->buffer[0].stride));
487
488      if (p->buffer_varient[0].instance_divisor == 0) {
489         x86_add(p->func, p->idx_EBX, stride);
490         sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
491      }
492   }
493   else if (linear) {
494      unsigned i;
495
496      /* Is this worthwhile??
497       */
498      for (i = 0; i < p->nr_buffer_varients; i++) {
499         struct translate_buffer_varient *varient = &p->buffer_varient[i];
500         struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
501                                                get_offset(p, &varient->ptr));
502         struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
503                                                   get_offset(p, &p->buffer[varient->buffer_index].stride));
504
505         if (varient->instance_divisor == 0) {
506            x86_mov(p->func, p->tmp_EAX, buf_ptr);
507            x86_add(p->func, p->tmp_EAX, buf_stride);
508            if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
509            x86_mov(p->func, buf_ptr, p->tmp_EAX);
510         }
511      }
512   }
513   else {
514      x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
515   }
516
517   return TRUE;
518}
519
520
521/* Build run( struct translate *machine,
522 *            unsigned start,
523 *            unsigned count,
524 *            void *output_buffer )
525 * or
526 *  run_elts( struct translate *machine,
527 *            unsigned *elts,
528 *            unsigned count,
529 *            void *output_buffer )
530 *
531 *  Lots of hardcoding
532 *
533 * EAX -- pointer to current output vertex
534 * ECX -- pointer to current attribute
535 *
536 */
537static boolean build_vertex_emit( struct translate_sse *p,
538				  struct x86_function *func,
539				  boolean linear )
540{
541   int fixup, label;
542   unsigned j;
543
544   p->tmp_EAX       = x86_make_reg(file_REG32, reg_AX);
545   p->idx_EBX       = x86_make_reg(file_REG32, reg_BX);
546   p->outbuf_ECX    = x86_make_reg(file_REG32, reg_CX);
547   p->machine_EDX   = x86_make_reg(file_REG32, reg_DX);
548   p->count_ESI     = x86_make_reg(file_REG32, reg_SI);
549
550   p->func = func;
551   p->loaded_inv_255 = FALSE;
552   p->loaded_255 = FALSE;
553   p->loaded_identity = FALSE;
554
555   x86_init_func(p->func);
556
557   /* Push a few regs?
558    */
559   x86_push(p->func, p->idx_EBX);
560   x86_push(p->func, p->count_ESI);
561
562   /* Load arguments into regs:
563    */
564   x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
565   x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
566   x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
567   x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
568
569   /* Load instance ID.
570    */
571   if (p->use_instancing) {
572      x86_mov(p->func,
573              p->tmp_EAX,
574              x86_fn_arg(p->func, 4));
575      x86_mov(p->func,
576              x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
577              p->tmp_EAX);
578   }
579
580   /* Get vertex count, compare to zero
581    */
582   x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
583   x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
584   fixup = x86_jcc_forward(p->func, cc_E);
585
586   /* always load, needed or not:
587    */
588   init_inputs(p, linear);
589
590   /* Note address for loop jump
591    */
592   label = x86_get_label(p->func);
593   {
594      struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
595      int last_varient = -1;
596      struct x86_reg vb;
597
598      for (j = 0; j < p->translate.key.nr_elements; j++) {
599         const struct translate_element *a = &p->translate.key.element[j];
600         unsigned varient = p->element_to_buffer_varient[j];
601
602         /* Figure out source pointer address:
603          */
604         if (varient != last_varient) {
605            last_varient = varient;
606            vb = get_buffer_ptr(p, linear, varient, elt);
607         }
608
609         if (!translate_attr( p, a,
610                              x86_make_disp(vb, a->input_offset),
611                              x86_make_disp(p->outbuf_ECX, a->output_offset)))
612            return FALSE;
613      }
614
615      /* Next output vertex:
616       */
617      x86_lea(p->func,
618              p->outbuf_ECX,
619              x86_make_disp(p->outbuf_ECX,
620                            p->translate.key.output_stride));
621
622      /* Incr index
623       */
624      incr_inputs( p, linear );
625   }
626
627   /* decr count, loop if not zero
628    */
629   x86_dec(p->func, p->count_ESI);
630   x86_jcc(p->func, cc_NZ, label);
631
632   /* Exit mmx state?
633    */
634   if (p->func->need_emms)
635      mmx_emms(p->func);
636
637   /* Land forward jump here:
638    */
639   x86_fixup_fwd_jump(p->func, fixup);
640
641   /* Pop regs and return
642    */
643
644   x86_pop(p->func, p->count_ESI);
645   x86_pop(p->func, p->idx_EBX);
646   x86_ret(p->func);
647
648   return TRUE;
649}
650
651
652
653
654
655
656
657static void translate_sse_set_buffer( struct translate *translate,
658				unsigned buf,
659				const void *ptr,
660				unsigned stride,
661				unsigned max_index )
662{
663   struct translate_sse *p = (struct translate_sse *)translate;
664
665   if (buf < p->nr_buffers) {
666      p->buffer[buf].base_ptr = (char *)ptr;
667      p->buffer[buf].stride = stride;
668      p->buffer[buf].max_index = max_index;
669   }
670
671   if (0) debug_printf("%s %d/%d: %p %d\n",
672                       __FUNCTION__, buf,
673                       p->nr_buffers,
674                       ptr, stride);
675}
676
677
678static void translate_sse_release( struct translate *translate )
679{
680   struct translate_sse *p = (struct translate_sse *)translate;
681
682   x86_release_func( &p->linear_func );
683   x86_release_func( &p->elt_func );
684
685   FREE(p);
686}
687
688
689struct translate *translate_sse2_create( const struct translate_key *key )
690{
691   struct translate_sse *p = NULL;
692   unsigned i;
693
694   if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
695      goto fail;
696
697   p = CALLOC_STRUCT( translate_sse );
698   if (p == NULL)
699      goto fail;
700
701   p->translate.key = *key;
702   p->translate.release = translate_sse_release;
703   p->translate.set_buffer = translate_sse_set_buffer;
704
705   for (i = 0; i < key->nr_elements; i++) {
706      if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
707         unsigned j;
708
709         p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1);
710
711         if (key->element[i].instance_divisor) {
712            p->use_instancing = TRUE;
713         }
714
715         /*
716          * Map vertex element to vertex buffer varient.
717          */
718         for (j = 0; j < p->nr_buffer_varients; j++) {
719            if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
720                p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
721               break;
722            }
723         }
724         if (j == p->nr_buffer_varients) {
725            p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
726            p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
727            p->nr_buffer_varients++;
728         }
729         p->element_to_buffer_varient[i] = j;
730      } else {
731         assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID);
732
733         p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID;
734      }
735   }
736
737   if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
738
739   if (!build_vertex_emit(p, &p->linear_func, TRUE))
740      goto fail;
741
742   if (!build_vertex_emit(p, &p->elt_func, FALSE))
743      goto fail;
744
745   p->translate.run = (void*)x86_get_func(&p->linear_func);
746   if (p->translate.run == NULL)
747      goto fail;
748
749   p->translate.run_elts = (void*)x86_get_func(&p->elt_func);
750   if (p->translate.run_elts == NULL)
751      goto fail;
752
753   return &p->translate;
754
755 fail:
756   if (p)
757      translate_sse_release( &p->translate );
758
759   return NULL;
760}
761
762
763
764#else
765
766struct translate *translate_sse2_create( const struct translate_key *key )
767{
768   return NULL;
769}
770
771#endif
772