draw_llvm.c revision 51729b5119df450f2f80c4e648b99fa35b9ec32d
1#include "draw_llvm.h"
2
3#include "draw_context.h"
4#include "draw_vs.h"
5
6#include "gallivm/lp_bld_arit.h"
7#include "gallivm/lp_bld_struct.h"
8#include "gallivm/lp_bld_type.h"
9#include "gallivm/lp_bld_flow.h"
10#include "gallivm/lp_bld_debug.h"
11#include "gallivm/lp_bld_tgsi.h"
12#include "gallivm/lp_bld_printf.h"
13
14#include "tgsi/tgsi_exec.h"
15#include "tgsi/tgsi_dump.h"
16
17#include "util/u_cpu_detect.h"
18#include "util/u_string.h"
19#include "util/u_pointer.h"
20
21#include <llvm-c/Transforms/Scalar.h>
22
23#define DEBUG_STORE 0
24
25
26/* generates the draw jit function */
27static void
28draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
29static void
30draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
31
32static void
33init_globals(struct draw_llvm *llvm)
34{
35   LLVMTypeRef texture_type;
36
37   /* struct draw_jit_texture */
38   {
39      LLVMTypeRef elem_types[4];
40
41      elem_types[DRAW_JIT_TEXTURE_WIDTH]  = LLVMInt32Type();
42      elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
43      elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
44      elem_types[DRAW_JIT_TEXTURE_DATA]   = LLVMPointerType(LLVMInt8Type(), 0);
45
46      texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
47
48      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
49                             llvm->target, texture_type,
50                             DRAW_JIT_TEXTURE_WIDTH);
51      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
52                             llvm->target, texture_type,
53                             DRAW_JIT_TEXTURE_HEIGHT);
54      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
55                             llvm->target, texture_type,
56                             DRAW_JIT_TEXTURE_STRIDE);
57      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
58                             llvm->target, texture_type,
59                             DRAW_JIT_TEXTURE_DATA);
60      LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
61                           llvm->target, texture_type);
62
63      LLVMAddTypeName(llvm->module, "texture", texture_type);
64   }
65
66
67   /* struct draw_jit_context */
68   {
69      LLVMTypeRef elem_types[3];
70      LLVMTypeRef context_type;
71
72      elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
73      elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
74      elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
75
76      context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
77
78      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
79                             llvm->target, context_type, 0);
80      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
81                             llvm->target, context_type, 1);
82      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
83                             llvm->target, context_type,
84                             DRAW_JIT_CONTEXT_TEXTURES_INDEX);
85      LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
86                           llvm->target, context_type);
87
88      LLVMAddTypeName(llvm->module, "draw_jit_context", context_type);
89
90      llvm->context_ptr_type = LLVMPointerType(context_type, 0);
91   }
92   {
93      LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0);
94      llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0);
95   }
96   /* struct pipe_vertex_buffer */
97   {
98      LLVMTypeRef elem_types[4];
99      LLVMTypeRef vb_type;
100
101      elem_types[0] = LLVMInt32Type();
102      elem_types[1] = LLVMInt32Type();
103      elem_types[2] = LLVMInt32Type();
104      elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
105
106      vb_type = LLVMStructType(elem_types, Elements(elem_types), 0);
107
108      LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
109                             llvm->target, vb_type, 0);
110      LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
111                             llvm->target, vb_type, 2);
112      LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer,
113                           llvm->target, vb_type);
114
115      LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type);
116
117      llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
118   }
119}
120
121static LLVMTypeRef
122create_vertex_header(struct draw_llvm *llvm, int data_elems)
123{
124   /* struct vertex_header */
125   LLVMTypeRef elem_types[3];
126   LLVMTypeRef vertex_header;
127   char struct_name[24];
128
129   util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
130
131   elem_types[0]  = LLVMIntType(32);
132   elem_types[1]  = LLVMArrayType(LLVMFloatType(), 4);
133   elem_types[2]  = LLVMArrayType(elem_types[1], data_elems);
134
135   vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
136
137   /* these are bit-fields and we can't take address of them
138      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
139      llvm->target, vertex_header,
140      DRAW_JIT_VERTEX_CLIPMASK);
141      LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
142      llvm->target, vertex_header,
143      DRAW_JIT_VERTEX_EDGEFLAG);
144      LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
145      llvm->target, vertex_header,
146      DRAW_JIT_VERTEX_PAD);
147      LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
148      llvm->target, vertex_header,
149      DRAW_JIT_VERTEX_VERTEX_ID);
150   */
151   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
152                          llvm->target, vertex_header,
153                          DRAW_JIT_VERTEX_CLIP);
154   LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
155                          llvm->target, vertex_header,
156                          DRAW_JIT_VERTEX_DATA);
157
158   LLVMAddTypeName(llvm->module, struct_name, vertex_header);
159
160   return LLVMPointerType(vertex_header, 0);
161}
162
163struct draw_llvm *
164draw_llvm_create(struct draw_context *draw)
165{
166   struct draw_llvm *llvm;
167
168#ifdef PIPE_ARCH_X86
169   util_cpu_detect();
170   /* require SSE2 due to LLVM PR6960. */
171   if (!util_cpu_caps.has_sse2)
172       return NULL;
173#endif
174
175   llvm = CALLOC_STRUCT( draw_llvm );
176   if (!llvm)
177      return NULL;
178
179   llvm->draw = draw;
180   llvm->engine = draw->engine;
181
182   debug_assert(llvm->engine);
183
184   llvm->module = LLVMModuleCreateWithName("draw_llvm");
185   llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
186
187   LLVMAddModuleProvider(llvm->engine, llvm->provider);
188
189   llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
190
191   llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
192   LLVMAddTargetData(llvm->target, llvm->pass);
193
194   if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
195      /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
196       * but there are more on SVN. */
197      /* TODO: Add more passes */
198      LLVMAddCFGSimplificationPass(llvm->pass);
199      LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
200      LLVMAddConstantPropagationPass(llvm->pass);
201      if(util_cpu_caps.has_sse4_1) {
202         /* FIXME: There is a bug in this pass, whereby the combination of fptosi
203          * and sitofp (necessary for trunc/floor/ceil/round implementation)
204          * somehow becomes invalid code.
205          */
206         LLVMAddInstructionCombiningPass(llvm->pass);
207      }
208      LLVMAddGVNPass(llvm->pass);
209   } else {
210      /* We need at least this pass to prevent the backends to fail in
211       * unexpected ways.
212       */
213      LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
214   }
215
216   init_globals(llvm);
217
218   if (gallivm_debug & GALLIVM_DEBUG_IR) {
219      LLVMDumpModule(llvm->module);
220   }
221
222   return llvm;
223}
224
225void
226draw_llvm_destroy(struct draw_llvm *llvm)
227{
228   LLVMDisposePassManager(llvm->pass);
229
230   FREE(llvm);
231}
232
233struct draw_llvm_variant *
234draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs)
235{
236   struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
237
238   draw_llvm_make_variant_key(llvm, &variant->key);
239
240   llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
241
242   draw_llvm_generate(llvm, variant);
243   draw_llvm_generate_elts(llvm, variant);
244
245   return variant;
246}
247
248static void
249generate_vs(struct draw_llvm *llvm,
250            LLVMBuilderRef builder,
251            LLVMValueRef (*outputs)[NUM_CHANNELS],
252            const LLVMValueRef (*inputs)[NUM_CHANNELS],
253            LLVMValueRef context_ptr)
254{
255   const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
256   struct lp_type vs_type;
257   LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
258
259   memset(&vs_type, 0, sizeof vs_type);
260   vs_type.floating = TRUE; /* floating point values */
261   vs_type.sign = TRUE;     /* values are signed */
262   vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
263   vs_type.width = 32;      /* 32-bit float */
264   vs_type.length = 4;      /* 4 elements per vector */
265#if 0
266   num_vs = 4;              /* number of vertices per block */
267#endif
268
269   if (gallivm_debug & GALLIVM_DEBUG_IR) {
270      tgsi_dump(tokens, 0);
271   }
272
273   lp_build_tgsi_soa(builder,
274                     tokens,
275                     vs_type,
276                     NULL /*struct lp_build_mask_context *mask*/,
277                     consts_ptr,
278                     NULL /*pos*/,
279                     inputs,
280                     outputs,
281                     NULL/*sampler*/,
282                     &llvm->draw->vs.vertex_shader->info);
283}
284
285#if DEBUG_STORE
286static void print_vectorf(LLVMBuilderRef builder,
287                         LLVMValueRef vec)
288{
289   LLVMValueRef val[4];
290   val[0] = LLVMBuildExtractElement(builder, vec,
291                                    LLVMConstInt(LLVMInt32Type(), 0, 0), "");
292   val[1] = LLVMBuildExtractElement(builder, vec,
293                                    LLVMConstInt(LLVMInt32Type(), 1, 0), "");
294   val[2] = LLVMBuildExtractElement(builder, vec,
295                                    LLVMConstInt(LLVMInt32Type(), 2, 0), "");
296   val[3] = LLVMBuildExtractElement(builder, vec,
297                                    LLVMConstInt(LLVMInt32Type(), 3, 0), "");
298   lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
299                   val[0], val[1], val[2], val[3]);
300}
301#endif
302
303static void
304generate_fetch(LLVMBuilderRef builder,
305               LLVMValueRef vbuffers_ptr,
306               LLVMValueRef *res,
307               struct pipe_vertex_element *velem,
308               LLVMValueRef vbuf,
309               LLVMValueRef index)
310{
311   LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
312   LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
313                                           &indices, 1, "");
314   LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
315   LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
316   LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
317   LLVMValueRef cond;
318   LLVMValueRef stride;
319
320   cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
321
322   index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
323
324   stride = LLVMBuildMul(builder, vb_stride, index, "");
325
326   vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
327
328   stride = LLVMBuildAdd(builder, stride,
329                         vb_buffer_offset,
330                         "");
331   stride = LLVMBuildAdd(builder, stride,
332                         LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0),
333                         "");
334
335   /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
336   vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
337
338   *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format);
339}
340
341static LLVMValueRef
342aos_to_soa(LLVMBuilderRef builder,
343           LLVMValueRef val0,
344           LLVMValueRef val1,
345           LLVMValueRef val2,
346           LLVMValueRef val3,
347           LLVMValueRef channel)
348{
349   LLVMValueRef ex, res;
350
351   ex = LLVMBuildExtractElement(builder, val0,
352                                channel, "");
353   res = LLVMBuildInsertElement(builder,
354                                LLVMConstNull(LLVMTypeOf(val0)),
355                                ex,
356                                LLVMConstInt(LLVMInt32Type(), 0, 0),
357                                "");
358
359   ex = LLVMBuildExtractElement(builder, val1,
360                                channel, "");
361   res = LLVMBuildInsertElement(builder,
362                                res, ex,
363                                LLVMConstInt(LLVMInt32Type(), 1, 0),
364                                "");
365
366   ex = LLVMBuildExtractElement(builder, val2,
367                                channel, "");
368   res = LLVMBuildInsertElement(builder,
369                                res, ex,
370                                LLVMConstInt(LLVMInt32Type(), 2, 0),
371                                "");
372
373   ex = LLVMBuildExtractElement(builder, val3,
374                                channel, "");
375   res = LLVMBuildInsertElement(builder,
376                                res, ex,
377                                LLVMConstInt(LLVMInt32Type(), 3, 0),
378                                "");
379
380   return res;
381}
382
383static void
384soa_to_aos(LLVMBuilderRef builder,
385           LLVMValueRef soa[NUM_CHANNELS],
386           LLVMValueRef aos[NUM_CHANNELS])
387{
388   LLVMValueRef comp;
389   int i = 0;
390
391   debug_assert(NUM_CHANNELS == 4);
392
393   aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
394   aos[1] = aos[2] = aos[3] = aos[0];
395
396   for (i = 0; i < NUM_CHANNELS; ++i) {
397      LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0);
398
399      comp = LLVMBuildExtractElement(builder, soa[i],
400                                     LLVMConstInt(LLVMInt32Type(), 0, 0), "");
401      aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
402
403      comp = LLVMBuildExtractElement(builder, soa[i],
404                                     LLVMConstInt(LLVMInt32Type(), 1, 0), "");
405      aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
406
407      comp = LLVMBuildExtractElement(builder, soa[i],
408                                     LLVMConstInt(LLVMInt32Type(), 2, 0), "");
409      aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
410
411      comp = LLVMBuildExtractElement(builder, soa[i],
412                                     LLVMConstInt(LLVMInt32Type(), 3, 0), "");
413      aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
414
415   }
416}
417
418static void
419convert_to_soa(LLVMBuilderRef builder,
420               LLVMValueRef (*aos)[NUM_CHANNELS],
421               LLVMValueRef (*soa)[NUM_CHANNELS],
422               int num_attribs)
423{
424   int i;
425
426   debug_assert(NUM_CHANNELS == 4);
427
428   for (i = 0; i < num_attribs; ++i) {
429      LLVMValueRef val0 = aos[i][0];
430      LLVMValueRef val1 = aos[i][1];
431      LLVMValueRef val2 = aos[i][2];
432      LLVMValueRef val3 = aos[i][3];
433
434      soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3,
435                             LLVMConstInt(LLVMInt32Type(), 0, 0));
436      soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3,
437                             LLVMConstInt(LLVMInt32Type(), 1, 0));
438      soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3,
439                             LLVMConstInt(LLVMInt32Type(), 2, 0));
440      soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3,
441                             LLVMConstInt(LLVMInt32Type(), 3, 0));
442   }
443}
444
445static void
446store_aos(LLVMBuilderRef builder,
447          LLVMValueRef io_ptr,
448          LLVMValueRef index,
449          LLVMValueRef value)
450{
451   LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr);
452   LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr);
453   LLVMValueRef indices[3];
454
455   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
456   indices[1] = index;
457   indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
458
459   /* undefined vertex */
460   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(),
461                                        0xffff, 0), id_ptr);
462
463#if DEBUG_STORE
464   lp_build_printf(builder, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
465#endif
466#if 0
467   /*lp_build_printf(builder, " ---- %p storing at %d (%p)  ", io_ptr, index, data_ptr);
468     print_vectorf(builder, value);*/
469   data_ptr = LLVMBuildBitCast(builder, data_ptr,
470                               LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
471                               "datavec");
472   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
473
474   LLVMBuildStore(builder, value, data_ptr);
475#else
476   {
477      LLVMValueRef x, y, z, w;
478      LLVMValueRef idx0, idx1, idx2, idx3;
479      LLVMValueRef gep0, gep1, gep2, gep3;
480      data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
481
482      idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
483      idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
484      idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
485      idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
486
487      x = LLVMBuildExtractElement(builder, value,
488                                  idx0, "");
489      y = LLVMBuildExtractElement(builder, value,
490                                  idx1, "");
491      z = LLVMBuildExtractElement(builder, value,
492                                  idx2, "");
493      w = LLVMBuildExtractElement(builder, value,
494                                  idx3, "");
495
496      gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
497      gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
498      gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
499      gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
500
501      /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
502        x, gep0, y, gep1, z, gep2, w, gep3);*/
503      LLVMBuildStore(builder, x, gep0);
504      LLVMBuildStore(builder, y, gep1);
505      LLVMBuildStore(builder, z, gep2);
506      LLVMBuildStore(builder, w, gep3);
507   }
508#endif
509}
510
511static void
512store_aos_array(LLVMBuilderRef builder,
513                LLVMValueRef io_ptr,
514                LLVMValueRef aos[NUM_CHANNELS],
515                int attrib,
516                int num_outputs)
517{
518   LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0);
519   LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
520   LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
521   LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
522   LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
523   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
524
525   debug_assert(NUM_CHANNELS == 4);
526
527   io0_ptr = LLVMBuildGEP(builder, io_ptr,
528                          &ind0, 1, "");
529   io1_ptr = LLVMBuildGEP(builder, io_ptr,
530                          &ind1, 1, "");
531   io2_ptr = LLVMBuildGEP(builder, io_ptr,
532                          &ind2, 1, "");
533   io3_ptr = LLVMBuildGEP(builder, io_ptr,
534                          &ind3, 1, "");
535
536#if DEBUG_STORE
537   lp_build_printf(builder, "   io = %p, indexes[%d, %d, %d, %d]\n",
538                   io_ptr, ind0, ind1, ind2, ind3);
539#endif
540
541   store_aos(builder, io0_ptr, attr_index, aos[0]);
542   store_aos(builder, io1_ptr, attr_index, aos[1]);
543   store_aos(builder, io2_ptr, attr_index, aos[2]);
544   store_aos(builder, io3_ptr, attr_index, aos[3]);
545}
546
547static void
548convert_to_aos(LLVMBuilderRef builder,
549               LLVMValueRef io,
550               LLVMValueRef (*outputs)[NUM_CHANNELS],
551               int num_outputs,
552               int max_vertices)
553{
554   unsigned chan, attrib;
555
556#if DEBUG_STORE
557   lp_build_printf(builder, "   # storing begin\n");
558#endif
559   for (attrib = 0; attrib < num_outputs; ++attrib) {
560      LLVMValueRef soa[4];
561      LLVMValueRef aos[4];
562      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
563         if(outputs[attrib][chan]) {
564            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
565            lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
566            /*lp_build_printf(builder, "output %d : %d ",
567                            LLVMConstInt(LLVMInt32Type(), attrib, 0),
568                            LLVMConstInt(LLVMInt32Type(), chan, 0));
569              print_vectorf(builder, out);*/
570            soa[chan] = out;
571         } else
572            soa[chan] = 0;
573      }
574      soa_to_aos(builder, soa, aos);
575      store_aos_array(builder,
576                      io,
577                      aos,
578                      attrib,
579                      num_outputs);
580   }
581#if DEBUG_STORE
582   lp_build_printf(builder, "   # storing end\n");
583#endif
584}
585
586static void
587draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
588{
589   LLVMTypeRef arg_types[7];
590   LLVMTypeRef func_type;
591   LLVMValueRef context_ptr;
592   LLVMBasicBlockRef block;
593   LLVMBuilderRef builder;
594   LLVMValueRef start, end, count, stride, step, io_itr;
595   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
596   struct draw_context *draw = llvm->draw;
597   unsigned i, j;
598   struct lp_build_context bld;
599   struct lp_build_loop_state lp_loop;
600   struct lp_type vs_type = lp_type_float_vec(32);
601   const int max_vertices = 4;
602   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
603   void *code;
604
605   arg_types[0] = llvm->context_ptr_type;           /* context */
606   arg_types[1] = llvm->vertex_header_ptr_type;     /* vertex_header */
607   arg_types[2] = llvm->buffer_ptr_type;            /* vbuffers */
608   arg_types[3] = LLVMInt32Type();                  /* start */
609   arg_types[4] = LLVMInt32Type();                  /* count */
610   arg_types[5] = LLVMInt32Type();                  /* stride */
611   arg_types[6] = llvm->vb_ptr_type;                /* pipe_vertex_buffer's */
612
613   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
614
615   variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
616   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
617   for(i = 0; i < Elements(arg_types); ++i)
618      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
619         LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
620
621   context_ptr  = LLVMGetParam(variant->function, 0);
622   io_ptr       = LLVMGetParam(variant->function, 1);
623   vbuffers_ptr = LLVMGetParam(variant->function, 2);
624   start        = LLVMGetParam(variant->function, 3);
625   count        = LLVMGetParam(variant->function, 4);
626   stride       = LLVMGetParam(variant->function, 5);
627   vb_ptr       = LLVMGetParam(variant->function, 6);
628
629   lp_build_name(context_ptr, "context");
630   lp_build_name(io_ptr, "io");
631   lp_build_name(vbuffers_ptr, "vbuffers");
632   lp_build_name(start, "start");
633   lp_build_name(count, "count");
634   lp_build_name(stride, "stride");
635   lp_build_name(vb_ptr, "vb");
636
637   /*
638    * Function body
639    */
640
641   block = LLVMAppendBasicBlock(variant->function, "entry");
642   builder = LLVMCreateBuilder();
643   LLVMPositionBuilderAtEnd(builder, block);
644
645   lp_build_context_init(&bld, builder, vs_type);
646
647   end = lp_build_add(&bld, start, count);
648
649   step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
650
651#if DEBUG_STORE
652   lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
653                   start, end, step);
654#endif
655   lp_build_loop_begin(builder, start, &lp_loop);
656   {
657      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
658      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
659      LLVMValueRef io;
660      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
661
662      io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
663      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
664#if DEBUG_STORE
665      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
666                      io_itr, io, lp_loop.counter);
667#endif
668      for (i = 0; i < NUM_CHANNELS; ++i) {
669         LLVMValueRef true_index = LLVMBuildAdd(
670            builder,
671            lp_loop.counter,
672            LLVMConstInt(LLVMInt32Type(), i, 0), "");
673         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
674            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
675            LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
676                                                 velem->vertex_buffer_index,
677                                                 0);
678            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
679                                           &vb_index, 1, "");
680            generate_fetch(builder, vbuffers_ptr,
681                           &aos_attribs[j][i], velem, vb, true_index);
682         }
683      }
684      convert_to_soa(builder, aos_attribs, inputs,
685                     draw->pt.nr_vertex_elements);
686
687      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
688      generate_vs(llvm,
689                  builder,
690                  outputs,
691                  ptr_aos,
692                  context_ptr);
693
694      convert_to_aos(builder, io, outputs,
695                     draw->vs.vertex_shader->info.num_outputs,
696                     max_vertices);
697   }
698   lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
699
700   LLVMBuildRetVoid(builder);
701
702   LLVMDisposeBuilder(builder);
703
704   /*
705    * Translate the LLVM IR into machine code.
706    */
707#ifdef DEBUG
708   if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
709      lp_debug_dump_value(variant->function);
710      assert(0);
711   }
712#endif
713
714   LLVMRunFunctionPassManager(llvm->pass, variant->function);
715
716   if (gallivm_debug & GALLIVM_DEBUG_IR) {
717      lp_debug_dump_value(variant->function);
718      debug_printf("\n");
719   }
720
721   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
722   variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
723
724   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
725      lp_disassemble(code);
726   }
727}
728
729
730static void
731draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
732{
733   LLVMTypeRef arg_types[7];
734   LLVMTypeRef func_type;
735   LLVMValueRef context_ptr;
736   LLVMBasicBlockRef block;
737   LLVMBuilderRef builder;
738   LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
739   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
740   struct draw_context *draw = llvm->draw;
741   unsigned i, j;
742   struct lp_build_context bld;
743   struct lp_build_context bld_int;
744   struct lp_build_loop_state lp_loop;
745   struct lp_type vs_type = lp_type_float_vec(32);
746   const int max_vertices = 4;
747   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
748   LLVMValueRef fetch_max;
749   void *code;
750
751   arg_types[0] = llvm->context_ptr_type;               /* context */
752   arg_types[1] = llvm->vertex_header_ptr_type;         /* vertex_header */
753   arg_types[2] = llvm->buffer_ptr_type;                /* vbuffers */
754   arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0);  /* fetch_elts * */
755   arg_types[4] = LLVMInt32Type();                      /* fetch_count */
756   arg_types[5] = LLVMInt32Type();                      /* stride */
757   arg_types[6] = llvm->vb_ptr_type;                    /* pipe_vertex_buffer's */
758
759   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
760
761   variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
762   LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
763   for(i = 0; i < Elements(arg_types); ++i)
764      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
765         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
766
767   context_ptr  = LLVMGetParam(variant->function_elts, 0);
768   io_ptr       = LLVMGetParam(variant->function_elts, 1);
769   vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
770   fetch_elts   = LLVMGetParam(variant->function_elts, 3);
771   fetch_count  = LLVMGetParam(variant->function_elts, 4);
772   stride       = LLVMGetParam(variant->function_elts, 5);
773   vb_ptr       = LLVMGetParam(variant->function_elts, 6);
774
775   lp_build_name(context_ptr, "context");
776   lp_build_name(io_ptr, "io");
777   lp_build_name(vbuffers_ptr, "vbuffers");
778   lp_build_name(fetch_elts, "fetch_elts");
779   lp_build_name(fetch_count, "fetch_count");
780   lp_build_name(stride, "stride");
781   lp_build_name(vb_ptr, "vb");
782
783   /*
784    * Function body
785    */
786
787   block = LLVMAppendBasicBlock(variant->function_elts, "entry");
788   builder = LLVMCreateBuilder();
789   LLVMPositionBuilderAtEnd(builder, block);
790
791   lp_build_context_init(&bld, builder, vs_type);
792   lp_build_context_init(&bld_int, builder, lp_type_int(32));
793
794   step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
795
796   fetch_max = LLVMBuildSub(builder, fetch_count,
797                            LLVMConstInt(LLVMInt32Type(), 1, 0),
798                            "fetch_max");
799
800   lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
801   {
802      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
803      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
804      LLVMValueRef io;
805      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
806
807      io_itr = lp_loop.counter;
808      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
809#if DEBUG_STORE
810      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
811                      io_itr, io, lp_loop.counter);
812#endif
813      for (i = 0; i < NUM_CHANNELS; ++i) {
814         LLVMValueRef true_index = LLVMBuildAdd(
815            builder,
816            lp_loop.counter,
817            LLVMConstInt(LLVMInt32Type(), i, 0), "");
818         LLVMValueRef fetch_ptr;
819
820         /* make sure we're not out of bounds which can happen
821          * if fetch_count % 4 != 0, because on the last iteration
822          * a few of the 4 vertex fetches will be out of bounds */
823         true_index = lp_build_min(&bld_int, true_index, fetch_max);
824
825         fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
826                                  &true_index, 1, "");
827         true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
828         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
829            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
830            LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
831                                                 velem->vertex_buffer_index,
832                                                 0);
833            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
834                                           &vb_index, 1, "");
835            generate_fetch(builder, vbuffers_ptr,
836                           &aos_attribs[j][i], velem, vb, true_index);
837         }
838      }
839      convert_to_soa(builder, aos_attribs, inputs,
840                     draw->pt.nr_vertex_elements);
841
842      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
843      generate_vs(llvm,
844                  builder,
845                  outputs,
846                  ptr_aos,
847                  context_ptr);
848
849      convert_to_aos(builder, io, outputs,
850                     draw->vs.vertex_shader->info.num_outputs,
851                     max_vertices);
852   }
853   lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
854
855   LLVMBuildRetVoid(builder);
856
857   LLVMDisposeBuilder(builder);
858
859   /*
860    * Translate the LLVM IR into machine code.
861    */
862#ifdef DEBUG
863   if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
864      lp_debug_dump_value(variant->function_elts);
865      assert(0);
866   }
867#endif
868
869   LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
870
871   if (gallivm_debug & GALLIVM_DEBUG_IR) {
872      lp_debug_dump_value(variant->function_elts);
873      debug_printf("\n");
874   }
875
876   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts);
877   variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
878
879   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
880      lp_disassemble(code);
881   }
882}
883
884void
885draw_llvm_make_variant_key(struct draw_llvm *llvm,
886                           struct draw_llvm_variant_key *key)
887{
888   memset(key, 0, sizeof(struct draw_llvm_variant_key));
889
890   key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
891
892   memcpy(key->vertex_element,
893          llvm->draw->pt.vertex_element,
894          sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
895
896   memcpy(&key->vs,
897          &llvm->draw->vs.vertex_shader->state,
898          sizeof(struct pipe_shader_state));
899}
900