draw_llvm.c revision 08f890d4c3b8376d1840f90474f7c56329432d95
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "draw_llvm.h"
29
30#include "draw_context.h"
31#include "draw_vs.h"
32
33#include "gallivm/lp_bld_arit.h"
34#include "gallivm/lp_bld_logic.h"
35#include "gallivm/lp_bld_const.h"
36#include "gallivm/lp_bld_swizzle.h"
37#include "gallivm/lp_bld_struct.h"
38#include "gallivm/lp_bld_type.h"
39#include "gallivm/lp_bld_flow.h"
40#include "gallivm/lp_bld_debug.h"
41#include "gallivm/lp_bld_tgsi.h"
42#include "gallivm/lp_bld_printf.h"
43#include "gallivm/lp_bld_intr.h"
44#include "gallivm/lp_bld_init.h"
45
46#include "tgsi/tgsi_exec.h"
47#include "tgsi/tgsi_dump.h"
48
49#include "util/u_cpu_detect.h"
50#include "util/u_pointer.h"
51#include "util/u_string.h"
52
53#include <llvm-c/Transforms/Scalar.h>
54
55#define DEBUG_STORE 0
56
57/* generates the draw jit function */
58static void
59draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
60static void
61draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
62
63static void
64init_globals(struct draw_llvm *llvm)
65{
66   LLVMTypeRef texture_type;
67
68   /* struct draw_jit_texture */
69   {
70      LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
71
72      elem_types[DRAW_JIT_TEXTURE_WIDTH]  = LLVMInt32Type();
73      elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
74      elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
75      elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
76      elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
77         LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
78      elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
79         LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
80      elem_types[DRAW_JIT_TEXTURE_DATA] =
81         LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
82                       DRAW_MAX_TEXTURE_LEVELS);
83      elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
84      elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
85      elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
86      elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
87         LLVMArrayType(LLVMFloatType(), 4);
88
89      texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
90
91      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
92                             llvm->target, texture_type,
93                             DRAW_JIT_TEXTURE_WIDTH);
94      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
95                             llvm->target, texture_type,
96                             DRAW_JIT_TEXTURE_HEIGHT);
97      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
98                             llvm->target, texture_type,
99                             DRAW_JIT_TEXTURE_DEPTH);
100      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
101                             llvm->target, texture_type,
102                             DRAW_JIT_TEXTURE_LAST_LEVEL);
103      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
104                             llvm->target, texture_type,
105                             DRAW_JIT_TEXTURE_ROW_STRIDE);
106      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
107                             llvm->target, texture_type,
108                             DRAW_JIT_TEXTURE_IMG_STRIDE);
109      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
110                             llvm->target, texture_type,
111                             DRAW_JIT_TEXTURE_DATA);
112      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
113                             llvm->target, texture_type,
114                             DRAW_JIT_TEXTURE_MIN_LOD);
115      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
116                             llvm->target, texture_type,
117                             DRAW_JIT_TEXTURE_MAX_LOD);
118      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
119                             llvm->target, texture_type,
120                             DRAW_JIT_TEXTURE_LOD_BIAS);
121      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
122                             llvm->target, texture_type,
123                             DRAW_JIT_TEXTURE_BORDER_COLOR);
124      LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
125                           llvm->target, texture_type);
126
127      LLVMAddTypeName(llvm->module, "texture", texture_type);
128   }
129
130
131   /* struct draw_jit_context */
132   {
133      LLVMTypeRef elem_types[4];
134      LLVMTypeRef context_type;
135
136      elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
137      elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */
138      elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */
139      elem_types[3] = LLVMArrayType(texture_type,
140                                    PIPE_MAX_VERTEX_SAMPLERS); /* textures */
141
142      context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
143
144      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
145                             llvm->target, context_type, 0);
146      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
147                             llvm->target, context_type, 1);
148      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
149                             llvm->target, context_type, 2);
150      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
151                             llvm->target, context_type,
152                             DRAW_JIT_CTX_TEXTURES);
153      LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
154                           llvm->target, context_type);
155
156      LLVMAddTypeName(llvm->module, "draw_jit_context", context_type);
157
158      llvm->context_ptr_type = LLVMPointerType(context_type, 0);
159   }
160   {
161      LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0);
162      llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0);
163   }
164   /* struct pipe_vertex_buffer */
165   {
166      LLVMTypeRef elem_types[4];
167      LLVMTypeRef vb_type;
168
169      elem_types[0] = LLVMInt32Type();
170      elem_types[1] = LLVMInt32Type();
171      elem_types[2] = LLVMInt32Type();
172      elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
173
174      vb_type = LLVMStructType(elem_types, Elements(elem_types), 0);
175
176      LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
177                             llvm->target, vb_type, 0);
178      LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
179                             llvm->target, vb_type, 2);
180      LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer,
181                           llvm->target, vb_type);
182
183      LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type);
184
185      llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
186   }
187}
188
189static LLVMTypeRef
190create_vertex_header(struct draw_llvm *llvm, int data_elems)
191{
192   /* struct vertex_header */
193   LLVMTypeRef elem_types[3];
194   LLVMTypeRef vertex_header;
195   char struct_name[24];
196
197   util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
198
199   elem_types[0]  = LLVMIntType(32);
200   elem_types[1]  = LLVMArrayType(LLVMFloatType(), 4);
201   elem_types[2]  = LLVMArrayType(elem_types[1], data_elems);
202
203   vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
204
205   /* these are bit-fields and we can't take address of them
206      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
207      llvm->target, vertex_header,
208      DRAW_JIT_VERTEX_CLIPMASK);
209      LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
210      llvm->target, vertex_header,
211      DRAW_JIT_VERTEX_EDGEFLAG);
212      LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
213      llvm->target, vertex_header,
214      DRAW_JIT_VERTEX_PAD);
215      LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
216      llvm->target, vertex_header,
217      DRAW_JIT_VERTEX_VERTEX_ID);
218   */
219   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
220                          llvm->target, vertex_header,
221                          DRAW_JIT_VERTEX_CLIP);
222   LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
223                          llvm->target, vertex_header,
224                          DRAW_JIT_VERTEX_DATA);
225
226   LLVMAddTypeName(llvm->module, struct_name, vertex_header);
227
228   return LLVMPointerType(vertex_header, 0);
229}
230
231struct draw_llvm *
232draw_llvm_create(struct draw_context *draw)
233{
234   struct draw_llvm *llvm;
235
236   llvm = CALLOC_STRUCT( draw_llvm );
237   if (!llvm)
238      return NULL;
239
240   llvm->draw = draw;
241   llvm->engine = draw->engine;
242
243   debug_assert(llvm->engine);
244
245   llvm->module = LLVMModuleCreateWithName("draw_llvm");
246   llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
247
248   LLVMAddModuleProvider(llvm->engine, llvm->provider);
249
250   llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
251
252   llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
253   LLVMAddTargetData(llvm->target, llvm->pass);
254
255   if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
256      /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
257       * but there are more on SVN. */
258      /* TODO: Add more passes */
259
260      LLVMAddCFGSimplificationPass(llvm->pass);
261
262      if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) {
263         /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
264          * avoid generating bad code.
265          * Test with piglit glsl-vs-sqrt-zero test.
266          */
267         LLVMAddConstantPropagationPass(llvm->pass);
268         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
269      }
270      else {
271         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
272         LLVMAddConstantPropagationPass(llvm->pass);
273      }
274
275      if(util_cpu_caps.has_sse4_1) {
276         /* FIXME: There is a bug in this pass, whereby the combination of fptosi
277          * and sitofp (necessary for trunc/floor/ceil/round implementation)
278          * somehow becomes invalid code.
279          */
280         LLVMAddInstructionCombiningPass(llvm->pass);
281      }
282      LLVMAddGVNPass(llvm->pass);
283   } else {
284      /* We need at least this pass to prevent the backends to fail in
285       * unexpected ways.
286       */
287      LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
288   }
289
290   init_globals(llvm);
291
292   if (gallivm_debug & GALLIVM_DEBUG_IR) {
293      LLVMDumpModule(llvm->module);
294   }
295
296   llvm->nr_variants = 0;
297   make_empty_list(&llvm->vs_variants_list);
298
299   return llvm;
300}
301
302void
303draw_llvm_destroy(struct draw_llvm *llvm)
304{
305   LLVMDisposePassManager(llvm->pass);
306
307   FREE(llvm);
308}
309
310struct draw_llvm_variant *
311draw_llvm_create_variant(struct draw_llvm *llvm,
312			 unsigned num_inputs,
313			 const struct draw_llvm_variant_key *key)
314{
315   struct draw_llvm_variant *variant;
316   struct llvm_vertex_shader *shader =
317      llvm_vertex_shader(llvm->draw->vs.vertex_shader);
318
319   variant = MALLOC(sizeof *variant +
320		    shader->variant_key_size -
321		    sizeof variant->key);
322   if (variant == NULL)
323      return NULL;
324
325   variant->llvm = llvm;
326
327   memcpy(&variant->key, key, shader->variant_key_size);
328
329   llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
330
331   draw_llvm_generate(llvm, variant);
332   draw_llvm_generate_elts(llvm, variant);
333
334   variant->shader = shader;
335   variant->list_item_global.base = variant;
336   variant->list_item_local.base = variant;
337   /*variant->no = */shader->variants_created++;
338   variant->list_item_global.base = variant;
339
340   return variant;
341}
342
343static void
344generate_vs(struct draw_llvm *llvm,
345            LLVMBuilderRef builder,
346            LLVMValueRef (*outputs)[NUM_CHANNELS],
347            const LLVMValueRef (*inputs)[NUM_CHANNELS],
348            LLVMValueRef context_ptr,
349            struct lp_build_sampler_soa *draw_sampler)
350{
351   const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
352   struct lp_type vs_type;
353   LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
354   struct lp_build_sampler_soa *sampler = 0;
355
356   memset(&vs_type, 0, sizeof vs_type);
357   vs_type.floating = TRUE; /* floating point values */
358   vs_type.sign = TRUE;     /* values are signed */
359   vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
360   vs_type.width = 32;      /* 32-bit float */
361   vs_type.length = 4;      /* 4 elements per vector */
362#if 0
363   num_vs = 4;              /* number of vertices per block */
364#endif
365
366   if (gallivm_debug & GALLIVM_DEBUG_IR) {
367      tgsi_dump(tokens, 0);
368   }
369
370   if (llvm->draw->num_sampler_views &&
371       llvm->draw->num_samplers)
372      sampler = draw_sampler;
373
374   lp_build_tgsi_soa(builder,
375                     tokens,
376                     vs_type,
377                     NULL /*struct lp_build_mask_context *mask*/,
378                     consts_ptr,
379                     NULL /*pos*/,
380                     inputs,
381                     outputs,
382                     sampler,
383                     &llvm->draw->vs.vertex_shader->info);
384}
385
386#if DEBUG_STORE
387static void print_vectorf(LLVMBuilderRef builder,
388                         LLVMValueRef vec)
389{
390   LLVMValueRef val[4];
391   val[0] = LLVMBuildExtractElement(builder, vec,
392                                    LLVMConstInt(LLVMInt32Type(), 0, 0), "");
393   val[1] = LLVMBuildExtractElement(builder, vec,
394                                    LLVMConstInt(LLVMInt32Type(), 1, 0), "");
395   val[2] = LLVMBuildExtractElement(builder, vec,
396                                    LLVMConstInt(LLVMInt32Type(), 2, 0), "");
397   val[3] = LLVMBuildExtractElement(builder, vec,
398                                    LLVMConstInt(LLVMInt32Type(), 3, 0), "");
399   lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
400                   val[0], val[1], val[2], val[3]);
401}
402#endif
403
404static void
405generate_fetch(LLVMBuilderRef builder,
406               LLVMValueRef vbuffers_ptr,
407               LLVMValueRef *res,
408               struct pipe_vertex_element *velem,
409               LLVMValueRef vbuf,
410               LLVMValueRef index,
411               LLVMValueRef instance_id)
412{
413   LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
414   LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
415                                           &indices, 1, "");
416   LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
417   LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
418   LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
419   LLVMValueRef cond;
420   LLVMValueRef stride;
421
422   if (velem->instance_divisor) {
423      /* array index = instance_id / instance_divisor */
424      index = LLVMBuildUDiv(builder, instance_id,
425                            LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0),
426                            "instance_divisor");
427   }
428
429   /* limit index to min(inex, vb_max_index) */
430   cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
431   index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
432
433   stride = LLVMBuildMul(builder, vb_stride, index, "");
434
435   vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
436
437   stride = LLVMBuildAdd(builder, stride,
438                         vb_buffer_offset,
439                         "");
440   stride = LLVMBuildAdd(builder, stride,
441                         LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0),
442                         "");
443
444   /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
445   vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
446
447   *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format);
448}
449
450static LLVMValueRef
451aos_to_soa(LLVMBuilderRef builder,
452           LLVMValueRef val0,
453           LLVMValueRef val1,
454           LLVMValueRef val2,
455           LLVMValueRef val3,
456           LLVMValueRef channel)
457{
458   LLVMValueRef ex, res;
459
460   ex = LLVMBuildExtractElement(builder, val0,
461                                channel, "");
462   res = LLVMBuildInsertElement(builder,
463                                LLVMConstNull(LLVMTypeOf(val0)),
464                                ex,
465                                LLVMConstInt(LLVMInt32Type(), 0, 0),
466                                "");
467
468   ex = LLVMBuildExtractElement(builder, val1,
469                                channel, "");
470   res = LLVMBuildInsertElement(builder,
471                                res, ex,
472                                LLVMConstInt(LLVMInt32Type(), 1, 0),
473                                "");
474
475   ex = LLVMBuildExtractElement(builder, val2,
476                                channel, "");
477   res = LLVMBuildInsertElement(builder,
478                                res, ex,
479                                LLVMConstInt(LLVMInt32Type(), 2, 0),
480                                "");
481
482   ex = LLVMBuildExtractElement(builder, val3,
483                                channel, "");
484   res = LLVMBuildInsertElement(builder,
485                                res, ex,
486                                LLVMConstInt(LLVMInt32Type(), 3, 0),
487                                "");
488
489   return res;
490}
491
492static void
493soa_to_aos(LLVMBuilderRef builder,
494           LLVMValueRef soa[NUM_CHANNELS],
495           LLVMValueRef aos[NUM_CHANNELS])
496{
497   LLVMValueRef comp;
498   int i = 0;
499
500   debug_assert(NUM_CHANNELS == 4);
501
502   aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
503   aos[1] = aos[2] = aos[3] = aos[0];
504
505   for (i = 0; i < NUM_CHANNELS; ++i) {
506      LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0);
507
508      comp = LLVMBuildExtractElement(builder, soa[i],
509                                     LLVMConstInt(LLVMInt32Type(), 0, 0), "");
510      aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
511
512      comp = LLVMBuildExtractElement(builder, soa[i],
513                                     LLVMConstInt(LLVMInt32Type(), 1, 0), "");
514      aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
515
516      comp = LLVMBuildExtractElement(builder, soa[i],
517                                     LLVMConstInt(LLVMInt32Type(), 2, 0), "");
518      aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
519
520      comp = LLVMBuildExtractElement(builder, soa[i],
521                                     LLVMConstInt(LLVMInt32Type(), 3, 0), "");
522      aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
523
524   }
525}
526
527static void
528convert_to_soa(LLVMBuilderRef builder,
529               LLVMValueRef (*aos)[NUM_CHANNELS],
530               LLVMValueRef (*soa)[NUM_CHANNELS],
531               int num_attribs)
532{
533   int i;
534
535   debug_assert(NUM_CHANNELS == 4);
536
537   for (i = 0; i < num_attribs; ++i) {
538      LLVMValueRef val0 = aos[i][0];
539      LLVMValueRef val1 = aos[i][1];
540      LLVMValueRef val2 = aos[i][2];
541      LLVMValueRef val3 = aos[i][3];
542
543      soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3,
544                             LLVMConstInt(LLVMInt32Type(), 0, 0));
545      soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3,
546                             LLVMConstInt(LLVMInt32Type(), 1, 0));
547      soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3,
548                             LLVMConstInt(LLVMInt32Type(), 2, 0));
549      soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3,
550                             LLVMConstInt(LLVMInt32Type(), 3, 0));
551   }
552}
553
554static void
555store_aos(LLVMBuilderRef builder,
556          LLVMValueRef io_ptr,
557          LLVMValueRef index,
558          LLVMValueRef value,
559          LLVMValueRef clipmask)
560{
561   LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr);
562   LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr);
563   LLVMValueRef indices[3];
564   LLVMValueRef val, shift;
565
566   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
567   indices[1] = index;
568   indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
569
570   /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */
571   val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0);
572   shift  = LLVMConstInt(LLVMInt32Type(), 12, 0);
573   val = LLVMBuildShl(builder, val, shift, "");
574   /* add clipmask:12 */
575   val = LLVMBuildOr(builder, val, clipmask, "");
576
577   /* store vertex header */
578   LLVMBuildStore(builder, val, id_ptr);
579
580
581#if DEBUG_STORE
582   lp_build_printf(builder, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
583#endif
584#if 0
585   /*lp_build_printf(builder, " ---- %p storing at %d (%p)  ", io_ptr, index, data_ptr);
586     print_vectorf(builder, value);*/
587   data_ptr = LLVMBuildBitCast(builder, data_ptr,
588                               LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
589                               "datavec");
590   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
591
592   LLVMBuildStore(builder, value, data_ptr);
593#else
594   {
595      LLVMValueRef x, y, z, w;
596      LLVMValueRef idx0, idx1, idx2, idx3;
597      LLVMValueRef gep0, gep1, gep2, gep3;
598      data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
599
600      idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
601      idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
602      idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
603      idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
604
605      x = LLVMBuildExtractElement(builder, value,
606                                  idx0, "");
607      y = LLVMBuildExtractElement(builder, value,
608                                  idx1, "");
609      z = LLVMBuildExtractElement(builder, value,
610                                  idx2, "");
611      w = LLVMBuildExtractElement(builder, value,
612                                  idx3, "");
613
614      gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
615      gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
616      gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
617      gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
618
619      /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
620        x, gep0, y, gep1, z, gep2, w, gep3);*/
621      LLVMBuildStore(builder, x, gep0);
622      LLVMBuildStore(builder, y, gep1);
623      LLVMBuildStore(builder, z, gep2);
624      LLVMBuildStore(builder, w, gep3);
625   }
626#endif
627}
628
629static void
630store_aos_array(LLVMBuilderRef builder,
631                LLVMValueRef io_ptr,
632                LLVMValueRef aos[NUM_CHANNELS],
633                int attrib,
634                int num_outputs,
635                LLVMValueRef clipmask)
636{
637   LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0);
638   LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
639   LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
640   LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
641   LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
642   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
643   LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
644
645   debug_assert(NUM_CHANNELS == 4);
646
647   io0_ptr = LLVMBuildGEP(builder, io_ptr,
648                          &ind0, 1, "");
649   io1_ptr = LLVMBuildGEP(builder, io_ptr,
650                          &ind1, 1, "");
651   io2_ptr = LLVMBuildGEP(builder, io_ptr,
652                          &ind2, 1, "");
653   io3_ptr = LLVMBuildGEP(builder, io_ptr,
654                          &ind3, 1, "");
655
656   clipmask0 = LLVMBuildExtractElement(builder, clipmask,
657                                       ind0, "");
658   clipmask1 = LLVMBuildExtractElement(builder, clipmask,
659                                       ind1, "");
660   clipmask2 = LLVMBuildExtractElement(builder, clipmask,
661                                       ind2, "");
662   clipmask3 = LLVMBuildExtractElement(builder, clipmask,
663                                       ind3, "");
664
665#if DEBUG_STORE
666   lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
667                   io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
668#endif
669   /* store for each of the 4 vertices */
670   store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0);
671   store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1);
672   store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2);
673   store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3);
674}
675
676static void
677convert_to_aos(LLVMBuilderRef builder,
678               LLVMValueRef io,
679               LLVMValueRef (*outputs)[NUM_CHANNELS],
680               LLVMValueRef clipmask,
681               int num_outputs,
682               int max_vertices)
683{
684   unsigned chan, attrib;
685
686#if DEBUG_STORE
687   lp_build_printf(builder, "   # storing begin\n");
688#endif
689   for (attrib = 0; attrib < num_outputs; ++attrib) {
690      LLVMValueRef soa[4];
691      LLVMValueRef aos[4];
692      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
693         if(outputs[attrib][chan]) {
694            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
695            lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
696            /*lp_build_printf(builder, "output %d : %d ",
697                            LLVMConstInt(LLVMInt32Type(), attrib, 0),
698                            LLVMConstInt(LLVMInt32Type(), chan, 0));
699              print_vectorf(builder, out);*/
700            soa[chan] = out;
701         } else
702            soa[chan] = 0;
703      }
704      soa_to_aos(builder, soa, aos);
705      store_aos_array(builder,
706                      io,
707                      aos,
708                      attrib,
709                      num_outputs,
710                      clipmask);
711   }
712#if DEBUG_STORE
713   lp_build_printf(builder, "   # storing end\n");
714#endif
715}
716
717/*
718 * Stores original vertex positions in clip coordinates
719 * There is probably a more efficient way to do this, 4 floats at once
720 * rather than extracting each element one by one.
721 */
722static void
723store_clip(LLVMBuilderRef builder,
724           LLVMValueRef io_ptr,
725           LLVMValueRef (*outputs)[NUM_CHANNELS])
726{
727   LLVMValueRef out[4];
728   LLVMValueRef indices[2];
729   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
730   LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
731   LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
732   LLVMValueRef out0elem, out1elem, out2elem, out3elem;
733
734   LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
735   LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
736   LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
737   LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
738
739   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
740   indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0);
741
742   out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
743   out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
744   out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
745   out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
746
747   io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
748   io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
749   io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
750   io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
751
752   clip_ptr0 = draw_jit_header_clip(builder, io0_ptr);
753   clip_ptr1 = draw_jit_header_clip(builder, io1_ptr);
754   clip_ptr2 = draw_jit_header_clip(builder, io2_ptr);
755   clip_ptr3 = draw_jit_header_clip(builder, io3_ptr);
756
757   for (int i = 0; i<4; i++){
758      clip0_ptr = LLVMBuildGEP(builder, clip_ptr0,
759                               indices, 2, ""); //x0
760      clip1_ptr = LLVMBuildGEP(builder, clip_ptr1,
761                               indices, 2, ""); //x1
762      clip2_ptr = LLVMBuildGEP(builder, clip_ptr2,
763                               indices, 2, ""); //x2
764      clip3_ptr = LLVMBuildGEP(builder, clip_ptr3,
765                               indices, 2, ""); //x3
766
767      out0elem = LLVMBuildExtractElement(builder, out[i],
768                                         ind0, ""); //x0
769      out1elem = LLVMBuildExtractElement(builder, out[i],
770                                         ind1, ""); //x1
771      out2elem = LLVMBuildExtractElement(builder, out[i],
772                                         ind2, ""); //x2
773      out3elem = LLVMBuildExtractElement(builder, out[i],
774                                         ind3, ""); //x3
775
776      LLVMBuildStore(builder, out0elem, clip0_ptr);
777      LLVMBuildStore(builder, out1elem, clip1_ptr);
778      LLVMBuildStore(builder, out2elem, clip2_ptr);
779      LLVMBuildStore(builder, out3elem, clip3_ptr);
780
781      indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
782   }
783
784}
785
786/*
787 * Transforms the outputs for viewport mapping
788 */
789static void
790generate_viewport(struct draw_llvm *llvm,
791                  LLVMBuilderRef builder,
792                  LLVMValueRef (*outputs)[NUM_CHANNELS])
793{
794   int i;
795   const float *scaleA = llvm->draw->viewport.scale;
796   const float *transA = llvm->draw->viewport.translate;
797   struct lp_type f32_type = lp_type_float_vec(32);
798   LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
799   LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
800
801   /* for 1/w convention*/
802   out3 = LLVMBuildFDiv(builder, const1, out3, "");
803
804   /* Viewport Mapping */
805   for (i=0; i<4; i++){
806      LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
807      LLVMValueRef scale = lp_build_const_vec(f32_type, scaleA[i]); /*sx sx sx sx*/
808      LLVMValueRef trans = lp_build_const_vec(f32_type, transA[i]); /*tx tx tx tx*/
809
810      /* divide by w */
811      out = LLVMBuildMul(builder, out, out3, "");
812      /* mult by scale */
813      out = LLVMBuildMul(builder, out, scale, "");
814      /* add translation */
815      out = LLVMBuildAdd(builder, out, trans, "");
816
817      /* store transformed outputs */
818      LLVMBuildStore(builder, out, outputs[0][i]);
819   }
820
821}
822
823/* Equivalent of _mm_set1_ps(a)
824 */
825static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
826				      LLVMValueRef a,
827				      const char *name)
828{
829   LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
830   int i;
831
832   for(i = 0; i < 4; ++i) {
833      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
834      res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
835   }
836
837   return res;
838}
839
840/*
841 * Returns clipmask as 4xi32 bitmask for the 4 vertices
842 */
843static LLVMValueRef
844generate_clipmask(LLVMBuilderRef builder,
845                  LLVMValueRef (*outputs)[NUM_CHANNELS],
846                  boolean clip_xy,
847                  boolean clip_z,
848                  boolean clip_user,
849                  boolean enable_d3dclipping,
850                  unsigned nr,
851                  LLVMValueRef context_ptr)
852{
853   LLVMValueRef mask; /* stores the <4xi32> clipmasks */
854   LLVMValueRef test, temp;
855   LLVMValueRef zero, shift;
856   LLVMValueRef pos_x, pos_y, pos_z, pos_w;
857   LLVMValueRef plane1, planes, plane_ptr, sum;
858
859   unsigned i;
860
861   struct lp_type f32_type = lp_type_float_vec(32);
862
863   zero = lp_build_const_vec(f32_type, 0);                    /* 0.0f 0.0f 0.0f 0.0f */
864   shift = lp_build_const_int_vec(lp_type_int_vec(32), 1);    /* 1 1 1 1 */
865
866   /* Assuming position stored at output[0] */
867   pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
868   pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
869   pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
870   pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
871
872   /* Cliptest, for hardwired planes */
873   if (clip_xy){
874      /* plane 1 */
875      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
876      temp = shift;
877      test = LLVMBuildAnd(builder, test, temp, "");
878      mask = test;
879
880      /* plane 2 */
881      test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
882      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
883      temp = LLVMBuildShl(builder, temp, shift, "");
884      test = LLVMBuildAnd(builder, test, temp, "");
885      mask = LLVMBuildOr(builder, mask, test, "");
886
887      /* plane 3 */
888      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
889      temp = LLVMBuildShl(builder, temp, shift, "");
890      test = LLVMBuildAnd(builder, test, temp, "");
891      mask = LLVMBuildOr(builder, mask, test, "");
892
893      /* plane 4 */
894      test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
895      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
896      temp = LLVMBuildShl(builder, temp, shift, "");
897      test = LLVMBuildAnd(builder, test, temp, "");
898      mask = LLVMBuildOr(builder, mask, test, "");
899   }
900
901   if (clip_z){
902      if (enable_d3dclipping){
903         /* plane 5 */
904         test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
905         temp = LLVMBuildShl(builder, temp, shift, "");
906         test = LLVMBuildAnd(builder, test, temp, "");
907         mask = LLVMBuildOr(builder, mask, test, "");
908      }
909      else{
910         /* plane 5 */
911         test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
912         test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
913         temp = LLVMBuildShl(builder, temp, shift, "");
914         test = LLVMBuildAnd(builder, test, temp, "");
915         mask = LLVMBuildOr(builder, mask, test, "");
916      }
917      /* plane 6 */
918      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
919      temp = LLVMBuildShl(builder, temp, shift, "");
920      test = LLVMBuildAnd(builder, test, temp, "");
921      mask = LLVMBuildOr(builder, mask, test, "");
922   }
923
924   if (clip_user){
925      LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr);
926      LLVMValueRef indices[3];
927
928      /* userclip planes */
929      for (i = 6; i < nr; i++) {
930         indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
931         indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0);
932
933         indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
934         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
935         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
936         planes = vec4f_from_scalar(builder, plane1, "plane4_x");
937         sum = LLVMBuildMul(builder, planes, pos_x, "");
938
939         indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0);
940         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
941         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
942         planes = vec4f_from_scalar(builder, plane1, "plane4_y");
943         test = LLVMBuildMul(builder, planes, pos_y, "");
944         sum = LLVMBuildFAdd(builder, sum, test, "");
945
946         indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0);
947         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
948         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
949         planes = vec4f_from_scalar(builder, plane1, "plane4_z");
950         test = LLVMBuildMul(builder, planes, pos_z, "");
951         sum = LLVMBuildFAdd(builder, sum, test, "");
952
953         indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0);
954         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
955         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
956         planes = vec4f_from_scalar(builder, plane1, "plane4_w");
957         test = LLVMBuildMul(builder, planes, pos_w, "");
958         sum = LLVMBuildFAdd(builder, sum, test, "");
959
960         test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum);
961         temp = LLVMBuildShl(builder, temp, shift, "");
962         test = LLVMBuildAnd(builder, test, temp, "");
963         mask = LLVMBuildOr(builder, mask, test, "");
964      }
965   }
966   return mask;
967}
968
969/*
970 * Returns boolean if any clipping has occurred
971 * Used zero/non-zero i32 value to represent boolean
972 */
973static void
974clipmask_bool(LLVMBuilderRef builder,
975              LLVMValueRef clipmask,
976              LLVMValueRef ret_ptr)
977{
978   LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
979   LLVMValueRef temp;
980   int i;
981
982   LLVMDumpValue(clipmask);
983
984   for (i=0; i<4; i++){
985      temp = LLVMBuildExtractElement(builder, clipmask,
986                                     LLVMConstInt(LLVMInt32Type(), i, 0) , "");
987      ret = LLVMBuildOr(builder, ret, temp, "");
988      LLVMDumpValue(ret);
989   }
990
991   LLVMBuildStore(builder, ret, ret_ptr);
992   LLVMDumpValue(ret_ptr);
993
994}
995
996static void
997draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
998{
999   LLVMTypeRef arg_types[8];
1000   LLVMTypeRef func_type;
1001   LLVMValueRef context_ptr;
1002   LLVMBasicBlockRef block;
1003   LLVMBuilderRef builder;
1004   LLVMValueRef start, end, count, stride, step, io_itr;
1005   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1006   LLVMValueRef instance_id;
1007   struct draw_context *draw = llvm->draw;
1008   unsigned i, j;
1009   struct lp_build_context bld;
1010   struct lp_build_loop_state lp_loop;
1011   const int max_vertices = 4;
1012   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1013   void *code;
1014   struct lp_build_sampler_soa *sampler = 0;
1015   LLVMValueRef ret, ret_ptr;
1016   boolean bypass_viewport = variant->key.bypass_viewport;
1017   boolean enable_cliptest = variant->key.clip_xy ||
1018                             variant->key.clip_z  ||
1019                             variant->key.clip_user;
1020
1021   arg_types[0] = llvm->context_ptr_type;           /* context */
1022   arg_types[1] = llvm->vertex_header_ptr_type;     /* vertex_header */
1023   arg_types[2] = llvm->buffer_ptr_type;            /* vbuffers */
1024   arg_types[3] = LLVMInt32Type();                  /* start */
1025   arg_types[4] = LLVMInt32Type();                  /* count */
1026   arg_types[5] = LLVMInt32Type();                  /* stride */
1027   arg_types[6] = llvm->vb_ptr_type;                /* pipe_vertex_buffer's */
1028   arg_types[7] = LLVMInt32Type();                  /* instance_id */
1029
1030   func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
1031
1032   variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
1033   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
1034   for(i = 0; i < Elements(arg_types); ++i)
1035      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1036         LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
1037
1038   context_ptr  = LLVMGetParam(variant->function, 0);
1039   io_ptr       = LLVMGetParam(variant->function, 1);
1040   vbuffers_ptr = LLVMGetParam(variant->function, 2);
1041   start        = LLVMGetParam(variant->function, 3);
1042   count        = LLVMGetParam(variant->function, 4);
1043   stride       = LLVMGetParam(variant->function, 5);
1044   vb_ptr       = LLVMGetParam(variant->function, 6);
1045   instance_id  = LLVMGetParam(variant->function, 7);
1046
1047   lp_build_name(context_ptr, "context");
1048   lp_build_name(io_ptr, "io");
1049   lp_build_name(vbuffers_ptr, "vbuffers");
1050   lp_build_name(start, "start");
1051   lp_build_name(count, "count");
1052   lp_build_name(stride, "stride");
1053   lp_build_name(vb_ptr, "vb");
1054   lp_build_name(instance_id, "instance_id");
1055
1056   /*
1057    * Function body
1058    */
1059
1060   block = LLVMAppendBasicBlock(variant->function, "entry");
1061   builder = LLVMCreateBuilder();
1062   LLVMPositionBuilderAtEnd(builder, block);
1063
1064   lp_build_context_init(&bld, builder, lp_type_int(32));
1065
1066   end = lp_build_add(&bld, start, count);
1067
1068   step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
1069
1070   /* function will return non-zero i32 value if any clipped vertices */
1071   ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
1072   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
1073
1074   /* code generated texture sampling */
1075   sampler = draw_llvm_sampler_soa_create(
1076      draw_llvm_variant_key_samplers(&variant->key),
1077      context_ptr);
1078
1079#if DEBUG_STORE
1080   lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
1081                   start, end, step);
1082#endif
1083   lp_build_loop_begin(builder, start, &lp_loop);
1084   {
1085      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1086      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1087      LLVMValueRef io;
1088      LLVMValueRef clipmask;   /* holds the clipmask value */
1089      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1090
1091      io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
1092      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1093#if DEBUG_STORE
1094      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1095                      io_itr, io, lp_loop.counter);
1096#endif
1097      for (i = 0; i < NUM_CHANNELS; ++i) {
1098         LLVMValueRef true_index = LLVMBuildAdd(
1099            builder,
1100            lp_loop.counter,
1101            LLVMConstInt(LLVMInt32Type(), i, 0), "");
1102         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1103            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1104            LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
1105                                                 velem->vertex_buffer_index,
1106                                                 0);
1107            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1108                                           &vb_index, 1, "");
1109            generate_fetch(builder, vbuffers_ptr,
1110                           &aos_attribs[j][i], velem, vb, true_index,
1111                           instance_id);
1112         }
1113      }
1114      convert_to_soa(builder, aos_attribs, inputs,
1115                     draw->pt.nr_vertex_elements);
1116
1117      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1118      generate_vs(llvm,
1119                  builder,
1120                  outputs,
1121                  ptr_aos,
1122                  context_ptr,
1123                  sampler);
1124
1125      /* store original positions in clip before further manipulation */
1126      store_clip(builder, io, outputs);
1127
1128      /* do cliptest */
1129      if (enable_cliptest){
1130         /* allocate clipmask, assign it integer type */
1131         clipmask = generate_clipmask(builder, outputs,
1132                                      variant->key.clip_xy,
1133                                      variant->key.clip_z,
1134                                      variant->key.clip_user,
1135                                      variant->key.enable_d3dclipping,
1136                                      variant->key.nr_planes,
1137                                      context_ptr);
1138         /* return clipping boolean value for function */
1139         clipmask_bool(builder, clipmask, ret_ptr);
1140      }
1141      else{
1142         clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
1143      }
1144
1145      /* do viewport mapping */
1146      if (!bypass_viewport){
1147         generate_viewport(llvm, builder, outputs);
1148      }
1149
1150      /* store clipmask in vertex header and positions in data */
1151      convert_to_aos(builder, io, outputs, clipmask,
1152                     draw->vs.vertex_shader->info.num_outputs,
1153                     max_vertices);
1154   }
1155
1156   lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
1157
1158   sampler->destroy(sampler);
1159
1160#ifdef PIPE_ARCH_X86
1161   /* Avoid corrupting the FPU stack on 32bit OSes. */
1162   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
1163#endif
1164
1165   ret = LLVMBuildLoad(builder, ret_ptr,"");
1166   LLVMBuildRet(builder, ret);
1167
1168   LLVMDisposeBuilder(builder);
1169
1170   /*
1171    * Translate the LLVM IR into machine code.
1172    */
1173#ifdef DEBUG
1174   if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
1175      lp_debug_dump_value(variant->function);
1176      assert(0);
1177   }
1178#endif
1179
1180   LLVMRunFunctionPassManager(llvm->pass, variant->function);
1181
1182   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1183      lp_debug_dump_value(variant->function);
1184      debug_printf("\n");
1185   }
1186
1187   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
1188   variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
1189
1190   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1191      lp_disassemble(code);
1192   }
1193   lp_func_delete_body(variant->function);
1194}
1195
1196
1197static void
1198draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1199{
1200   LLVMTypeRef arg_types[8];
1201   LLVMTypeRef func_type;
1202   LLVMValueRef context_ptr;
1203   LLVMBasicBlockRef block;
1204   LLVMBuilderRef builder;
1205   LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
1206   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1207   LLVMValueRef instance_id;
1208   struct draw_context *draw = llvm->draw;
1209   unsigned i, j;
1210   struct lp_build_context bld;
1211   struct lp_build_loop_state lp_loop;
1212   const int max_vertices = 4;
1213   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1214   LLVMValueRef fetch_max;
1215   void *code;
1216   struct lp_build_sampler_soa *sampler = 0;
1217   LLVMValueRef ret, ret_ptr;
1218   boolean bypass_viewport = variant->key.bypass_viewport;
1219   boolean enable_cliptest = variant->key.clip_xy ||
1220                             variant->key.clip_z  ||
1221                             variant->key.clip_user;
1222
1223   arg_types[0] = llvm->context_ptr_type;               /* context */
1224   arg_types[1] = llvm->vertex_header_ptr_type;         /* vertex_header */
1225   arg_types[2] = llvm->buffer_ptr_type;                /* vbuffers */
1226   arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0);  /* fetch_elts * */
1227   arg_types[4] = LLVMInt32Type();                      /* fetch_count */
1228   arg_types[5] = LLVMInt32Type();                      /* stride */
1229   arg_types[6] = llvm->vb_ptr_type;                    /* pipe_vertex_buffer's */
1230   arg_types[7] = LLVMInt32Type();                      /* instance_id */
1231
1232   func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
1233
1234   variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
1235   LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
1236   for(i = 0; i < Elements(arg_types); ++i)
1237      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1238         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
1239                          LLVMNoAliasAttribute);
1240
1241   context_ptr  = LLVMGetParam(variant->function_elts, 0);
1242   io_ptr       = LLVMGetParam(variant->function_elts, 1);
1243   vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
1244   fetch_elts   = LLVMGetParam(variant->function_elts, 3);
1245   fetch_count  = LLVMGetParam(variant->function_elts, 4);
1246   stride       = LLVMGetParam(variant->function_elts, 5);
1247   vb_ptr       = LLVMGetParam(variant->function_elts, 6);
1248   instance_id  = LLVMGetParam(variant->function_elts, 7);
1249
1250   lp_build_name(context_ptr, "context");
1251   lp_build_name(io_ptr, "io");
1252   lp_build_name(vbuffers_ptr, "vbuffers");
1253   lp_build_name(fetch_elts, "fetch_elts");
1254   lp_build_name(fetch_count, "fetch_count");
1255   lp_build_name(stride, "stride");
1256   lp_build_name(vb_ptr, "vb");
1257   lp_build_name(instance_id, "instance_id");
1258
1259   /*
1260    * Function body
1261    */
1262
1263   block = LLVMAppendBasicBlock(variant->function_elts, "entry");
1264   builder = LLVMCreateBuilder();
1265   LLVMPositionBuilderAtEnd(builder, block);
1266
1267   lp_build_context_init(&bld, builder, lp_type_int(32));
1268
1269   step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
1270
1271   /* code generated texture sampling */
1272   sampler = draw_llvm_sampler_soa_create(
1273      draw_llvm_variant_key_samplers(&variant->key),
1274      context_ptr);
1275
1276   fetch_max = LLVMBuildSub(builder, fetch_count,
1277                            LLVMConstInt(LLVMInt32Type(), 1, 0),
1278                            "fetch_max");
1279
1280   /* function returns non-zero i32 value if any clipped vertices */
1281   ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
1282   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
1283
1284   lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
1285   {
1286      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1287      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1288      LLVMValueRef io;
1289      LLVMValueRef clipmask;   /* holds the clipmask value */
1290      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1291
1292      io_itr = lp_loop.counter;
1293      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1294#if DEBUG_STORE
1295      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1296                      io_itr, io, lp_loop.counter);
1297#endif
1298      for (i = 0; i < NUM_CHANNELS; ++i) {
1299         LLVMValueRef true_index = LLVMBuildAdd(
1300            builder,
1301            lp_loop.counter,
1302            LLVMConstInt(LLVMInt32Type(), i, 0), "");
1303         LLVMValueRef fetch_ptr;
1304
1305         /* make sure we're not out of bounds which can happen
1306          * if fetch_count % 4 != 0, because on the last iteration
1307          * a few of the 4 vertex fetches will be out of bounds */
1308         true_index = lp_build_min(&bld, true_index, fetch_max);
1309
1310         fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
1311                                  &true_index, 1, "");
1312         true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
1313         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1314            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1315            LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
1316                                                 velem->vertex_buffer_index,
1317                                                 0);
1318            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1319                                           &vb_index, 1, "");
1320            generate_fetch(builder, vbuffers_ptr,
1321                           &aos_attribs[j][i], velem, vb, true_index,
1322                           instance_id);
1323         }
1324      }
1325      convert_to_soa(builder, aos_attribs, inputs,
1326                     draw->pt.nr_vertex_elements);
1327
1328      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1329      generate_vs(llvm,
1330                  builder,
1331                  outputs,
1332                  ptr_aos,
1333                  context_ptr,
1334                  sampler);
1335
1336      /* store original positions in clip before further manipulation */
1337      store_clip(builder, io, outputs);
1338
1339      /* do cliptest */
1340      if (enable_cliptest){
1341         /* allocate clipmask, assign it integer type */
1342         clipmask = generate_clipmask(builder, outputs,
1343                                      variant->key.clip_xy,
1344                                      variant->key.clip_z,
1345                                      variant->key.clip_user,
1346                                      variant->key.enable_d3dclipping,
1347                                      variant->key.nr_planes,
1348                                      context_ptr);
1349         /* return clipping boolean value for function */
1350         clipmask_bool(builder, clipmask, ret_ptr);
1351      }
1352      else{
1353         clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
1354      }
1355
1356      /* do viewport mapping */
1357      if (!bypass_viewport){
1358         generate_viewport(llvm, builder, outputs);
1359      }
1360
1361      /* store clipmask in vertex header,
1362       * original positions in clip
1363       * and transformed positions in data
1364       */
1365      convert_to_aos(builder, io, outputs, clipmask,
1366                     draw->vs.vertex_shader->info.num_outputs,
1367                     max_vertices);
1368   }
1369
1370   lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
1371
1372   sampler->destroy(sampler);
1373
1374#ifdef PIPE_ARCH_X86
1375   /* Avoid corrupting the FPU stack on 32bit OSes. */
1376   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
1377#endif
1378
1379   ret = LLVMBuildLoad(builder, ret_ptr,"");
1380   LLVMBuildRet(builder, ret);
1381
1382   LLVMDisposeBuilder(builder);
1383
1384   /*
1385    * Translate the LLVM IR into machine code.
1386    */
1387#ifdef DEBUG
1388   if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
1389      lp_debug_dump_value(variant->function_elts);
1390      assert(0);
1391   }
1392#endif
1393
1394   LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
1395
1396   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1397      lp_debug_dump_value(variant->function_elts);
1398      debug_printf("\n");
1399   }
1400
1401   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts);
1402   variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
1403
1404   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1405      lp_disassemble(code);
1406   }
1407   lp_func_delete_body(variant->function_elts);
1408}
1409
1410
1411struct draw_llvm_variant_key *
1412draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
1413{
1414   unsigned i;
1415   struct draw_llvm_variant_key *key;
1416   struct lp_sampler_static_state *sampler;
1417
1418   key = (struct draw_llvm_variant_key *)store;
1419
1420   /* Presumably all variants of the shader should have the same
1421    * number of vertex elements - ie the number of shader inputs.
1422    */
1423   key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
1424
1425   /* will have to rig this up properly later */
1426   key->clip_xy = llvm->draw->clip_xy;
1427   key->clip_z = llvm->draw->clip_z;
1428   key->clip_user = llvm->draw->clip_user;
1429   key->bypass_viewport = llvm->draw->identity_viewport;
1430   key->enable_d3dclipping = (boolean)!llvm->draw->rasterizer->gl_rasterization_rules;
1431   key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
1432   key->nr_planes = llvm->draw->nr_planes;
1433
1434   /* All variants of this shader will have the same value for
1435    * nr_samplers.  Not yet trying to compact away holes in the
1436    * sampler array.
1437    */
1438   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
1439
1440   sampler = draw_llvm_variant_key_samplers(key);
1441
1442   memcpy(key->vertex_element,
1443          llvm->draw->pt.vertex_element,
1444          sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
1445
1446   memset(sampler, 0, key->nr_samplers * sizeof *sampler);
1447
1448   for (i = 0 ; i < key->nr_samplers; i++) {
1449      lp_sampler_static_state(&sampler[i],
1450			      llvm->draw->sampler_views[i],
1451			      llvm->draw->samplers[i]);
1452   }
1453
1454   return key;
1455}
1456
1457void
1458draw_llvm_set_mapped_texture(struct draw_context *draw,
1459                             unsigned sampler_idx,
1460                             uint32_t width, uint32_t height, uint32_t depth,
1461                             uint32_t last_level,
1462                             uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
1463                             uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
1464                             const void *data[DRAW_MAX_TEXTURE_LEVELS])
1465{
1466   unsigned j;
1467   struct draw_jit_texture *jit_tex;
1468
1469   assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
1470
1471
1472   jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
1473
1474   jit_tex->width = width;
1475   jit_tex->height = height;
1476   jit_tex->depth = depth;
1477   jit_tex->last_level = last_level;
1478
1479   for (j = 0; j <= last_level; j++) {
1480      jit_tex->data[j] = data[j];
1481      jit_tex->row_stride[j] = row_stride[j];
1482      jit_tex->img_stride[j] = img_stride[j];
1483   }
1484}
1485
1486void
1487draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
1488{
1489   struct draw_llvm *llvm = variant->llvm;
1490   struct draw_context *draw = llvm->draw;
1491
1492   if (variant->function_elts) {
1493      if (variant->function_elts)
1494         LLVMFreeMachineCodeForFunction(draw->engine,
1495                                        variant->function_elts);
1496      LLVMDeleteFunction(variant->function_elts);
1497   }
1498
1499   if (variant->function) {
1500      if (variant->function)
1501         LLVMFreeMachineCodeForFunction(draw->engine,
1502                                        variant->function);
1503      LLVMDeleteFunction(variant->function);
1504   }
1505
1506   remove_from_list(&variant->list_item_local);
1507   variant->shader->variants_cached--;
1508   remove_from_list(&variant->list_item_global);
1509   llvm->nr_variants--;
1510   FREE(variant);
1511}
1512