draw_llvm.c revision 1d6f3543a063ab9e740fd0c149dcce26c282d773
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "draw_llvm.h"
29
30#include "draw_context.h"
31#include "draw_vs.h"
32
33#include "gallivm/lp_bld_arit.h"
34#include "gallivm/lp_bld_logic.h"
35#include "gallivm/lp_bld_const.h"
36#include "gallivm/lp_bld_swizzle.h"
37#include "gallivm/lp_bld_struct.h"
38#include "gallivm/lp_bld_type.h"
39#include "gallivm/lp_bld_flow.h"
40#include "gallivm/lp_bld_debug.h"
41#include "gallivm/lp_bld_tgsi.h"
42#include "gallivm/lp_bld_printf.h"
43#include "gallivm/lp_bld_intr.h"
44#include "gallivm/lp_bld_init.h"
45
46#include "tgsi/tgsi_exec.h"
47#include "tgsi/tgsi_dump.h"
48
49#include "util/u_math.h"
50#include "util/u_pointer.h"
51#include "util/u_string.h"
52
53#include <llvm-c/Transforms/Scalar.h>
54
55#define DEBUG_STORE 0
56
57/* generates the draw jit function */
58static void
59draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
60static void
61draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
62
63static void
64init_globals(struct draw_llvm *llvm)
65{
66   LLVMTypeRef texture_type;
67
68   /* struct draw_jit_texture */
69   {
70      LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
71
72      elem_types[DRAW_JIT_TEXTURE_WIDTH]  = LLVMInt32Type();
73      elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
74      elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
75      elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
76      elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
77         LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
78      elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
79         LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
80      elem_types[DRAW_JIT_TEXTURE_DATA] =
81         LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
82                       PIPE_MAX_TEXTURE_LEVELS);
83      elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
84      elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
85      elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
86      elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
87         LLVMArrayType(LLVMFloatType(), 4);
88
89      texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
90
91      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
92                             llvm->target, texture_type,
93                             DRAW_JIT_TEXTURE_WIDTH);
94      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
95                             llvm->target, texture_type,
96                             DRAW_JIT_TEXTURE_HEIGHT);
97      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
98                             llvm->target, texture_type,
99                             DRAW_JIT_TEXTURE_DEPTH);
100      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
101                             llvm->target, texture_type,
102                             DRAW_JIT_TEXTURE_LAST_LEVEL);
103      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
104                             llvm->target, texture_type,
105                             DRAW_JIT_TEXTURE_ROW_STRIDE);
106      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
107                             llvm->target, texture_type,
108                             DRAW_JIT_TEXTURE_IMG_STRIDE);
109      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
110                             llvm->target, texture_type,
111                             DRAW_JIT_TEXTURE_DATA);
112      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
113                             llvm->target, texture_type,
114                             DRAW_JIT_TEXTURE_MIN_LOD);
115      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
116                             llvm->target, texture_type,
117                             DRAW_JIT_TEXTURE_MAX_LOD);
118      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
119                             llvm->target, texture_type,
120                             DRAW_JIT_TEXTURE_LOD_BIAS);
121      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
122                             llvm->target, texture_type,
123                             DRAW_JIT_TEXTURE_BORDER_COLOR);
124      LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
125                           llvm->target, texture_type);
126
127      LLVMAddTypeName(llvm->module, "texture", texture_type);
128   }
129
130
131   /* struct draw_jit_context */
132   {
133      LLVMTypeRef elem_types[5];
134      LLVMTypeRef context_type;
135
136      elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
137      elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */
138      elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */
139      elem_types[3] = LLVMPointerType(LLVMFloatType(), 0); /* viewport */
140      elem_types[4] = LLVMArrayType(texture_type,
141                                    PIPE_MAX_VERTEX_SAMPLERS); /* textures */
142
143      context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
144
145      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
146                             llvm->target, context_type, 0);
147      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
148                             llvm->target, context_type, 1);
149      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
150                             llvm->target, context_type, 2);
151      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
152                             llvm->target, context_type,
153                             DRAW_JIT_CTX_TEXTURES);
154      LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
155                           llvm->target, context_type);
156
157      LLVMAddTypeName(llvm->module, "draw_jit_context", context_type);
158
159      llvm->context_ptr_type = LLVMPointerType(context_type, 0);
160   }
161   {
162      LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0);
163      llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0);
164   }
165   /* struct pipe_vertex_buffer */
166   {
167      LLVMTypeRef elem_types[4];
168      LLVMTypeRef vb_type;
169
170      elem_types[0] = LLVMInt32Type();
171      elem_types[1] = LLVMInt32Type();
172      elem_types[2] = LLVMInt32Type();
173      elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
174
175      vb_type = LLVMStructType(elem_types, Elements(elem_types), 0);
176
177      LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
178                             llvm->target, vb_type, 0);
179      LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
180                             llvm->target, vb_type, 2);
181      LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer,
182                           llvm->target, vb_type);
183
184      LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type);
185
186      llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
187   }
188}
189
190static LLVMTypeRef
191create_vertex_header(struct draw_llvm *llvm, int data_elems)
192{
193   /* struct vertex_header */
194   LLVMTypeRef elem_types[3];
195   LLVMTypeRef vertex_header;
196   char struct_name[24];
197
198   util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
199
200   elem_types[0]  = LLVMIntType(32);
201   elem_types[1]  = LLVMArrayType(LLVMFloatType(), 4);
202   elem_types[2]  = LLVMArrayType(elem_types[1], data_elems);
203
204   vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
205
206   /* these are bit-fields and we can't take address of them
207      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
208      llvm->target, vertex_header,
209      DRAW_JIT_VERTEX_CLIPMASK);
210      LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
211      llvm->target, vertex_header,
212      DRAW_JIT_VERTEX_EDGEFLAG);
213      LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
214      llvm->target, vertex_header,
215      DRAW_JIT_VERTEX_PAD);
216      LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
217      llvm->target, vertex_header,
218      DRAW_JIT_VERTEX_VERTEX_ID);
219   */
220   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
221                          llvm->target, vertex_header,
222                          DRAW_JIT_VERTEX_CLIP);
223   LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
224                          llvm->target, vertex_header,
225                          DRAW_JIT_VERTEX_DATA);
226
227   LLVMAddTypeName(llvm->module, struct_name, vertex_header);
228
229   return LLVMPointerType(vertex_header, 0);
230}
231
232struct draw_llvm *
233draw_llvm_create(struct draw_context *draw)
234{
235   struct draw_llvm *llvm;
236
237   llvm = CALLOC_STRUCT( draw_llvm );
238   if (!llvm)
239      return NULL;
240
241   llvm->draw = draw;
242   llvm->engine = draw->engine;
243
244   debug_assert(llvm->engine);
245
246   llvm->module = LLVMModuleCreateWithName("draw_llvm");
247   llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
248
249   LLVMAddModuleProvider(llvm->engine, llvm->provider);
250
251   llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
252
253   llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
254   LLVMAddTargetData(llvm->target, llvm->pass);
255
256   if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
257      /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
258       * but there are more on SVN. */
259      /* TODO: Add more passes */
260
261      LLVMAddCFGSimplificationPass(llvm->pass);
262
263      if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) {
264         /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
265          * avoid generating bad code.
266          * Test with piglit glsl-vs-sqrt-zero test.
267          */
268         LLVMAddConstantPropagationPass(llvm->pass);
269         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
270      }
271      else {
272         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
273         LLVMAddConstantPropagationPass(llvm->pass);
274      }
275
276      LLVMAddInstructionCombiningPass(llvm->pass);
277      LLVMAddGVNPass(llvm->pass);
278   } else {
279      /* We need at least this pass to prevent the backends to fail in
280       * unexpected ways.
281       */
282      LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
283   }
284
285   init_globals(llvm);
286
287   if (gallivm_debug & GALLIVM_DEBUG_IR) {
288      LLVMDumpModule(llvm->module);
289   }
290
291   llvm->nr_variants = 0;
292   make_empty_list(&llvm->vs_variants_list);
293
294   return llvm;
295}
296
297void
298draw_llvm_destroy(struct draw_llvm *llvm)
299{
300   LLVMDisposePassManager(llvm->pass);
301
302   FREE(llvm);
303}
304
305struct draw_llvm_variant *
306draw_llvm_create_variant(struct draw_llvm *llvm,
307			 unsigned num_inputs,
308			 const struct draw_llvm_variant_key *key)
309{
310   struct draw_llvm_variant *variant;
311   struct llvm_vertex_shader *shader =
312      llvm_vertex_shader(llvm->draw->vs.vertex_shader);
313
314   variant = MALLOC(sizeof *variant +
315		    shader->variant_key_size -
316		    sizeof variant->key);
317   if (variant == NULL)
318      return NULL;
319
320   variant->llvm = llvm;
321
322   memcpy(&variant->key, key, shader->variant_key_size);
323
324   llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
325
326   draw_llvm_generate(llvm, variant);
327   draw_llvm_generate_elts(llvm, variant);
328
329   variant->shader = shader;
330   variant->list_item_global.base = variant;
331   variant->list_item_local.base = variant;
332   /*variant->no = */shader->variants_created++;
333   variant->list_item_global.base = variant;
334
335   return variant;
336}
337
338static void
339generate_vs(struct draw_llvm *llvm,
340            LLVMBuilderRef builder,
341            LLVMValueRef (*outputs)[NUM_CHANNELS],
342            const LLVMValueRef (*inputs)[NUM_CHANNELS],
343            LLVMValueRef system_values_array,
344            LLVMValueRef context_ptr,
345            struct lp_build_sampler_soa *draw_sampler)
346{
347   const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
348   struct lp_type vs_type;
349   LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
350   struct lp_build_sampler_soa *sampler = 0;
351
352   memset(&vs_type, 0, sizeof vs_type);
353   vs_type.floating = TRUE; /* floating point values */
354   vs_type.sign = TRUE;     /* values are signed */
355   vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
356   vs_type.width = 32;      /* 32-bit float */
357   vs_type.length = 4;      /* 4 elements per vector */
358#if 0
359   num_vs = 4;              /* number of vertices per block */
360#endif
361
362   if (gallivm_debug & GALLIVM_DEBUG_IR) {
363      tgsi_dump(tokens, 0);
364   }
365
366   if (llvm->draw->num_sampler_views &&
367       llvm->draw->num_samplers)
368      sampler = draw_sampler;
369
370   lp_build_tgsi_soa(builder,
371                     tokens,
372                     vs_type,
373                     NULL /*struct lp_build_mask_context *mask*/,
374                     consts_ptr,
375                     system_values_array,
376                     NULL /*pos*/,
377                     inputs,
378                     outputs,
379                     sampler,
380                     &llvm->draw->vs.vertex_shader->info);
381}
382
383#if DEBUG_STORE
384static void print_vectorf(LLVMBuilderRef builder,
385                         LLVMValueRef vec)
386{
387   LLVMValueRef val[4];
388   val[0] = LLVMBuildExtractElement(builder, vec,
389                                    LLVMConstInt(LLVMInt32Type(), 0, 0), "");
390   val[1] = LLVMBuildExtractElement(builder, vec,
391                                    LLVMConstInt(LLVMInt32Type(), 1, 0), "");
392   val[2] = LLVMBuildExtractElement(builder, vec,
393                                    LLVMConstInt(LLVMInt32Type(), 2, 0), "");
394   val[3] = LLVMBuildExtractElement(builder, vec,
395                                    LLVMConstInt(LLVMInt32Type(), 3, 0), "");
396   lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
397                   val[0], val[1], val[2], val[3]);
398}
399#endif
400
401static void
402generate_fetch(LLVMBuilderRef builder,
403               LLVMValueRef vbuffers_ptr,
404               LLVMValueRef *res,
405               struct pipe_vertex_element *velem,
406               LLVMValueRef vbuf,
407               LLVMValueRef index,
408               LLVMValueRef instance_id)
409{
410   LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
411   LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
412                                           &indices, 1, "");
413   LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
414   LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
415   LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
416   LLVMValueRef cond;
417   LLVMValueRef stride;
418
419   if (velem->instance_divisor) {
420      /* array index = instance_id / instance_divisor */
421      index = LLVMBuildUDiv(builder, instance_id,
422                            LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0),
423                            "instance_divisor");
424   }
425
426   /* limit index to min(index, vb_max_index) */
427   cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
428   index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
429
430   stride = LLVMBuildMul(builder, vb_stride, index, "");
431
432   vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
433
434   stride = LLVMBuildAdd(builder, stride,
435                         vb_buffer_offset,
436                         "");
437   stride = LLVMBuildAdd(builder, stride,
438                         LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0),
439                         "");
440
441   /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
442   vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
443
444   *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format);
445}
446
447static LLVMValueRef
448aos_to_soa(LLVMBuilderRef builder,
449           LLVMValueRef val0,
450           LLVMValueRef val1,
451           LLVMValueRef val2,
452           LLVMValueRef val3,
453           LLVMValueRef channel)
454{
455   LLVMValueRef ex, res;
456
457   ex = LLVMBuildExtractElement(builder, val0,
458                                channel, "");
459   res = LLVMBuildInsertElement(builder,
460                                LLVMConstNull(LLVMTypeOf(val0)),
461                                ex,
462                                LLVMConstInt(LLVMInt32Type(), 0, 0),
463                                "");
464
465   ex = LLVMBuildExtractElement(builder, val1,
466                                channel, "");
467   res = LLVMBuildInsertElement(builder,
468                                res, ex,
469                                LLVMConstInt(LLVMInt32Type(), 1, 0),
470                                "");
471
472   ex = LLVMBuildExtractElement(builder, val2,
473                                channel, "");
474   res = LLVMBuildInsertElement(builder,
475                                res, ex,
476                                LLVMConstInt(LLVMInt32Type(), 2, 0),
477                                "");
478
479   ex = LLVMBuildExtractElement(builder, val3,
480                                channel, "");
481   res = LLVMBuildInsertElement(builder,
482                                res, ex,
483                                LLVMConstInt(LLVMInt32Type(), 3, 0),
484                                "");
485
486   return res;
487}
488
489static void
490soa_to_aos(LLVMBuilderRef builder,
491           LLVMValueRef soa[NUM_CHANNELS],
492           LLVMValueRef aos[NUM_CHANNELS])
493{
494   LLVMValueRef comp;
495   int i = 0;
496
497   debug_assert(NUM_CHANNELS == 4);
498
499   aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
500   aos[1] = aos[2] = aos[3] = aos[0];
501
502   for (i = 0; i < NUM_CHANNELS; ++i) {
503      LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0);
504
505      comp = LLVMBuildExtractElement(builder, soa[i],
506                                     LLVMConstInt(LLVMInt32Type(), 0, 0), "");
507      aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
508
509      comp = LLVMBuildExtractElement(builder, soa[i],
510                                     LLVMConstInt(LLVMInt32Type(), 1, 0), "");
511      aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
512
513      comp = LLVMBuildExtractElement(builder, soa[i],
514                                     LLVMConstInt(LLVMInt32Type(), 2, 0), "");
515      aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
516
517      comp = LLVMBuildExtractElement(builder, soa[i],
518                                     LLVMConstInt(LLVMInt32Type(), 3, 0), "");
519      aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
520
521   }
522}
523
524static void
525convert_to_soa(LLVMBuilderRef builder,
526               LLVMValueRef (*aos)[NUM_CHANNELS],
527               LLVMValueRef (*soa)[NUM_CHANNELS],
528               int num_attribs)
529{
530   int i;
531
532   debug_assert(NUM_CHANNELS == 4);
533
534   for (i = 0; i < num_attribs; ++i) {
535      LLVMValueRef val0 = aos[i][0];
536      LLVMValueRef val1 = aos[i][1];
537      LLVMValueRef val2 = aos[i][2];
538      LLVMValueRef val3 = aos[i][3];
539
540      soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3,
541                             LLVMConstInt(LLVMInt32Type(), 0, 0));
542      soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3,
543                             LLVMConstInt(LLVMInt32Type(), 1, 0));
544      soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3,
545                             LLVMConstInt(LLVMInt32Type(), 2, 0));
546      soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3,
547                             LLVMConstInt(LLVMInt32Type(), 3, 0));
548   }
549}
550
551static void
552store_aos(LLVMBuilderRef builder,
553          LLVMValueRef io_ptr,
554          LLVMValueRef index,
555          LLVMValueRef value,
556          LLVMValueRef clipmask)
557{
558   LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr);
559   LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr);
560   LLVMValueRef indices[3];
561   LLVMValueRef val, shift;
562
563   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
564   indices[1] = index;
565   indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
566
567   /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */
568   val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0);
569   shift  = LLVMConstInt(LLVMInt32Type(), 12, 0);
570   val = LLVMBuildShl(builder, val, shift, "");
571   /* add clipmask:12 */
572   val = LLVMBuildOr(builder, val, clipmask, "");
573
574   /* store vertex header */
575   LLVMBuildStore(builder, val, id_ptr);
576
577
578#if DEBUG_STORE
579   lp_build_printf(builder, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
580#endif
581#if 0
582   /*lp_build_printf(builder, " ---- %p storing at %d (%p)  ", io_ptr, index, data_ptr);
583     print_vectorf(builder, value);*/
584   data_ptr = LLVMBuildBitCast(builder, data_ptr,
585                               LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
586                               "datavec");
587   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
588
589   LLVMBuildStore(builder, value, data_ptr);
590#else
591   {
592      LLVMValueRef x, y, z, w;
593      LLVMValueRef idx0, idx1, idx2, idx3;
594      LLVMValueRef gep0, gep1, gep2, gep3;
595      data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
596
597      idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
598      idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
599      idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
600      idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
601
602      x = LLVMBuildExtractElement(builder, value,
603                                  idx0, "");
604      y = LLVMBuildExtractElement(builder, value,
605                                  idx1, "");
606      z = LLVMBuildExtractElement(builder, value,
607                                  idx2, "");
608      w = LLVMBuildExtractElement(builder, value,
609                                  idx3, "");
610
611      gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
612      gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
613      gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
614      gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
615
616      /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
617        x, gep0, y, gep1, z, gep2, w, gep3);*/
618      LLVMBuildStore(builder, x, gep0);
619      LLVMBuildStore(builder, y, gep1);
620      LLVMBuildStore(builder, z, gep2);
621      LLVMBuildStore(builder, w, gep3);
622   }
623#endif
624}
625
626static void
627store_aos_array(LLVMBuilderRef builder,
628                LLVMValueRef io_ptr,
629                LLVMValueRef aos[NUM_CHANNELS],
630                int attrib,
631                int num_outputs,
632                LLVMValueRef clipmask)
633{
634   LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0);
635   LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
636   LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
637   LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
638   LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
639   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
640   LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
641
642   debug_assert(NUM_CHANNELS == 4);
643
644   io0_ptr = LLVMBuildGEP(builder, io_ptr,
645                          &ind0, 1, "");
646   io1_ptr = LLVMBuildGEP(builder, io_ptr,
647                          &ind1, 1, "");
648   io2_ptr = LLVMBuildGEP(builder, io_ptr,
649                          &ind2, 1, "");
650   io3_ptr = LLVMBuildGEP(builder, io_ptr,
651                          &ind3, 1, "");
652
653   clipmask0 = LLVMBuildExtractElement(builder, clipmask,
654                                       ind0, "");
655   clipmask1 = LLVMBuildExtractElement(builder, clipmask,
656                                       ind1, "");
657   clipmask2 = LLVMBuildExtractElement(builder, clipmask,
658                                       ind2, "");
659   clipmask3 = LLVMBuildExtractElement(builder, clipmask,
660                                       ind3, "");
661
662#if DEBUG_STORE
663   lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
664                   io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
665#endif
666   /* store for each of the 4 vertices */
667   store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0);
668   store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1);
669   store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2);
670   store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3);
671}
672
673static void
674convert_to_aos(LLVMBuilderRef builder,
675               LLVMValueRef io,
676               LLVMValueRef (*outputs)[NUM_CHANNELS],
677               LLVMValueRef clipmask,
678               int num_outputs,
679               int max_vertices)
680{
681   unsigned chan, attrib;
682
683#if DEBUG_STORE
684   lp_build_printf(builder, "   # storing begin\n");
685#endif
686   for (attrib = 0; attrib < num_outputs; ++attrib) {
687      LLVMValueRef soa[4];
688      LLVMValueRef aos[4];
689      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
690         if(outputs[attrib][chan]) {
691            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
692            lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
693            /*lp_build_printf(builder, "output %d : %d ",
694                            LLVMConstInt(LLVMInt32Type(), attrib, 0),
695                            LLVMConstInt(LLVMInt32Type(), chan, 0));
696              print_vectorf(builder, out);*/
697            soa[chan] = out;
698         } else
699            soa[chan] = 0;
700      }
701      soa_to_aos(builder, soa, aos);
702      store_aos_array(builder,
703                      io,
704                      aos,
705                      attrib,
706                      num_outputs,
707                      clipmask);
708   }
709#if DEBUG_STORE
710   lp_build_printf(builder, "   # storing end\n");
711#endif
712}
713
714/*
715 * Stores original vertex positions in clip coordinates
716 * There is probably a more efficient way to do this, 4 floats at once
717 * rather than extracting each element one by one.
718 */
719static void
720store_clip(LLVMBuilderRef builder,
721           LLVMValueRef io_ptr,
722           LLVMValueRef (*outputs)[NUM_CHANNELS])
723{
724   LLVMValueRef out[4];
725   LLVMValueRef indices[2];
726   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
727   LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
728   LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
729   LLVMValueRef out0elem, out1elem, out2elem, out3elem;
730   int i;
731
732   LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
733   LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
734   LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
735   LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
736
737   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
738   indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0);
739
740   out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
741   out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
742   out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
743   out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
744
745   io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
746   io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
747   io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
748   io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
749
750   clip_ptr0 = draw_jit_header_clip(builder, io0_ptr);
751   clip_ptr1 = draw_jit_header_clip(builder, io1_ptr);
752   clip_ptr2 = draw_jit_header_clip(builder, io2_ptr);
753   clip_ptr3 = draw_jit_header_clip(builder, io3_ptr);
754
755   for (i = 0; i<4; i++){
756      clip0_ptr = LLVMBuildGEP(builder, clip_ptr0,
757                               indices, 2, ""); //x0
758      clip1_ptr = LLVMBuildGEP(builder, clip_ptr1,
759                               indices, 2, ""); //x1
760      clip2_ptr = LLVMBuildGEP(builder, clip_ptr2,
761                               indices, 2, ""); //x2
762      clip3_ptr = LLVMBuildGEP(builder, clip_ptr3,
763                               indices, 2, ""); //x3
764
765      out0elem = LLVMBuildExtractElement(builder, out[i],
766                                         ind0, ""); //x0
767      out1elem = LLVMBuildExtractElement(builder, out[i],
768                                         ind1, ""); //x1
769      out2elem = LLVMBuildExtractElement(builder, out[i],
770                                         ind2, ""); //x2
771      out3elem = LLVMBuildExtractElement(builder, out[i],
772                                         ind3, ""); //x3
773
774      LLVMBuildStore(builder, out0elem, clip0_ptr);
775      LLVMBuildStore(builder, out1elem, clip1_ptr);
776      LLVMBuildStore(builder, out2elem, clip2_ptr);
777      LLVMBuildStore(builder, out3elem, clip3_ptr);
778
779      indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
780   }
781
782}
783
784/* Equivalent of _mm_set1_ps(a)
785 */
786static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
787				      LLVMValueRef a,
788				      const char *name)
789{
790   LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
791   int i;
792
793   for(i = 0; i < 4; ++i) {
794      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
795      res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
796   }
797
798   return res;
799}
800
801/*
802 * Transforms the outputs for viewport mapping
803 */
804static void
805generate_viewport(struct draw_llvm *llvm,
806                  LLVMBuilderRef builder,
807                  LLVMValueRef (*outputs)[NUM_CHANNELS],
808                  LLVMValueRef context_ptr)
809{
810   int i;
811   struct lp_type f32_type = lp_type_float_vec(32);
812   LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
813   LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
814   LLVMValueRef vp_ptr = draw_jit_context_viewport(builder, context_ptr);
815
816   /* for 1/w convention*/
817   out3 = LLVMBuildFDiv(builder, const1, out3, "");
818   LLVMBuildStore(builder, out3, outputs[0][3]);
819
820   /* Viewport Mapping */
821   for (i=0; i<3; i++){
822      LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
823      LLVMValueRef scale;
824      LLVMValueRef trans;
825      LLVMValueRef scale_i;
826      LLVMValueRef trans_i;
827      LLVMValueRef index;
828
829      index = LLVMConstInt(LLVMInt32Type(), i, 0);
830      scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
831
832      index = LLVMConstInt(LLVMInt32Type(), i+4, 0);
833      trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
834
835      scale = vec4f_from_scalar(builder, LLVMBuildLoad(builder, scale_i, ""), "scale");
836      trans = vec4f_from_scalar(builder, LLVMBuildLoad(builder, trans_i, ""), "trans");
837
838      /* divide by w */
839      out = LLVMBuildFMul(builder, out, out3, "");
840      /* mult by scale */
841      out = LLVMBuildFMul(builder, out, scale, "");
842      /* add translation */
843      out = LLVMBuildFAdd(builder, out, trans, "");
844
845      /* store transformed outputs */
846      LLVMBuildStore(builder, out, outputs[0][i]);
847   }
848
849}
850
851
852/*
853 * Returns clipmask as 4xi32 bitmask for the 4 vertices
854 */
855static LLVMValueRef
856generate_clipmask(LLVMBuilderRef builder,
857                  LLVMValueRef (*outputs)[NUM_CHANNELS],
858                  boolean clip_xy,
859                  boolean clip_z,
860                  boolean clip_user,
861                  boolean clip_halfz,
862                  unsigned nr,
863                  LLVMValueRef context_ptr)
864{
865   LLVMValueRef mask; /* stores the <4xi32> clipmasks */
866   LLVMValueRef test, temp;
867   LLVMValueRef zero, shift;
868   LLVMValueRef pos_x, pos_y, pos_z, pos_w;
869   LLVMValueRef plane1, planes, plane_ptr, sum;
870
871   unsigned i;
872
873   struct lp_type f32_type = lp_type_float_vec(32);
874
875   mask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
876   temp = lp_build_const_int_vec(lp_type_int_vec(32), 0);
877   zero = lp_build_const_vec(f32_type, 0);                    /* 0.0f 0.0f 0.0f 0.0f */
878   shift = lp_build_const_int_vec(lp_type_int_vec(32), 1);    /* 1 1 1 1 */
879
880   /* Assuming position stored at output[0] */
881   pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
882   pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
883   pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
884   pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
885
886   /* Cliptest, for hardwired planes */
887   if (clip_xy){
888      /* plane 1 */
889      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
890      temp = shift;
891      test = LLVMBuildAnd(builder, test, temp, "");
892      mask = test;
893
894      /* plane 2 */
895      test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
896      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
897      temp = LLVMBuildShl(builder, temp, shift, "");
898      test = LLVMBuildAnd(builder, test, temp, "");
899      mask = LLVMBuildOr(builder, mask, test, "");
900
901      /* plane 3 */
902      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
903      temp = LLVMBuildShl(builder, temp, shift, "");
904      test = LLVMBuildAnd(builder, test, temp, "");
905      mask = LLVMBuildOr(builder, mask, test, "");
906
907      /* plane 4 */
908      test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
909      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
910      temp = LLVMBuildShl(builder, temp, shift, "");
911      test = LLVMBuildAnd(builder, test, temp, "");
912      mask = LLVMBuildOr(builder, mask, test, "");
913   }
914
915   if (clip_z){
916      temp = lp_build_const_int_vec(lp_type_int_vec(32), 16);
917      if (clip_halfz){
918         /* plane 5 */
919         test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
920         test = LLVMBuildAnd(builder, test, temp, "");
921         mask = LLVMBuildOr(builder, mask, test, "");
922      }
923      else{
924         /* plane 5 */
925         test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
926         test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
927         test = LLVMBuildAnd(builder, test, temp, "");
928         mask = LLVMBuildOr(builder, mask, test, "");
929      }
930      /* plane 6 */
931      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
932      temp = LLVMBuildShl(builder, temp, shift, "");
933      test = LLVMBuildAnd(builder, test, temp, "");
934      mask = LLVMBuildOr(builder, mask, test, "");
935   }
936
937   if (clip_user){
938      LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr);
939      LLVMValueRef indices[3];
940      temp = lp_build_const_int_vec(lp_type_int_vec(32), 32);
941
942      /* userclip planes */
943      for (i = 6; i < nr; i++) {
944         indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
945         indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0);
946
947         indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
948         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
949         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
950         planes = vec4f_from_scalar(builder, plane1, "plane4_x");
951         sum = LLVMBuildFMul(builder, planes, pos_x, "");
952
953         indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0);
954         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
955         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
956         planes = vec4f_from_scalar(builder, plane1, "plane4_y");
957         test = LLVMBuildFMul(builder, planes, pos_y, "");
958         sum = LLVMBuildFAdd(builder, sum, test, "");
959
960         indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0);
961         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
962         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
963         planes = vec4f_from_scalar(builder, plane1, "plane4_z");
964         test = LLVMBuildFMul(builder, planes, pos_z, "");
965         sum = LLVMBuildFAdd(builder, sum, test, "");
966
967         indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0);
968         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
969         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
970         planes = vec4f_from_scalar(builder, plane1, "plane4_w");
971         test = LLVMBuildFMul(builder, planes, pos_w, "");
972         sum = LLVMBuildFAdd(builder, sum, test, "");
973
974         test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum);
975         temp = LLVMBuildShl(builder, temp, shift, "");
976         test = LLVMBuildAnd(builder, test, temp, "");
977         mask = LLVMBuildOr(builder, mask, test, "");
978      }
979   }
980   return mask;
981}
982
983/*
984 * Returns boolean if any clipping has occurred
985 * Used zero/non-zero i32 value to represent boolean
986 */
987static void
988clipmask_bool(LLVMBuilderRef builder,
989              LLVMValueRef clipmask,
990              LLVMValueRef ret_ptr)
991{
992   LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
993   LLVMValueRef temp;
994   int i;
995
996   for (i=0; i<4; i++){
997      temp = LLVMBuildExtractElement(builder, clipmask,
998                                     LLVMConstInt(LLVMInt32Type(), i, 0) , "");
999      ret = LLVMBuildOr(builder, ret, temp, "");
1000   }
1001
1002   LLVMBuildStore(builder, ret, ret_ptr);
1003}
1004
1005static void
1006draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1007{
1008   LLVMTypeRef arg_types[8];
1009   LLVMTypeRef func_type;
1010   LLVMValueRef context_ptr;
1011   LLVMBasicBlockRef block;
1012   LLVMBuilderRef builder;
1013   LLVMValueRef start, end, count, stride, step, io_itr;
1014   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1015   LLVMValueRef instance_id;
1016   LLVMValueRef system_values_array;
1017   struct draw_context *draw = llvm->draw;
1018   const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1019   unsigned i, j;
1020   struct lp_build_context bld;
1021   struct lp_build_loop_state lp_loop;
1022   const int max_vertices = 4;
1023   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1024   void *code;
1025   struct lp_build_sampler_soa *sampler = 0;
1026   LLVMValueRef ret, ret_ptr;
1027   boolean bypass_viewport = variant->key.bypass_viewport;
1028   boolean enable_cliptest = variant->key.clip_xy ||
1029                             variant->key.clip_z  ||
1030                             variant->key.clip_user;
1031
1032   arg_types[0] = llvm->context_ptr_type;           /* context */
1033   arg_types[1] = llvm->vertex_header_ptr_type;     /* vertex_header */
1034   arg_types[2] = llvm->buffer_ptr_type;            /* vbuffers */
1035   arg_types[3] = LLVMInt32Type();                  /* start */
1036   arg_types[4] = LLVMInt32Type();                  /* count */
1037   arg_types[5] = LLVMInt32Type();                  /* stride */
1038   arg_types[6] = llvm->vb_ptr_type;                /* pipe_vertex_buffer's */
1039   arg_types[7] = LLVMInt32Type();                  /* instance_id */
1040
1041   func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
1042
1043   variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
1044   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
1045   for(i = 0; i < Elements(arg_types); ++i)
1046      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1047         LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
1048
1049   context_ptr  = LLVMGetParam(variant->function, 0);
1050   io_ptr       = LLVMGetParam(variant->function, 1);
1051   vbuffers_ptr = LLVMGetParam(variant->function, 2);
1052   start        = LLVMGetParam(variant->function, 3);
1053   count        = LLVMGetParam(variant->function, 4);
1054   stride       = LLVMGetParam(variant->function, 5);
1055   vb_ptr       = LLVMGetParam(variant->function, 6);
1056   instance_id  = LLVMGetParam(variant->function, 7);
1057
1058   lp_build_name(context_ptr, "context");
1059   lp_build_name(io_ptr, "io");
1060   lp_build_name(vbuffers_ptr, "vbuffers");
1061   lp_build_name(start, "start");
1062   lp_build_name(count, "count");
1063   lp_build_name(stride, "stride");
1064   lp_build_name(vb_ptr, "vb");
1065   lp_build_name(instance_id, "instance_id");
1066
1067   /*
1068    * Function body
1069    */
1070
1071   block = LLVMAppendBasicBlock(variant->function, "entry");
1072   builder = LLVMCreateBuilder();
1073   LLVMPositionBuilderAtEnd(builder, block);
1074
1075   lp_build_context_init(&bld, builder, lp_type_int(32));
1076
1077   system_values_array = lp_build_system_values_array(builder, vs_info,
1078                                                      instance_id, NULL);
1079
1080   end = lp_build_add(&bld, start, count);
1081
1082   step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
1083
1084   /* function will return non-zero i32 value if any clipped vertices */
1085   ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
1086   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
1087
1088   /* code generated texture sampling */
1089   sampler = draw_llvm_sampler_soa_create(
1090      draw_llvm_variant_key_samplers(&variant->key),
1091      context_ptr);
1092
1093#if DEBUG_STORE
1094   lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
1095                   start, end, step);
1096#endif
1097   lp_build_loop_begin(builder, start, &lp_loop);
1098   {
1099      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1100      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1101      LLVMValueRef io;
1102      LLVMValueRef clipmask;   /* holds the clipmask value */
1103      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1104
1105      io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
1106      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1107#if DEBUG_STORE
1108      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1109                      io_itr, io, lp_loop.counter);
1110#endif
1111      for (i = 0; i < NUM_CHANNELS; ++i) {
1112         LLVMValueRef true_index = LLVMBuildAdd(
1113            builder,
1114            lp_loop.counter,
1115            LLVMConstInt(LLVMInt32Type(), i, 0), "");
1116         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1117            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1118            LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
1119                                                 velem->vertex_buffer_index,
1120                                                 0);
1121            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1122                                           &vb_index, 1, "");
1123            generate_fetch(builder, vbuffers_ptr,
1124                           &aos_attribs[j][i], velem, vb, true_index,
1125                           instance_id);
1126         }
1127      }
1128      convert_to_soa(builder, aos_attribs, inputs,
1129                     draw->pt.nr_vertex_elements);
1130
1131      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1132      generate_vs(llvm,
1133                  builder,
1134                  outputs,
1135                  ptr_aos,
1136                  system_values_array,
1137                  context_ptr,
1138                  sampler);
1139
1140      /* store original positions in clip before further manipulation */
1141      store_clip(builder, io, outputs);
1142
1143      /* do cliptest */
1144      if (enable_cliptest){
1145         /* allocate clipmask, assign it integer type */
1146         clipmask = generate_clipmask(builder, outputs,
1147                                      variant->key.clip_xy,
1148                                      variant->key.clip_z,
1149                                      variant->key.clip_user,
1150                                      variant->key.clip_halfz,
1151                                      variant->key.nr_planes,
1152                                      context_ptr);
1153         /* return clipping boolean value for function */
1154         clipmask_bool(builder, clipmask, ret_ptr);
1155      }
1156      else{
1157         clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
1158      }
1159
1160      /* do viewport mapping */
1161      if (!bypass_viewport){
1162         generate_viewport(llvm, builder, outputs, context_ptr);
1163      }
1164
1165      /* store clipmask in vertex header and positions in data */
1166      convert_to_aos(builder, io, outputs, clipmask,
1167                     vs_info->num_outputs, max_vertices);
1168   }
1169
1170   lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
1171
1172   sampler->destroy(sampler);
1173
1174   ret = LLVMBuildLoad(builder, ret_ptr,"");
1175   LLVMBuildRet(builder, ret);
1176
1177   LLVMDisposeBuilder(builder);
1178
1179   /*
1180    * Translate the LLVM IR into machine code.
1181    */
1182#ifdef DEBUG
1183   if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
1184      lp_debug_dump_value(variant->function);
1185      assert(0);
1186   }
1187#endif
1188
1189   LLVMRunFunctionPassManager(llvm->pass, variant->function);
1190
1191   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1192      lp_debug_dump_value(variant->function);
1193      debug_printf("\n");
1194   }
1195
1196   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
1197   variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
1198
1199   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1200      lp_disassemble(code);
1201   }
1202   lp_func_delete_body(variant->function);
1203}
1204
1205
1206static void
1207draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1208{
1209   LLVMTypeRef arg_types[8];
1210   LLVMTypeRef func_type;
1211   LLVMValueRef context_ptr;
1212   LLVMBasicBlockRef block;
1213   LLVMBuilderRef builder;
1214   LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
1215   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1216   LLVMValueRef instance_id;
1217   LLVMValueRef system_values_array;
1218   struct draw_context *draw = llvm->draw;
1219   const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1220   unsigned i, j;
1221   struct lp_build_context bld;
1222   struct lp_build_loop_state lp_loop;
1223   const int max_vertices = 4;
1224   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1225   LLVMValueRef fetch_max;
1226   void *code;
1227   struct lp_build_sampler_soa *sampler = 0;
1228   LLVMValueRef ret, ret_ptr;
1229   boolean bypass_viewport = variant->key.bypass_viewport;
1230   boolean enable_cliptest = variant->key.clip_xy ||
1231                             variant->key.clip_z  ||
1232                             variant->key.clip_user;
1233
1234   arg_types[0] = llvm->context_ptr_type;               /* context */
1235   arg_types[1] = llvm->vertex_header_ptr_type;         /* vertex_header */
1236   arg_types[2] = llvm->buffer_ptr_type;                /* vbuffers */
1237   arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0);  /* fetch_elts * */
1238   arg_types[4] = LLVMInt32Type();                      /* fetch_count */
1239   arg_types[5] = LLVMInt32Type();                      /* stride */
1240   arg_types[6] = llvm->vb_ptr_type;                    /* pipe_vertex_buffer's */
1241   arg_types[7] = LLVMInt32Type();                      /* instance_id */
1242
1243   func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
1244
1245   variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
1246   LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
1247   for(i = 0; i < Elements(arg_types); ++i)
1248      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1249         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
1250                          LLVMNoAliasAttribute);
1251
1252   context_ptr  = LLVMGetParam(variant->function_elts, 0);
1253   io_ptr       = LLVMGetParam(variant->function_elts, 1);
1254   vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
1255   fetch_elts   = LLVMGetParam(variant->function_elts, 3);
1256   fetch_count  = LLVMGetParam(variant->function_elts, 4);
1257   stride       = LLVMGetParam(variant->function_elts, 5);
1258   vb_ptr       = LLVMGetParam(variant->function_elts, 6);
1259   instance_id  = LLVMGetParam(variant->function_elts, 7);
1260
1261   lp_build_name(context_ptr, "context");
1262   lp_build_name(io_ptr, "io");
1263   lp_build_name(vbuffers_ptr, "vbuffers");
1264   lp_build_name(fetch_elts, "fetch_elts");
1265   lp_build_name(fetch_count, "fetch_count");
1266   lp_build_name(stride, "stride");
1267   lp_build_name(vb_ptr, "vb");
1268   lp_build_name(instance_id, "instance_id");
1269
1270   /*
1271    * Function body
1272    */
1273
1274   block = LLVMAppendBasicBlock(variant->function_elts, "entry");
1275   builder = LLVMCreateBuilder();
1276   LLVMPositionBuilderAtEnd(builder, block);
1277
1278   lp_build_context_init(&bld, builder, lp_type_int(32));
1279
1280   system_values_array = lp_build_system_values_array(builder, vs_info,
1281                                                      instance_id, NULL);
1282
1283
1284   step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
1285
1286   /* code generated texture sampling */
1287   sampler = draw_llvm_sampler_soa_create(
1288      draw_llvm_variant_key_samplers(&variant->key),
1289      context_ptr);
1290
1291   fetch_max = LLVMBuildSub(builder, fetch_count,
1292                            LLVMConstInt(LLVMInt32Type(), 1, 0),
1293                            "fetch_max");
1294
1295   /* function returns non-zero i32 value if any clipped vertices */
1296   ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
1297   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
1298
1299   lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
1300   {
1301      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1302      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1303      LLVMValueRef io;
1304      LLVMValueRef clipmask;   /* holds the clipmask value */
1305      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1306
1307      io_itr = lp_loop.counter;
1308      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1309#if DEBUG_STORE
1310      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1311                      io_itr, io, lp_loop.counter);
1312#endif
1313      for (i = 0; i < NUM_CHANNELS; ++i) {
1314         LLVMValueRef true_index = LLVMBuildAdd(
1315            builder,
1316            lp_loop.counter,
1317            LLVMConstInt(LLVMInt32Type(), i, 0), "");
1318         LLVMValueRef fetch_ptr;
1319
1320         /* make sure we're not out of bounds which can happen
1321          * if fetch_count % 4 != 0, because on the last iteration
1322          * a few of the 4 vertex fetches will be out of bounds */
1323         true_index = lp_build_min(&bld, true_index, fetch_max);
1324
1325         fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
1326                                  &true_index, 1, "");
1327         true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
1328         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1329            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1330            LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
1331                                                 velem->vertex_buffer_index,
1332                                                 0);
1333            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1334                                           &vb_index, 1, "");
1335            generate_fetch(builder, vbuffers_ptr,
1336                           &aos_attribs[j][i], velem, vb, true_index,
1337                           instance_id);
1338         }
1339      }
1340      convert_to_soa(builder, aos_attribs, inputs,
1341                     draw->pt.nr_vertex_elements);
1342
1343      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1344      generate_vs(llvm,
1345                  builder,
1346                  outputs,
1347                  ptr_aos,
1348                  system_values_array,
1349                  context_ptr,
1350                  sampler);
1351
1352      /* store original positions in clip before further manipulation */
1353      store_clip(builder, io, outputs);
1354
1355      /* do cliptest */
1356      if (enable_cliptest){
1357         /* allocate clipmask, assign it integer type */
1358         clipmask = generate_clipmask(builder, outputs,
1359                                      variant->key.clip_xy,
1360                                      variant->key.clip_z,
1361                                      variant->key.clip_user,
1362                                      variant->key.clip_halfz,
1363                                      variant->key.nr_planes,
1364                                      context_ptr);
1365         /* return clipping boolean value for function */
1366         clipmask_bool(builder, clipmask, ret_ptr);
1367      }
1368      else{
1369         clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
1370      }
1371
1372      /* do viewport mapping */
1373      if (!bypass_viewport){
1374         generate_viewport(llvm, builder, outputs, context_ptr);
1375      }
1376
1377      /* store clipmask in vertex header,
1378       * original positions in clip
1379       * and transformed positions in data
1380       */
1381      convert_to_aos(builder, io, outputs, clipmask,
1382                     vs_info->num_outputs, max_vertices);
1383   }
1384
1385   lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
1386
1387   sampler->destroy(sampler);
1388
1389   ret = LLVMBuildLoad(builder, ret_ptr,"");
1390   LLVMBuildRet(builder, ret);
1391
1392   LLVMDisposeBuilder(builder);
1393
1394   /*
1395    * Translate the LLVM IR into machine code.
1396    */
1397#ifdef DEBUG
1398   if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
1399      lp_debug_dump_value(variant->function_elts);
1400      assert(0);
1401   }
1402#endif
1403
1404   LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
1405
1406   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1407      lp_debug_dump_value(variant->function_elts);
1408      debug_printf("\n");
1409   }
1410
1411   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts);
1412   variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
1413
1414   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1415      lp_disassemble(code);
1416   }
1417   lp_func_delete_body(variant->function_elts);
1418}
1419
1420
1421struct draw_llvm_variant_key *
1422draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
1423{
1424   unsigned i;
1425   struct draw_llvm_variant_key *key;
1426   struct lp_sampler_static_state *sampler;
1427
1428   key = (struct draw_llvm_variant_key *)store;
1429
1430   /* Presumably all variants of the shader should have the same
1431    * number of vertex elements - ie the number of shader inputs.
1432    */
1433   key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
1434
1435   /* will have to rig this up properly later */
1436   key->clip_xy = llvm->draw->clip_xy;
1437   key->clip_z = llvm->draw->clip_z;
1438   key->clip_user = llvm->draw->clip_user;
1439   key->bypass_viewport = llvm->draw->identity_viewport;
1440   key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
1441   key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
1442   key->nr_planes = llvm->draw->nr_planes;
1443   key->pad = 0;
1444
1445   /* All variants of this shader will have the same value for
1446    * nr_samplers.  Not yet trying to compact away holes in the
1447    * sampler array.
1448    */
1449   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
1450
1451   sampler = draw_llvm_variant_key_samplers(key);
1452
1453   memcpy(key->vertex_element,
1454          llvm->draw->pt.vertex_element,
1455          sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
1456
1457   memset(sampler, 0, key->nr_samplers * sizeof *sampler);
1458
1459   for (i = 0 ; i < key->nr_samplers; i++) {
1460      lp_sampler_static_state(&sampler[i],
1461			      llvm->draw->sampler_views[i],
1462			      llvm->draw->samplers[i]);
1463   }
1464
1465   return key;
1466}
1467
1468void
1469draw_llvm_set_mapped_texture(struct draw_context *draw,
1470                             unsigned sampler_idx,
1471                             uint32_t width, uint32_t height, uint32_t depth,
1472                             uint32_t last_level,
1473                             uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
1474                             uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
1475                             const void *data[PIPE_MAX_TEXTURE_LEVELS])
1476{
1477   unsigned j;
1478   struct draw_jit_texture *jit_tex;
1479
1480   assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
1481
1482
1483   jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
1484
1485   jit_tex->width = width;
1486   jit_tex->height = height;
1487   jit_tex->depth = depth;
1488   jit_tex->last_level = last_level;
1489
1490   for (j = 0; j <= last_level; j++) {
1491      jit_tex->data[j] = data[j];
1492      jit_tex->row_stride[j] = row_stride[j];
1493      jit_tex->img_stride[j] = img_stride[j];
1494   }
1495}
1496
1497
1498void
1499draw_llvm_set_sampler_state(struct draw_context *draw)
1500{
1501   unsigned i;
1502
1503   for (i = 0; i < draw->num_samplers; i++) {
1504      struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
1505
1506      if (draw->samplers[i]) {
1507         jit_tex->min_lod = draw->samplers[i]->min_lod;
1508         jit_tex->max_lod = draw->samplers[i]->max_lod;
1509         jit_tex->lod_bias = draw->samplers[i]->lod_bias;
1510         COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
1511      }
1512   }
1513}
1514
1515
1516void
1517draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
1518{
1519   struct draw_llvm *llvm = variant->llvm;
1520   struct draw_context *draw = llvm->draw;
1521
1522   if (variant->function_elts) {
1523      if (variant->function_elts)
1524         LLVMFreeMachineCodeForFunction(draw->engine,
1525                                        variant->function_elts);
1526      LLVMDeleteFunction(variant->function_elts);
1527   }
1528
1529   if (variant->function) {
1530      if (variant->function)
1531         LLVMFreeMachineCodeForFunction(draw->engine,
1532                                        variant->function);
1533      LLVMDeleteFunction(variant->function);
1534   }
1535
1536   remove_from_list(&variant->list_item_local);
1537   variant->shader->variants_cached--;
1538   remove_from_list(&variant->list_item_global);
1539   llvm->nr_variants--;
1540   FREE(variant);
1541}
1542