draw_llvm.c revision 154d91cad907ba5643fb3e39717a8f7c5a76049a
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "draw_llvm.h"
29
30#include "draw_context.h"
31#include "draw_vs.h"
32
33#include "gallivm/lp_bld_arit.h"
34#include "gallivm/lp_bld_logic.h"
35#include "gallivm/lp_bld_const.h"
36#include "gallivm/lp_bld_swizzle.h"
37#include "gallivm/lp_bld_struct.h"
38#include "gallivm/lp_bld_type.h"
39#include "gallivm/lp_bld_flow.h"
40#include "gallivm/lp_bld_debug.h"
41#include "gallivm/lp_bld_tgsi.h"
42#include "gallivm/lp_bld_printf.h"
43#include "gallivm/lp_bld_intr.h"
44#include "gallivm/lp_bld_init.h"
45
46#include "tgsi/tgsi_exec.h"
47#include "tgsi/tgsi_dump.h"
48
49#include "util/u_math.h"
50#include "util/u_pointer.h"
51#include "util/u_string.h"
52
53#include <llvm-c/Transforms/Scalar.h>
54
55#define DEBUG_STORE 0
56
57/* generates the draw jit function */
58static void
59draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
60static void
61draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
62
63static void
64init_globals(struct draw_llvm *llvm)
65{
66   LLVMTypeRef texture_type;
67
68   /* struct draw_jit_texture */
69   {
70      LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
71
72      elem_types[DRAW_JIT_TEXTURE_WIDTH]  = LLVMInt32Type();
73      elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
74      elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
75      elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
76      elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
77         LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
78      elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
79         LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
80      elem_types[DRAW_JIT_TEXTURE_DATA] =
81         LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
82                       PIPE_MAX_TEXTURE_LEVELS);
83      elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
84      elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
85      elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
86      elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
87         LLVMArrayType(LLVMFloatType(), 4);
88
89      texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
90
91      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
92                             llvm->target, texture_type,
93                             DRAW_JIT_TEXTURE_WIDTH);
94      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
95                             llvm->target, texture_type,
96                             DRAW_JIT_TEXTURE_HEIGHT);
97      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
98                             llvm->target, texture_type,
99                             DRAW_JIT_TEXTURE_DEPTH);
100      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
101                             llvm->target, texture_type,
102                             DRAW_JIT_TEXTURE_LAST_LEVEL);
103      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
104                             llvm->target, texture_type,
105                             DRAW_JIT_TEXTURE_ROW_STRIDE);
106      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
107                             llvm->target, texture_type,
108                             DRAW_JIT_TEXTURE_IMG_STRIDE);
109      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
110                             llvm->target, texture_type,
111                             DRAW_JIT_TEXTURE_DATA);
112      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
113                             llvm->target, texture_type,
114                             DRAW_JIT_TEXTURE_MIN_LOD);
115      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
116                             llvm->target, texture_type,
117                             DRAW_JIT_TEXTURE_MAX_LOD);
118      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
119                             llvm->target, texture_type,
120                             DRAW_JIT_TEXTURE_LOD_BIAS);
121      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
122                             llvm->target, texture_type,
123                             DRAW_JIT_TEXTURE_BORDER_COLOR);
124      LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
125                           llvm->target, texture_type);
126
127      LLVMAddTypeName(llvm->module, "texture", texture_type);
128   }
129
130
131   /* struct draw_jit_context */
132   {
133      LLVMTypeRef elem_types[5];
134      LLVMTypeRef context_type;
135
136      elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
137      elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */
138      elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */
139      elem_types[3] = LLVMPointerType(LLVMFloatType(), 0); /* viewport */
140      elem_types[4] = LLVMArrayType(texture_type,
141                                    PIPE_MAX_VERTEX_SAMPLERS); /* textures */
142
143      context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
144
145      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
146                             llvm->target, context_type, 0);
147      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
148                             llvm->target, context_type, 1);
149      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
150                             llvm->target, context_type, 2);
151      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
152                             llvm->target, context_type,
153                             DRAW_JIT_CTX_TEXTURES);
154      LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
155                           llvm->target, context_type);
156
157      LLVMAddTypeName(llvm->module, "draw_jit_context", context_type);
158
159      llvm->context_ptr_type = LLVMPointerType(context_type, 0);
160   }
161   {
162      LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0);
163      llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0);
164   }
165   /* struct pipe_vertex_buffer */
166   {
167      LLVMTypeRef elem_types[4];
168      LLVMTypeRef vb_type;
169
170      elem_types[0] = LLVMInt32Type();
171      elem_types[1] = LLVMInt32Type();
172      elem_types[2] = LLVMInt32Type();
173      elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
174
175      vb_type = LLVMStructType(elem_types, Elements(elem_types), 0);
176
177      LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
178                             llvm->target, vb_type, 0);
179      LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
180                             llvm->target, vb_type, 2);
181      LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer,
182                           llvm->target, vb_type);
183
184      LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type);
185
186      llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
187   }
188}
189
190static LLVMTypeRef
191create_vertex_header(struct draw_llvm *llvm, int data_elems)
192{
193   /* struct vertex_header */
194   LLVMTypeRef elem_types[3];
195   LLVMTypeRef vertex_header;
196   char struct_name[24];
197
198   util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
199
200   elem_types[0]  = LLVMIntType(32);
201   elem_types[1]  = LLVMArrayType(LLVMFloatType(), 4);
202   elem_types[2]  = LLVMArrayType(elem_types[1], data_elems);
203
204   vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
205
206   /* these are bit-fields and we can't take address of them
207      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
208      llvm->target, vertex_header,
209      DRAW_JIT_VERTEX_CLIPMASK);
210      LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
211      llvm->target, vertex_header,
212      DRAW_JIT_VERTEX_EDGEFLAG);
213      LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
214      llvm->target, vertex_header,
215      DRAW_JIT_VERTEX_PAD);
216      LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
217      llvm->target, vertex_header,
218      DRAW_JIT_VERTEX_VERTEX_ID);
219   */
220   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
221                          llvm->target, vertex_header,
222                          DRAW_JIT_VERTEX_CLIP);
223   LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
224                          llvm->target, vertex_header,
225                          DRAW_JIT_VERTEX_DATA);
226
227   LLVMAddTypeName(llvm->module, struct_name, vertex_header);
228
229   return LLVMPointerType(vertex_header, 0);
230}
231
232struct draw_llvm *
233draw_llvm_create(struct draw_context *draw)
234{
235   struct draw_llvm *llvm;
236
237   llvm = CALLOC_STRUCT( draw_llvm );
238   if (!llvm)
239      return NULL;
240
241   llvm->draw = draw;
242   llvm->engine = draw->engine;
243
244   debug_assert(llvm->engine);
245
246   llvm->module = LLVMModuleCreateWithName("draw_llvm");
247   llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
248
249   LLVMAddModuleProvider(llvm->engine, llvm->provider);
250
251   llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
252
253   llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
254   LLVMAddTargetData(llvm->target, llvm->pass);
255
256   if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
257      /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
258       * but there are more on SVN. */
259      /* TODO: Add more passes */
260
261      LLVMAddCFGSimplificationPass(llvm->pass);
262
263      if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) {
264         /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
265          * avoid generating bad code.
266          * Test with piglit glsl-vs-sqrt-zero test.
267          */
268         LLVMAddConstantPropagationPass(llvm->pass);
269         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
270      }
271      else {
272         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
273         LLVMAddConstantPropagationPass(llvm->pass);
274      }
275
276      LLVMAddInstructionCombiningPass(llvm->pass);
277      LLVMAddGVNPass(llvm->pass);
278   } else {
279      /* We need at least this pass to prevent the backends to fail in
280       * unexpected ways.
281       */
282      LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
283   }
284
285   init_globals(llvm);
286
287   if (gallivm_debug & GALLIVM_DEBUG_IR) {
288      LLVMDumpModule(llvm->module);
289   }
290
291   llvm->nr_variants = 0;
292   make_empty_list(&llvm->vs_variants_list);
293
294   return llvm;
295}
296
297void
298draw_llvm_destroy(struct draw_llvm *llvm)
299{
300   LLVMDisposePassManager(llvm->pass);
301
302   FREE(llvm);
303}
304
305struct draw_llvm_variant *
306draw_llvm_create_variant(struct draw_llvm *llvm,
307			 unsigned num_inputs,
308			 const struct draw_llvm_variant_key *key)
309{
310   struct draw_llvm_variant *variant;
311   struct llvm_vertex_shader *shader =
312      llvm_vertex_shader(llvm->draw->vs.vertex_shader);
313
314   variant = MALLOC(sizeof *variant +
315		    shader->variant_key_size -
316		    sizeof variant->key);
317   if (variant == NULL)
318      return NULL;
319
320   variant->llvm = llvm;
321
322   memcpy(&variant->key, key, shader->variant_key_size);
323
324   llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
325
326   draw_llvm_generate(llvm, variant);
327   draw_llvm_generate_elts(llvm, variant);
328
329   variant->shader = shader;
330   variant->list_item_global.base = variant;
331   variant->list_item_local.base = variant;
332   /*variant->no = */shader->variants_created++;
333   variant->list_item_global.base = variant;
334
335   return variant;
336}
337
338static void
339generate_vs(struct draw_llvm *llvm,
340            LLVMBuilderRef builder,
341            LLVMValueRef (*outputs)[NUM_CHANNELS],
342            const LLVMValueRef (*inputs)[NUM_CHANNELS],
343            LLVMValueRef context_ptr,
344            struct lp_build_sampler_soa *draw_sampler)
345{
346   const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
347   struct lp_type vs_type;
348   LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
349   struct lp_build_sampler_soa *sampler = 0;
350
351   memset(&vs_type, 0, sizeof vs_type);
352   vs_type.floating = TRUE; /* floating point values */
353   vs_type.sign = TRUE;     /* values are signed */
354   vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
355   vs_type.width = 32;      /* 32-bit float */
356   vs_type.length = 4;      /* 4 elements per vector */
357#if 0
358   num_vs = 4;              /* number of vertices per block */
359#endif
360
361   if (gallivm_debug & GALLIVM_DEBUG_IR) {
362      tgsi_dump(tokens, 0);
363   }
364
365   if (llvm->draw->num_sampler_views &&
366       llvm->draw->num_samplers)
367      sampler = draw_sampler;
368
369   lp_build_tgsi_soa(builder,
370                     tokens,
371                     vs_type,
372                     NULL /*struct lp_build_mask_context *mask*/,
373                     consts_ptr,
374                     NULL /*pos*/,
375                     inputs,
376                     outputs,
377                     sampler,
378                     &llvm->draw->vs.vertex_shader->info);
379}
380
381#if DEBUG_STORE
382static void print_vectorf(LLVMBuilderRef builder,
383                         LLVMValueRef vec)
384{
385   LLVMValueRef val[4];
386   val[0] = LLVMBuildExtractElement(builder, vec,
387                                    LLVMConstInt(LLVMInt32Type(), 0, 0), "");
388   val[1] = LLVMBuildExtractElement(builder, vec,
389                                    LLVMConstInt(LLVMInt32Type(), 1, 0), "");
390   val[2] = LLVMBuildExtractElement(builder, vec,
391                                    LLVMConstInt(LLVMInt32Type(), 2, 0), "");
392   val[3] = LLVMBuildExtractElement(builder, vec,
393                                    LLVMConstInt(LLVMInt32Type(), 3, 0), "");
394   lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
395                   val[0], val[1], val[2], val[3]);
396}
397#endif
398
399static void
400generate_fetch(LLVMBuilderRef builder,
401               LLVMValueRef vbuffers_ptr,
402               LLVMValueRef *res,
403               struct pipe_vertex_element *velem,
404               LLVMValueRef vbuf,
405               LLVMValueRef index,
406               LLVMValueRef instance_id)
407{
408   LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
409   LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
410                                           &indices, 1, "");
411   LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
412   LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
413   LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
414   LLVMValueRef cond;
415   LLVMValueRef stride;
416
417   if (velem->instance_divisor) {
418      /* array index = instance_id / instance_divisor */
419      index = LLVMBuildUDiv(builder, instance_id,
420                            LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0),
421                            "instance_divisor");
422   }
423
424   /* limit index to min(index, vb_max_index) */
425   cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
426   index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
427
428   stride = LLVMBuildMul(builder, vb_stride, index, "");
429
430   vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
431
432   stride = LLVMBuildAdd(builder, stride,
433                         vb_buffer_offset,
434                         "");
435   stride = LLVMBuildAdd(builder, stride,
436                         LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0),
437                         "");
438
439   /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
440   vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
441
442   *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format);
443}
444
445static LLVMValueRef
446aos_to_soa(LLVMBuilderRef builder,
447           LLVMValueRef val0,
448           LLVMValueRef val1,
449           LLVMValueRef val2,
450           LLVMValueRef val3,
451           LLVMValueRef channel)
452{
453   LLVMValueRef ex, res;
454
455   ex = LLVMBuildExtractElement(builder, val0,
456                                channel, "");
457   res = LLVMBuildInsertElement(builder,
458                                LLVMConstNull(LLVMTypeOf(val0)),
459                                ex,
460                                LLVMConstInt(LLVMInt32Type(), 0, 0),
461                                "");
462
463   ex = LLVMBuildExtractElement(builder, val1,
464                                channel, "");
465   res = LLVMBuildInsertElement(builder,
466                                res, ex,
467                                LLVMConstInt(LLVMInt32Type(), 1, 0),
468                                "");
469
470   ex = LLVMBuildExtractElement(builder, val2,
471                                channel, "");
472   res = LLVMBuildInsertElement(builder,
473                                res, ex,
474                                LLVMConstInt(LLVMInt32Type(), 2, 0),
475                                "");
476
477   ex = LLVMBuildExtractElement(builder, val3,
478                                channel, "");
479   res = LLVMBuildInsertElement(builder,
480                                res, ex,
481                                LLVMConstInt(LLVMInt32Type(), 3, 0),
482                                "");
483
484   return res;
485}
486
487static void
488soa_to_aos(LLVMBuilderRef builder,
489           LLVMValueRef soa[NUM_CHANNELS],
490           LLVMValueRef aos[NUM_CHANNELS])
491{
492   LLVMValueRef comp;
493   int i = 0;
494
495   debug_assert(NUM_CHANNELS == 4);
496
497   aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
498   aos[1] = aos[2] = aos[3] = aos[0];
499
500   for (i = 0; i < NUM_CHANNELS; ++i) {
501      LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0);
502
503      comp = LLVMBuildExtractElement(builder, soa[i],
504                                     LLVMConstInt(LLVMInt32Type(), 0, 0), "");
505      aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
506
507      comp = LLVMBuildExtractElement(builder, soa[i],
508                                     LLVMConstInt(LLVMInt32Type(), 1, 0), "");
509      aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
510
511      comp = LLVMBuildExtractElement(builder, soa[i],
512                                     LLVMConstInt(LLVMInt32Type(), 2, 0), "");
513      aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
514
515      comp = LLVMBuildExtractElement(builder, soa[i],
516                                     LLVMConstInt(LLVMInt32Type(), 3, 0), "");
517      aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
518
519   }
520}
521
522static void
523convert_to_soa(LLVMBuilderRef builder,
524               LLVMValueRef (*aos)[NUM_CHANNELS],
525               LLVMValueRef (*soa)[NUM_CHANNELS],
526               int num_attribs)
527{
528   int i;
529
530   debug_assert(NUM_CHANNELS == 4);
531
532   for (i = 0; i < num_attribs; ++i) {
533      LLVMValueRef val0 = aos[i][0];
534      LLVMValueRef val1 = aos[i][1];
535      LLVMValueRef val2 = aos[i][2];
536      LLVMValueRef val3 = aos[i][3];
537
538      soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3,
539                             LLVMConstInt(LLVMInt32Type(), 0, 0));
540      soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3,
541                             LLVMConstInt(LLVMInt32Type(), 1, 0));
542      soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3,
543                             LLVMConstInt(LLVMInt32Type(), 2, 0));
544      soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3,
545                             LLVMConstInt(LLVMInt32Type(), 3, 0));
546   }
547}
548
549static void
550store_aos(LLVMBuilderRef builder,
551          LLVMValueRef io_ptr,
552          LLVMValueRef index,
553          LLVMValueRef value,
554          LLVMValueRef clipmask)
555{
556   LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr);
557   LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr);
558   LLVMValueRef indices[3];
559   LLVMValueRef val, shift;
560
561   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
562   indices[1] = index;
563   indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
564
565   /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */
566   val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0);
567   shift  = LLVMConstInt(LLVMInt32Type(), 12, 0);
568   val = LLVMBuildShl(builder, val, shift, "");
569   /* add clipmask:12 */
570   val = LLVMBuildOr(builder, val, clipmask, "");
571
572   /* store vertex header */
573   LLVMBuildStore(builder, val, id_ptr);
574
575
576#if DEBUG_STORE
577   lp_build_printf(builder, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
578#endif
579#if 0
580   /*lp_build_printf(builder, " ---- %p storing at %d (%p)  ", io_ptr, index, data_ptr);
581     print_vectorf(builder, value);*/
582   data_ptr = LLVMBuildBitCast(builder, data_ptr,
583                               LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
584                               "datavec");
585   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
586
587   LLVMBuildStore(builder, value, data_ptr);
588#else
589   {
590      LLVMValueRef x, y, z, w;
591      LLVMValueRef idx0, idx1, idx2, idx3;
592      LLVMValueRef gep0, gep1, gep2, gep3;
593      data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
594
595      idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
596      idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
597      idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
598      idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
599
600      x = LLVMBuildExtractElement(builder, value,
601                                  idx0, "");
602      y = LLVMBuildExtractElement(builder, value,
603                                  idx1, "");
604      z = LLVMBuildExtractElement(builder, value,
605                                  idx2, "");
606      w = LLVMBuildExtractElement(builder, value,
607                                  idx3, "");
608
609      gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
610      gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
611      gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
612      gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
613
614      /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
615        x, gep0, y, gep1, z, gep2, w, gep3);*/
616      LLVMBuildStore(builder, x, gep0);
617      LLVMBuildStore(builder, y, gep1);
618      LLVMBuildStore(builder, z, gep2);
619      LLVMBuildStore(builder, w, gep3);
620   }
621#endif
622}
623
624static void
625store_aos_array(LLVMBuilderRef builder,
626                LLVMValueRef io_ptr,
627                LLVMValueRef aos[NUM_CHANNELS],
628                int attrib,
629                int num_outputs,
630                LLVMValueRef clipmask)
631{
632   LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0);
633   LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
634   LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
635   LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
636   LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
637   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
638   LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
639
640   debug_assert(NUM_CHANNELS == 4);
641
642   io0_ptr = LLVMBuildGEP(builder, io_ptr,
643                          &ind0, 1, "");
644   io1_ptr = LLVMBuildGEP(builder, io_ptr,
645                          &ind1, 1, "");
646   io2_ptr = LLVMBuildGEP(builder, io_ptr,
647                          &ind2, 1, "");
648   io3_ptr = LLVMBuildGEP(builder, io_ptr,
649                          &ind3, 1, "");
650
651   clipmask0 = LLVMBuildExtractElement(builder, clipmask,
652                                       ind0, "");
653   clipmask1 = LLVMBuildExtractElement(builder, clipmask,
654                                       ind1, "");
655   clipmask2 = LLVMBuildExtractElement(builder, clipmask,
656                                       ind2, "");
657   clipmask3 = LLVMBuildExtractElement(builder, clipmask,
658                                       ind3, "");
659
660#if DEBUG_STORE
661   lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
662                   io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
663#endif
664   /* store for each of the 4 vertices */
665   store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0);
666   store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1);
667   store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2);
668   store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3);
669}
670
671static void
672convert_to_aos(LLVMBuilderRef builder,
673               LLVMValueRef io,
674               LLVMValueRef (*outputs)[NUM_CHANNELS],
675               LLVMValueRef clipmask,
676               int num_outputs,
677               int max_vertices)
678{
679   unsigned chan, attrib;
680
681#if DEBUG_STORE
682   lp_build_printf(builder, "   # storing begin\n");
683#endif
684   for (attrib = 0; attrib < num_outputs; ++attrib) {
685      LLVMValueRef soa[4];
686      LLVMValueRef aos[4];
687      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
688         if(outputs[attrib][chan]) {
689            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
690            lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
691            /*lp_build_printf(builder, "output %d : %d ",
692                            LLVMConstInt(LLVMInt32Type(), attrib, 0),
693                            LLVMConstInt(LLVMInt32Type(), chan, 0));
694              print_vectorf(builder, out);*/
695            soa[chan] = out;
696         } else
697            soa[chan] = 0;
698      }
699      soa_to_aos(builder, soa, aos);
700      store_aos_array(builder,
701                      io,
702                      aos,
703                      attrib,
704                      num_outputs,
705                      clipmask);
706   }
707#if DEBUG_STORE
708   lp_build_printf(builder, "   # storing end\n");
709#endif
710}
711
712/*
713 * Stores original vertex positions in clip coordinates
714 * There is probably a more efficient way to do this, 4 floats at once
715 * rather than extracting each element one by one.
716 */
717static void
718store_clip(LLVMBuilderRef builder,
719           LLVMValueRef io_ptr,
720           LLVMValueRef (*outputs)[NUM_CHANNELS])
721{
722   LLVMValueRef out[4];
723   LLVMValueRef indices[2];
724   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
725   LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
726   LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
727   LLVMValueRef out0elem, out1elem, out2elem, out3elem;
728   int i;
729
730   LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
731   LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
732   LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
733   LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
734
735   indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
736   indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0);
737
738   out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
739   out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
740   out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
741   out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
742
743   io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
744   io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
745   io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
746   io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
747
748   clip_ptr0 = draw_jit_header_clip(builder, io0_ptr);
749   clip_ptr1 = draw_jit_header_clip(builder, io1_ptr);
750   clip_ptr2 = draw_jit_header_clip(builder, io2_ptr);
751   clip_ptr3 = draw_jit_header_clip(builder, io3_ptr);
752
753   for (i = 0; i<4; i++){
754      clip0_ptr = LLVMBuildGEP(builder, clip_ptr0,
755                               indices, 2, ""); //x0
756      clip1_ptr = LLVMBuildGEP(builder, clip_ptr1,
757                               indices, 2, ""); //x1
758      clip2_ptr = LLVMBuildGEP(builder, clip_ptr2,
759                               indices, 2, ""); //x2
760      clip3_ptr = LLVMBuildGEP(builder, clip_ptr3,
761                               indices, 2, ""); //x3
762
763      out0elem = LLVMBuildExtractElement(builder, out[i],
764                                         ind0, ""); //x0
765      out1elem = LLVMBuildExtractElement(builder, out[i],
766                                         ind1, ""); //x1
767      out2elem = LLVMBuildExtractElement(builder, out[i],
768                                         ind2, ""); //x2
769      out3elem = LLVMBuildExtractElement(builder, out[i],
770                                         ind3, ""); //x3
771
772      LLVMBuildStore(builder, out0elem, clip0_ptr);
773      LLVMBuildStore(builder, out1elem, clip1_ptr);
774      LLVMBuildStore(builder, out2elem, clip2_ptr);
775      LLVMBuildStore(builder, out3elem, clip3_ptr);
776
777      indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
778   }
779
780}
781
782/* Equivalent of _mm_set1_ps(a)
783 */
784static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
785				      LLVMValueRef a,
786				      const char *name)
787{
788   LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
789   int i;
790
791   for(i = 0; i < 4; ++i) {
792      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
793      res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
794   }
795
796   return res;
797}
798
799/*
800 * Transforms the outputs for viewport mapping
801 */
802static void
803generate_viewport(struct draw_llvm *llvm,
804                  LLVMBuilderRef builder,
805                  LLVMValueRef (*outputs)[NUM_CHANNELS],
806                  LLVMValueRef context_ptr)
807{
808   int i;
809   struct lp_type f32_type = lp_type_float_vec(32);
810   LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
811   LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
812   LLVMValueRef vp_ptr = draw_jit_context_viewport(builder, context_ptr);
813
814   /* for 1/w convention*/
815   out3 = LLVMBuildFDiv(builder, const1, out3, "");
816   LLVMBuildStore(builder, out3, outputs[0][3]);
817
818   /* Viewport Mapping */
819   for (i=0; i<3; i++){
820      LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
821      LLVMValueRef scale;
822      LLVMValueRef trans;
823      LLVMValueRef scale_i;
824      LLVMValueRef trans_i;
825      LLVMValueRef index;
826
827      index = LLVMConstInt(LLVMInt32Type(), i, 0);
828      scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
829
830      index = LLVMConstInt(LLVMInt32Type(), i+4, 0);
831      trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
832
833      scale = vec4f_from_scalar(builder, LLVMBuildLoad(builder, scale_i, ""), "scale");
834      trans = vec4f_from_scalar(builder, LLVMBuildLoad(builder, trans_i, ""), "trans");
835
836      /* divide by w */
837      out = LLVMBuildFMul(builder, out, out3, "");
838      /* mult by scale */
839      out = LLVMBuildFMul(builder, out, scale, "");
840      /* add translation */
841      out = LLVMBuildFAdd(builder, out, trans, "");
842
843      /* store transformed outputs */
844      LLVMBuildStore(builder, out, outputs[0][i]);
845   }
846
847}
848
849
850/*
851 * Returns clipmask as 4xi32 bitmask for the 4 vertices
852 */
853static LLVMValueRef
854generate_clipmask(LLVMBuilderRef builder,
855                  LLVMValueRef (*outputs)[NUM_CHANNELS],
856                  boolean clip_xy,
857                  boolean clip_z,
858                  boolean clip_user,
859                  boolean clip_halfz,
860                  unsigned nr,
861                  LLVMValueRef context_ptr)
862{
863   LLVMValueRef mask; /* stores the <4xi32> clipmasks */
864   LLVMValueRef test, temp;
865   LLVMValueRef zero, shift;
866   LLVMValueRef pos_x, pos_y, pos_z, pos_w;
867   LLVMValueRef plane1, planes, plane_ptr, sum;
868
869   unsigned i;
870
871   struct lp_type f32_type = lp_type_float_vec(32);
872
873   mask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
874   temp = lp_build_const_int_vec(lp_type_int_vec(32), 0);
875   zero = lp_build_const_vec(f32_type, 0);                    /* 0.0f 0.0f 0.0f 0.0f */
876   shift = lp_build_const_int_vec(lp_type_int_vec(32), 1);    /* 1 1 1 1 */
877
878   /* Assuming position stored at output[0] */
879   pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
880   pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
881   pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
882   pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
883
884   /* Cliptest, for hardwired planes */
885   if (clip_xy){
886      /* plane 1 */
887      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
888      temp = shift;
889      test = LLVMBuildAnd(builder, test, temp, "");
890      mask = test;
891
892      /* plane 2 */
893      test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
894      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
895      temp = LLVMBuildShl(builder, temp, shift, "");
896      test = LLVMBuildAnd(builder, test, temp, "");
897      mask = LLVMBuildOr(builder, mask, test, "");
898
899      /* plane 3 */
900      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
901      temp = LLVMBuildShl(builder, temp, shift, "");
902      test = LLVMBuildAnd(builder, test, temp, "");
903      mask = LLVMBuildOr(builder, mask, test, "");
904
905      /* plane 4 */
906      test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
907      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
908      temp = LLVMBuildShl(builder, temp, shift, "");
909      test = LLVMBuildAnd(builder, test, temp, "");
910      mask = LLVMBuildOr(builder, mask, test, "");
911   }
912
913   if (clip_z){
914      temp = lp_build_const_int_vec(lp_type_int_vec(32), 16);
915      if (clip_halfz){
916         /* plane 5 */
917         test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
918         test = LLVMBuildAnd(builder, test, temp, "");
919         mask = LLVMBuildOr(builder, mask, test, "");
920      }
921      else{
922         /* plane 5 */
923         test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
924         test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
925         test = LLVMBuildAnd(builder, test, temp, "");
926         mask = LLVMBuildOr(builder, mask, test, "");
927      }
928      /* plane 6 */
929      test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
930      temp = LLVMBuildShl(builder, temp, shift, "");
931      test = LLVMBuildAnd(builder, test, temp, "");
932      mask = LLVMBuildOr(builder, mask, test, "");
933   }
934
935   if (clip_user){
936      LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr);
937      LLVMValueRef indices[3];
938      temp = lp_build_const_int_vec(lp_type_int_vec(32), 32);
939
940      /* userclip planes */
941      for (i = 6; i < nr; i++) {
942         indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
943         indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0);
944
945         indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
946         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
947         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
948         planes = vec4f_from_scalar(builder, plane1, "plane4_x");
949         sum = LLVMBuildFMul(builder, planes, pos_x, "");
950
951         indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0);
952         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
953         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
954         planes = vec4f_from_scalar(builder, plane1, "plane4_y");
955         test = LLVMBuildFMul(builder, planes, pos_y, "");
956         sum = LLVMBuildFAdd(builder, sum, test, "");
957
958         indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0);
959         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
960         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
961         planes = vec4f_from_scalar(builder, plane1, "plane4_z");
962         test = LLVMBuildFMul(builder, planes, pos_z, "");
963         sum = LLVMBuildFAdd(builder, sum, test, "");
964
965         indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0);
966         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
967         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
968         planes = vec4f_from_scalar(builder, plane1, "plane4_w");
969         test = LLVMBuildFMul(builder, planes, pos_w, "");
970         sum = LLVMBuildFAdd(builder, sum, test, "");
971
972         test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum);
973         temp = LLVMBuildShl(builder, temp, shift, "");
974         test = LLVMBuildAnd(builder, test, temp, "");
975         mask = LLVMBuildOr(builder, mask, test, "");
976      }
977   }
978   return mask;
979}
980
981/*
982 * Returns boolean if any clipping has occurred
983 * Used zero/non-zero i32 value to represent boolean
984 */
985static void
986clipmask_bool(LLVMBuilderRef builder,
987              LLVMValueRef clipmask,
988              LLVMValueRef ret_ptr)
989{
990   LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
991   LLVMValueRef temp;
992   int i;
993
994   for (i=0; i<4; i++){
995      temp = LLVMBuildExtractElement(builder, clipmask,
996                                     LLVMConstInt(LLVMInt32Type(), i, 0) , "");
997      ret = LLVMBuildOr(builder, ret, temp, "");
998   }
999
1000   LLVMBuildStore(builder, ret, ret_ptr);
1001}
1002
1003static void
1004draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1005{
1006   LLVMTypeRef arg_types[8];
1007   LLVMTypeRef func_type;
1008   LLVMValueRef context_ptr;
1009   LLVMBasicBlockRef block;
1010   LLVMBuilderRef builder;
1011   LLVMValueRef start, end, count, stride, step, io_itr;
1012   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1013   LLVMValueRef instance_id;
1014   struct draw_context *draw = llvm->draw;
1015   unsigned i, j;
1016   struct lp_build_context bld;
1017   struct lp_build_loop_state lp_loop;
1018   const int max_vertices = 4;
1019   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1020   void *code;
1021   struct lp_build_sampler_soa *sampler = 0;
1022   LLVMValueRef ret, ret_ptr;
1023   boolean bypass_viewport = variant->key.bypass_viewport;
1024   boolean enable_cliptest = variant->key.clip_xy ||
1025                             variant->key.clip_z  ||
1026                             variant->key.clip_user;
1027
1028   arg_types[0] = llvm->context_ptr_type;           /* context */
1029   arg_types[1] = llvm->vertex_header_ptr_type;     /* vertex_header */
1030   arg_types[2] = llvm->buffer_ptr_type;            /* vbuffers */
1031   arg_types[3] = LLVMInt32Type();                  /* start */
1032   arg_types[4] = LLVMInt32Type();                  /* count */
1033   arg_types[5] = LLVMInt32Type();                  /* stride */
1034   arg_types[6] = llvm->vb_ptr_type;                /* pipe_vertex_buffer's */
1035   arg_types[7] = LLVMInt32Type();                  /* instance_id */
1036
1037   func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
1038
1039   variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
1040   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
1041   for(i = 0; i < Elements(arg_types); ++i)
1042      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1043         LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
1044
1045   context_ptr  = LLVMGetParam(variant->function, 0);
1046   io_ptr       = LLVMGetParam(variant->function, 1);
1047   vbuffers_ptr = LLVMGetParam(variant->function, 2);
1048   start        = LLVMGetParam(variant->function, 3);
1049   count        = LLVMGetParam(variant->function, 4);
1050   stride       = LLVMGetParam(variant->function, 5);
1051   vb_ptr       = LLVMGetParam(variant->function, 6);
1052   instance_id  = LLVMGetParam(variant->function, 7);
1053
1054   lp_build_name(context_ptr, "context");
1055   lp_build_name(io_ptr, "io");
1056   lp_build_name(vbuffers_ptr, "vbuffers");
1057   lp_build_name(start, "start");
1058   lp_build_name(count, "count");
1059   lp_build_name(stride, "stride");
1060   lp_build_name(vb_ptr, "vb");
1061   lp_build_name(instance_id, "instance_id");
1062
1063   /*
1064    * Function body
1065    */
1066
1067   block = LLVMAppendBasicBlock(variant->function, "entry");
1068   builder = LLVMCreateBuilder();
1069   LLVMPositionBuilderAtEnd(builder, block);
1070
1071   lp_build_context_init(&bld, builder, lp_type_int(32));
1072
1073   end = lp_build_add(&bld, start, count);
1074
1075   step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
1076
1077   /* function will return non-zero i32 value if any clipped vertices */
1078   ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
1079   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
1080
1081   /* code generated texture sampling */
1082   sampler = draw_llvm_sampler_soa_create(
1083      draw_llvm_variant_key_samplers(&variant->key),
1084      context_ptr);
1085
1086#if DEBUG_STORE
1087   lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
1088                   start, end, step);
1089#endif
1090   lp_build_loop_begin(builder, start, &lp_loop);
1091   {
1092      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1093      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1094      LLVMValueRef io;
1095      LLVMValueRef clipmask;   /* holds the clipmask value */
1096      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1097
1098      io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
1099      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1100#if DEBUG_STORE
1101      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1102                      io_itr, io, lp_loop.counter);
1103#endif
1104      for (i = 0; i < NUM_CHANNELS; ++i) {
1105         LLVMValueRef true_index = LLVMBuildAdd(
1106            builder,
1107            lp_loop.counter,
1108            LLVMConstInt(LLVMInt32Type(), i, 0), "");
1109         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1110            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1111            LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
1112                                                 velem->vertex_buffer_index,
1113                                                 0);
1114            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1115                                           &vb_index, 1, "");
1116            generate_fetch(builder, vbuffers_ptr,
1117                           &aos_attribs[j][i], velem, vb, true_index,
1118                           instance_id);
1119         }
1120      }
1121      convert_to_soa(builder, aos_attribs, inputs,
1122                     draw->pt.nr_vertex_elements);
1123
1124      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1125      generate_vs(llvm,
1126                  builder,
1127                  outputs,
1128                  ptr_aos,
1129                  context_ptr,
1130                  sampler);
1131
1132      /* store original positions in clip before further manipulation */
1133      store_clip(builder, io, outputs);
1134
1135      /* do cliptest */
1136      if (enable_cliptest){
1137         /* allocate clipmask, assign it integer type */
1138         clipmask = generate_clipmask(builder, outputs,
1139                                      variant->key.clip_xy,
1140                                      variant->key.clip_z,
1141                                      variant->key.clip_user,
1142                                      variant->key.clip_halfz,
1143                                      variant->key.nr_planes,
1144                                      context_ptr);
1145         /* return clipping boolean value for function */
1146         clipmask_bool(builder, clipmask, ret_ptr);
1147      }
1148      else{
1149         clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
1150      }
1151
1152      /* do viewport mapping */
1153      if (!bypass_viewport){
1154         generate_viewport(llvm, builder, outputs, context_ptr);
1155      }
1156
1157      /* store clipmask in vertex header and positions in data */
1158      convert_to_aos(builder, io, outputs, clipmask,
1159                     draw->vs.vertex_shader->info.num_outputs,
1160                     max_vertices);
1161   }
1162
1163   lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
1164
1165   sampler->destroy(sampler);
1166
1167#ifdef PIPE_ARCH_X86
1168   /* Avoid corrupting the FPU stack on 32bit OSes. */
1169   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
1170#endif
1171
1172   ret = LLVMBuildLoad(builder, ret_ptr,"");
1173   LLVMBuildRet(builder, ret);
1174
1175   LLVMDisposeBuilder(builder);
1176
1177   /*
1178    * Translate the LLVM IR into machine code.
1179    */
1180#ifdef DEBUG
1181   if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
1182      lp_debug_dump_value(variant->function);
1183      assert(0);
1184   }
1185#endif
1186
1187   LLVMRunFunctionPassManager(llvm->pass, variant->function);
1188
1189   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1190      lp_debug_dump_value(variant->function);
1191      debug_printf("\n");
1192   }
1193
1194   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
1195   variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
1196
1197   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1198      lp_disassemble(code);
1199   }
1200   lp_func_delete_body(variant->function);
1201}
1202
1203
1204static void
1205draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1206{
1207   LLVMTypeRef arg_types[8];
1208   LLVMTypeRef func_type;
1209   LLVMValueRef context_ptr;
1210   LLVMBasicBlockRef block;
1211   LLVMBuilderRef builder;
1212   LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
1213   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1214   LLVMValueRef instance_id;
1215   struct draw_context *draw = llvm->draw;
1216   unsigned i, j;
1217   struct lp_build_context bld;
1218   struct lp_build_loop_state lp_loop;
1219   const int max_vertices = 4;
1220   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1221   LLVMValueRef fetch_max;
1222   void *code;
1223   struct lp_build_sampler_soa *sampler = 0;
1224   LLVMValueRef ret, ret_ptr;
1225   boolean bypass_viewport = variant->key.bypass_viewport;
1226   boolean enable_cliptest = variant->key.clip_xy ||
1227                             variant->key.clip_z  ||
1228                             variant->key.clip_user;
1229
1230   arg_types[0] = llvm->context_ptr_type;               /* context */
1231   arg_types[1] = llvm->vertex_header_ptr_type;         /* vertex_header */
1232   arg_types[2] = llvm->buffer_ptr_type;                /* vbuffers */
1233   arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0);  /* fetch_elts * */
1234   arg_types[4] = LLVMInt32Type();                      /* fetch_count */
1235   arg_types[5] = LLVMInt32Type();                      /* stride */
1236   arg_types[6] = llvm->vb_ptr_type;                    /* pipe_vertex_buffer's */
1237   arg_types[7] = LLVMInt32Type();                      /* instance_id */
1238
1239   func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
1240
1241   variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
1242   LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
1243   for(i = 0; i < Elements(arg_types); ++i)
1244      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1245         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
1246                          LLVMNoAliasAttribute);
1247
1248   context_ptr  = LLVMGetParam(variant->function_elts, 0);
1249   io_ptr       = LLVMGetParam(variant->function_elts, 1);
1250   vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
1251   fetch_elts   = LLVMGetParam(variant->function_elts, 3);
1252   fetch_count  = LLVMGetParam(variant->function_elts, 4);
1253   stride       = LLVMGetParam(variant->function_elts, 5);
1254   vb_ptr       = LLVMGetParam(variant->function_elts, 6);
1255   instance_id  = LLVMGetParam(variant->function_elts, 7);
1256
1257   lp_build_name(context_ptr, "context");
1258   lp_build_name(io_ptr, "io");
1259   lp_build_name(vbuffers_ptr, "vbuffers");
1260   lp_build_name(fetch_elts, "fetch_elts");
1261   lp_build_name(fetch_count, "fetch_count");
1262   lp_build_name(stride, "stride");
1263   lp_build_name(vb_ptr, "vb");
1264   lp_build_name(instance_id, "instance_id");
1265
1266   /*
1267    * Function body
1268    */
1269
1270   block = LLVMAppendBasicBlock(variant->function_elts, "entry");
1271   builder = LLVMCreateBuilder();
1272   LLVMPositionBuilderAtEnd(builder, block);
1273
1274   lp_build_context_init(&bld, builder, lp_type_int(32));
1275
1276   step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
1277
1278   /* code generated texture sampling */
1279   sampler = draw_llvm_sampler_soa_create(
1280      draw_llvm_variant_key_samplers(&variant->key),
1281      context_ptr);
1282
1283   fetch_max = LLVMBuildSub(builder, fetch_count,
1284                            LLVMConstInt(LLVMInt32Type(), 1, 0),
1285                            "fetch_max");
1286
1287   /* function returns non-zero i32 value if any clipped vertices */
1288   ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
1289   LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
1290
1291   lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
1292   {
1293      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1294      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1295      LLVMValueRef io;
1296      LLVMValueRef clipmask;   /* holds the clipmask value */
1297      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1298
1299      io_itr = lp_loop.counter;
1300      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1301#if DEBUG_STORE
1302      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1303                      io_itr, io, lp_loop.counter);
1304#endif
1305      for (i = 0; i < NUM_CHANNELS; ++i) {
1306         LLVMValueRef true_index = LLVMBuildAdd(
1307            builder,
1308            lp_loop.counter,
1309            LLVMConstInt(LLVMInt32Type(), i, 0), "");
1310         LLVMValueRef fetch_ptr;
1311
1312         /* make sure we're not out of bounds which can happen
1313          * if fetch_count % 4 != 0, because on the last iteration
1314          * a few of the 4 vertex fetches will be out of bounds */
1315         true_index = lp_build_min(&bld, true_index, fetch_max);
1316
1317         fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
1318                                  &true_index, 1, "");
1319         true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
1320         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1321            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1322            LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
1323                                                 velem->vertex_buffer_index,
1324                                                 0);
1325            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1326                                           &vb_index, 1, "");
1327            generate_fetch(builder, vbuffers_ptr,
1328                           &aos_attribs[j][i], velem, vb, true_index,
1329                           instance_id);
1330         }
1331      }
1332      convert_to_soa(builder, aos_attribs, inputs,
1333                     draw->pt.nr_vertex_elements);
1334
1335      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1336      generate_vs(llvm,
1337                  builder,
1338                  outputs,
1339                  ptr_aos,
1340                  context_ptr,
1341                  sampler);
1342
1343      /* store original positions in clip before further manipulation */
1344      store_clip(builder, io, outputs);
1345
1346      /* do cliptest */
1347      if (enable_cliptest){
1348         /* allocate clipmask, assign it integer type */
1349         clipmask = generate_clipmask(builder, outputs,
1350                                      variant->key.clip_xy,
1351                                      variant->key.clip_z,
1352                                      variant->key.clip_user,
1353                                      variant->key.clip_halfz,
1354                                      variant->key.nr_planes,
1355                                      context_ptr);
1356         /* return clipping boolean value for function */
1357         clipmask_bool(builder, clipmask, ret_ptr);
1358      }
1359      else{
1360         clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
1361      }
1362
1363      /* do viewport mapping */
1364      if (!bypass_viewport){
1365         generate_viewport(llvm, builder, outputs, context_ptr);
1366      }
1367
1368      /* store clipmask in vertex header,
1369       * original positions in clip
1370       * and transformed positions in data
1371       */
1372      convert_to_aos(builder, io, outputs, clipmask,
1373                     draw->vs.vertex_shader->info.num_outputs,
1374                     max_vertices);
1375   }
1376
1377   lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
1378
1379   sampler->destroy(sampler);
1380
1381#ifdef PIPE_ARCH_X86
1382   /* Avoid corrupting the FPU stack on 32bit OSes. */
1383   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
1384#endif
1385
1386   ret = LLVMBuildLoad(builder, ret_ptr,"");
1387   LLVMBuildRet(builder, ret);
1388
1389   LLVMDisposeBuilder(builder);
1390
1391   /*
1392    * Translate the LLVM IR into machine code.
1393    */
1394#ifdef DEBUG
1395   if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
1396      lp_debug_dump_value(variant->function_elts);
1397      assert(0);
1398   }
1399#endif
1400
1401   LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
1402
1403   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1404      lp_debug_dump_value(variant->function_elts);
1405      debug_printf("\n");
1406   }
1407
1408   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts);
1409   variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
1410
1411   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1412      lp_disassemble(code);
1413   }
1414   lp_func_delete_body(variant->function_elts);
1415}
1416
1417
1418struct draw_llvm_variant_key *
1419draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
1420{
1421   unsigned i;
1422   struct draw_llvm_variant_key *key;
1423   struct lp_sampler_static_state *sampler;
1424
1425   key = (struct draw_llvm_variant_key *)store;
1426
1427   /* Presumably all variants of the shader should have the same
1428    * number of vertex elements - ie the number of shader inputs.
1429    */
1430   key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
1431
1432   /* will have to rig this up properly later */
1433   key->clip_xy = llvm->draw->clip_xy;
1434   key->clip_z = llvm->draw->clip_z;
1435   key->clip_user = llvm->draw->clip_user;
1436   key->bypass_viewport = llvm->draw->identity_viewport;
1437   key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
1438   key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
1439   key->nr_planes = llvm->draw->nr_planes;
1440   key->pad = 0;
1441
1442   /* All variants of this shader will have the same value for
1443    * nr_samplers.  Not yet trying to compact away holes in the
1444    * sampler array.
1445    */
1446   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
1447
1448   sampler = draw_llvm_variant_key_samplers(key);
1449
1450   memcpy(key->vertex_element,
1451          llvm->draw->pt.vertex_element,
1452          sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
1453
1454   memset(sampler, 0, key->nr_samplers * sizeof *sampler);
1455
1456   for (i = 0 ; i < key->nr_samplers; i++) {
1457      lp_sampler_static_state(&sampler[i],
1458			      llvm->draw->sampler_views[i],
1459			      llvm->draw->samplers[i]);
1460   }
1461
1462   return key;
1463}
1464
1465void
1466draw_llvm_set_mapped_texture(struct draw_context *draw,
1467                             unsigned sampler_idx,
1468                             uint32_t width, uint32_t height, uint32_t depth,
1469                             uint32_t last_level,
1470                             uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
1471                             uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
1472                             const void *data[PIPE_MAX_TEXTURE_LEVELS])
1473{
1474   unsigned j;
1475   struct draw_jit_texture *jit_tex;
1476
1477   assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
1478
1479
1480   jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
1481
1482   jit_tex->width = width;
1483   jit_tex->height = height;
1484   jit_tex->depth = depth;
1485   jit_tex->last_level = last_level;
1486
1487   for (j = 0; j <= last_level; j++) {
1488      jit_tex->data[j] = data[j];
1489      jit_tex->row_stride[j] = row_stride[j];
1490      jit_tex->img_stride[j] = img_stride[j];
1491   }
1492}
1493
1494
1495void
1496draw_llvm_set_sampler_state(struct draw_context *draw)
1497{
1498   unsigned i;
1499
1500   for (i = 0; i < draw->num_samplers; i++) {
1501      struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
1502
1503      if (draw->samplers[i]) {
1504         jit_tex->min_lod = draw->samplers[i]->min_lod;
1505         jit_tex->max_lod = draw->samplers[i]->max_lod;
1506         jit_tex->lod_bias = draw->samplers[i]->lod_bias;
1507         COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
1508      }
1509   }
1510}
1511
1512
1513void
1514draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
1515{
1516   struct draw_llvm *llvm = variant->llvm;
1517   struct draw_context *draw = llvm->draw;
1518
1519   if (variant->function_elts) {
1520      if (variant->function_elts)
1521         LLVMFreeMachineCodeForFunction(draw->engine,
1522                                        variant->function_elts);
1523      LLVMDeleteFunction(variant->function_elts);
1524   }
1525
1526   if (variant->function) {
1527      if (variant->function)
1528         LLVMFreeMachineCodeForFunction(draw->engine,
1529                                        variant->function);
1530      LLVMDeleteFunction(variant->function);
1531   }
1532
1533   remove_from_list(&variant->list_item_local);
1534   variant->shader->variants_cached--;
1535   remove_from_list(&variant->list_item_global);
1536   llvm->nr_variants--;
1537   FREE(variant);
1538}
1539