draw_llvm.c revision 46931ecf480e1d231bb6c2236d91b5390f2465ac
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "draw_llvm.h"
29
30#include "draw_context.h"
31#include "draw_vs.h"
32
33#include "gallivm/lp_bld_arit.h"
34#include "gallivm/lp_bld_logic.h"
35#include "gallivm/lp_bld_const.h"
36#include "gallivm/lp_bld_swizzle.h"
37#include "gallivm/lp_bld_struct.h"
38#include "gallivm/lp_bld_type.h"
39#include "gallivm/lp_bld_flow.h"
40#include "gallivm/lp_bld_debug.h"
41#include "gallivm/lp_bld_tgsi.h"
42#include "gallivm/lp_bld_printf.h"
43#include "gallivm/lp_bld_intr.h"
44#include "gallivm/lp_bld_init.h"
45#include "gallivm/lp_bld_type.h"
46
47#include "tgsi/tgsi_exec.h"
48#include "tgsi/tgsi_dump.h"
49
50#include "util/u_math.h"
51#include "util/u_pointer.h"
52#include "util/u_string.h"
53#include "util/u_simple_list.h"
54
55
56#define DEBUG_STORE 0
57
58
59/**
60 * This function is called by the gallivm "garbage collector" when
61 * the LLVM global data structures are freed.  We must free all LLVM-related
62 * data.  Specifically, all JIT'd shader variants.
63 */
64static void
65draw_llvm_garbage_collect_callback(void *cb_data)
66{
67   struct draw_llvm *llvm = (struct draw_llvm *) cb_data;
68   struct draw_context *draw = llvm->draw;
69   struct draw_llvm_variant_list_item *li;
70
71   /* Ensure prepare will be run and shaders recompiled */
72   assert(!draw->suspend_flushing);
73   draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
74
75   /* free all shader variants */
76   li = first_elem(&llvm->vs_variants_list);
77   while (!at_end(&llvm->vs_variants_list, li)) {
78      struct draw_llvm_variant_list_item *next = next_elem(li);
79      draw_llvm_destroy_variant(li->base);
80      li = next;
81   }
82
83   /* Null-out these pointers so they get remade next time they're needed.
84    * See the accessor functions below.
85    */
86   llvm->context_ptr_type = NULL;
87   llvm->buffer_ptr_type = NULL;
88   llvm->vb_ptr_type = NULL;
89   llvm->vertex_header_ptr_type = NULL;
90}
91
92
93static void
94draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var,
95                   boolean elts);
96
97
98/**
99 * Create LLVM type for struct draw_jit_texture
100 */
101static LLVMTypeRef
102create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
103{
104   LLVMTargetDataRef target = gallivm->target;
105   LLVMTypeRef texture_type;
106   LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
107   LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
108
109   elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
110   elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
111   elem_types[DRAW_JIT_TEXTURE_DEPTH] =
112   elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] =
113   elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
114   elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
115   elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
116      LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
117   elem_types[DRAW_JIT_TEXTURE_DATA] =
118      LLVMArrayType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
119                    PIPE_MAX_TEXTURE_LEVELS);
120   elem_types[DRAW_JIT_TEXTURE_MIN_LOD] =
121   elem_types[DRAW_JIT_TEXTURE_MAX_LOD] =
122   elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context);
123   elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
124      LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
125
126   texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
127                                          Elements(elem_types), 0);
128
129#if HAVE_LLVM < 0x0300
130   LLVMAddTypeName(gallivm->module, struct_name, texture_type);
131
132   /* Make sure the target's struct layout cache doesn't return
133    * stale/invalid data.
134    */
135   LLVMInvalidateStructLayout(gallivm->target, texture_type);
136#endif
137
138   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
139                          target, texture_type,
140                          DRAW_JIT_TEXTURE_WIDTH);
141   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
142                          target, texture_type,
143                          DRAW_JIT_TEXTURE_HEIGHT);
144   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
145                          target, texture_type,
146                          DRAW_JIT_TEXTURE_DEPTH);
147   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level,
148                          target, texture_type,
149                          DRAW_JIT_TEXTURE_FIRST_LEVEL);
150   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
151                          target, texture_type,
152                          DRAW_JIT_TEXTURE_LAST_LEVEL);
153   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
154                          target, texture_type,
155                          DRAW_JIT_TEXTURE_ROW_STRIDE);
156   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
157                          target, texture_type,
158                          DRAW_JIT_TEXTURE_IMG_STRIDE);
159   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
160                          target, texture_type,
161                          DRAW_JIT_TEXTURE_DATA);
162   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
163                          target, texture_type,
164                          DRAW_JIT_TEXTURE_MIN_LOD);
165   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
166                          target, texture_type,
167                          DRAW_JIT_TEXTURE_MAX_LOD);
168   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
169                          target, texture_type,
170                          DRAW_JIT_TEXTURE_LOD_BIAS);
171   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
172                          target, texture_type,
173                          DRAW_JIT_TEXTURE_BORDER_COLOR);
174
175   LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
176
177   return texture_type;
178}
179
180
181/**
182 * Create LLVM type for struct draw_jit_texture
183 */
184static LLVMTypeRef
185create_jit_context_type(struct gallivm_state *gallivm,
186                        LLVMTypeRef texture_type, const char *struct_name)
187{
188   LLVMTargetDataRef target = gallivm->target;
189   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
190   LLVMTypeRef elem_types[5];
191   LLVMTypeRef context_type;
192
193   elem_types[0] = LLVMPointerType(float_type, 0); /* vs_constants */
194   elem_types[1] = LLVMPointerType(float_type, 0); /* gs_constants */
195   elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
196                                                 DRAW_TOTAL_CLIP_PLANES), 0);
197   elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */
198   elem_types[4] = LLVMArrayType(texture_type,
199                                 PIPE_MAX_VERTEX_SAMPLERS); /* textures */
200   context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
201                                          Elements(elem_types), 0);
202#if HAVE_LLVM < 0x0300
203   LLVMAddTypeName(gallivm->module, struct_name, context_type);
204
205   LLVMInvalidateStructLayout(gallivm->target, context_type);
206#endif
207
208   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
209                          target, context_type, 0);
210   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
211                          target, context_type, 1);
212   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
213                          target, context_type, 2);
214   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
215                          target, context_type,
216                          DRAW_JIT_CTX_TEXTURES);
217   LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
218                        target, context_type);
219
220   return context_type;
221}
222
223
224/**
225 * Create LLVM type for struct pipe_vertex_buffer
226 */
227static LLVMTypeRef
228create_jit_vertex_buffer_type(struct gallivm_state *gallivm, const char *struct_name)
229{
230   LLVMTargetDataRef target = gallivm->target;
231   LLVMTypeRef elem_types[4];
232   LLVMTypeRef vb_type;
233
234   elem_types[0] =
235   elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
236   elem_types[2] =
237   elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */
238
239   vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
240                                     Elements(elem_types), 0);
241#if HAVE_LLVM < 0x0300
242   LLVMAddTypeName(gallivm->module, struct_name, vb_type);
243
244   LLVMInvalidateStructLayout(gallivm->target, vb_type);
245#endif
246
247   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
248                          target, vb_type, 0);
249   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
250                          target, vb_type, 1);
251
252   LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
253
254   return vb_type;
255}
256
257
258/**
259 * Create LLVM type for struct vertex_header;
260 */
261static LLVMTypeRef
262create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
263{
264   LLVMTargetDataRef target = gallivm->target;
265   LLVMTypeRef elem_types[4];
266   LLVMTypeRef vertex_header;
267   char struct_name[24];
268
269   util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
270
271   elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
272   elem_types[DRAW_JIT_VERTEX_CLIP]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
273   elem_types[DRAW_JIT_VERTEX_PRE_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
274   elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);
275
276   vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
277                                           Elements(elem_types), 0);
278#if HAVE_LLVM < 0x0300
279   LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
280
281   LLVMInvalidateStructLayout(gallivm->target, vertex_header);
282#endif
283
284   /* these are bit-fields and we can't take address of them
285      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
286      target, vertex_header,
287      DRAW_JIT_VERTEX_CLIPMASK);
288      LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
289      target, vertex_header,
290      DRAW_JIT_VERTEX_EDGEFLAG);
291      LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
292      target, vertex_header,
293      DRAW_JIT_VERTEX_PAD);
294      LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
295      target, vertex_header,
296      DRAW_JIT_VERTEX_VERTEX_ID);
297   */
298   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
299                          target, vertex_header,
300                          DRAW_JIT_VERTEX_CLIP);
301   LP_CHECK_MEMBER_OFFSET(struct vertex_header, pre_clip_pos,
302                          target, vertex_header,
303                          DRAW_JIT_VERTEX_PRE_CLIP_POS);
304   LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
305                          target, vertex_header,
306                          DRAW_JIT_VERTEX_DATA);
307
308   assert(LLVMABISizeOfType(target, vertex_header) ==
309          offsetof(struct vertex_header, data[data_elems]));
310
311   return vertex_header;
312}
313
314
315/**
316 * Create LLVM types for various structures.
317 */
318static void
319create_jit_types(struct draw_llvm *llvm)
320{
321   struct gallivm_state *gallivm = llvm->gallivm;
322   LLVMTypeRef texture_type, context_type, buffer_type, vb_type;
323
324   texture_type = create_jit_texture_type(gallivm, "texture");
325
326   context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context");
327   llvm->context_ptr_type = LLVMPointerType(context_type, 0);
328
329   buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
330   llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
331
332   vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
333   llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
334}
335
336
337static LLVMTypeRef
338get_context_ptr_type(struct draw_llvm *llvm)
339{
340   if (!llvm->context_ptr_type)
341      create_jit_types(llvm);
342   return llvm->context_ptr_type;
343}
344
345
346static LLVMTypeRef
347get_buffer_ptr_type(struct draw_llvm *llvm)
348{
349   if (!llvm->buffer_ptr_type)
350      create_jit_types(llvm);
351   return llvm->buffer_ptr_type;
352}
353
354
355static LLVMTypeRef
356get_vb_ptr_type(struct draw_llvm *llvm)
357{
358   if (!llvm->vb_ptr_type)
359      create_jit_types(llvm);
360   return llvm->vb_ptr_type;
361}
362
363static LLVMTypeRef
364get_vertex_header_ptr_type(struct draw_llvm *llvm)
365{
366   if (!llvm->vertex_header_ptr_type)
367      create_jit_types(llvm);
368   return llvm->vertex_header_ptr_type;
369}
370
371
372/**
373 * Create per-context LLVM info.
374 */
375struct draw_llvm *
376draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm)
377{
378   struct draw_llvm *llvm;
379
380   llvm = CALLOC_STRUCT( draw_llvm );
381   if (!llvm)
382      return NULL;
383
384   lp_build_init();
385
386   llvm->draw = draw;
387   llvm->gallivm = gallivm;
388
389   if (gallivm_debug & GALLIVM_DEBUG_IR) {
390      LLVMDumpModule(llvm->gallivm->module);
391   }
392
393   llvm->nr_variants = 0;
394   make_empty_list(&llvm->vs_variants_list);
395
396   gallivm_register_garbage_collector_callback(
397                              draw_llvm_garbage_collect_callback, llvm);
398
399   return llvm;
400}
401
402
403/**
404 * Free per-context LLVM info.
405 */
406void
407draw_llvm_destroy(struct draw_llvm *llvm)
408{
409   gallivm_remove_garbage_collector_callback(
410                              draw_llvm_garbage_collect_callback, llvm);
411
412   /* XXX free other draw_llvm data? */
413   FREE(llvm);
414}
415
416
417/**
418 * Create LLVM-generated code for a vertex shader.
419 */
420struct draw_llvm_variant *
421draw_llvm_create_variant(struct draw_llvm *llvm,
422			 unsigned num_inputs,
423			 const struct draw_llvm_variant_key *key)
424{
425   struct draw_llvm_variant *variant;
426   struct llvm_vertex_shader *shader =
427      llvm_vertex_shader(llvm->draw->vs.vertex_shader);
428   LLVMTypeRef vertex_header;
429
430   variant = MALLOC(sizeof *variant +
431		    shader->variant_key_size -
432		    sizeof variant->key);
433   if (variant == NULL)
434      return NULL;
435
436   variant->llvm = llvm;
437
438   memcpy(&variant->key, key, shader->variant_key_size);
439
440   vertex_header = create_jit_vertex_header(llvm->gallivm, num_inputs);
441
442   llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
443
444   draw_llvm_generate(llvm, variant, FALSE);  /* linear */
445   draw_llvm_generate(llvm, variant, TRUE);   /* elts */
446
447   variant->shader = shader;
448   variant->list_item_global.base = variant;
449   variant->list_item_local.base = variant;
450   /*variant->no = */shader->variants_created++;
451   variant->list_item_global.base = variant;
452
453   return variant;
454}
455
456
457static void
458generate_vs(struct draw_llvm *llvm,
459            LLVMBuilderRef builder,
460            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
461            const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
462            LLVMValueRef instance_id,
463            LLVMValueRef context_ptr,
464            struct lp_build_sampler_soa *draw_sampler,
465            boolean clamp_vertex_color)
466{
467   const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
468   struct lp_type vs_type;
469   LLVMValueRef consts_ptr = draw_jit_context_vs_constants(llvm->gallivm, context_ptr);
470   struct lp_build_sampler_soa *sampler = 0;
471
472   memset(&vs_type, 0, sizeof vs_type);
473   vs_type.floating = TRUE; /* floating point values */
474   vs_type.sign = TRUE;     /* values are signed */
475   vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
476   vs_type.width = 32;      /* 32-bit float */
477   vs_type.length = 4;      /* 4 elements per vector */
478#if 0
479   num_vs = 4;              /* number of vertices per block */
480#endif
481
482   if (gallivm_debug & GALLIVM_DEBUG_IR) {
483      tgsi_dump(tokens, 0);
484   }
485
486   if (llvm->draw->num_sampler_views && llvm->draw->num_samplers)
487      sampler = draw_sampler;
488
489   lp_build_tgsi_soa(llvm->gallivm,
490                     tokens,
491                     vs_type,
492                     NULL /*struct lp_build_mask_context *mask*/,
493                     consts_ptr,
494                     instance_id,
495                     NULL /*pos*/,
496                     inputs,
497                     outputs,
498                     sampler,
499                     &llvm->draw->vs.vertex_shader->info);
500
501   {
502      LLVMValueRef out;
503      unsigned chan, attrib;
504      struct lp_build_context bld;
505      struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info;
506      lp_build_context_init(&bld, llvm->gallivm, vs_type);
507
508      for (attrib = 0; attrib < info->num_outputs; ++attrib) {
509         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
510            if (outputs[attrib][chan]) {
511               switch (info->output_semantic_name[attrib]) {
512               case TGSI_SEMANTIC_COLOR:
513               case TGSI_SEMANTIC_BCOLOR:
514                  if (clamp_vertex_color) {
515                     out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
516                     out = lp_build_clamp(&bld, out, bld.zero, bld.one);
517                     LLVMBuildStore(builder, out, outputs[attrib][chan]);
518                  }
519                  break;
520               case TGSI_SEMANTIC_FOG:
521                  if (chan == 1 || chan == 2)
522                     LLVMBuildStore(builder, bld.zero, outputs[attrib][chan]);
523                  else if (chan == 3)
524                     LLVMBuildStore(builder, bld.one, outputs[attrib][chan]);
525                  break;
526               }
527            }
528         }
529      }
530   }
531}
532
533
534#if DEBUG_STORE
535static void print_vectorf(LLVMBuilderRef builder,
536                         LLVMValueRef vec)
537{
538   LLVMValueRef val[4];
539   val[0] = LLVMBuildExtractElement(builder, vec,
540                                    lp_build_const_int32(gallivm, 0), "");
541   val[1] = LLVMBuildExtractElement(builder, vec,
542                                    lp_build_const_int32(gallivm, 1), "");
543   val[2] = LLVMBuildExtractElement(builder, vec,
544                                    lp_build_const_int32(gallivm, 2), "");
545   val[3] = LLVMBuildExtractElement(builder, vec,
546                                    lp_build_const_int32(gallivm, 3), "");
547   lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
548                   val[0], val[1], val[2], val[3]);
549}
550#endif
551
552
553static void
554generate_fetch(struct gallivm_state *gallivm,
555               LLVMValueRef vbuffers_ptr,
556               LLVMValueRef *res,
557               struct pipe_vertex_element *velem,
558               LLVMValueRef vbuf,
559               LLVMValueRef index,
560               LLVMValueRef instance_id)
561{
562   LLVMBuilderRef builder = gallivm->builder;
563   LLVMValueRef indices =
564      LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
565                   velem->vertex_buffer_index, 0);
566   LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
567                                           &indices, 1, "");
568   LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf);
569   LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf);
570   LLVMValueRef stride;
571
572   if (velem->instance_divisor) {
573      /* array index = instance_id / instance_divisor */
574      index = LLVMBuildUDiv(builder, instance_id,
575                            lp_build_const_int32(gallivm, velem->instance_divisor),
576                            "instance_divisor");
577   }
578
579   stride = LLVMBuildMul(builder, vb_stride, index, "");
580
581   vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
582
583   stride = LLVMBuildAdd(builder, stride,
584                         vb_buffer_offset,
585                         "");
586   stride = LLVMBuildAdd(builder, stride,
587                         lp_build_const_int32(gallivm, velem->src_offset),
588                         "");
589
590   /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
591   vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
592
593   *res = draw_llvm_translate_from(gallivm, vbuffer_ptr, velem->src_format);
594}
595
596
597static LLVMValueRef
598aos_to_soa(struct gallivm_state *gallivm,
599           LLVMValueRef val0,
600           LLVMValueRef val1,
601           LLVMValueRef val2,
602           LLVMValueRef val3,
603           LLVMValueRef channel)
604{
605   LLVMBuilderRef builder = gallivm->builder;
606   LLVMValueRef ex, res;
607
608   ex = LLVMBuildExtractElement(builder, val0,
609                                channel, "");
610   res = LLVMBuildInsertElement(builder,
611                                LLVMConstNull(LLVMTypeOf(val0)),
612                                ex,
613                                lp_build_const_int32(gallivm, 0),
614                                "");
615
616   ex = LLVMBuildExtractElement(builder, val1,
617                                channel, "");
618   res = LLVMBuildInsertElement(builder,
619                                res, ex,
620                                lp_build_const_int32(gallivm, 1),
621                                "");
622
623   ex = LLVMBuildExtractElement(builder, val2,
624                                channel, "");
625   res = LLVMBuildInsertElement(builder,
626                                res, ex,
627                                lp_build_const_int32(gallivm, 2),
628                                "");
629
630   ex = LLVMBuildExtractElement(builder, val3,
631                                channel, "");
632   res = LLVMBuildInsertElement(builder,
633                                res, ex,
634                                lp_build_const_int32(gallivm, 3),
635                                "");
636
637   return res;
638}
639
640
641static void
642soa_to_aos(struct gallivm_state *gallivm,
643           LLVMValueRef soa[TGSI_NUM_CHANNELS],
644           LLVMValueRef aos[TGSI_NUM_CHANNELS])
645{
646   LLVMBuilderRef builder = gallivm->builder;
647   LLVMValueRef comp;
648   int i = 0;
649
650   debug_assert(TGSI_NUM_CHANNELS == 4);
651
652   aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
653   aos[1] = aos[2] = aos[3] = aos[0];
654
655   for (i = 0; i < TGSI_NUM_CHANNELS; ++i) {
656      LLVMValueRef channel = lp_build_const_int32(gallivm, i);
657
658      comp = LLVMBuildExtractElement(builder, soa[i],
659                                     lp_build_const_int32(gallivm, 0), "");
660      aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
661
662      comp = LLVMBuildExtractElement(builder, soa[i],
663                                     lp_build_const_int32(gallivm, 1), "");
664      aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
665
666      comp = LLVMBuildExtractElement(builder, soa[i],
667                                     lp_build_const_int32(gallivm, 2), "");
668      aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
669
670      comp = LLVMBuildExtractElement(builder, soa[i],
671                                     lp_build_const_int32(gallivm, 3), "");
672      aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
673
674   }
675}
676
677
678static void
679convert_to_soa(struct gallivm_state *gallivm,
680               LLVMValueRef (*aos)[TGSI_NUM_CHANNELS],
681               LLVMValueRef (*soa)[TGSI_NUM_CHANNELS],
682               int num_attribs)
683{
684   int i;
685
686   debug_assert(TGSI_NUM_CHANNELS == 4);
687
688   for (i = 0; i < num_attribs; ++i) {
689      LLVMValueRef val0 = aos[i][0];
690      LLVMValueRef val1 = aos[i][1];
691      LLVMValueRef val2 = aos[i][2];
692      LLVMValueRef val3 = aos[i][3];
693
694      soa[i][0] = aos_to_soa(gallivm, val0, val1, val2, val3,
695                             lp_build_const_int32(gallivm, 0));
696      soa[i][1] = aos_to_soa(gallivm, val0, val1, val2, val3,
697                             lp_build_const_int32(gallivm, 1));
698      soa[i][2] = aos_to_soa(gallivm, val0, val1, val2, val3,
699                             lp_build_const_int32(gallivm, 2));
700      soa[i][3] = aos_to_soa(gallivm, val0, val1, val2, val3,
701                             lp_build_const_int32(gallivm, 3));
702   }
703}
704
705
706static void
707store_aos(struct gallivm_state *gallivm,
708          LLVMValueRef io_ptr,
709          LLVMValueRef index,
710          LLVMValueRef value,
711          LLVMValueRef clipmask, boolean have_clipdist)
712{
713   LLVMBuilderRef builder = gallivm->builder;
714   LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptr);
715   LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
716   LLVMValueRef indices[3];
717   LLVMValueRef val;
718   int vertex_id_pad_edgeflag;
719
720   indices[0] = lp_build_const_int32(gallivm, 0);
721   indices[1] = index;
722   indices[2] = lp_build_const_int32(gallivm, 0);
723
724   /* If this assertion fails, it means we need to update the bit twidding
725    * code here.  See struct vertex_header in draw_private.h.
726    */
727   assert(DRAW_TOTAL_CLIP_PLANES==14);
728   /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
729   vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
730   if (have_clipdist)
731      vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
732   val = lp_build_const_int32(gallivm, vertex_id_pad_edgeflag);
733   /* OR with the clipmask */
734   val = LLVMBuildOr(builder, val, clipmask, "");
735
736   /* store vertex header */
737   LLVMBuildStore(builder, val, id_ptr);
738
739
740#if DEBUG_STORE
741   lp_build_printf(builder, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
742#endif
743#if 0
744   /*lp_build_printf(builder, " ---- %p storing at %d (%p)  ", io_ptr, index, data_ptr);
745     print_vectorf(builder, value);*/
746   data_ptr = LLVMBuildBitCast(builder, data_ptr,
747                               LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0), 0),
748                               "datavec");
749   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
750
751   LLVMBuildStore(builder, value, data_ptr);
752#else
753   {
754      LLVMValueRef x, y, z, w;
755      LLVMValueRef idx0, idx1, idx2, idx3;
756      LLVMValueRef gep0, gep1, gep2, gep3;
757      data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
758
759      idx0 = lp_build_const_int32(gallivm, 0);
760      idx1 = lp_build_const_int32(gallivm, 1);
761      idx2 = lp_build_const_int32(gallivm, 2);
762      idx3 = lp_build_const_int32(gallivm, 3);
763
764      x = LLVMBuildExtractElement(builder, value,
765                                  idx0, "");
766      y = LLVMBuildExtractElement(builder, value,
767                                  idx1, "");
768      z = LLVMBuildExtractElement(builder, value,
769                                  idx2, "");
770      w = LLVMBuildExtractElement(builder, value,
771                                  idx3, "");
772
773      gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
774      gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
775      gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
776      gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
777
778      /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
779        x, gep0, y, gep1, z, gep2, w, gep3);*/
780      LLVMBuildStore(builder, x, gep0);
781      LLVMBuildStore(builder, y, gep1);
782      LLVMBuildStore(builder, z, gep2);
783      LLVMBuildStore(builder, w, gep3);
784   }
785#endif
786}
787
788
789static void
790store_aos_array(struct gallivm_state *gallivm,
791                LLVMValueRef io_ptr,
792                LLVMValueRef aos[TGSI_NUM_CHANNELS],
793                int attrib,
794                int num_outputs,
795                LLVMValueRef clipmask,
796                boolean have_clipdist)
797{
798   LLVMBuilderRef builder = gallivm->builder;
799   LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
800   LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0);
801   LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1);
802   LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2);
803   LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3);
804   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
805   LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
806
807   debug_assert(TGSI_NUM_CHANNELS == 4);
808
809   io0_ptr = LLVMBuildGEP(builder, io_ptr,
810                          &ind0, 1, "");
811   io1_ptr = LLVMBuildGEP(builder, io_ptr,
812                          &ind1, 1, "");
813   io2_ptr = LLVMBuildGEP(builder, io_ptr,
814                          &ind2, 1, "");
815   io3_ptr = LLVMBuildGEP(builder, io_ptr,
816                          &ind3, 1, "");
817
818   clipmask0 = LLVMBuildExtractElement(builder, clipmask,
819                                       ind0, "");
820   clipmask1 = LLVMBuildExtractElement(builder, clipmask,
821                                       ind1, "");
822   clipmask2 = LLVMBuildExtractElement(builder, clipmask,
823                                       ind2, "");
824   clipmask3 = LLVMBuildExtractElement(builder, clipmask,
825                                       ind3, "");
826
827#if DEBUG_STORE
828   lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
829                   io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
830#endif
831   /* store for each of the 4 vertices */
832   store_aos(gallivm, io0_ptr, attr_index, aos[0], clipmask0, have_clipdist);
833   store_aos(gallivm, io1_ptr, attr_index, aos[1], clipmask1, have_clipdist);
834   store_aos(gallivm, io2_ptr, attr_index, aos[2], clipmask2, have_clipdist);
835   store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3, have_clipdist);
836}
837
838
839static void
840convert_to_aos(struct gallivm_state *gallivm,
841               LLVMValueRef io,
842               LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
843               LLVMValueRef clipmask,
844               int num_outputs,
845               int max_vertices, boolean have_clipdist)
846{
847   LLVMBuilderRef builder = gallivm->builder;
848   unsigned chan, attrib;
849
850#if DEBUG_STORE
851   lp_build_printf(builder, "   # storing begin\n");
852#endif
853   for (attrib = 0; attrib < num_outputs; ++attrib) {
854      LLVMValueRef soa[4];
855      LLVMValueRef aos[4];
856      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
857         if (outputs[attrib][chan]) {
858            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
859            lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
860            /*lp_build_printf(builder, "output %d : %d ",
861                            LLVMConstInt(LLVMInt32Type(), attrib, 0),
862                            LLVMConstInt(LLVMInt32Type(), chan, 0));
863              print_vectorf(builder, out);*/
864            soa[chan] = out;
865         }
866         else {
867            soa[chan] = 0;
868         }
869      }
870      soa_to_aos(gallivm, soa, aos);
871      store_aos_array(gallivm,
872                      io,
873                      aos,
874                      attrib,
875                      num_outputs,
876                      clipmask, have_clipdist);
877   }
878#if DEBUG_STORE
879   lp_build_printf(builder, "   # storing end\n");
880#endif
881}
882
883
884/**
885 * Stores original vertex positions in clip coordinates
886 * There is probably a more efficient way to do this, 4 floats at once
887 * rather than extracting each element one by one.
888 * idx is the output to store things too, if pre_clip_pos is set
889 * we store the pos to the idx, if not we store the clipvertex to it.
890 */
891static void
892store_clip(struct gallivm_state *gallivm,
893           LLVMValueRef io_ptr,
894           LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
895           boolean pre_clip_pos, int idx)
896{
897   LLVMBuilderRef builder = gallivm->builder;
898   LLVMValueRef out[4];
899   LLVMValueRef indices[2];
900   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
901   LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
902   LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
903   LLVMValueRef out0elem, out1elem, out2elem, out3elem;
904   int i;
905
906   LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0);
907   LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1);
908   LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2);
909   LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3);
910
911   indices[0] =
912   indices[1] = lp_build_const_int32(gallivm, 0);
913
914   out[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 x2 x3*/
915   out[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 y2 y3*/
916   out[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 z2 z3*/
917   out[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 w2 w3*/
918
919   io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
920   io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
921   io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
922   io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
923
924   if (!pre_clip_pos) {
925      clip_ptr0 = draw_jit_header_clip(gallivm, io0_ptr);
926      clip_ptr1 = draw_jit_header_clip(gallivm, io1_ptr);
927      clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr);
928      clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr);
929   } else {
930      clip_ptr0 = draw_jit_header_pre_clip_pos(gallivm, io0_ptr);
931      clip_ptr1 = draw_jit_header_pre_clip_pos(gallivm, io1_ptr);
932      clip_ptr2 = draw_jit_header_pre_clip_pos(gallivm, io2_ptr);
933      clip_ptr3 = draw_jit_header_pre_clip_pos(gallivm, io3_ptr);
934   }
935
936   for (i = 0; i<4; i++) {
937      clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, indices, 2, ""); /* x0 */
938      clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, indices, 2, ""); /* x1 */
939      clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, indices, 2, ""); /* x2 */
940      clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, indices, 2, ""); /* x3 */
941
942      out0elem = LLVMBuildExtractElement(builder, out[i], ind0, ""); /* x0 */
943      out1elem = LLVMBuildExtractElement(builder, out[i], ind1, ""); /* x1 */
944      out2elem = LLVMBuildExtractElement(builder, out[i], ind2, ""); /* x2 */
945      out3elem = LLVMBuildExtractElement(builder, out[i], ind3, ""); /* x3 */
946
947      LLVMBuildStore(builder, out0elem, clip0_ptr);
948      LLVMBuildStore(builder, out1elem, clip1_ptr);
949      LLVMBuildStore(builder, out2elem, clip2_ptr);
950      LLVMBuildStore(builder, out3elem, clip3_ptr);
951
952      indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
953   }
954
955}
956
957
958/**
959 * Equivalent of _mm_set1_ps(a)
960 */
961static LLVMValueRef
962vec4f_from_scalar(struct gallivm_state *gallivm,
963                  LLVMValueRef a,
964                  const char *name)
965{
966   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
967   LLVMValueRef res = LLVMGetUndef(LLVMVectorType(float_type, 4));
968   int i;
969
970   for (i = 0; i < 4; ++i) {
971      LLVMValueRef index = lp_build_const_int32(gallivm, i);
972      res = LLVMBuildInsertElement(gallivm->builder, res, a,
973                                   index, i == 3 ? name : "");
974   }
975
976   return res;
977}
978
979
980/**
981 * Transforms the outputs for viewport mapping
982 */
983static void
984generate_viewport(struct draw_llvm *llvm,
985                  LLVMBuilderRef builder,
986                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
987                  LLVMValueRef context_ptr)
988{
989   int i;
990   struct gallivm_state *gallivm = llvm->gallivm;
991   struct lp_type f32_type = lp_type_float_vec(32);
992   LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
993   LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
994   LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr);
995
996   /* for 1/w convention*/
997   out3 = LLVMBuildFDiv(builder, const1, out3, "");
998   LLVMBuildStore(builder, out3, outputs[0][3]);
999
1000   /* Viewport Mapping */
1001   for (i=0; i<3; i++) {
1002      LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
1003      LLVMValueRef scale;
1004      LLVMValueRef trans;
1005      LLVMValueRef scale_i;
1006      LLVMValueRef trans_i;
1007      LLVMValueRef index;
1008
1009      index = lp_build_const_int32(gallivm, i);
1010      scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
1011
1012      index = lp_build_const_int32(gallivm, i+4);
1013      trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
1014
1015      scale = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, scale_i, ""), "scale");
1016      trans = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, trans_i, ""), "trans");
1017
1018      /* divide by w */
1019      out = LLVMBuildFMul(builder, out, out3, "");
1020      /* mult by scale */
1021      out = LLVMBuildFMul(builder, out, scale, "");
1022      /* add translation */
1023      out = LLVMBuildFAdd(builder, out, trans, "");
1024
1025      /* store transformed outputs */
1026      LLVMBuildStore(builder, out, outputs[0][i]);
1027   }
1028
1029}
1030
1031
1032/**
1033 * Returns clipmask as 4xi32 bitmask for the 4 vertices
1034 */
1035static LLVMValueRef
1036generate_clipmask(struct draw_llvm *llvm,
1037                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1038                  boolean clip_xy,
1039                  boolean clip_z,
1040                  boolean clip_user,
1041                  boolean clip_halfz,
1042                  unsigned ucp_enable,
1043                  LLVMValueRef context_ptr,
1044                  boolean *have_clipdist)
1045{
1046   struct gallivm_state *gallivm = llvm->gallivm;
1047   LLVMBuilderRef builder = gallivm->builder;
1048   LLVMValueRef mask; /* stores the <4xi32> clipmasks */
1049   LLVMValueRef test, temp;
1050   LLVMValueRef zero, shift;
1051   LLVMValueRef pos_x, pos_y, pos_z, pos_w;
1052   LLVMValueRef cv_x, cv_y, cv_z, cv_w;
1053   LLVMValueRef plane1, planes, plane_ptr, sum;
1054   struct lp_type f32_type = lp_type_float_vec(32);
1055   const unsigned pos = draw_current_shader_position_output(llvm->draw);
1056   const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
1057   int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
1058   bool have_cd = false;
1059   unsigned cd[2];
1060
1061   cd[0] = draw_current_shader_clipdistance_output(llvm->draw, 0);
1062   cd[1] = draw_current_shader_clipdistance_output(llvm->draw, 1);
1063
1064   if (cd[0] != pos || cd[1] != pos)
1065      have_cd = true;
1066
1067   mask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
1068   temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
1069   zero = lp_build_const_vec(gallivm, f32_type, 0);                    /* 0.0f 0.0f 0.0f 0.0f */
1070   shift = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1);    /* 1 1 1 1 */
1071
1072   /*
1073    * load clipvertex and position from correct locations.
1074    * if they are the same just load them once.
1075    */
1076   pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 x2 x3*/
1077   pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 y2 y3*/
1078   pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 z2 z3*/
1079   pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 w2 w3*/
1080
1081   if (clip_user && cv != pos) {
1082      cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 x2 x3*/
1083      cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 y2 y3*/
1084      cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 z2 z3*/
1085      cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 w2 w3*/
1086   } else {
1087      cv_x = pos_x;
1088      cv_y = pos_y;
1089      cv_z = pos_z;
1090      cv_w = pos_w;
1091   }
1092
1093   /* Cliptest, for hardwired planes */
1094   if (clip_xy) {
1095      /* plane 1 */
1096      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
1097      temp = shift;
1098      test = LLVMBuildAnd(builder, test, temp, "");
1099      mask = test;
1100
1101      /* plane 2 */
1102      test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
1103      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1104      temp = LLVMBuildShl(builder, temp, shift, "");
1105      test = LLVMBuildAnd(builder, test, temp, "");
1106      mask = LLVMBuildOr(builder, mask, test, "");
1107
1108      /* plane 3 */
1109      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1110      temp = LLVMBuildShl(builder, temp, shift, "");
1111      test = LLVMBuildAnd(builder, test, temp, "");
1112      mask = LLVMBuildOr(builder, mask, test, "");
1113
1114      /* plane 4 */
1115      test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1116      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1117      temp = LLVMBuildShl(builder, temp, shift, "");
1118      test = LLVMBuildAnd(builder, test, temp, "");
1119      mask = LLVMBuildOr(builder, mask, test, "");
1120   }
1121
1122   if (clip_z) {
1123      temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 16);
1124      if (clip_halfz) {
1125         /* plane 5 */
1126         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1127         test = LLVMBuildAnd(builder, test, temp, "");
1128         mask = LLVMBuildOr(builder, mask, test, "");
1129      }
1130      else {
1131         /* plane 5 */
1132         test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1133         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1134         test = LLVMBuildAnd(builder, test, temp, "");
1135         mask = LLVMBuildOr(builder, mask, test, "");
1136      }
1137      /* plane 6 */
1138      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1139      temp = LLVMBuildShl(builder, temp, shift, "");
1140      test = LLVMBuildAnd(builder, test, temp, "");
1141      mask = LLVMBuildOr(builder, mask, test, "");
1142   }
1143
1144   if (clip_user) {
1145      LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
1146      LLVMValueRef indices[3];
1147
1148      /* userclip planes */
1149      while (ucp_enable) {
1150         unsigned plane_idx = ffs(ucp_enable)-1;
1151         ucp_enable &= ~(1 << plane_idx);
1152         plane_idx += 6;
1153
1154         if (have_cd && num_written_clipdistance) {
1155            LLVMValueRef clipdist;
1156            int i;
1157            i = plane_idx - 6;
1158
1159            *have_clipdist = TRUE;
1160            if (i < 4) {
1161               clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], "");
1162            } else {
1163               clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
1164            }
1165            test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
1166            temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx);
1167            test = LLVMBuildAnd(builder, test, temp, "");
1168            mask = LLVMBuildOr(builder, mask, test, "");
1169         } else {
1170            indices[0] = lp_build_const_int32(gallivm, 0);
1171            indices[1] = lp_build_const_int32(gallivm, plane_idx);
1172
1173            indices[2] = lp_build_const_int32(gallivm, 0);
1174            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1175            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
1176            planes = vec4f_from_scalar(gallivm, plane1, "plane4_x");
1177            sum = LLVMBuildFMul(builder, planes, cv_x, "");
1178
1179            indices[2] = lp_build_const_int32(gallivm, 1);
1180            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1181            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
1182            planes = vec4f_from_scalar(gallivm, plane1, "plane4_y");
1183            test = LLVMBuildFMul(builder, planes, cv_y, "");
1184            sum = LLVMBuildFAdd(builder, sum, test, "");
1185
1186            indices[2] = lp_build_const_int32(gallivm, 2);
1187            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1188            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
1189            planes = vec4f_from_scalar(gallivm, plane1, "plane4_z");
1190            test = LLVMBuildFMul(builder, planes, cv_z, "");
1191            sum = LLVMBuildFAdd(builder, sum, test, "");
1192
1193            indices[2] = lp_build_const_int32(gallivm, 3);
1194            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1195            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
1196            planes = vec4f_from_scalar(gallivm, plane1, "plane4_w");
1197            test = LLVMBuildFMul(builder, planes, cv_w, "");
1198            sum = LLVMBuildFAdd(builder, sum, test, "");
1199
1200            test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1201            temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx);
1202            test = LLVMBuildAnd(builder, test, temp, "");
1203            mask = LLVMBuildOr(builder, mask, test, "");
1204         }
1205      }
1206   }
1207   return mask;
1208}
1209
1210
1211/**
1212 * Returns boolean if any clipping has occurred
1213 * Used zero/non-zero i32 value to represent boolean
1214 */
1215static void
1216clipmask_bool(struct gallivm_state *gallivm,
1217              LLVMValueRef clipmask,
1218              LLVMValueRef ret_ptr)
1219{
1220   LLVMBuilderRef builder = gallivm->builder;
1221   LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
1222   LLVMValueRef temp;
1223   int i;
1224
1225   for (i=0; i<4; i++) {
1226      temp = LLVMBuildExtractElement(builder, clipmask,
1227                                     lp_build_const_int32(gallivm, i) , "");
1228      ret = LLVMBuildOr(builder, ret, temp, "");
1229   }
1230
1231   LLVMBuildStore(builder, ret, ret_ptr);
1232}
1233
1234
1235static void
1236draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
1237                   boolean elts)
1238{
1239   struct gallivm_state *gallivm = llvm->gallivm;
1240   LLVMContextRef context = gallivm->context;
1241   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1242   LLVMTypeRef arg_types[8];
1243   LLVMTypeRef func_type;
1244   LLVMValueRef context_ptr;
1245   LLVMBasicBlockRef block;
1246   LLVMBuilderRef builder;
1247   LLVMValueRef end, start;
1248   LLVMValueRef count, fetch_elts, fetch_count;
1249   LLVMValueRef stride, step, io_itr;
1250   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1251   LLVMValueRef instance_id;
1252   LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1253   LLVMValueRef one = lp_build_const_int32(gallivm, 1);
1254   struct draw_context *draw = llvm->draw;
1255   const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1256   unsigned i, j;
1257   struct lp_build_context bld;
1258   struct lp_build_loop_state lp_loop;
1259   const int max_vertices = 4;
1260   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1261   LLVMValueRef fetch_max;
1262   void *code;
1263   struct lp_build_sampler_soa *sampler = 0;
1264   LLVMValueRef ret, ret_ptr;
1265   const boolean bypass_viewport = variant->key.bypass_viewport;
1266   const boolean enable_cliptest = variant->key.clip_xy ||
1267                                   variant->key.clip_z  ||
1268                                   variant->key.clip_user;
1269   LLVMValueRef variant_func;
1270   const unsigned pos = draw_current_shader_position_output(llvm->draw);
1271   const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
1272   boolean have_clipdist = FALSE;
1273
1274   arg_types[0] = get_context_ptr_type(llvm);       /* context */
1275   arg_types[1] = get_vertex_header_ptr_type(llvm); /* vertex_header */
1276   arg_types[2] = get_buffer_ptr_type(llvm);        /* vbuffers */
1277   if (elts)
1278      arg_types[3] = LLVMPointerType(int32_type, 0);/* fetch_elts * */
1279   else
1280      arg_types[3] = int32_type;                    /* start */
1281   arg_types[4] = int32_type;                       /* fetch_count / count */
1282   arg_types[5] = int32_type;                       /* stride */
1283   arg_types[6] = get_vb_ptr_type(llvm);            /* pipe_vertex_buffer's */
1284   arg_types[7] = int32_type;                       /* instance_id */
1285
1286   func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
1287
1288   variant_func = LLVMAddFunction(gallivm->module,
1289                                  elts ? "draw_llvm_shader_elts" : "draw_llvm_shader",
1290                                  func_type);
1291
1292   if (elts)
1293      variant->function_elts = variant_func;
1294   else
1295      variant->function = variant_func;
1296
1297   LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
1298   for (i = 0; i < Elements(arg_types); ++i)
1299      if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1300         LLVMAddAttribute(LLVMGetParam(variant_func, i),
1301                          LLVMNoAliasAttribute);
1302
1303   context_ptr  = LLVMGetParam(variant_func, 0);
1304   io_ptr       = LLVMGetParam(variant_func, 1);
1305   vbuffers_ptr = LLVMGetParam(variant_func, 2);
1306   stride       = LLVMGetParam(variant_func, 5);
1307   vb_ptr       = LLVMGetParam(variant_func, 6);
1308   instance_id  = LLVMGetParam(variant_func, 7);
1309
1310   lp_build_name(context_ptr, "context");
1311   lp_build_name(io_ptr, "io");
1312   lp_build_name(vbuffers_ptr, "vbuffers");
1313   lp_build_name(stride, "stride");
1314   lp_build_name(vb_ptr, "vb");
1315   lp_build_name(instance_id, "instance_id");
1316
1317   if (elts) {
1318      fetch_elts   = LLVMGetParam(variant_func, 3);
1319      fetch_count  = LLVMGetParam(variant_func, 4);
1320      lp_build_name(fetch_elts, "fetch_elts");
1321      lp_build_name(fetch_count, "fetch_count");
1322      start = count = NULL;
1323   }
1324   else {
1325      start        = LLVMGetParam(variant_func, 3);
1326      count        = LLVMGetParam(variant_func, 4);
1327      lp_build_name(start, "start");
1328      lp_build_name(count, "count");
1329      fetch_elts = fetch_count = NULL;
1330   }
1331
1332   /*
1333    * Function body
1334    */
1335
1336   block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
1337   builder = gallivm->builder;
1338   LLVMPositionBuilderAtEnd(builder, block);
1339
1340   lp_build_context_init(&bld, gallivm, lp_type_int(32));
1341
1342   /* function will return non-zero i32 value if any clipped vertices */
1343   ret_ptr = lp_build_alloca(gallivm, int32_type, "");
1344   LLVMBuildStore(builder, zero, ret_ptr);
1345
1346   /* code generated texture sampling */
1347   sampler = draw_llvm_sampler_soa_create(
1348      draw_llvm_variant_key_samplers(&variant->key),
1349      context_ptr);
1350
1351   if (elts) {
1352      start = zero;
1353      end = fetch_count;
1354   }
1355   else {
1356      end = lp_build_add(&bld, start, count);
1357   }
1358
1359   step = lp_build_const_int32(gallivm, max_vertices);
1360
1361   fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
1362
1363   lp_build_loop_begin(&lp_loop, gallivm, start);
1364   {
1365      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1366      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS] = { { 0 } };
1367      LLVMValueRef io;
1368      LLVMValueRef clipmask;   /* holds the clipmask value */
1369      const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
1370
1371      if (elts)
1372         io_itr = lp_loop.counter;
1373      else
1374         io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
1375
1376      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1377#if DEBUG_STORE
1378      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1379                      io_itr, io, lp_loop.counter);
1380#endif
1381      for (i = 0; i < TGSI_NUM_CHANNELS; ++i) {
1382         LLVMValueRef true_index =
1383            LLVMBuildAdd(builder,
1384                         lp_loop.counter,
1385                         lp_build_const_int32(gallivm, i), "");
1386
1387         /* make sure we're not out of bounds which can happen
1388          * if fetch_count % 4 != 0, because on the last iteration
1389          * a few of the 4 vertex fetches will be out of bounds */
1390         true_index = lp_build_min(&bld, true_index, fetch_max);
1391
1392         if (elts) {
1393            LLVMValueRef fetch_ptr;
1394            fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
1395                                     &true_index, 1, "");
1396            true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
1397         }
1398
1399         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1400            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1401            LLVMValueRef vb_index =
1402               lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1403            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
1404            generate_fetch(gallivm, vbuffers_ptr,
1405                           &aos_attribs[j][i], velem, vb, true_index,
1406                           instance_id);
1407         }
1408      }
1409      convert_to_soa(gallivm, aos_attribs, inputs,
1410                     draw->pt.nr_vertex_elements);
1411
1412      ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
1413      generate_vs(llvm,
1414                  builder,
1415                  outputs,
1416                  ptr_aos,
1417                  instance_id,
1418                  context_ptr,
1419                  sampler,
1420                  variant->key.clamp_vertex_color);
1421
1422      /* store original positions in clip before further manipulation */
1423      store_clip(gallivm, io, outputs, 0, cv);
1424      store_clip(gallivm, io, outputs, 1, pos);
1425
1426      /* do cliptest */
1427      if (enable_cliptest) {
1428         /* allocate clipmask, assign it integer type */
1429         clipmask = generate_clipmask(llvm, outputs,
1430                                      variant->key.clip_xy,
1431                                      variant->key.clip_z,
1432                                      variant->key.clip_user,
1433                                      variant->key.clip_halfz,
1434                                      variant->key.ucp_enable,
1435                                      context_ptr, &have_clipdist);
1436         /* return clipping boolean value for function */
1437         clipmask_bool(gallivm, clipmask, ret_ptr);
1438      }
1439      else {
1440         clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
1441      }
1442
1443      /* do viewport mapping */
1444      if (!bypass_viewport) {
1445         generate_viewport(llvm, builder, outputs, context_ptr);
1446      }
1447
1448      /* store clipmask in vertex header,
1449       * original positions in clip
1450       * and transformed positions in data
1451       */
1452      convert_to_aos(gallivm, io, outputs, clipmask,
1453                     vs_info->num_outputs, max_vertices, have_clipdist);
1454   }
1455
1456   lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
1457
1458   sampler->destroy(sampler);
1459
1460   ret = LLVMBuildLoad(builder, ret_ptr, "");
1461   LLVMBuildRet(builder, ret);
1462
1463   /*
1464    * Translate the LLVM IR into machine code.
1465    */
1466#ifdef DEBUG
1467   if (LLVMVerifyFunction(variant_func, LLVMPrintMessageAction)) {
1468      lp_debug_dump_value(variant_func);
1469      assert(0);
1470   }
1471#endif
1472
1473   LLVMRunFunctionPassManager(gallivm->passmgr, variant_func);
1474
1475   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1476      lp_debug_dump_value(variant_func);
1477      debug_printf("\n");
1478   }
1479
1480   code = LLVMGetPointerToGlobal(gallivm->engine, variant_func);
1481   if (elts)
1482      variant->jit_func_elts = (draw_jit_vert_func_elts) pointer_to_func(code);
1483   else
1484      variant->jit_func = (draw_jit_vert_func) pointer_to_func(code);
1485
1486   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1487      lp_disassemble(code);
1488   }
1489   lp_func_delete_body(variant_func);
1490}
1491
1492
1493struct draw_llvm_variant_key *
1494draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
1495{
1496   unsigned i;
1497   struct draw_llvm_variant_key *key;
1498   struct lp_sampler_static_state *sampler;
1499
1500   key = (struct draw_llvm_variant_key *)store;
1501
1502   key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/
1503
1504   /* Presumably all variants of the shader should have the same
1505    * number of vertex elements - ie the number of shader inputs.
1506    */
1507   key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
1508
1509   /* will have to rig this up properly later */
1510   key->clip_xy = llvm->draw->clip_xy;
1511   key->clip_z = llvm->draw->clip_z;
1512   key->clip_user = llvm->draw->clip_user;
1513   key->bypass_viewport = llvm->draw->identity_viewport;
1514   key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
1515   key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
1516   key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
1517   key->pad = 0;
1518
1519   /* All variants of this shader will have the same value for
1520    * nr_samplers.  Not yet trying to compact away holes in the
1521    * sampler array.
1522    */
1523   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
1524
1525   sampler = draw_llvm_variant_key_samplers(key);
1526
1527   memcpy(key->vertex_element,
1528          llvm->draw->pt.vertex_element,
1529          sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
1530
1531   memset(sampler, 0, key->nr_samplers * sizeof *sampler);
1532
1533   for (i = 0 ; i < key->nr_samplers; i++) {
1534      lp_sampler_static_state(&sampler[i],
1535			      llvm->draw->sampler_views[i],
1536			      llvm->draw->samplers[i]);
1537   }
1538
1539   return key;
1540}
1541
1542
1543void
1544draw_llvm_set_mapped_texture(struct draw_context *draw,
1545                             unsigned sampler_idx,
1546                             uint32_t width, uint32_t height, uint32_t depth,
1547                             uint32_t first_level, uint32_t last_level,
1548                             uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
1549                             uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
1550                             const void *data[PIPE_MAX_TEXTURE_LEVELS])
1551{
1552   unsigned j;
1553   struct draw_jit_texture *jit_tex;
1554
1555   assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
1556
1557   jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
1558
1559   jit_tex->width = width;
1560   jit_tex->height = height;
1561   jit_tex->depth = depth;
1562   jit_tex->first_level = first_level;
1563   jit_tex->last_level = last_level;
1564
1565   for (j = first_level; j <= last_level; j++) {
1566      jit_tex->data[j] = data[j];
1567      jit_tex->row_stride[j] = row_stride[j];
1568      jit_tex->img_stride[j] = img_stride[j];
1569   }
1570}
1571
1572
1573void
1574draw_llvm_set_sampler_state(struct draw_context *draw)
1575{
1576   unsigned i;
1577
1578   for (i = 0; i < draw->num_samplers; i++) {
1579      struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
1580
1581      if (draw->samplers[i]) {
1582         jit_tex->min_lod = draw->samplers[i]->min_lod;
1583         jit_tex->max_lod = draw->samplers[i]->max_lod;
1584         jit_tex->lod_bias = draw->samplers[i]->lod_bias;
1585         COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color.f);
1586      }
1587   }
1588}
1589
1590
1591void
1592draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
1593{
1594   struct draw_llvm *llvm = variant->llvm;
1595
1596   if (variant->function_elts) {
1597      LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
1598                                     variant->function_elts);
1599      LLVMDeleteFunction(variant->function_elts);
1600   }
1601
1602   if (variant->function) {
1603      LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
1604                                     variant->function);
1605      LLVMDeleteFunction(variant->function);
1606   }
1607
1608   remove_from_list(&variant->list_item_local);
1609   variant->shader->variants_cached--;
1610   remove_from_list(&variant->list_item_global);
1611   llvm->nr_variants--;
1612   FREE(variant);
1613}
1614