draw_llvm.c revision efc82aef35a2aac5d2ed9774f6d28f2626796416
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "draw_llvm.h"
29
30#include "draw_context.h"
31#include "draw_vs.h"
32
33#include "gallivm/lp_bld_arit.h"
34#include "gallivm/lp_bld_logic.h"
35#include "gallivm/lp_bld_const.h"
36#include "gallivm/lp_bld_swizzle.h"
37#include "gallivm/lp_bld_struct.h"
38#include "gallivm/lp_bld_type.h"
39#include "gallivm/lp_bld_flow.h"
40#include "gallivm/lp_bld_debug.h"
41#include "gallivm/lp_bld_tgsi.h"
42#include "gallivm/lp_bld_printf.h"
43#include "gallivm/lp_bld_intr.h"
44#include "gallivm/lp_bld_init.h"
45#include "gallivm/lp_bld_type.h"
46
47#include "tgsi/tgsi_exec.h"
48#include "tgsi/tgsi_dump.h"
49
50#include "util/u_math.h"
51#include "util/u_pointer.h"
52#include "util/u_string.h"
53#include "util/u_simple_list.h"
54
55
56#define DEBUG_STORE 0
57
58
59/**
60 * This function is called by the gallivm "garbage collector" when
61 * the LLVM global data structures are freed.  We must free all LLVM-related
62 * data.  Specifically, all JIT'd shader variants.
63 */
64static void
65draw_llvm_garbage_collect_callback(void *cb_data)
66{
67   struct draw_llvm *llvm = (struct draw_llvm *) cb_data;
68   struct draw_llvm_variant_list_item *li;
69
70   /* free all shader variants */
71   li = first_elem(&llvm->vs_variants_list);
72   while (!at_end(&llvm->vs_variants_list, li)) {
73      struct draw_llvm_variant_list_item *next = next_elem(li);
74      draw_llvm_destroy_variant(li->base);
75      li = next;
76   }
77
78   /* Null-out these pointers so they get remade next time they're needed.
79    * See the accessor functions below.
80    */
81   llvm->context_ptr_type = NULL;
82   llvm->buffer_ptr_type = NULL;
83   llvm->vb_ptr_type = NULL;
84   llvm->vertex_header_ptr_type = NULL;
85}
86
87
88static void
89draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
90
91static void
92draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
93
94
95/**
96 * Create LLVM type for struct draw_jit_texture
97 */
98static LLVMTypeRef
99create_jit_texture_type(struct gallivm_state *gallivm)
100{
101   LLVMTargetDataRef target = gallivm->target;
102   LLVMTypeRef texture_type;
103   LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
104   LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
105
106   elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
107   elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
108   elem_types[DRAW_JIT_TEXTURE_DEPTH] =
109   elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
110   elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
111   elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
112      LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
113   elem_types[DRAW_JIT_TEXTURE_DATA] =
114      LLVMArrayType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
115                    PIPE_MAX_TEXTURE_LEVELS);
116   elem_types[DRAW_JIT_TEXTURE_MIN_LOD] =
117   elem_types[DRAW_JIT_TEXTURE_MAX_LOD] =
118   elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context);
119   elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
120      LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
121
122   texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
123                                          Elements(elem_types), 0);
124
125   /* Make sure the target's struct layout cache doesn't return
126    * stale/invalid data.
127    */
128   LLVMInvalidateStructLayout(gallivm->target, texture_type);
129
130   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
131                          target, texture_type,
132                          DRAW_JIT_TEXTURE_WIDTH);
133   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
134                          target, texture_type,
135                          DRAW_JIT_TEXTURE_HEIGHT);
136   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
137                          target, texture_type,
138                          DRAW_JIT_TEXTURE_DEPTH);
139   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
140                          target, texture_type,
141                          DRAW_JIT_TEXTURE_LAST_LEVEL);
142   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
143                          target, texture_type,
144                          DRAW_JIT_TEXTURE_ROW_STRIDE);
145   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
146                          target, texture_type,
147                          DRAW_JIT_TEXTURE_IMG_STRIDE);
148   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
149                          target, texture_type,
150                          DRAW_JIT_TEXTURE_DATA);
151   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
152                          target, texture_type,
153                          DRAW_JIT_TEXTURE_MIN_LOD);
154   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
155                          target, texture_type,
156                          DRAW_JIT_TEXTURE_MAX_LOD);
157   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
158                          target, texture_type,
159                          DRAW_JIT_TEXTURE_LOD_BIAS);
160   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
161                          target, texture_type,
162                          DRAW_JIT_TEXTURE_BORDER_COLOR);
163
164   LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
165
166   return texture_type;
167}
168
169
170/**
171 * Create LLVM type for struct draw_jit_texture
172 */
173static LLVMTypeRef
174create_jit_context_type(struct gallivm_state *gallivm,
175                        LLVMTypeRef texture_type)
176{
177   LLVMTargetDataRef target = gallivm->target;
178   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
179   LLVMTypeRef elem_types[5];
180   LLVMTypeRef context_type;
181
182   elem_types[0] = LLVMPointerType(float_type, 0); /* vs_constants */
183   elem_types[1] = LLVMPointerType(float_type, 0); /* gs_constants */
184   elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), 12), 0); /* planes */
185   elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */
186   elem_types[4] = LLVMArrayType(texture_type,
187                                 PIPE_MAX_VERTEX_SAMPLERS); /* textures */
188
189   context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
190                                          Elements(elem_types), 0);
191
192   LLVMInvalidateStructLayout(gallivm->target, context_type);
193
194   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
195                          target, context_type, 0);
196   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
197                          target, context_type, 1);
198   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
199                          target, context_type, 2);
200   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
201                          target, context_type,
202                          DRAW_JIT_CTX_TEXTURES);
203   LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
204                        target, context_type);
205
206   return context_type;
207}
208
209
210/**
211 * Create LLVM type for struct pipe_vertex_buffer
212 */
213static LLVMTypeRef
214create_jit_vertex_buffer_type(struct gallivm_state *gallivm)
215{
216   LLVMTargetDataRef target = gallivm->target;
217   LLVMTypeRef elem_types[4];
218   LLVMTypeRef vb_type;
219
220   elem_types[0] =
221   elem_types[1] =
222   elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
223   elem_types[3] = LLVMPointerType(LLVMOpaqueTypeInContext(gallivm->context), 0); /* vs_constants */
224
225   vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
226                                     Elements(elem_types), 0);
227
228   LLVMInvalidateStructLayout(gallivm->target, vb_type);
229
230   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
231                          target, vb_type, 0);
232   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
233                          target, vb_type, 2);
234
235   LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
236
237   return vb_type;
238}
239
240
241/**
242 * Create LLVM type for struct vertex_header;
243 */
244static LLVMTypeRef
245create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
246{
247   LLVMTargetDataRef target = gallivm->target;
248   LLVMTypeRef elem_types[3];
249   LLVMTypeRef vertex_header;
250   char struct_name[24];
251
252   util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
253
254   elem_types[0]  = LLVMIntTypeInContext(gallivm->context, 32);
255   elem_types[1]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
256   elem_types[2]  = LLVMArrayType(elem_types[1], data_elems);
257
258   vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
259                                           Elements(elem_types), 0);
260
261   LLVMInvalidateStructLayout(gallivm->target, vertex_header);
262
263   /* these are bit-fields and we can't take address of them
264      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
265      target, vertex_header,
266      DRAW_JIT_VERTEX_CLIPMASK);
267      LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
268      target, vertex_header,
269      DRAW_JIT_VERTEX_EDGEFLAG);
270      LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
271      target, vertex_header,
272      DRAW_JIT_VERTEX_PAD);
273      LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
274      target, vertex_header,
275      DRAW_JIT_VERTEX_VERTEX_ID);
276   */
277   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
278                          target, vertex_header,
279                          DRAW_JIT_VERTEX_CLIP);
280   LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
281                          target, vertex_header,
282                          DRAW_JIT_VERTEX_DATA);
283
284   LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
285
286   return vertex_header;
287}
288
289
290/**
291 * Create LLVM types for various structures.
292 */
293static void
294create_jit_types(struct draw_llvm *llvm)
295{
296   struct gallivm_state *gallivm = llvm->gallivm;
297   LLVMTypeRef texture_type, context_type, buffer_type, vb_type;
298
299   texture_type = create_jit_texture_type(gallivm);
300   LLVMAddTypeName(gallivm->module, "texture", texture_type);
301
302   context_type = create_jit_context_type(gallivm, texture_type);
303   LLVMAddTypeName(gallivm->module, "draw_jit_context", context_type);
304   llvm->context_ptr_type = LLVMPointerType(context_type, 0);
305
306   buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
307   LLVMAddTypeName(gallivm->module, "buffer", buffer_type);
308   llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
309
310   vb_type = create_jit_vertex_buffer_type(gallivm);
311   LLVMAddTypeName(gallivm->module, "pipe_vertex_buffer", vb_type);
312   llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
313}
314
315
316static LLVMTypeRef
317get_context_ptr_type(struct draw_llvm *llvm)
318{
319   if (!llvm->context_ptr_type)
320      create_jit_types(llvm);
321   return llvm->context_ptr_type;
322}
323
324
325static LLVMTypeRef
326get_buffer_ptr_type(struct draw_llvm *llvm)
327{
328   if (!llvm->buffer_ptr_type)
329      create_jit_types(llvm);
330   return llvm->buffer_ptr_type;
331}
332
333
334static LLVMTypeRef
335get_vb_ptr_type(struct draw_llvm *llvm)
336{
337   if (!llvm->vb_ptr_type)
338      create_jit_types(llvm);
339   return llvm->vb_ptr_type;
340}
341
342static LLVMTypeRef
343get_vertex_header_ptr_type(struct draw_llvm *llvm)
344{
345   if (!llvm->vertex_header_ptr_type)
346      create_jit_types(llvm);
347   return llvm->vertex_header_ptr_type;
348}
349
350
351/**
352 * Create per-context LLVM info.
353 */
354struct draw_llvm *
355draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm)
356{
357   struct draw_llvm *llvm;
358
359   llvm = CALLOC_STRUCT( draw_llvm );
360   if (!llvm)
361      return NULL;
362
363   lp_build_init();
364
365   llvm->draw = draw;
366   llvm->gallivm = gallivm;
367
368   if (gallivm_debug & GALLIVM_DEBUG_IR) {
369      LLVMDumpModule(llvm->gallivm->module);
370   }
371
372   llvm->nr_variants = 0;
373   make_empty_list(&llvm->vs_variants_list);
374
375   gallivm_register_garbage_collector_callback(
376                              draw_llvm_garbage_collect_callback, llvm);
377
378   return llvm;
379}
380
381
382/**
383 * Free per-context LLVM info.
384 */
385void
386draw_llvm_destroy(struct draw_llvm *llvm)
387{
388   gallivm_remove_garbage_collector_callback(
389                              draw_llvm_garbage_collect_callback, llvm);
390
391   /* XXX free other draw_llvm data? */
392   FREE(llvm);
393}
394
395
396/**
397 * Create LLVM-generated code for a vertex shader.
398 */
399struct draw_llvm_variant *
400draw_llvm_create_variant(struct draw_llvm *llvm,
401			 unsigned num_inputs,
402			 const struct draw_llvm_variant_key *key)
403{
404   struct draw_llvm_variant *variant;
405   struct llvm_vertex_shader *shader =
406      llvm_vertex_shader(llvm->draw->vs.vertex_shader);
407   LLVMTypeRef vertex_header;
408
409   variant = MALLOC(sizeof *variant +
410		    shader->variant_key_size -
411		    sizeof variant->key);
412   if (variant == NULL)
413      return NULL;
414
415   variant->llvm = llvm;
416
417   memcpy(&variant->key, key, shader->variant_key_size);
418
419   vertex_header = create_jit_vertex_header(llvm->gallivm, num_inputs);
420
421   llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
422
423   draw_llvm_generate(llvm, variant);
424   draw_llvm_generate_elts(llvm, variant);
425
426   variant->shader = shader;
427   variant->list_item_global.base = variant;
428   variant->list_item_local.base = variant;
429   /*variant->no = */shader->variants_created++;
430   variant->list_item_global.base = variant;
431
432   return variant;
433}
434
435static void
436generate_vs(struct draw_llvm *llvm,
437            LLVMBuilderRef builder,
438            LLVMValueRef (*outputs)[NUM_CHANNELS],
439            const LLVMValueRef (*inputs)[NUM_CHANNELS],
440            LLVMValueRef context_ptr,
441            struct lp_build_sampler_soa *draw_sampler)
442{
443   const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
444   struct lp_type vs_type;
445   LLVMValueRef consts_ptr = draw_jit_context_vs_constants(llvm->gallivm, context_ptr);
446   struct lp_build_sampler_soa *sampler = 0;
447
448   memset(&vs_type, 0, sizeof vs_type);
449   vs_type.floating = TRUE; /* floating point values */
450   vs_type.sign = TRUE;     /* values are signed */
451   vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
452   vs_type.width = 32;      /* 32-bit float */
453   vs_type.length = 4;      /* 4 elements per vector */
454#if 0
455   num_vs = 4;              /* number of vertices per block */
456#endif
457
458   if (gallivm_debug & GALLIVM_DEBUG_IR) {
459      tgsi_dump(tokens, 0);
460   }
461
462   if (llvm->draw->num_sampler_views &&
463       llvm->draw->num_samplers)
464      sampler = draw_sampler;
465
466   lp_build_tgsi_soa(llvm->gallivm,
467                     tokens,
468                     vs_type,
469                     NULL /*struct lp_build_mask_context *mask*/,
470                     consts_ptr,
471                     NULL /*pos*/,
472                     inputs,
473                     outputs,
474                     sampler,
475                     &llvm->draw->vs.vertex_shader->info);
476}
477
478#if DEBUG_STORE
479static void print_vectorf(LLVMBuilderRef builder,
480                         LLVMValueRef vec)
481{
482   LLVMValueRef val[4];
483   val[0] = LLVMBuildExtractElement(builder, vec,
484                                    lp_build_const_int32(gallivm, 0), "");
485   val[1] = LLVMBuildExtractElement(builder, vec,
486                                    lp_build_const_int32(gallivm, 1), "");
487   val[2] = LLVMBuildExtractElement(builder, vec,
488                                    lp_build_const_int32(gallivm, 2), "");
489   val[3] = LLVMBuildExtractElement(builder, vec,
490                                    lp_build_const_int32(gallivm, 3), "");
491   lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
492                   val[0], val[1], val[2], val[3]);
493}
494#endif
495
496static void
497generate_fetch(struct gallivm_state *gallivm,
498               LLVMValueRef vbuffers_ptr,
499               LLVMValueRef *res,
500               struct pipe_vertex_element *velem,
501               LLVMValueRef vbuf,
502               LLVMValueRef index,
503               LLVMValueRef instance_id)
504{
505   LLVMBuilderRef builder = gallivm->builder;
506   LLVMValueRef indices =
507      LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
508                   velem->vertex_buffer_index, 0);
509   LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
510                                           &indices, 1, "");
511   LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf);
512   LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(gallivm, vbuf);
513   LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf);
514   LLVMValueRef cond;
515   LLVMValueRef stride;
516
517   if (velem->instance_divisor) {
518      /* array index = instance_id / instance_divisor */
519      index = LLVMBuildUDiv(builder, instance_id,
520                            lp_build_const_int32(gallivm, velem->instance_divisor),
521                            "instance_divisor");
522   }
523
524   /* limit index to min(index, vb_max_index) */
525   cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
526   index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
527
528   stride = LLVMBuildMul(builder, vb_stride, index, "");
529
530   vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
531
532   stride = LLVMBuildAdd(builder, stride,
533                         vb_buffer_offset,
534                         "");
535   stride = LLVMBuildAdd(builder, stride,
536                         lp_build_const_int32(gallivm, velem->src_offset),
537                         "");
538
539   /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
540   vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
541
542   *res = draw_llvm_translate_from(gallivm, vbuffer_ptr, velem->src_format);
543}
544
545static LLVMValueRef
546aos_to_soa(struct gallivm_state *gallivm,
547           LLVMValueRef val0,
548           LLVMValueRef val1,
549           LLVMValueRef val2,
550           LLVMValueRef val3,
551           LLVMValueRef channel)
552{
553   LLVMBuilderRef builder = gallivm->builder;
554   LLVMValueRef ex, res;
555
556   ex = LLVMBuildExtractElement(builder, val0,
557                                channel, "");
558   res = LLVMBuildInsertElement(builder,
559                                LLVMConstNull(LLVMTypeOf(val0)),
560                                ex,
561                                lp_build_const_int32(gallivm, 0),
562                                "");
563
564   ex = LLVMBuildExtractElement(builder, val1,
565                                channel, "");
566   res = LLVMBuildInsertElement(builder,
567                                res, ex,
568                                lp_build_const_int32(gallivm, 1),
569                                "");
570
571   ex = LLVMBuildExtractElement(builder, val2,
572                                channel, "");
573   res = LLVMBuildInsertElement(builder,
574                                res, ex,
575                                lp_build_const_int32(gallivm, 2),
576                                "");
577
578   ex = LLVMBuildExtractElement(builder, val3,
579                                channel, "");
580   res = LLVMBuildInsertElement(builder,
581                                res, ex,
582                                lp_build_const_int32(gallivm, 3),
583                                "");
584
585   return res;
586}
587
588static void
589soa_to_aos(struct gallivm_state *gallivm,
590           LLVMValueRef soa[NUM_CHANNELS],
591           LLVMValueRef aos[NUM_CHANNELS])
592{
593   LLVMBuilderRef builder = gallivm->builder;
594   LLVMValueRef comp;
595   int i = 0;
596
597   debug_assert(NUM_CHANNELS == 4);
598
599   aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
600   aos[1] = aos[2] = aos[3] = aos[0];
601
602   for (i = 0; i < NUM_CHANNELS; ++i) {
603      LLVMValueRef channel = lp_build_const_int32(gallivm, i);
604
605      comp = LLVMBuildExtractElement(builder, soa[i],
606                                     lp_build_const_int32(gallivm, 0), "");
607      aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
608
609      comp = LLVMBuildExtractElement(builder, soa[i],
610                                     lp_build_const_int32(gallivm, 1), "");
611      aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
612
613      comp = LLVMBuildExtractElement(builder, soa[i],
614                                     lp_build_const_int32(gallivm, 2), "");
615      aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
616
617      comp = LLVMBuildExtractElement(builder, soa[i],
618                                     lp_build_const_int32(gallivm, 3), "");
619      aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
620
621   }
622}
623
624static void
625convert_to_soa(struct gallivm_state *gallivm,
626               LLVMValueRef (*aos)[NUM_CHANNELS],
627               LLVMValueRef (*soa)[NUM_CHANNELS],
628               int num_attribs)
629{
630   int i;
631
632   debug_assert(NUM_CHANNELS == 4);
633
634   for (i = 0; i < num_attribs; ++i) {
635      LLVMValueRef val0 = aos[i][0];
636      LLVMValueRef val1 = aos[i][1];
637      LLVMValueRef val2 = aos[i][2];
638      LLVMValueRef val3 = aos[i][3];
639
640      soa[i][0] = aos_to_soa(gallivm, val0, val1, val2, val3,
641                             lp_build_const_int32(gallivm, 0));
642      soa[i][1] = aos_to_soa(gallivm, val0, val1, val2, val3,
643                             lp_build_const_int32(gallivm, 1));
644      soa[i][2] = aos_to_soa(gallivm, val0, val1, val2, val3,
645                             lp_build_const_int32(gallivm, 2));
646      soa[i][3] = aos_to_soa(gallivm, val0, val1, val2, val3,
647                             lp_build_const_int32(gallivm, 3));
648   }
649}
650
651static void
652store_aos(struct gallivm_state *gallivm,
653          LLVMValueRef io_ptr,
654          LLVMValueRef index,
655          LLVMValueRef value,
656          LLVMValueRef clipmask)
657{
658   LLVMBuilderRef builder = gallivm->builder;
659   LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptr);
660   LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
661   LLVMValueRef indices[3];
662   LLVMValueRef val, shift;
663
664   indices[0] = lp_build_const_int32(gallivm, 0);
665   indices[1] = index;
666   indices[2] = lp_build_const_int32(gallivm, 0);
667
668   /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */
669   val = lp_build_const_int32(gallivm, 0xffff1);
670   shift = lp_build_const_int32(gallivm, 12);
671   val = LLVMBuildShl(builder, val, shift, "");
672   /* add clipmask:12 */
673   val = LLVMBuildOr(builder, val, clipmask, "");
674
675   /* store vertex header */
676   LLVMBuildStore(builder, val, id_ptr);
677
678
679#if DEBUG_STORE
680   lp_build_printf(builder, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
681#endif
682#if 0
683   /*lp_build_printf(builder, " ---- %p storing at %d (%p)  ", io_ptr, index, data_ptr);
684     print_vectorf(builder, value);*/
685   data_ptr = LLVMBuildBitCast(builder, data_ptr,
686                               LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0), 0),
687                               "datavec");
688   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
689
690   LLVMBuildStore(builder, value, data_ptr);
691#else
692   {
693      LLVMValueRef x, y, z, w;
694      LLVMValueRef idx0, idx1, idx2, idx3;
695      LLVMValueRef gep0, gep1, gep2, gep3;
696      data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
697
698      idx0 = lp_build_const_int32(gallivm, 0);
699      idx1 = lp_build_const_int32(gallivm, 1);
700      idx2 = lp_build_const_int32(gallivm, 2);
701      idx3 = lp_build_const_int32(gallivm, 3);
702
703      x = LLVMBuildExtractElement(builder, value,
704                                  idx0, "");
705      y = LLVMBuildExtractElement(builder, value,
706                                  idx1, "");
707      z = LLVMBuildExtractElement(builder, value,
708                                  idx2, "");
709      w = LLVMBuildExtractElement(builder, value,
710                                  idx3, "");
711
712      gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
713      gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
714      gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
715      gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
716
717      /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
718        x, gep0, y, gep1, z, gep2, w, gep3);*/
719      LLVMBuildStore(builder, x, gep0);
720      LLVMBuildStore(builder, y, gep1);
721      LLVMBuildStore(builder, z, gep2);
722      LLVMBuildStore(builder, w, gep3);
723   }
724#endif
725}
726
727static void
728store_aos_array(struct gallivm_state *gallivm,
729                LLVMValueRef io_ptr,
730                LLVMValueRef aos[NUM_CHANNELS],
731                int attrib,
732                int num_outputs,
733                LLVMValueRef clipmask)
734{
735   LLVMBuilderRef builder = gallivm->builder;
736   LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
737   LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0);
738   LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1);
739   LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2);
740   LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3);
741   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
742   LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
743
744   debug_assert(NUM_CHANNELS == 4);
745
746   io0_ptr = LLVMBuildGEP(builder, io_ptr,
747                          &ind0, 1, "");
748   io1_ptr = LLVMBuildGEP(builder, io_ptr,
749                          &ind1, 1, "");
750   io2_ptr = LLVMBuildGEP(builder, io_ptr,
751                          &ind2, 1, "");
752   io3_ptr = LLVMBuildGEP(builder, io_ptr,
753                          &ind3, 1, "");
754
755   clipmask0 = LLVMBuildExtractElement(builder, clipmask,
756                                       ind0, "");
757   clipmask1 = LLVMBuildExtractElement(builder, clipmask,
758                                       ind1, "");
759   clipmask2 = LLVMBuildExtractElement(builder, clipmask,
760                                       ind2, "");
761   clipmask3 = LLVMBuildExtractElement(builder, clipmask,
762                                       ind3, "");
763
764#if DEBUG_STORE
765   lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
766                   io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
767#endif
768   /* store for each of the 4 vertices */
769   store_aos(gallivm, io0_ptr, attr_index, aos[0], clipmask0);
770   store_aos(gallivm, io1_ptr, attr_index, aos[1], clipmask1);
771   store_aos(gallivm, io2_ptr, attr_index, aos[2], clipmask2);
772   store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3);
773}
774
775static void
776convert_to_aos(struct gallivm_state *gallivm,
777               LLVMValueRef io,
778               LLVMValueRef (*outputs)[NUM_CHANNELS],
779               LLVMValueRef clipmask,
780               int num_outputs,
781               int max_vertices)
782{
783   LLVMBuilderRef builder = gallivm->builder;
784   unsigned chan, attrib;
785
786#if DEBUG_STORE
787   lp_build_printf(builder, "   # storing begin\n");
788#endif
789   for (attrib = 0; attrib < num_outputs; ++attrib) {
790      LLVMValueRef soa[4];
791      LLVMValueRef aos[4];
792      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
793         if(outputs[attrib][chan]) {
794            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
795            lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
796            /*lp_build_printf(builder, "output %d : %d ",
797                            LLVMConstInt(LLVMInt32Type(), attrib, 0),
798                            LLVMConstInt(LLVMInt32Type(), chan, 0));
799              print_vectorf(builder, out);*/
800            soa[chan] = out;
801         } else
802            soa[chan] = 0;
803      }
804      soa_to_aos(gallivm, soa, aos);
805      store_aos_array(gallivm,
806                      io,
807                      aos,
808                      attrib,
809                      num_outputs,
810                      clipmask);
811   }
812#if DEBUG_STORE
813   lp_build_printf(builder, "   # storing end\n");
814#endif
815}
816
817/*
818 * Stores original vertex positions in clip coordinates
819 * There is probably a more efficient way to do this, 4 floats at once
820 * rather than extracting each element one by one.
821 */
822static void
823store_clip(struct gallivm_state *gallivm,
824           LLVMValueRef io_ptr,
825           LLVMValueRef (*outputs)[NUM_CHANNELS])
826{
827   LLVMBuilderRef builder = gallivm->builder;
828   LLVMValueRef out[4];
829   LLVMValueRef indices[2];
830   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
831   LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
832   LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
833   LLVMValueRef out0elem, out1elem, out2elem, out3elem;
834   int i;
835
836   LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0);
837   LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1);
838   LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2);
839   LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3);
840
841   indices[0] =
842   indices[1] = lp_build_const_int32(gallivm, 0);
843
844   out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
845   out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
846   out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
847   out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
848
849   io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
850   io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
851   io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
852   io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
853
854   clip_ptr0 = draw_jit_header_clip(gallivm, io0_ptr);
855   clip_ptr1 = draw_jit_header_clip(gallivm, io1_ptr);
856   clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr);
857   clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr);
858
859   for (i = 0; i<4; i++){
860      clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, indices, 2, ""); /* x0 */
861      clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, indices, 2, ""); /* x1 */
862      clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, indices, 2, ""); /* x2 */
863      clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, indices, 2, ""); /* x3 */
864
865      out0elem = LLVMBuildExtractElement(builder, out[i], ind0, ""); /* x0 */
866      out1elem = LLVMBuildExtractElement(builder, out[i], ind1, ""); /* x1 */
867      out2elem = LLVMBuildExtractElement(builder, out[i], ind2, ""); /* x2 */
868      out3elem = LLVMBuildExtractElement(builder, out[i], ind3, ""); /* x3 */
869
870      LLVMBuildStore(builder, out0elem, clip0_ptr);
871      LLVMBuildStore(builder, out1elem, clip1_ptr);
872      LLVMBuildStore(builder, out2elem, clip2_ptr);
873      LLVMBuildStore(builder, out3elem, clip3_ptr);
874
875      indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
876   }
877
878}
879
880/* Equivalent of _mm_set1_ps(a)
881 */
882static LLVMValueRef
883vec4f_from_scalar(struct gallivm_state *gallivm,
884                  LLVMValueRef a,
885                  const char *name)
886{
887   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
888   LLVMValueRef res = LLVMGetUndef(LLVMVectorType(float_type, 4));
889   int i;
890
891   for(i = 0; i < 4; ++i) {
892      LLVMValueRef index = lp_build_const_int32(gallivm, i);
893      res = LLVMBuildInsertElement(gallivm->builder, res, a,
894                                   index, i == 3 ? name : "");
895   }
896
897   return res;
898}
899
900/*
901 * Transforms the outputs for viewport mapping
902 */
903static void
904generate_viewport(struct draw_llvm *llvm,
905                  LLVMBuilderRef builder,
906                  LLVMValueRef (*outputs)[NUM_CHANNELS],
907                  LLVMValueRef context_ptr)
908{
909   int i;
910   struct gallivm_state *gallivm = llvm->gallivm;
911   struct lp_type f32_type = lp_type_float_vec(32);
912   LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
913   LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
914   LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr);
915
916   /* for 1/w convention*/
917   out3 = LLVMBuildFDiv(builder, const1, out3, "");
918   LLVMBuildStore(builder, out3, outputs[0][3]);
919
920   /* Viewport Mapping */
921   for (i=0; i<3; i++){
922      LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
923      LLVMValueRef scale;
924      LLVMValueRef trans;
925      LLVMValueRef scale_i;
926      LLVMValueRef trans_i;
927      LLVMValueRef index;
928
929      index = lp_build_const_int32(gallivm, i);
930      scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
931
932      index = lp_build_const_int32(gallivm, i+4);
933      trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
934
935      scale = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, scale_i, ""), "scale");
936      trans = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, trans_i, ""), "trans");
937
938      /* divide by w */
939      out = LLVMBuildFMul(builder, out, out3, "");
940      /* mult by scale */
941      out = LLVMBuildFMul(builder, out, scale, "");
942      /* add translation */
943      out = LLVMBuildFAdd(builder, out, trans, "");
944
945      /* store transformed outputs */
946      LLVMBuildStore(builder, out, outputs[0][i]);
947   }
948
949}
950
951
952/*
953 * Returns clipmask as 4xi32 bitmask for the 4 vertices
954 */
955static LLVMValueRef
956generate_clipmask(struct gallivm_state *gallivm,
957                  LLVMValueRef (*outputs)[NUM_CHANNELS],
958                  boolean clip_xy,
959                  boolean clip_z,
960                  boolean clip_user,
961                  boolean clip_halfz,
962                  unsigned nr,
963                  LLVMValueRef context_ptr)
964{
965   LLVMBuilderRef builder = gallivm->builder;
966   LLVMValueRef mask; /* stores the <4xi32> clipmasks */
967   LLVMValueRef test, temp;
968   LLVMValueRef zero, shift;
969   LLVMValueRef pos_x, pos_y, pos_z, pos_w;
970   LLVMValueRef plane1, planes, plane_ptr, sum;
971
972   unsigned i;
973
974   struct lp_type f32_type = lp_type_float_vec(32);
975
976   mask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
977   temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
978   zero = lp_build_const_vec(gallivm, f32_type, 0);                    /* 0.0f 0.0f 0.0f 0.0f */
979   shift = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1);    /* 1 1 1 1 */
980
981   /* Assuming position stored at output[0] */
982   pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
983   pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
984   pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
985   pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
986
987   /* Cliptest, for hardwired planes */
988   if (clip_xy){
989      /* plane 1 */
990      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
991      temp = shift;
992      test = LLVMBuildAnd(builder, test, temp, "");
993      mask = test;
994
995      /* plane 2 */
996      test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
997      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
998      temp = LLVMBuildShl(builder, temp, shift, "");
999      test = LLVMBuildAnd(builder, test, temp, "");
1000      mask = LLVMBuildOr(builder, mask, test, "");
1001
1002      /* plane 3 */
1003      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1004      temp = LLVMBuildShl(builder, temp, shift, "");
1005      test = LLVMBuildAnd(builder, test, temp, "");
1006      mask = LLVMBuildOr(builder, mask, test, "");
1007
1008      /* plane 4 */
1009      test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1010      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1011      temp = LLVMBuildShl(builder, temp, shift, "");
1012      test = LLVMBuildAnd(builder, test, temp, "");
1013      mask = LLVMBuildOr(builder, mask, test, "");
1014   }
1015
1016   if (clip_z){
1017      temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 16);
1018      if (clip_halfz){
1019         /* plane 5 */
1020         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1021         test = LLVMBuildAnd(builder, test, temp, "");
1022         mask = LLVMBuildOr(builder, mask, test, "");
1023      }
1024      else{
1025         /* plane 5 */
1026         test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1027         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1028         test = LLVMBuildAnd(builder, test, temp, "");
1029         mask = LLVMBuildOr(builder, mask, test, "");
1030      }
1031      /* plane 6 */
1032      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1033      temp = LLVMBuildShl(builder, temp, shift, "");
1034      test = LLVMBuildAnd(builder, test, temp, "");
1035      mask = LLVMBuildOr(builder, mask, test, "");
1036   }
1037
1038   if (clip_user){
1039      LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
1040      LLVMValueRef indices[3];
1041      temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 32);
1042
1043      /* userclip planes */
1044      for (i = 6; i < nr; i++) {
1045         indices[0] = lp_build_const_int32(gallivm, 0);
1046         indices[1] = lp_build_const_int32(gallivm, i);
1047
1048         indices[2] = lp_build_const_int32(gallivm, 0);
1049         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1050         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
1051         planes = vec4f_from_scalar(gallivm, plane1, "plane4_x");
1052         sum = LLVMBuildFMul(builder, planes, pos_x, "");
1053
1054         indices[2] = lp_build_const_int32(gallivm, 1);
1055         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1056         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
1057         planes = vec4f_from_scalar(gallivm, plane1, "plane4_y");
1058         test = LLVMBuildFMul(builder, planes, pos_y, "");
1059         sum = LLVMBuildFAdd(builder, sum, test, "");
1060
1061         indices[2] = lp_build_const_int32(gallivm, 2);
1062         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1063         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
1064         planes = vec4f_from_scalar(gallivm, plane1, "plane4_z");
1065         test = LLVMBuildFMul(builder, planes, pos_z, "");
1066         sum = LLVMBuildFAdd(builder, sum, test, "");
1067
1068         indices[2] = lp_build_const_int32(gallivm, 3);
1069         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1070         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
1071         planes = vec4f_from_scalar(gallivm, plane1, "plane4_w");
1072         test = LLVMBuildFMul(builder, planes, pos_w, "");
1073         sum = LLVMBuildFAdd(builder, sum, test, "");
1074
1075         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1076         temp = LLVMBuildShl(builder, temp, shift, "");
1077         test = LLVMBuildAnd(builder, test, temp, "");
1078         mask = LLVMBuildOr(builder, mask, test, "");
1079      }
1080   }
1081   return mask;
1082}
1083
1084/*
1085 * Returns boolean if any clipping has occurred
1086 * Used zero/non-zero i32 value to represent boolean
1087 */
1088static void
1089clipmask_bool(struct gallivm_state *gallivm,
1090              LLVMValueRef clipmask,
1091              LLVMValueRef ret_ptr)
1092{
1093   LLVMBuilderRef builder = gallivm->builder;
1094   LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
1095   LLVMValueRef temp;
1096   int i;
1097
1098   for (i=0; i<4; i++){
1099      temp = LLVMBuildExtractElement(builder, clipmask,
1100                                     lp_build_const_int32(gallivm, i) , "");
1101      ret = LLVMBuildOr(builder, ret, temp, "");
1102   }
1103
1104   LLVMBuildStore(builder, ret, ret_ptr);
1105}
1106
1107static void
1108draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1109{
1110   struct gallivm_state *gallivm = llvm->gallivm;
1111   LLVMContextRef context = gallivm->context;
1112   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1113   LLVMTypeRef arg_types[8];
1114   LLVMTypeRef func_type;
1115   LLVMValueRef context_ptr;
1116   LLVMBasicBlockRef block;
1117   LLVMBuilderRef builder;
1118   LLVMValueRef start, end, count, stride, step, io_itr;
1119   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1120   LLVMValueRef instance_id;
1121   struct draw_context *draw = llvm->draw;
1122   unsigned i, j;
1123   struct lp_build_context bld;
1124   struct lp_build_loop_state lp_loop;
1125   const int max_vertices = 4;
1126   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1127   void *code;
1128   struct lp_build_sampler_soa *sampler = 0;
1129   LLVMValueRef ret, ret_ptr;
1130   boolean bypass_viewport = variant->key.bypass_viewport;
1131   boolean enable_cliptest = variant->key.clip_xy ||
1132                             variant->key.clip_z  ||
1133                             variant->key.clip_user;
1134
1135   arg_types[0] = get_context_ptr_type(llvm);       /* context */
1136   arg_types[1] = get_vertex_header_ptr_type(llvm); /* vertex_header */
1137   arg_types[2] = get_buffer_ptr_type(llvm);        /* vbuffers */
1138   arg_types[3] = int32_type;                       /* start */
1139   arg_types[4] = int32_type;                       /* count */
1140   arg_types[5] = int32_type;                       /* stride */
1141   arg_types[6] = get_vb_ptr_type(llvm);            /* pipe_vertex_buffer's */
1142   arg_types[7] = int32_type;                       /* instance_id */
1143
1144   func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
1145
1146   variant->function = LLVMAddFunction(gallivm->module, "draw_llvm_shader",
1147                                       func_type);
1148   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
1149   for(i = 0; i < Elements(arg_types); ++i)
1150      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1151         LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
1152
1153   context_ptr  = LLVMGetParam(variant->function, 0);
1154   io_ptr       = LLVMGetParam(variant->function, 1);
1155   vbuffers_ptr = LLVMGetParam(variant->function, 2);
1156   start        = LLVMGetParam(variant->function, 3);
1157   count        = LLVMGetParam(variant->function, 4);
1158   stride       = LLVMGetParam(variant->function, 5);
1159   vb_ptr       = LLVMGetParam(variant->function, 6);
1160   instance_id  = LLVMGetParam(variant->function, 7);
1161
1162   lp_build_name(context_ptr, "context");
1163   lp_build_name(io_ptr, "io");
1164   lp_build_name(vbuffers_ptr, "vbuffers");
1165   lp_build_name(start, "start");
1166   lp_build_name(count, "count");
1167   lp_build_name(stride, "stride");
1168   lp_build_name(vb_ptr, "vb");
1169   lp_build_name(instance_id, "instance_id");
1170
1171   /*
1172    * Function body
1173    */
1174
1175   block = LLVMAppendBasicBlockInContext(gallivm->context, variant->function, "entry");
1176   builder = gallivm->builder;
1177   assert(builder);
1178   LLVMPositionBuilderAtEnd(builder, block);
1179
1180   lp_build_context_init(&bld, llvm->gallivm, lp_type_int(32));
1181
1182   end = lp_build_add(&bld, start, count);
1183
1184   step = lp_build_const_int32(gallivm, max_vertices);
1185
1186   /* function will return non-zero i32 value if any clipped vertices */
1187   ret_ptr = lp_build_alloca(gallivm, int32_type, "");
1188   LLVMBuildStore(builder, lp_build_const_int32(gallivm, 0), ret_ptr);
1189
1190   /* code generated texture sampling */
1191   sampler = draw_llvm_sampler_soa_create(
1192      draw_llvm_variant_key_samplers(&variant->key),
1193      context_ptr);
1194
1195#if DEBUG_STORE
1196   lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
1197                   start, end, step);
1198#endif
1199   lp_build_loop_begin(&lp_loop, llvm->gallivm, start);
1200   {
1201      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1202      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1203      LLVMValueRef io;
1204      LLVMValueRef clipmask;   /* holds the clipmask value */
1205      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1206
1207      io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
1208      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1209#if DEBUG_STORE
1210      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1211                      io_itr, io, lp_loop.counter);
1212#endif
1213      for (i = 0; i < NUM_CHANNELS; ++i) {
1214         LLVMValueRef true_index = LLVMBuildAdd(
1215            builder,
1216            lp_loop.counter,
1217            lp_build_const_int32(gallivm, i), "");
1218         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1219            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1220            LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1221            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1222                                           &vb_index, 1, "");
1223            generate_fetch(llvm->gallivm, vbuffers_ptr,
1224                           &aos_attribs[j][i], velem, vb, true_index,
1225                           instance_id);
1226         }
1227      }
1228      convert_to_soa(gallivm, aos_attribs, inputs,
1229                     draw->pt.nr_vertex_elements);
1230
1231      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1232      generate_vs(llvm,
1233                  builder,
1234                  outputs,
1235                  ptr_aos,
1236                  context_ptr,
1237                  sampler);
1238
1239      /* store original positions in clip before further manipulation */
1240      store_clip(gallivm, io, outputs);
1241
1242      /* do cliptest */
1243      if (enable_cliptest){
1244         /* allocate clipmask, assign it integer type */
1245         clipmask = generate_clipmask(gallivm, outputs,
1246                                      variant->key.clip_xy,
1247                                      variant->key.clip_z,
1248                                      variant->key.clip_user,
1249                                      variant->key.clip_halfz,
1250                                      variant->key.nr_planes,
1251                                      context_ptr);
1252         /* return clipping boolean value for function */
1253         clipmask_bool(gallivm, clipmask, ret_ptr);
1254      }
1255      else{
1256         clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
1257      }
1258
1259      /* do viewport mapping */
1260      if (!bypass_viewport){
1261         generate_viewport(llvm, builder, outputs, context_ptr);
1262      }
1263
1264      /* store clipmask in vertex header and positions in data */
1265      convert_to_aos(gallivm, io, outputs, clipmask,
1266                     draw->vs.vertex_shader->info.num_outputs,
1267                     max_vertices);
1268   }
1269
1270   lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
1271
1272   sampler->destroy(sampler);
1273
1274   ret = LLVMBuildLoad(builder, ret_ptr,"");
1275   LLVMBuildRet(builder, ret);
1276
1277   /*
1278    * Translate the LLVM IR into machine code.
1279    */
1280#ifdef DEBUG
1281   if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
1282      lp_debug_dump_value(variant->function);
1283      assert(0);
1284   }
1285#endif
1286
1287   LLVMRunFunctionPassManager(gallivm->passmgr, variant->function);
1288
1289   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1290      lp_debug_dump_value(variant->function);
1291      debug_printf("\n");
1292   }
1293
1294   code = LLVMGetPointerToGlobal(gallivm->engine, variant->function);
1295   variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
1296
1297   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1298      lp_disassemble(code);
1299   }
1300   lp_func_delete_body(variant->function);
1301}
1302
1303
1304static void
1305draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1306{
1307   struct gallivm_state *gallivm = llvm->gallivm;
1308   LLVMContextRef context = gallivm->context;
1309   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1310   LLVMTypeRef arg_types[8];
1311   LLVMTypeRef func_type;
1312   LLVMValueRef context_ptr;
1313   LLVMBasicBlockRef block;
1314   LLVMBuilderRef builder;
1315   LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
1316   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1317   LLVMValueRef instance_id;
1318   struct draw_context *draw = llvm->draw;
1319   unsigned i, j;
1320   struct lp_build_context bld;
1321   struct lp_build_loop_state lp_loop;
1322   const int max_vertices = 4;
1323   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1324   LLVMValueRef fetch_max;
1325   void *code;
1326   struct lp_build_sampler_soa *sampler = 0;
1327   LLVMValueRef ret, ret_ptr;
1328   boolean bypass_viewport = variant->key.bypass_viewport;
1329   boolean enable_cliptest = variant->key.clip_xy ||
1330                             variant->key.clip_z  ||
1331                             variant->key.clip_user;
1332
1333   arg_types[0] = get_context_ptr_type(llvm);           /* context */
1334   arg_types[1] = get_vertex_header_ptr_type(llvm);     /* vertex_header */
1335   arg_types[2] = get_buffer_ptr_type(llvm);            /* vbuffers */
1336   arg_types[3] = LLVMPointerType(int32_type, 0);       /* fetch_elts * */
1337   arg_types[4] = int32_type;                           /* fetch_count */
1338   arg_types[5] = int32_type;                           /* stride */
1339   arg_types[6] = get_vb_ptr_type(llvm);                /* pipe_vertex_buffer's */
1340   arg_types[7] = int32_type;                           /* instance_id */
1341
1342   func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
1343
1344   variant->function_elts = LLVMAddFunction(gallivm->module, "draw_llvm_shader_elts", func_type);
1345   LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
1346   for(i = 0; i < Elements(arg_types); ++i)
1347      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1348         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
1349                          LLVMNoAliasAttribute);
1350
1351   context_ptr  = LLVMGetParam(variant->function_elts, 0);
1352   io_ptr       = LLVMGetParam(variant->function_elts, 1);
1353   vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
1354   fetch_elts   = LLVMGetParam(variant->function_elts, 3);
1355   fetch_count  = LLVMGetParam(variant->function_elts, 4);
1356   stride       = LLVMGetParam(variant->function_elts, 5);
1357   vb_ptr       = LLVMGetParam(variant->function_elts, 6);
1358   instance_id  = LLVMGetParam(variant->function_elts, 7);
1359
1360   lp_build_name(context_ptr, "context");
1361   lp_build_name(io_ptr, "io");
1362   lp_build_name(vbuffers_ptr, "vbuffers");
1363   lp_build_name(fetch_elts, "fetch_elts");
1364   lp_build_name(fetch_count, "fetch_count");
1365   lp_build_name(stride, "stride");
1366   lp_build_name(vb_ptr, "vb");
1367   lp_build_name(instance_id, "instance_id");
1368
1369   /*
1370    * Function body
1371    */
1372
1373   block = LLVMAppendBasicBlockInContext(gallivm->context, variant->function_elts, "entry");
1374   builder = gallivm->builder;
1375   LLVMPositionBuilderAtEnd(builder, block);
1376
1377   lp_build_context_init(&bld, gallivm, lp_type_int(32));
1378
1379   step = lp_build_const_int32(gallivm, max_vertices);
1380
1381   /* code generated texture sampling */
1382   sampler = draw_llvm_sampler_soa_create(
1383      draw_llvm_variant_key_samplers(&variant->key),
1384      context_ptr);
1385
1386   fetch_max = LLVMBuildSub(builder, fetch_count,
1387                            lp_build_const_int32(gallivm, 1),
1388                            "fetch_max");
1389
1390   /* function returns non-zero i32 value if any clipped vertices */
1391   ret_ptr = lp_build_alloca(gallivm, int32_type, "");
1392   LLVMBuildStore(builder, lp_build_const_int32(gallivm, 0), ret_ptr);
1393
1394   lp_build_loop_begin(&lp_loop, gallivm, lp_build_const_int32(gallivm, 0));
1395   {
1396      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1397      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1398      LLVMValueRef io;
1399      LLVMValueRef clipmask;   /* holds the clipmask value */
1400      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1401
1402      io_itr = lp_loop.counter;
1403      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1404#if DEBUG_STORE
1405      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1406                      io_itr, io, lp_loop.counter);
1407#endif
1408      for (i = 0; i < NUM_CHANNELS; ++i) {
1409         LLVMValueRef true_index = LLVMBuildAdd(
1410            builder,
1411            lp_loop.counter,
1412            lp_build_const_int32(gallivm, i), "");
1413         LLVMValueRef fetch_ptr;
1414
1415         /* make sure we're not out of bounds which can happen
1416          * if fetch_count % 4 != 0, because on the last iteration
1417          * a few of the 4 vertex fetches will be out of bounds */
1418         true_index = lp_build_min(&bld, true_index, fetch_max);
1419
1420         fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
1421                                  &true_index, 1, "");
1422         true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
1423         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1424            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1425            LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1426            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1427                                           &vb_index, 1, "");
1428            generate_fetch(gallivm, vbuffers_ptr,
1429                           &aos_attribs[j][i], velem, vb, true_index,
1430                           instance_id);
1431         }
1432      }
1433      convert_to_soa(gallivm, aos_attribs, inputs,
1434                     draw->pt.nr_vertex_elements);
1435
1436      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1437      generate_vs(llvm,
1438                  builder,
1439                  outputs,
1440                  ptr_aos,
1441                  context_ptr,
1442                  sampler);
1443
1444      /* store original positions in clip before further manipulation */
1445      store_clip(gallivm, io, outputs);
1446
1447      /* do cliptest */
1448      if (enable_cliptest){
1449         /* allocate clipmask, assign it integer type */
1450         clipmask = generate_clipmask(gallivm, outputs,
1451                                      variant->key.clip_xy,
1452                                      variant->key.clip_z,
1453                                      variant->key.clip_user,
1454                                      variant->key.clip_halfz,
1455                                      variant->key.nr_planes,
1456                                      context_ptr);
1457         /* return clipping boolean value for function */
1458         clipmask_bool(gallivm, clipmask, ret_ptr);
1459      }
1460      else{
1461         clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
1462      }
1463
1464      /* do viewport mapping */
1465      if (!bypass_viewport){
1466         generate_viewport(llvm, builder, outputs, context_ptr);
1467      }
1468
1469      /* store clipmask in vertex header,
1470       * original positions in clip
1471       * and transformed positions in data
1472       */
1473      convert_to_aos(gallivm, io, outputs, clipmask,
1474                     draw->vs.vertex_shader->info.num_outputs,
1475                     max_vertices);
1476   }
1477
1478   lp_build_loop_end_cond(&lp_loop, fetch_count, step, LLVMIntUGE);
1479
1480   sampler->destroy(sampler);
1481
1482   ret = LLVMBuildLoad(builder, ret_ptr,"");
1483   LLVMBuildRet(builder, ret);
1484
1485   /*
1486    * Translate the LLVM IR into machine code.
1487    */
1488#ifdef DEBUG
1489   if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
1490      lp_debug_dump_value(variant->function_elts);
1491      assert(0);
1492   }
1493#endif
1494
1495   LLVMRunFunctionPassManager(gallivm->passmgr, variant->function_elts);
1496
1497   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1498      lp_debug_dump_value(variant->function_elts);
1499      debug_printf("\n");
1500   }
1501
1502   code = LLVMGetPointerToGlobal(gallivm->engine, variant->function_elts);
1503   variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
1504
1505   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1506      lp_disassemble(code);
1507   }
1508   lp_func_delete_body(variant->function_elts);
1509}
1510
1511
1512struct draw_llvm_variant_key *
1513draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
1514{
1515   unsigned i;
1516   struct draw_llvm_variant_key *key;
1517   struct lp_sampler_static_state *sampler;
1518
1519   key = (struct draw_llvm_variant_key *)store;
1520
1521   /* Presumably all variants of the shader should have the same
1522    * number of vertex elements - ie the number of shader inputs.
1523    */
1524   key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
1525
1526   /* will have to rig this up properly later */
1527   key->clip_xy = llvm->draw->clip_xy;
1528   key->clip_z = llvm->draw->clip_z;
1529   key->clip_user = llvm->draw->clip_user;
1530   key->bypass_viewport = llvm->draw->identity_viewport;
1531   key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
1532   key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
1533   key->nr_planes = llvm->draw->nr_planes;
1534   key->pad = 0;
1535
1536   /* All variants of this shader will have the same value for
1537    * nr_samplers.  Not yet trying to compact away holes in the
1538    * sampler array.
1539    */
1540   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
1541
1542   sampler = draw_llvm_variant_key_samplers(key);
1543
1544   memcpy(key->vertex_element,
1545          llvm->draw->pt.vertex_element,
1546          sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
1547
1548   memset(sampler, 0, key->nr_samplers * sizeof *sampler);
1549
1550   for (i = 0 ; i < key->nr_samplers; i++) {
1551      lp_sampler_static_state(&sampler[i],
1552			      llvm->draw->sampler_views[i],
1553			      llvm->draw->samplers[i]);
1554   }
1555
1556   return key;
1557}
1558
1559void
1560draw_llvm_set_mapped_texture(struct draw_context *draw,
1561                             unsigned sampler_idx,
1562                             uint32_t width, uint32_t height, uint32_t depth,
1563                             uint32_t last_level,
1564                             uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
1565                             uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
1566                             const void *data[PIPE_MAX_TEXTURE_LEVELS])
1567{
1568   unsigned j;
1569   struct draw_jit_texture *jit_tex;
1570
1571   assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
1572
1573
1574   jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
1575
1576   jit_tex->width = width;
1577   jit_tex->height = height;
1578   jit_tex->depth = depth;
1579   jit_tex->last_level = last_level;
1580
1581   for (j = 0; j <= last_level; j++) {
1582      jit_tex->data[j] = data[j];
1583      jit_tex->row_stride[j] = row_stride[j];
1584      jit_tex->img_stride[j] = img_stride[j];
1585   }
1586}
1587
1588
1589void
1590draw_llvm_set_sampler_state(struct draw_context *draw)
1591{
1592   unsigned i;
1593
1594   for (i = 0; i < draw->num_samplers; i++) {
1595      struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
1596
1597      if (draw->samplers[i]) {
1598         jit_tex->min_lod = draw->samplers[i]->min_lod;
1599         jit_tex->max_lod = draw->samplers[i]->max_lod;
1600         jit_tex->lod_bias = draw->samplers[i]->lod_bias;
1601         COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
1602      }
1603   }
1604}
1605
1606
1607void
1608draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
1609{
1610   struct draw_llvm *llvm = variant->llvm;
1611
1612   if (variant->function_elts) {
1613      if (variant->function_elts)
1614         LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
1615                                        variant->function_elts);
1616      LLVMDeleteFunction(variant->function_elts);
1617   }
1618
1619   if (variant->function) {
1620      if (variant->function)
1621         LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
1622                                        variant->function);
1623      LLVMDeleteFunction(variant->function);
1624   }
1625
1626   remove_from_list(&variant->list_item_local);
1627   variant->shader->variants_cached--;
1628   remove_from_list(&variant->list_item_global);
1629   llvm->nr_variants--;
1630   FREE(variant);
1631}
1632