draw_llvm.c revision 5700bc6bac8a1a7f8a82f667e561745804317bb8
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "draw_llvm.h"
29
30#include "draw_context.h"
31#include "draw_vs.h"
32
33#include "gallivm/lp_bld_arit.h"
34#include "gallivm/lp_bld_logic.h"
35#include "gallivm/lp_bld_const.h"
36#include "gallivm/lp_bld_swizzle.h"
37#include "gallivm/lp_bld_struct.h"
38#include "gallivm/lp_bld_type.h"
39#include "gallivm/lp_bld_flow.h"
40#include "gallivm/lp_bld_debug.h"
41#include "gallivm/lp_bld_tgsi.h"
42#include "gallivm/lp_bld_printf.h"
43#include "gallivm/lp_bld_intr.h"
44#include "gallivm/lp_bld_init.h"
45#include "gallivm/lp_bld_type.h"
46
47#include "tgsi/tgsi_exec.h"
48#include "tgsi/tgsi_dump.h"
49
50#include "util/u_math.h"
51#include "util/u_pointer.h"
52#include "util/u_string.h"
53#include "util/u_simple_list.h"
54
55
56#define DEBUG_STORE 0
57
58
59/**
60 * This function is called by the gallivm "garbage collector" when
61 * the LLVM global data structures are freed.  We must free all LLVM-related
62 * data.  Specifically, all JIT'd shader variants.
63 */
64static void
65draw_llvm_garbage_collect_callback(void *cb_data)
66{
67   struct draw_llvm *llvm = (struct draw_llvm *) cb_data;
68   struct draw_llvm_variant_list_item *li;
69
70   /* free all shader variants */
71   li = first_elem(&llvm->vs_variants_list);
72   while (!at_end(&llvm->vs_variants_list, li)) {
73      struct draw_llvm_variant_list_item *next = next_elem(li);
74      draw_llvm_destroy_variant(li->base);
75      li = next;
76   }
77
78   /* Null-out these pointers so they get remade next time they're needed.
79    * See the accessor functions below.
80    */
81   llvm->context_ptr_type = NULL;
82   llvm->buffer_ptr_type = NULL;
83   llvm->vb_ptr_type = NULL;
84   llvm->vertex_header_ptr_type = NULL;
85}
86
87
88static void
89draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
90
91static void
92draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
93
94
95/**
96 * Create LLVM type for struct draw_jit_texture
97 */
98static LLVMTypeRef
99create_jit_texture_type(struct gallivm_state *gallivm)
100{
101   LLVMTargetDataRef target = gallivm->target;
102   LLVMTypeRef texture_type;
103   LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
104   LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
105
106   elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
107   elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
108   elem_types[DRAW_JIT_TEXTURE_DEPTH] =
109   elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
110   elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
111   elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
112      LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
113   elem_types[DRAW_JIT_TEXTURE_DATA] =
114      LLVMArrayType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
115                    PIPE_MAX_TEXTURE_LEVELS);
116   elem_types[DRAW_JIT_TEXTURE_MIN_LOD] =
117   elem_types[DRAW_JIT_TEXTURE_MAX_LOD] =
118   elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context);
119   elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
120      LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
121
122   texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
123                                          Elements(elem_types), 0);
124
125   /* Make sure the target's struct layout cache doesn't return
126    * stale/invalid data.
127    */
128   LLVMInvalidateStructLayout(gallivm->target, texture_type);
129
130   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
131                          target, texture_type,
132                          DRAW_JIT_TEXTURE_WIDTH);
133   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
134                          target, texture_type,
135                          DRAW_JIT_TEXTURE_HEIGHT);
136   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
137                          target, texture_type,
138                          DRAW_JIT_TEXTURE_DEPTH);
139   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
140                          target, texture_type,
141                          DRAW_JIT_TEXTURE_LAST_LEVEL);
142   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
143                          target, texture_type,
144                          DRAW_JIT_TEXTURE_ROW_STRIDE);
145   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
146                          target, texture_type,
147                          DRAW_JIT_TEXTURE_IMG_STRIDE);
148   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
149                          target, texture_type,
150                          DRAW_JIT_TEXTURE_DATA);
151   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
152                          target, texture_type,
153                          DRAW_JIT_TEXTURE_MIN_LOD);
154   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
155                          target, texture_type,
156                          DRAW_JIT_TEXTURE_MAX_LOD);
157   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
158                          target, texture_type,
159                          DRAW_JIT_TEXTURE_LOD_BIAS);
160   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
161                          target, texture_type,
162                          DRAW_JIT_TEXTURE_BORDER_COLOR);
163
164   LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
165
166   return texture_type;
167}
168
169
170/**
171 * Create LLVM type for struct draw_jit_texture
172 */
173static LLVMTypeRef
174create_jit_context_type(struct gallivm_state *gallivm,
175                        LLVMTypeRef texture_type)
176{
177   LLVMTargetDataRef target = gallivm->target;
178   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
179   LLVMTypeRef elem_types[5];
180   LLVMTypeRef context_type;
181
182   elem_types[0] = LLVMPointerType(float_type, 0); /* vs_constants */
183   elem_types[1] = LLVMPointerType(float_type, 0); /* gs_constants */
184   elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), 12), 0); /* planes */
185   elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */
186   elem_types[4] = LLVMArrayType(texture_type,
187                                 PIPE_MAX_VERTEX_SAMPLERS); /* textures */
188
189   context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
190                                          Elements(elem_types), 0);
191
192   LLVMInvalidateStructLayout(gallivm->target, context_type);
193
194   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
195                          target, context_type, 0);
196   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
197                          target, context_type, 1);
198   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
199                          target, context_type, 2);
200   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
201                          target, context_type,
202                          DRAW_JIT_CTX_TEXTURES);
203   LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
204                        target, context_type);
205
206   return context_type;
207}
208
209
210/**
211 * Create LLVM type for struct pipe_vertex_buffer
212 */
213static LLVMTypeRef
214create_jit_vertex_buffer_type(struct gallivm_state *gallivm)
215{
216   LLVMTargetDataRef target = gallivm->target;
217   LLVMTypeRef elem_types[4];
218   LLVMTypeRef vb_type;
219
220   elem_types[0] =
221   elem_types[1] =
222   elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
223   elem_types[3] = LLVMPointerType(LLVMOpaqueTypeInContext(gallivm->context), 0); /* vs_constants */
224
225   vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
226                                     Elements(elem_types), 0);
227
228   LLVMInvalidateStructLayout(gallivm->target, vb_type);
229
230   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
231                          target, vb_type, 0);
232   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, max_index,
233                          target, vb_type, 1);
234   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
235                          target, vb_type, 2);
236
237   LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
238
239   return vb_type;
240}
241
242
243/**
244 * Create LLVM type for struct vertex_header;
245 */
246static LLVMTypeRef
247create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
248{
249   LLVMTargetDataRef target = gallivm->target;
250   LLVMTypeRef elem_types[3];
251   LLVMTypeRef vertex_header;
252   char struct_name[24];
253
254   util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
255
256   elem_types[0]  = LLVMIntTypeInContext(gallivm->context, 32);
257   elem_types[1]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
258   elem_types[2]  = LLVMArrayType(elem_types[1], data_elems);
259
260   vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
261                                           Elements(elem_types), 0);
262
263   LLVMInvalidateStructLayout(gallivm->target, vertex_header);
264
265   /* these are bit-fields and we can't take address of them
266      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
267      target, vertex_header,
268      DRAW_JIT_VERTEX_CLIPMASK);
269      LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
270      target, vertex_header,
271      DRAW_JIT_VERTEX_EDGEFLAG);
272      LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
273      target, vertex_header,
274      DRAW_JIT_VERTEX_PAD);
275      LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
276      target, vertex_header,
277      DRAW_JIT_VERTEX_VERTEX_ID);
278   */
279   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
280                          target, vertex_header,
281                          DRAW_JIT_VERTEX_CLIP);
282   LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
283                          target, vertex_header,
284                          DRAW_JIT_VERTEX_DATA);
285
286   LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
287
288   return vertex_header;
289}
290
291
292/**
293 * Create LLVM types for various structures.
294 */
295static void
296create_jit_types(struct draw_llvm *llvm)
297{
298   struct gallivm_state *gallivm = llvm->gallivm;
299   LLVMTypeRef texture_type, context_type, buffer_type, vb_type;
300
301   texture_type = create_jit_texture_type(gallivm);
302   LLVMAddTypeName(gallivm->module, "texture", texture_type);
303
304   context_type = create_jit_context_type(gallivm, texture_type);
305   LLVMAddTypeName(gallivm->module, "draw_jit_context", context_type);
306   llvm->context_ptr_type = LLVMPointerType(context_type, 0);
307
308   buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
309   LLVMAddTypeName(gallivm->module, "buffer", buffer_type);
310   llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
311
312   vb_type = create_jit_vertex_buffer_type(gallivm);
313   LLVMAddTypeName(gallivm->module, "pipe_vertex_buffer", vb_type);
314   llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
315}
316
317
318static LLVMTypeRef
319get_context_ptr_type(struct draw_llvm *llvm)
320{
321   if (!llvm->context_ptr_type)
322      create_jit_types(llvm);
323   return llvm->context_ptr_type;
324}
325
326
327static LLVMTypeRef
328get_buffer_ptr_type(struct draw_llvm *llvm)
329{
330   if (!llvm->buffer_ptr_type)
331      create_jit_types(llvm);
332   return llvm->buffer_ptr_type;
333}
334
335
336static LLVMTypeRef
337get_vb_ptr_type(struct draw_llvm *llvm)
338{
339   if (!llvm->vb_ptr_type)
340      create_jit_types(llvm);
341   return llvm->vb_ptr_type;
342}
343
344static LLVMTypeRef
345get_vertex_header_ptr_type(struct draw_llvm *llvm)
346{
347   if (!llvm->vertex_header_ptr_type)
348      create_jit_types(llvm);
349   return llvm->vertex_header_ptr_type;
350}
351
352
353/**
354 * Create per-context LLVM info.
355 */
356struct draw_llvm *
357draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm)
358{
359   struct draw_llvm *llvm;
360
361   llvm = CALLOC_STRUCT( draw_llvm );
362   if (!llvm)
363      return NULL;
364
365   lp_build_init();
366
367   llvm->draw = draw;
368   llvm->gallivm = gallivm;
369
370   if (gallivm_debug & GALLIVM_DEBUG_IR) {
371      LLVMDumpModule(llvm->gallivm->module);
372   }
373
374   llvm->nr_variants = 0;
375   make_empty_list(&llvm->vs_variants_list);
376
377   gallivm_register_garbage_collector_callback(
378                              draw_llvm_garbage_collect_callback, llvm);
379
380   return llvm;
381}
382
383
384/**
385 * Free per-context LLVM info.
386 */
387void
388draw_llvm_destroy(struct draw_llvm *llvm)
389{
390   gallivm_remove_garbage_collector_callback(
391                              draw_llvm_garbage_collect_callback, llvm);
392
393   /* XXX free other draw_llvm data? */
394   FREE(llvm);
395}
396
397
398/**
399 * Create LLVM-generated code for a vertex shader.
400 */
401struct draw_llvm_variant *
402draw_llvm_create_variant(struct draw_llvm *llvm,
403			 unsigned num_inputs,
404			 const struct draw_llvm_variant_key *key)
405{
406   struct draw_llvm_variant *variant;
407   struct llvm_vertex_shader *shader =
408      llvm_vertex_shader(llvm->draw->vs.vertex_shader);
409   LLVMTypeRef vertex_header;
410
411   variant = MALLOC(sizeof *variant +
412		    shader->variant_key_size -
413		    sizeof variant->key);
414   if (variant == NULL)
415      return NULL;
416
417   variant->llvm = llvm;
418
419   memcpy(&variant->key, key, shader->variant_key_size);
420
421   vertex_header = create_jit_vertex_header(llvm->gallivm, num_inputs);
422
423   llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
424
425   draw_llvm_generate(llvm, variant);
426   draw_llvm_generate_elts(llvm, variant);
427
428   variant->shader = shader;
429   variant->list_item_global.base = variant;
430   variant->list_item_local.base = variant;
431   /*variant->no = */shader->variants_created++;
432   variant->list_item_global.base = variant;
433
434   return variant;
435}
436
437static void
438generate_vs(struct draw_llvm *llvm,
439            LLVMBuilderRef builder,
440            LLVMValueRef (*outputs)[NUM_CHANNELS],
441            const LLVMValueRef (*inputs)[NUM_CHANNELS],
442            LLVMValueRef system_values_array,
443            LLVMValueRef context_ptr,
444            struct lp_build_sampler_soa *draw_sampler)
445{
446   const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
447   struct lp_type vs_type;
448   LLVMValueRef consts_ptr = draw_jit_context_vs_constants(llvm->gallivm, context_ptr);
449   struct lp_build_sampler_soa *sampler = 0;
450
451   memset(&vs_type, 0, sizeof vs_type);
452   vs_type.floating = TRUE; /* floating point values */
453   vs_type.sign = TRUE;     /* values are signed */
454   vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
455   vs_type.width = 32;      /* 32-bit float */
456   vs_type.length = 4;      /* 4 elements per vector */
457#if 0
458   num_vs = 4;              /* number of vertices per block */
459#endif
460
461   if (gallivm_debug & GALLIVM_DEBUG_IR) {
462      tgsi_dump(tokens, 0);
463   }
464
465   if (llvm->draw->num_sampler_views &&
466       llvm->draw->num_samplers)
467      sampler = draw_sampler;
468
469   lp_build_tgsi_soa(llvm->gallivm,
470                     tokens,
471                     vs_type,
472                     NULL /*struct lp_build_mask_context *mask*/,
473                     consts_ptr,
474                     system_values_array,
475                     NULL /*pos*/,
476                     inputs,
477                     outputs,
478                     sampler,
479                     &llvm->draw->vs.vertex_shader->info);
480}
481
482#if DEBUG_STORE
483static void print_vectorf(LLVMBuilderRef builder,
484                         LLVMValueRef vec)
485{
486   LLVMValueRef val[4];
487   val[0] = LLVMBuildExtractElement(builder, vec,
488                                    lp_build_const_int32(gallivm, 0), "");
489   val[1] = LLVMBuildExtractElement(builder, vec,
490                                    lp_build_const_int32(gallivm, 1), "");
491   val[2] = LLVMBuildExtractElement(builder, vec,
492                                    lp_build_const_int32(gallivm, 2), "");
493   val[3] = LLVMBuildExtractElement(builder, vec,
494                                    lp_build_const_int32(gallivm, 3), "");
495   lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
496                   val[0], val[1], val[2], val[3]);
497}
498#endif
499
500static void
501generate_fetch(struct gallivm_state *gallivm,
502               LLVMValueRef vbuffers_ptr,
503               LLVMValueRef *res,
504               struct pipe_vertex_element *velem,
505               LLVMValueRef vbuf,
506               LLVMValueRef index,
507               LLVMValueRef instance_id)
508{
509   LLVMBuilderRef builder = gallivm->builder;
510   LLVMValueRef indices =
511      LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
512                   velem->vertex_buffer_index, 0);
513   LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
514                                           &indices, 1, "");
515   LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf);
516   LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(gallivm, vbuf);
517   LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf);
518   LLVMValueRef cond;
519   LLVMValueRef stride;
520
521   if (velem->instance_divisor) {
522      /* array index = instance_id / instance_divisor */
523      index = LLVMBuildUDiv(builder, instance_id,
524                            lp_build_const_int32(gallivm, velem->instance_divisor),
525                            "instance_divisor");
526   }
527
528   /* limit index to min(index, vb_max_index) */
529   cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
530   index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
531
532   stride = LLVMBuildMul(builder, vb_stride, index, "");
533
534   vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
535
536   stride = LLVMBuildAdd(builder, stride,
537                         vb_buffer_offset,
538                         "");
539   stride = LLVMBuildAdd(builder, stride,
540                         lp_build_const_int32(gallivm, velem->src_offset),
541                         "");
542
543   /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
544   vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
545
546   *res = draw_llvm_translate_from(gallivm, vbuffer_ptr, velem->src_format);
547}
548
549static LLVMValueRef
550aos_to_soa(struct gallivm_state *gallivm,
551           LLVMValueRef val0,
552           LLVMValueRef val1,
553           LLVMValueRef val2,
554           LLVMValueRef val3,
555           LLVMValueRef channel)
556{
557   LLVMBuilderRef builder = gallivm->builder;
558   LLVMValueRef ex, res;
559
560   ex = LLVMBuildExtractElement(builder, val0,
561                                channel, "");
562   res = LLVMBuildInsertElement(builder,
563                                LLVMConstNull(LLVMTypeOf(val0)),
564                                ex,
565                                lp_build_const_int32(gallivm, 0),
566                                "");
567
568   ex = LLVMBuildExtractElement(builder, val1,
569                                channel, "");
570   res = LLVMBuildInsertElement(builder,
571                                res, ex,
572                                lp_build_const_int32(gallivm, 1),
573                                "");
574
575   ex = LLVMBuildExtractElement(builder, val2,
576                                channel, "");
577   res = LLVMBuildInsertElement(builder,
578                                res, ex,
579                                lp_build_const_int32(gallivm, 2),
580                                "");
581
582   ex = LLVMBuildExtractElement(builder, val3,
583                                channel, "");
584   res = LLVMBuildInsertElement(builder,
585                                res, ex,
586                                lp_build_const_int32(gallivm, 3),
587                                "");
588
589   return res;
590}
591
592static void
593soa_to_aos(struct gallivm_state *gallivm,
594           LLVMValueRef soa[NUM_CHANNELS],
595           LLVMValueRef aos[NUM_CHANNELS])
596{
597   LLVMBuilderRef builder = gallivm->builder;
598   LLVMValueRef comp;
599   int i = 0;
600
601   debug_assert(NUM_CHANNELS == 4);
602
603   aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
604   aos[1] = aos[2] = aos[3] = aos[0];
605
606   for (i = 0; i < NUM_CHANNELS; ++i) {
607      LLVMValueRef channel = lp_build_const_int32(gallivm, i);
608
609      comp = LLVMBuildExtractElement(builder, soa[i],
610                                     lp_build_const_int32(gallivm, 0), "");
611      aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
612
613      comp = LLVMBuildExtractElement(builder, soa[i],
614                                     lp_build_const_int32(gallivm, 1), "");
615      aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
616
617      comp = LLVMBuildExtractElement(builder, soa[i],
618                                     lp_build_const_int32(gallivm, 2), "");
619      aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
620
621      comp = LLVMBuildExtractElement(builder, soa[i],
622                                     lp_build_const_int32(gallivm, 3), "");
623      aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
624
625   }
626}
627
628static void
629convert_to_soa(struct gallivm_state *gallivm,
630               LLVMValueRef (*aos)[NUM_CHANNELS],
631               LLVMValueRef (*soa)[NUM_CHANNELS],
632               int num_attribs)
633{
634   int i;
635
636   debug_assert(NUM_CHANNELS == 4);
637
638   for (i = 0; i < num_attribs; ++i) {
639      LLVMValueRef val0 = aos[i][0];
640      LLVMValueRef val1 = aos[i][1];
641      LLVMValueRef val2 = aos[i][2];
642      LLVMValueRef val3 = aos[i][3];
643
644      soa[i][0] = aos_to_soa(gallivm, val0, val1, val2, val3,
645                             lp_build_const_int32(gallivm, 0));
646      soa[i][1] = aos_to_soa(gallivm, val0, val1, val2, val3,
647                             lp_build_const_int32(gallivm, 1));
648      soa[i][2] = aos_to_soa(gallivm, val0, val1, val2, val3,
649                             lp_build_const_int32(gallivm, 2));
650      soa[i][3] = aos_to_soa(gallivm, val0, val1, val2, val3,
651                             lp_build_const_int32(gallivm, 3));
652   }
653}
654
655static void
656store_aos(struct gallivm_state *gallivm,
657          LLVMValueRef io_ptr,
658          LLVMValueRef index,
659          LLVMValueRef value,
660          LLVMValueRef clipmask)
661{
662   LLVMBuilderRef builder = gallivm->builder;
663   LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptr);
664   LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
665   LLVMValueRef indices[3];
666   LLVMValueRef val, shift;
667
668   indices[0] = lp_build_const_int32(gallivm, 0);
669   indices[1] = index;
670   indices[2] = lp_build_const_int32(gallivm, 0);
671
672   /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */
673   val = lp_build_const_int32(gallivm, 0xffff1);
674   shift = lp_build_const_int32(gallivm, 12);
675   val = LLVMBuildShl(builder, val, shift, "");
676   /* add clipmask:12 */
677   val = LLVMBuildOr(builder, val, clipmask, "");
678
679   /* store vertex header */
680   LLVMBuildStore(builder, val, id_ptr);
681
682
683#if DEBUG_STORE
684   lp_build_printf(builder, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
685#endif
686#if 0
687   /*lp_build_printf(builder, " ---- %p storing at %d (%p)  ", io_ptr, index, data_ptr);
688     print_vectorf(builder, value);*/
689   data_ptr = LLVMBuildBitCast(builder, data_ptr,
690                               LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0), 0),
691                               "datavec");
692   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
693
694   LLVMBuildStore(builder, value, data_ptr);
695#else
696   {
697      LLVMValueRef x, y, z, w;
698      LLVMValueRef idx0, idx1, idx2, idx3;
699      LLVMValueRef gep0, gep1, gep2, gep3;
700      data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
701
702      idx0 = lp_build_const_int32(gallivm, 0);
703      idx1 = lp_build_const_int32(gallivm, 1);
704      idx2 = lp_build_const_int32(gallivm, 2);
705      idx3 = lp_build_const_int32(gallivm, 3);
706
707      x = LLVMBuildExtractElement(builder, value,
708                                  idx0, "");
709      y = LLVMBuildExtractElement(builder, value,
710                                  idx1, "");
711      z = LLVMBuildExtractElement(builder, value,
712                                  idx2, "");
713      w = LLVMBuildExtractElement(builder, value,
714                                  idx3, "");
715
716      gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
717      gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
718      gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
719      gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
720
721      /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
722        x, gep0, y, gep1, z, gep2, w, gep3);*/
723      LLVMBuildStore(builder, x, gep0);
724      LLVMBuildStore(builder, y, gep1);
725      LLVMBuildStore(builder, z, gep2);
726      LLVMBuildStore(builder, w, gep3);
727   }
728#endif
729}
730
731static void
732store_aos_array(struct gallivm_state *gallivm,
733                LLVMValueRef io_ptr,
734                LLVMValueRef aos[NUM_CHANNELS],
735                int attrib,
736                int num_outputs,
737                LLVMValueRef clipmask)
738{
739   LLVMBuilderRef builder = gallivm->builder;
740   LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
741   LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0);
742   LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1);
743   LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2);
744   LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3);
745   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
746   LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
747
748   debug_assert(NUM_CHANNELS == 4);
749
750   io0_ptr = LLVMBuildGEP(builder, io_ptr,
751                          &ind0, 1, "");
752   io1_ptr = LLVMBuildGEP(builder, io_ptr,
753                          &ind1, 1, "");
754   io2_ptr = LLVMBuildGEP(builder, io_ptr,
755                          &ind2, 1, "");
756   io3_ptr = LLVMBuildGEP(builder, io_ptr,
757                          &ind3, 1, "");
758
759   clipmask0 = LLVMBuildExtractElement(builder, clipmask,
760                                       ind0, "");
761   clipmask1 = LLVMBuildExtractElement(builder, clipmask,
762                                       ind1, "");
763   clipmask2 = LLVMBuildExtractElement(builder, clipmask,
764                                       ind2, "");
765   clipmask3 = LLVMBuildExtractElement(builder, clipmask,
766                                       ind3, "");
767
768#if DEBUG_STORE
769   lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
770                   io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
771#endif
772   /* store for each of the 4 vertices */
773   store_aos(gallivm, io0_ptr, attr_index, aos[0], clipmask0);
774   store_aos(gallivm, io1_ptr, attr_index, aos[1], clipmask1);
775   store_aos(gallivm, io2_ptr, attr_index, aos[2], clipmask2);
776   store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3);
777}
778
779static void
780convert_to_aos(struct gallivm_state *gallivm,
781               LLVMValueRef io,
782               LLVMValueRef (*outputs)[NUM_CHANNELS],
783               LLVMValueRef clipmask,
784               int num_outputs,
785               int max_vertices)
786{
787   LLVMBuilderRef builder = gallivm->builder;
788   unsigned chan, attrib;
789
790#if DEBUG_STORE
791   lp_build_printf(builder, "   # storing begin\n");
792#endif
793   for (attrib = 0; attrib < num_outputs; ++attrib) {
794      LLVMValueRef soa[4];
795      LLVMValueRef aos[4];
796      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
797         if(outputs[attrib][chan]) {
798            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
799            lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
800            /*lp_build_printf(builder, "output %d : %d ",
801                            LLVMConstInt(LLVMInt32Type(), attrib, 0),
802                            LLVMConstInt(LLVMInt32Type(), chan, 0));
803              print_vectorf(builder, out);*/
804            soa[chan] = out;
805         } else
806            soa[chan] = 0;
807      }
808      soa_to_aos(gallivm, soa, aos);
809      store_aos_array(gallivm,
810                      io,
811                      aos,
812                      attrib,
813                      num_outputs,
814                      clipmask);
815   }
816#if DEBUG_STORE
817   lp_build_printf(builder, "   # storing end\n");
818#endif
819}
820
821/*
822 * Stores original vertex positions in clip coordinates
823 * There is probably a more efficient way to do this, 4 floats at once
824 * rather than extracting each element one by one.
825 */
826static void
827store_clip(struct gallivm_state *gallivm,
828           LLVMValueRef io_ptr,
829           LLVMValueRef (*outputs)[NUM_CHANNELS])
830{
831   LLVMBuilderRef builder = gallivm->builder;
832   LLVMValueRef out[4];
833   LLVMValueRef indices[2];
834   LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
835   LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
836   LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
837   LLVMValueRef out0elem, out1elem, out2elem, out3elem;
838   int i;
839
840   LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0);
841   LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1);
842   LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2);
843   LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3);
844
845   indices[0] =
846   indices[1] = lp_build_const_int32(gallivm, 0);
847
848   out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
849   out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
850   out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
851   out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
852
853   io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
854   io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
855   io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
856   io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
857
858   clip_ptr0 = draw_jit_header_clip(gallivm, io0_ptr);
859   clip_ptr1 = draw_jit_header_clip(gallivm, io1_ptr);
860   clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr);
861   clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr);
862
863   for (i = 0; i<4; i++){
864      clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, indices, 2, ""); /* x0 */
865      clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, indices, 2, ""); /* x1 */
866      clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, indices, 2, ""); /* x2 */
867      clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, indices, 2, ""); /* x3 */
868
869      out0elem = LLVMBuildExtractElement(builder, out[i], ind0, ""); /* x0 */
870      out1elem = LLVMBuildExtractElement(builder, out[i], ind1, ""); /* x1 */
871      out2elem = LLVMBuildExtractElement(builder, out[i], ind2, ""); /* x2 */
872      out3elem = LLVMBuildExtractElement(builder, out[i], ind3, ""); /* x3 */
873
874      LLVMBuildStore(builder, out0elem, clip0_ptr);
875      LLVMBuildStore(builder, out1elem, clip1_ptr);
876      LLVMBuildStore(builder, out2elem, clip2_ptr);
877      LLVMBuildStore(builder, out3elem, clip3_ptr);
878
879      indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
880   }
881
882}
883
884/* Equivalent of _mm_set1_ps(a)
885 */
886static LLVMValueRef
887vec4f_from_scalar(struct gallivm_state *gallivm,
888                  LLVMValueRef a,
889                  const char *name)
890{
891   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
892   LLVMValueRef res = LLVMGetUndef(LLVMVectorType(float_type, 4));
893   int i;
894
895   for(i = 0; i < 4; ++i) {
896      LLVMValueRef index = lp_build_const_int32(gallivm, i);
897      res = LLVMBuildInsertElement(gallivm->builder, res, a,
898                                   index, i == 3 ? name : "");
899   }
900
901   return res;
902}
903
904/*
905 * Transforms the outputs for viewport mapping
906 */
907static void
908generate_viewport(struct draw_llvm *llvm,
909                  LLVMBuilderRef builder,
910                  LLVMValueRef (*outputs)[NUM_CHANNELS],
911                  LLVMValueRef context_ptr)
912{
913   int i;
914   struct gallivm_state *gallivm = llvm->gallivm;
915   struct lp_type f32_type = lp_type_float_vec(32);
916   LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
917   LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
918   LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr);
919
920   /* for 1/w convention*/
921   out3 = LLVMBuildFDiv(builder, const1, out3, "");
922   LLVMBuildStore(builder, out3, outputs[0][3]);
923
924   /* Viewport Mapping */
925   for (i=0; i<3; i++){
926      LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
927      LLVMValueRef scale;
928      LLVMValueRef trans;
929      LLVMValueRef scale_i;
930      LLVMValueRef trans_i;
931      LLVMValueRef index;
932
933      index = lp_build_const_int32(gallivm, i);
934      scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
935
936      index = lp_build_const_int32(gallivm, i+4);
937      trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
938
939      scale = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, scale_i, ""), "scale");
940      trans = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, trans_i, ""), "trans");
941
942      /* divide by w */
943      out = LLVMBuildFMul(builder, out, out3, "");
944      /* mult by scale */
945      out = LLVMBuildFMul(builder, out, scale, "");
946      /* add translation */
947      out = LLVMBuildFAdd(builder, out, trans, "");
948
949      /* store transformed outputs */
950      LLVMBuildStore(builder, out, outputs[0][i]);
951   }
952
953}
954
955
956/*
957 * Returns clipmask as 4xi32 bitmask for the 4 vertices
958 */
959static LLVMValueRef
960generate_clipmask(struct gallivm_state *gallivm,
961                  LLVMValueRef (*outputs)[NUM_CHANNELS],
962                  boolean clip_xy,
963                  boolean clip_z,
964                  boolean clip_user,
965                  boolean clip_halfz,
966                  unsigned nr,
967                  LLVMValueRef context_ptr)
968{
969   LLVMBuilderRef builder = gallivm->builder;
970   LLVMValueRef mask; /* stores the <4xi32> clipmasks */
971   LLVMValueRef test, temp;
972   LLVMValueRef zero, shift;
973   LLVMValueRef pos_x, pos_y, pos_z, pos_w;
974   LLVMValueRef plane1, planes, plane_ptr, sum;
975
976   unsigned i;
977
978   struct lp_type f32_type = lp_type_float_vec(32);
979
980   mask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
981   temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
982   zero = lp_build_const_vec(gallivm, f32_type, 0);                    /* 0.0f 0.0f 0.0f 0.0f */
983   shift = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1);    /* 1 1 1 1 */
984
985   /* Assuming position stored at output[0] */
986   pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
987   pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
988   pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
989   pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
990
991   /* Cliptest, for hardwired planes */
992   if (clip_xy){
993      /* plane 1 */
994      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
995      temp = shift;
996      test = LLVMBuildAnd(builder, test, temp, "");
997      mask = test;
998
999      /* plane 2 */
1000      test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
1001      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1002      temp = LLVMBuildShl(builder, temp, shift, "");
1003      test = LLVMBuildAnd(builder, test, temp, "");
1004      mask = LLVMBuildOr(builder, mask, test, "");
1005
1006      /* plane 3 */
1007      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1008      temp = LLVMBuildShl(builder, temp, shift, "");
1009      test = LLVMBuildAnd(builder, test, temp, "");
1010      mask = LLVMBuildOr(builder, mask, test, "");
1011
1012      /* plane 4 */
1013      test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1014      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1015      temp = LLVMBuildShl(builder, temp, shift, "");
1016      test = LLVMBuildAnd(builder, test, temp, "");
1017      mask = LLVMBuildOr(builder, mask, test, "");
1018   }
1019
1020   if (clip_z){
1021      temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 16);
1022      if (clip_halfz){
1023         /* plane 5 */
1024         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1025         test = LLVMBuildAnd(builder, test, temp, "");
1026         mask = LLVMBuildOr(builder, mask, test, "");
1027      }
1028      else{
1029         /* plane 5 */
1030         test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1031         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1032         test = LLVMBuildAnd(builder, test, temp, "");
1033         mask = LLVMBuildOr(builder, mask, test, "");
1034      }
1035      /* plane 6 */
1036      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1037      temp = LLVMBuildShl(builder, temp, shift, "");
1038      test = LLVMBuildAnd(builder, test, temp, "");
1039      mask = LLVMBuildOr(builder, mask, test, "");
1040   }
1041
1042   if (clip_user){
1043      LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
1044      LLVMValueRef indices[3];
1045      temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 32);
1046
1047      /* userclip planes */
1048      for (i = 6; i < nr; i++) {
1049         indices[0] = lp_build_const_int32(gallivm, 0);
1050         indices[1] = lp_build_const_int32(gallivm, i);
1051
1052         indices[2] = lp_build_const_int32(gallivm, 0);
1053         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1054         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
1055         planes = vec4f_from_scalar(gallivm, plane1, "plane4_x");
1056         sum = LLVMBuildFMul(builder, planes, pos_x, "");
1057
1058         indices[2] = lp_build_const_int32(gallivm, 1);
1059         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1060         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
1061         planes = vec4f_from_scalar(gallivm, plane1, "plane4_y");
1062         test = LLVMBuildFMul(builder, planes, pos_y, "");
1063         sum = LLVMBuildFAdd(builder, sum, test, "");
1064
1065         indices[2] = lp_build_const_int32(gallivm, 2);
1066         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1067         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
1068         planes = vec4f_from_scalar(gallivm, plane1, "plane4_z");
1069         test = LLVMBuildFMul(builder, planes, pos_z, "");
1070         sum = LLVMBuildFAdd(builder, sum, test, "");
1071
1072         indices[2] = lp_build_const_int32(gallivm, 3);
1073         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1074         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
1075         planes = vec4f_from_scalar(gallivm, plane1, "plane4_w");
1076         test = LLVMBuildFMul(builder, planes, pos_w, "");
1077         sum = LLVMBuildFAdd(builder, sum, test, "");
1078
1079         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1080         temp = LLVMBuildShl(builder, temp, shift, "");
1081         test = LLVMBuildAnd(builder, test, temp, "");
1082         mask = LLVMBuildOr(builder, mask, test, "");
1083      }
1084   }
1085   return mask;
1086}
1087
1088/*
1089 * Returns boolean if any clipping has occurred
1090 * Used zero/non-zero i32 value to represent boolean
1091 */
1092static void
1093clipmask_bool(struct gallivm_state *gallivm,
1094              LLVMValueRef clipmask,
1095              LLVMValueRef ret_ptr)
1096{
1097   LLVMBuilderRef builder = gallivm->builder;
1098   LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
1099   LLVMValueRef temp;
1100   int i;
1101
1102   for (i=0; i<4; i++){
1103      temp = LLVMBuildExtractElement(builder, clipmask,
1104                                     lp_build_const_int32(gallivm, i) , "");
1105      ret = LLVMBuildOr(builder, ret, temp, "");
1106   }
1107
1108   LLVMBuildStore(builder, ret, ret_ptr);
1109}
1110
1111static void
1112draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1113{
1114   struct gallivm_state *gallivm = llvm->gallivm;
1115   LLVMContextRef context = gallivm->context;
1116   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1117   LLVMTypeRef arg_types[8];
1118   LLVMTypeRef func_type;
1119   LLVMValueRef context_ptr;
1120   LLVMBasicBlockRef block;
1121   LLVMBuilderRef builder;
1122   LLVMValueRef start, end, count, stride, step, io_itr;
1123   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1124   LLVMValueRef instance_id;
1125   LLVMValueRef system_values_array;
1126   struct draw_context *draw = llvm->draw;
1127   const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1128   unsigned i, j;
1129   struct lp_build_context bld;
1130   struct lp_build_loop_state lp_loop;
1131   const int max_vertices = 4;
1132   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1133   void *code;
1134   struct lp_build_sampler_soa *sampler = 0;
1135   LLVMValueRef ret, ret_ptr;
1136   boolean bypass_viewport = variant->key.bypass_viewport;
1137   boolean enable_cliptest = variant->key.clip_xy ||
1138                             variant->key.clip_z  ||
1139                             variant->key.clip_user;
1140
1141   arg_types[0] = get_context_ptr_type(llvm);       /* context */
1142   arg_types[1] = get_vertex_header_ptr_type(llvm); /* vertex_header */
1143   arg_types[2] = get_buffer_ptr_type(llvm);        /* vbuffers */
1144   arg_types[3] = int32_type;                       /* start */
1145   arg_types[4] = int32_type;                       /* count */
1146   arg_types[5] = int32_type;                       /* stride */
1147   arg_types[6] = get_vb_ptr_type(llvm);            /* pipe_vertex_buffer's */
1148   arg_types[7] = int32_type;                       /* instance_id */
1149
1150   func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
1151
1152   variant->function = LLVMAddFunction(gallivm->module, "draw_llvm_shader",
1153                                       func_type);
1154   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
1155   for(i = 0; i < Elements(arg_types); ++i)
1156      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1157         LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
1158
1159   context_ptr  = LLVMGetParam(variant->function, 0);
1160   io_ptr       = LLVMGetParam(variant->function, 1);
1161   vbuffers_ptr = LLVMGetParam(variant->function, 2);
1162   start        = LLVMGetParam(variant->function, 3);
1163   count        = LLVMGetParam(variant->function, 4);
1164   stride       = LLVMGetParam(variant->function, 5);
1165   vb_ptr       = LLVMGetParam(variant->function, 6);
1166   instance_id  = LLVMGetParam(variant->function, 7);
1167
1168   lp_build_name(context_ptr, "context");
1169   lp_build_name(io_ptr, "io");
1170   lp_build_name(vbuffers_ptr, "vbuffers");
1171   lp_build_name(start, "start");
1172   lp_build_name(count, "count");
1173   lp_build_name(stride, "stride");
1174   lp_build_name(vb_ptr, "vb");
1175   lp_build_name(instance_id, "instance_id");
1176
1177   /*
1178    * Function body
1179    */
1180
1181   block = LLVMAppendBasicBlockInContext(gallivm->context, variant->function, "entry");
1182   builder = gallivm->builder;
1183   assert(builder);
1184   LLVMPositionBuilderAtEnd(builder, block);
1185
1186   lp_build_context_init(&bld, llvm->gallivm, lp_type_int(32));
1187
1188   system_values_array = lp_build_system_values_array(gallivm, vs_info,
1189                                                      instance_id, NULL);
1190
1191   end = lp_build_add(&bld, start, count);
1192
1193   step = lp_build_const_int32(gallivm, max_vertices);
1194
1195   /* function will return non-zero i32 value if any clipped vertices */
1196   ret_ptr = lp_build_alloca(gallivm, int32_type, "");
1197   LLVMBuildStore(builder, lp_build_const_int32(gallivm, 0), ret_ptr);
1198
1199   /* code generated texture sampling */
1200   sampler = draw_llvm_sampler_soa_create(
1201      draw_llvm_variant_key_samplers(&variant->key),
1202      context_ptr);
1203
1204#if DEBUG_STORE
1205   lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
1206                   start, end, step);
1207#endif
1208   lp_build_loop_begin(&lp_loop, llvm->gallivm, start);
1209   {
1210      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1211      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1212      LLVMValueRef io;
1213      LLVMValueRef clipmask;   /* holds the clipmask value */
1214      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1215
1216      io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
1217      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1218#if DEBUG_STORE
1219      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1220                      io_itr, io, lp_loop.counter);
1221#endif
1222      for (i = 0; i < NUM_CHANNELS; ++i) {
1223         LLVMValueRef true_index = LLVMBuildAdd(
1224            builder,
1225            lp_loop.counter,
1226            lp_build_const_int32(gallivm, i), "");
1227         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1228            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1229            LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1230            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1231                                           &vb_index, 1, "");
1232            generate_fetch(llvm->gallivm, vbuffers_ptr,
1233                           &aos_attribs[j][i], velem, vb, true_index,
1234                           instance_id);
1235         }
1236      }
1237      convert_to_soa(gallivm, aos_attribs, inputs,
1238                     draw->pt.nr_vertex_elements);
1239
1240      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1241      generate_vs(llvm,
1242                  builder,
1243                  outputs,
1244                  ptr_aos,
1245                  system_values_array,
1246                  context_ptr,
1247                  sampler);
1248
1249      /* store original positions in clip before further manipulation */
1250      store_clip(gallivm, io, outputs);
1251
1252      /* do cliptest */
1253      if (enable_cliptest){
1254         /* allocate clipmask, assign it integer type */
1255         clipmask = generate_clipmask(gallivm, outputs,
1256                                      variant->key.clip_xy,
1257                                      variant->key.clip_z,
1258                                      variant->key.clip_user,
1259                                      variant->key.clip_halfz,
1260                                      variant->key.nr_planes,
1261                                      context_ptr);
1262         /* return clipping boolean value for function */
1263         clipmask_bool(gallivm, clipmask, ret_ptr);
1264      }
1265      else{
1266         clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
1267      }
1268
1269      /* do viewport mapping */
1270      if (!bypass_viewport){
1271         generate_viewport(llvm, builder, outputs, context_ptr);
1272      }
1273
1274      /* store clipmask in vertex header and positions in data */
1275      convert_to_aos(gallivm, io, outputs, clipmask,
1276                     vs_info->num_outputs, max_vertices);
1277   }
1278
1279   lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
1280
1281   sampler->destroy(sampler);
1282
1283   ret = LLVMBuildLoad(builder, ret_ptr,"");
1284   LLVMBuildRet(builder, ret);
1285
1286   /*
1287    * Translate the LLVM IR into machine code.
1288    */
1289#ifdef DEBUG
1290   if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
1291      lp_debug_dump_value(variant->function);
1292      assert(0);
1293   }
1294#endif
1295
1296   LLVMRunFunctionPassManager(gallivm->passmgr, variant->function);
1297
1298   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1299      lp_debug_dump_value(variant->function);
1300      debug_printf("\n");
1301   }
1302
1303   code = LLVMGetPointerToGlobal(gallivm->engine, variant->function);
1304   variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
1305
1306   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1307      lp_disassemble(code);
1308   }
1309   lp_func_delete_body(variant->function);
1310}
1311
1312
1313static void
1314draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1315{
1316   struct gallivm_state *gallivm = llvm->gallivm;
1317   LLVMContextRef context = gallivm->context;
1318   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1319   LLVMTypeRef arg_types[8];
1320   LLVMTypeRef func_type;
1321   LLVMValueRef context_ptr;
1322   LLVMBasicBlockRef block;
1323   LLVMBuilderRef builder;
1324   LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
1325   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1326   LLVMValueRef instance_id;
1327   LLVMValueRef system_values_array;
1328   struct draw_context *draw = llvm->draw;
1329   const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1330   unsigned i, j;
1331   struct lp_build_context bld;
1332   struct lp_build_loop_state lp_loop;
1333   const int max_vertices = 4;
1334   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1335   LLVMValueRef fetch_max;
1336   void *code;
1337   struct lp_build_sampler_soa *sampler = 0;
1338   LLVMValueRef ret, ret_ptr;
1339   boolean bypass_viewport = variant->key.bypass_viewport;
1340   boolean enable_cliptest = variant->key.clip_xy ||
1341                             variant->key.clip_z  ||
1342                             variant->key.clip_user;
1343
1344   arg_types[0] = get_context_ptr_type(llvm);           /* context */
1345   arg_types[1] = get_vertex_header_ptr_type(llvm);     /* vertex_header */
1346   arg_types[2] = get_buffer_ptr_type(llvm);            /* vbuffers */
1347   arg_types[3] = LLVMPointerType(int32_type, 0);       /* fetch_elts * */
1348   arg_types[4] = int32_type;                           /* fetch_count */
1349   arg_types[5] = int32_type;                           /* stride */
1350   arg_types[6] = get_vb_ptr_type(llvm);                /* pipe_vertex_buffer's */
1351   arg_types[7] = int32_type;                           /* instance_id */
1352
1353   func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
1354
1355   variant->function_elts = LLVMAddFunction(gallivm->module, "draw_llvm_shader_elts", func_type);
1356   LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
1357   for(i = 0; i < Elements(arg_types); ++i)
1358      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1359         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
1360                          LLVMNoAliasAttribute);
1361
1362   context_ptr  = LLVMGetParam(variant->function_elts, 0);
1363   io_ptr       = LLVMGetParam(variant->function_elts, 1);
1364   vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
1365   fetch_elts   = LLVMGetParam(variant->function_elts, 3);
1366   fetch_count  = LLVMGetParam(variant->function_elts, 4);
1367   stride       = LLVMGetParam(variant->function_elts, 5);
1368   vb_ptr       = LLVMGetParam(variant->function_elts, 6);
1369   instance_id  = LLVMGetParam(variant->function_elts, 7);
1370
1371   lp_build_name(context_ptr, "context");
1372   lp_build_name(io_ptr, "io");
1373   lp_build_name(vbuffers_ptr, "vbuffers");
1374   lp_build_name(fetch_elts, "fetch_elts");
1375   lp_build_name(fetch_count, "fetch_count");
1376   lp_build_name(stride, "stride");
1377   lp_build_name(vb_ptr, "vb");
1378   lp_build_name(instance_id, "instance_id");
1379
1380   /*
1381    * Function body
1382    */
1383
1384   block = LLVMAppendBasicBlockInContext(gallivm->context, variant->function_elts, "entry");
1385   builder = gallivm->builder;
1386   LLVMPositionBuilderAtEnd(builder, block);
1387
1388   lp_build_context_init(&bld, gallivm, lp_type_int(32));
1389
1390   system_values_array = lp_build_system_values_array(gallivm, vs_info,
1391                                                      instance_id, NULL);
1392
1393
1394   step = lp_build_const_int32(gallivm, max_vertices);
1395
1396   /* code generated texture sampling */
1397   sampler = draw_llvm_sampler_soa_create(
1398      draw_llvm_variant_key_samplers(&variant->key),
1399      context_ptr);
1400
1401   fetch_max = LLVMBuildSub(builder, fetch_count,
1402                            lp_build_const_int32(gallivm, 1),
1403                            "fetch_max");
1404
1405   /* function returns non-zero i32 value if any clipped vertices */
1406   ret_ptr = lp_build_alloca(gallivm, int32_type, "");
1407   LLVMBuildStore(builder, lp_build_const_int32(gallivm, 0), ret_ptr);
1408
1409   lp_build_loop_begin(&lp_loop, gallivm, lp_build_const_int32(gallivm, 0));
1410   {
1411      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1412      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1413      LLVMValueRef io;
1414      LLVMValueRef clipmask;   /* holds the clipmask value */
1415      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1416
1417      io_itr = lp_loop.counter;
1418      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1419#if DEBUG_STORE
1420      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1421                      io_itr, io, lp_loop.counter);
1422#endif
1423      for (i = 0; i < NUM_CHANNELS; ++i) {
1424         LLVMValueRef true_index = LLVMBuildAdd(
1425            builder,
1426            lp_loop.counter,
1427            lp_build_const_int32(gallivm, i), "");
1428         LLVMValueRef fetch_ptr;
1429
1430         /* make sure we're not out of bounds which can happen
1431          * if fetch_count % 4 != 0, because on the last iteration
1432          * a few of the 4 vertex fetches will be out of bounds */
1433         true_index = lp_build_min(&bld, true_index, fetch_max);
1434
1435         fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
1436                                  &true_index, 1, "");
1437         true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
1438         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1439            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1440            LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1441            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1442                                           &vb_index, 1, "");
1443            generate_fetch(gallivm, vbuffers_ptr,
1444                           &aos_attribs[j][i], velem, vb, true_index,
1445                           instance_id);
1446         }
1447      }
1448      convert_to_soa(gallivm, aos_attribs, inputs,
1449                     draw->pt.nr_vertex_elements);
1450
1451      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1452      generate_vs(llvm,
1453                  builder,
1454                  outputs,
1455                  ptr_aos,
1456                  system_values_array,
1457                  context_ptr,
1458                  sampler);
1459
1460      /* store original positions in clip before further manipulation */
1461      store_clip(gallivm, io, outputs);
1462
1463      /* do cliptest */
1464      if (enable_cliptest){
1465         /* allocate clipmask, assign it integer type */
1466         clipmask = generate_clipmask(gallivm, outputs,
1467                                      variant->key.clip_xy,
1468                                      variant->key.clip_z,
1469                                      variant->key.clip_user,
1470                                      variant->key.clip_halfz,
1471                                      variant->key.nr_planes,
1472                                      context_ptr);
1473         /* return clipping boolean value for function */
1474         clipmask_bool(gallivm, clipmask, ret_ptr);
1475      }
1476      else{
1477         clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
1478      }
1479
1480      /* do viewport mapping */
1481      if (!bypass_viewport){
1482         generate_viewport(llvm, builder, outputs, context_ptr);
1483      }
1484
1485      /* store clipmask in vertex header,
1486       * original positions in clip
1487       * and transformed positions in data
1488       */
1489      convert_to_aos(gallivm, io, outputs, clipmask,
1490                     vs_info->num_outputs, max_vertices);
1491   }
1492
1493   lp_build_loop_end_cond(&lp_loop, fetch_count, step, LLVMIntUGE);
1494
1495   sampler->destroy(sampler);
1496
1497   ret = LLVMBuildLoad(builder, ret_ptr,"");
1498   LLVMBuildRet(builder, ret);
1499
1500   /*
1501    * Translate the LLVM IR into machine code.
1502    */
1503#ifdef DEBUG
1504   if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
1505      lp_debug_dump_value(variant->function_elts);
1506      assert(0);
1507   }
1508#endif
1509
1510   LLVMRunFunctionPassManager(gallivm->passmgr, variant->function_elts);
1511
1512   if (gallivm_debug & GALLIVM_DEBUG_IR) {
1513      lp_debug_dump_value(variant->function_elts);
1514      debug_printf("\n");
1515   }
1516
1517   code = LLVMGetPointerToGlobal(gallivm->engine, variant->function_elts);
1518   variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
1519
1520   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1521      lp_disassemble(code);
1522   }
1523   lp_func_delete_body(variant->function_elts);
1524}
1525
1526
1527struct draw_llvm_variant_key *
1528draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
1529{
1530   unsigned i;
1531   struct draw_llvm_variant_key *key;
1532   struct lp_sampler_static_state *sampler;
1533
1534   key = (struct draw_llvm_variant_key *)store;
1535
1536   /* Presumably all variants of the shader should have the same
1537    * number of vertex elements - ie the number of shader inputs.
1538    */
1539   key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
1540
1541   /* will have to rig this up properly later */
1542   key->clip_xy = llvm->draw->clip_xy;
1543   key->clip_z = llvm->draw->clip_z;
1544   key->clip_user = llvm->draw->clip_user;
1545   key->bypass_viewport = llvm->draw->identity_viewport;
1546   key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
1547   key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
1548   key->nr_planes = llvm->draw->nr_planes;
1549   key->pad = 0;
1550
1551   /* All variants of this shader will have the same value for
1552    * nr_samplers.  Not yet trying to compact away holes in the
1553    * sampler array.
1554    */
1555   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
1556
1557   sampler = draw_llvm_variant_key_samplers(key);
1558
1559   memcpy(key->vertex_element,
1560          llvm->draw->pt.vertex_element,
1561          sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
1562
1563   memset(sampler, 0, key->nr_samplers * sizeof *sampler);
1564
1565   for (i = 0 ; i < key->nr_samplers; i++) {
1566      lp_sampler_static_state(&sampler[i],
1567			      llvm->draw->sampler_views[i],
1568			      llvm->draw->samplers[i]);
1569   }
1570
1571   return key;
1572}
1573
1574void
1575draw_llvm_set_mapped_texture(struct draw_context *draw,
1576                             unsigned sampler_idx,
1577                             uint32_t width, uint32_t height, uint32_t depth,
1578                             uint32_t last_level,
1579                             uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
1580                             uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
1581                             const void *data[PIPE_MAX_TEXTURE_LEVELS])
1582{
1583   unsigned j;
1584   struct draw_jit_texture *jit_tex;
1585
1586   assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
1587
1588
1589   jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
1590
1591   jit_tex->width = width;
1592   jit_tex->height = height;
1593   jit_tex->depth = depth;
1594   jit_tex->last_level = last_level;
1595
1596   for (j = 0; j <= last_level; j++) {
1597      jit_tex->data[j] = data[j];
1598      jit_tex->row_stride[j] = row_stride[j];
1599      jit_tex->img_stride[j] = img_stride[j];
1600   }
1601}
1602
1603
1604void
1605draw_llvm_set_sampler_state(struct draw_context *draw)
1606{
1607   unsigned i;
1608
1609   for (i = 0; i < draw->num_samplers; i++) {
1610      struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
1611
1612      if (draw->samplers[i]) {
1613         jit_tex->min_lod = draw->samplers[i]->min_lod;
1614         jit_tex->max_lod = draw->samplers[i]->max_lod;
1615         jit_tex->lod_bias = draw->samplers[i]->lod_bias;
1616         COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
1617      }
1618   }
1619}
1620
1621
1622void
1623draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
1624{
1625   struct draw_llvm *llvm = variant->llvm;
1626
1627   if (variant->function_elts) {
1628      LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
1629                                     variant->function_elts);
1630      LLVMDeleteFunction(variant->function_elts);
1631   }
1632
1633   if (variant->function) {
1634      LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
1635                                     variant->function);
1636      LLVMDeleteFunction(variant->function);
1637   }
1638
1639   remove_from_list(&variant->list_item_local);
1640   variant->shader->variants_cached--;
1641   remove_from_list(&variant->list_item_global);
1642   llvm->nr_variants--;
1643   FREE(variant);
1644}
1645