lp_bld_tgsi_soa.c revision efc82aef35a2aac5d2ed9774f6d28f2626796416
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_info.h"
46#include "tgsi/tgsi_parse.h"
47#include "tgsi/tgsi_util.h"
48#include "tgsi/tgsi_scan.h"
49#include "lp_bld_type.h"
50#include "lp_bld_const.h"
51#include "lp_bld_arit.h"
52#include "lp_bld_bitarit.h"
53#include "lp_bld_gather.h"
54#include "lp_bld_init.h"
55#include "lp_bld_logic.h"
56#include "lp_bld_swizzle.h"
57#include "lp_bld_flow.h"
58#include "lp_bld_quad.h"
59#include "lp_bld_tgsi.h"
60#include "lp_bld_limits.h"
61#include "lp_bld_debug.h"
62#include "lp_bld_printf.h"
63
64
65#define FOR_EACH_CHANNEL( CHAN )\
66   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
67
68#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
70
71#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
72   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
73
74#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
75   FOR_EACH_CHANNEL( CHAN )\
76      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
77
78#define CHAN_X 0
79#define CHAN_Y 1
80#define CHAN_Z 2
81#define CHAN_W 3
82#define NUM_CHANNELS 4
83
84#define LP_MAX_INSTRUCTIONS 256
85
86
87struct lp_exec_mask {
88   struct lp_build_context *bld;
89
90   boolean has_mask;
91
92   LLVMTypeRef int_vec_type;
93
94   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
95   int cond_stack_size;
96   LLVMValueRef cond_mask;
97
98   LLVMBasicBlockRef loop_block;
99   LLVMValueRef cont_mask;
100   LLVMValueRef break_mask;
101   LLVMValueRef break_var;
102   struct {
103      LLVMBasicBlockRef loop_block;
104      LLVMValueRef cont_mask;
105      LLVMValueRef break_mask;
106      LLVMValueRef break_var;
107   } loop_stack[LP_MAX_TGSI_NESTING];
108   int loop_stack_size;
109
110   LLVMValueRef ret_mask;
111   struct {
112      int pc;
113      LLVMValueRef ret_mask;
114   } call_stack[LP_MAX_TGSI_NESTING];
115   int call_stack_size;
116
117   LLVMValueRef exec_mask;
118};
119
120struct lp_build_tgsi_soa_context
121{
122   struct lp_build_context base;
123
124   /* Builder for vector integer masks and indices */
125   struct lp_build_context uint_bld;
126
127   /* Builder for scalar elements of shader's data type (float) */
128   struct lp_build_context elem_bld;
129
130   LLVMValueRef consts_ptr;
131   const LLVMValueRef *pos;
132   const LLVMValueRef (*inputs)[NUM_CHANNELS];
133   LLVMValueRef (*outputs)[NUM_CHANNELS];
134
135   const struct lp_build_sampler_soa *sampler;
136
137   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
138   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
139   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
140   LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
141
142   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
143    * set in the indirect_files field.
144    * The temps[] array above is unused then.
145    */
146   LLVMValueRef temps_array;
147
148   /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
149    * set in the indirect_files field.
150    * The outputs[] array above is unused then.
151    */
152   LLVMValueRef outputs_array;
153
154   /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
155    * set in the indirect_files field.
156    * The inputs[] array above is unused then.
157    */
158   LLVMValueRef inputs_array;
159
160   const struct tgsi_shader_info *info;
161   /** bitmask indicating which register files are accessed indirectly */
162   unsigned indirect_files;
163
164   struct lp_build_mask_context *mask;
165   struct lp_exec_mask exec_mask;
166
167   struct tgsi_full_instruction *instructions;
168   uint max_instructions;
169};
170
171static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
172{
173   mask->bld = bld;
174   mask->has_mask = FALSE;
175   mask->cond_stack_size = 0;
176   mask->loop_stack_size = 0;
177   mask->call_stack_size = 0;
178
179   mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
180   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
181         LLVMConstAllOnes(mask->int_vec_type);
182}
183
184static void lp_exec_mask_update(struct lp_exec_mask *mask)
185{
186   if (mask->loop_stack_size) {
187      /*for loops we need to update the entire mask at runtime */
188      LLVMValueRef tmp;
189      assert(mask->break_mask);
190      tmp = LLVMBuildAnd(mask->bld->builder,
191                         mask->cont_mask,
192                         mask->break_mask,
193                         "maskcb");
194      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
195                                     mask->cond_mask,
196                                     tmp,
197                                     "maskfull");
198   } else
199      mask->exec_mask = mask->cond_mask;
200
201   if (mask->call_stack_size) {
202      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
203                                     mask->exec_mask,
204                                     mask->ret_mask,
205                                     "callmask");
206   }
207
208   mask->has_mask = (mask->cond_stack_size > 0 ||
209                     mask->loop_stack_size > 0 ||
210                     mask->call_stack_size > 0);
211}
212
213static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
214                                   LLVMValueRef val)
215{
216   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
217   if (mask->cond_stack_size == 0) {
218      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
219   }
220   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
221   assert(LLVMTypeOf(val) == mask->int_vec_type);
222   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
223                                  mask->cond_mask,
224                                  val,
225                                  "");
226   lp_exec_mask_update(mask);
227}
228
229static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
230{
231   LLVMValueRef prev_mask;
232   LLVMValueRef inv_mask;
233
234   assert(mask->cond_stack_size);
235   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
236   if (mask->cond_stack_size == 1) {
237      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
238   }
239
240   inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
241
242   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
243                                  inv_mask,
244                                  prev_mask, "");
245   lp_exec_mask_update(mask);
246}
247
248static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
249{
250   assert(mask->cond_stack_size);
251   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
252   lp_exec_mask_update(mask);
253}
254
255static void lp_exec_bgnloop(struct lp_exec_mask *mask)
256{
257   if (mask->loop_stack_size == 0) {
258      assert(mask->loop_block == NULL);
259      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
260      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
261      assert(mask->break_var == NULL);
262   }
263
264   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
265
266   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
267   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
268   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
269   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
270   ++mask->loop_stack_size;
271
272   mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
273   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
274
275   mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
276   LLVMBuildBr(mask->bld->builder, mask->loop_block);
277   LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
278
279   mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
280
281   lp_exec_mask_update(mask);
282}
283
284static void lp_exec_break(struct lp_exec_mask *mask)
285{
286   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
287                                         mask->exec_mask,
288                                         "break");
289
290   mask->break_mask = LLVMBuildAnd(mask->bld->builder,
291                                   mask->break_mask,
292                                   exec_mask, "break_full");
293
294   lp_exec_mask_update(mask);
295}
296
297static void lp_exec_continue(struct lp_exec_mask *mask)
298{
299   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
300                                         mask->exec_mask,
301                                         "");
302
303   mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
304                                  mask->cont_mask,
305                                  exec_mask, "");
306
307   lp_exec_mask_update(mask);
308}
309
310
311static void lp_exec_endloop(struct gallivm_state *gallivm,
312                            struct lp_exec_mask *mask)
313{
314   LLVMBasicBlockRef endloop;
315   LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
316                                               mask->bld->type.width *
317                                               mask->bld->type.length);
318   LLVMValueRef i1cond;
319
320   assert(mask->break_mask);
321
322   /*
323    * Restore the cont_mask, but don't pop
324    */
325   assert(mask->loop_stack_size);
326   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
327   lp_exec_mask_update(mask);
328
329   /*
330    * Unlike the continue mask, the break_mask must be preserved across loop
331    * iterations
332    */
333   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
334
335   /* i1cond = (mask == 0) */
336   i1cond = LLVMBuildICmp(
337      mask->bld->builder,
338      LLVMIntNE,
339      LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
340      LLVMConstNull(reg_type), "");
341
342   endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
343
344   LLVMBuildCondBr(mask->bld->builder,
345                   i1cond, mask->loop_block, endloop);
346
347   LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
348
349   assert(mask->loop_stack_size);
350   --mask->loop_stack_size;
351   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
352   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
353   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
354   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
355
356   lp_exec_mask_update(mask);
357}
358
359/* stores val into an address pointed to by dst.
360 * mask->exec_mask is used to figure out which bits of val
361 * should be stored into the address
362 * (0 means don't store this bit, 1 means do store).
363 */
364static void lp_exec_mask_store(struct lp_exec_mask *mask,
365                               LLVMValueRef pred,
366                               LLVMValueRef val,
367                               LLVMValueRef dst)
368{
369   /* Mix the predicate and execution mask */
370   if (mask->has_mask) {
371      if (pred) {
372         pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
373      } else {
374         pred = mask->exec_mask;
375      }
376   }
377
378   if (pred) {
379      LLVMValueRef real_val, dst_val;
380
381      dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
382      real_val = lp_build_select(mask->bld,
383                                 pred,
384                                 val, dst_val);
385
386      LLVMBuildStore(mask->bld->builder, real_val, dst);
387   } else
388      LLVMBuildStore(mask->bld->builder, val, dst);
389}
390
391static void lp_exec_mask_call(struct lp_exec_mask *mask,
392                              int func,
393                              int *pc)
394{
395   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
396   mask->call_stack[mask->call_stack_size].pc = *pc;
397   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
398   mask->call_stack_size++;
399   *pc = func;
400}
401
402static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
403{
404   LLVMValueRef exec_mask;
405
406   if (mask->call_stack_size == 0) {
407      /* returning from main() */
408      *pc = -1;
409      return;
410   }
411   exec_mask = LLVMBuildNot(mask->bld->builder,
412                            mask->exec_mask,
413                            "ret");
414
415   mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
416                                 mask->ret_mask,
417                                 exec_mask, "ret_full");
418
419   lp_exec_mask_update(mask);
420}
421
422static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
423{
424}
425
426static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
427{
428   assert(mask->call_stack_size);
429   mask->call_stack_size--;
430   *pc = mask->call_stack[mask->call_stack_size].pc;
431   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
432   lp_exec_mask_update(mask);
433}
434
435
436/**
437 * Return pointer to a temporary register channel (src or dest).
438 * Note that indirect addressing cannot be handled here.
439 * \param index  which temporary register
440 * \param chan  which channel of the temp register.
441 */
442static LLVMValueRef
443get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
444             unsigned index,
445             unsigned chan)
446{
447   assert(chan < 4);
448   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
449      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan);
450      return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
451   }
452   else {
453      return bld->temps[index][chan];
454   }
455}
456
457/**
458 * Return pointer to a output register channel (src or dest).
459 * Note that indirect addressing cannot be handled here.
460 * \param index  which output register
461 * \param chan  which channel of the output register.
462 */
463static LLVMValueRef
464get_output_ptr(struct lp_build_tgsi_soa_context *bld,
465               unsigned index,
466               unsigned chan)
467{
468   assert(chan < 4);
469   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
470      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
471                                                 index * 4 + chan);
472      return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, "");
473   }
474   else {
475      return bld->outputs[index][chan];
476   }
477}
478
479/**
480 * Gather vector.
481 * XXX the lp_build_gather() function should be capable of doing this
482 * with a little work.
483 */
484static LLVMValueRef
485build_gather(struct lp_build_tgsi_soa_context *bld,
486             LLVMValueRef base_ptr,
487             LLVMValueRef indexes)
488{
489   LLVMValueRef res = bld->base.undef;
490   unsigned i;
491
492   /*
493    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
494    */
495   for (i = 0; i < bld->base.type.length; i++) {
496      LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i);
497      LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
498                                                   indexes, ii, "");
499      LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
500                                             &index, 1, "gather_ptr");
501      LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
502
503      res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
504   }
505
506   return res;
507}
508
509
510/**
511 * Scatter/store vector.
512 */
513static void
514emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
515                  LLVMValueRef base_ptr,
516                  LLVMValueRef indexes,
517                  LLVMValueRef values,
518                  struct lp_exec_mask *mask,
519                  LLVMValueRef pred)
520{
521   struct gallivm_state *gallivm = bld->base.gallivm;
522   LLVMBuilderRef builder = bld->base.builder;
523   unsigned i;
524
525   /* Mix the predicate and execution mask */
526   if (mask->has_mask) {
527      if (pred) {
528         pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
529      }
530      else {
531         pred = mask->exec_mask;
532      }
533   }
534
535   /*
536    * Loop over elements of index_vec, store scalar value.
537    */
538   for (i = 0; i < bld->base.type.length; i++) {
539      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
540      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
541      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
542      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
543      LLVMValueRef scalar_pred = pred ?
544         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
545
546      if (0)
547         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
548                         ii, val, index, scalar_ptr);
549
550      if (scalar_pred) {
551         LLVMValueRef real_val, dst_val;
552         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
553         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
554         LLVMBuildStore(builder, real_val, scalar_ptr);
555      }
556      else {
557         LLVMBuildStore(builder, val, scalar_ptr);
558      }
559   }
560}
561
562
563/**
564 * Read the current value of the ADDR register, convert the floats to
565 * ints, add the base index and return the vector of offsets.
566 * The offsets will be used to index into the constant buffer or
567 * temporary register file.
568 */
569static LLVMValueRef
570get_indirect_index(struct lp_build_tgsi_soa_context *bld,
571                   unsigned reg_file, unsigned reg_index,
572                   const struct tgsi_src_register *indirect_reg)
573{
574   struct lp_build_context *uint_bld = &bld->uint_bld;
575   /* always use X component of address register */
576   unsigned swizzle = indirect_reg->SwizzleX;
577   LLVMValueRef base;
578   LLVMValueRef rel;
579   LLVMValueRef max_index;
580   LLVMValueRef index;
581
582   assert(bld->indirect_files & (1 << reg_file));
583
584   base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index);
585
586   assert(swizzle < 4);
587   rel = LLVMBuildLoad(bld->base.builder,
588                        bld->addr[indirect_reg->Index][swizzle],
589                        "load addr reg");
590
591   /* for indexing we want integers */
592   rel = LLVMBuildFPToSI(bld->base.builder,
593                         rel,
594                         uint_bld->vec_type, "");
595
596   index = lp_build_add(uint_bld, base, rel);
597
598   max_index = lp_build_const_int_vec(bld->base.gallivm,
599                                      uint_bld->type,
600                                      bld->info->file_max[reg_file]);
601
602   assert(!uint_bld->type.sign);
603   index = lp_build_min(uint_bld, index, max_index);
604
605   return index;
606}
607
608
609/**
610 * Register fetch.
611 */
612static LLVMValueRef
613emit_fetch(
614   struct lp_build_tgsi_soa_context *bld,
615   const struct tgsi_full_instruction *inst,
616   unsigned src_op,
617   const unsigned chan_index )
618{
619   struct gallivm_state *gallivm = bld->base.gallivm;
620   struct lp_build_context *uint_bld = &bld->uint_bld;
621   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
622   const unsigned swizzle =
623      tgsi_util_get_full_src_register_swizzle(reg, chan_index);
624   LLVMValueRef res;
625   LLVMValueRef indirect_index = NULL;
626
627   if (swizzle > 3) {
628      assert(0 && "invalid swizzle in emit_fetch()");
629      return bld->base.undef;
630   }
631
632   if (reg->Register.Indirect) {
633      indirect_index = get_indirect_index(bld,
634                                          reg->Register.File,
635                                          reg->Register.Index,
636                                          &reg->Indirect);
637   } else {
638      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
639   }
640
641   switch (reg->Register.File) {
642   case TGSI_FILE_CONSTANT:
643      if (reg->Register.Indirect) {
644         LLVMValueRef swizzle_vec =
645            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
646         LLVMValueRef index_vec;  /* index into the const buffer */
647
648         /* index_vec = indirect_index * 4 + swizzle */
649         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
650         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
651
652         /* Gather values from the constant buffer */
653         res = build_gather(bld, bld->consts_ptr, index_vec);
654      }
655      else {
656         LLVMValueRef index;  /* index into the const buffer */
657         LLVMValueRef scalar, scalar_ptr;
658
659         index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
660
661         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
662                                   &index, 1, "");
663         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
664
665         res = lp_build_broadcast_scalar(&bld->base, scalar);
666      }
667      break;
668
669   case TGSI_FILE_IMMEDIATE:
670      res = bld->immediates[reg->Register.Index][swizzle];
671      assert(res);
672      break;
673
674   case TGSI_FILE_INPUT:
675      if (reg->Register.Indirect) {
676         LLVMValueRef swizzle_vec =
677            lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
678         LLVMValueRef length_vec =
679            lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
680         LLVMValueRef index_vec;  /* index into the const buffer */
681         LLVMValueRef inputs_array;
682         LLVMTypeRef float4_ptr_type;
683
684         /* index_vec = (indirect_index * 4 + swizzle) * length */
685         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
686         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
687         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
688
689         /* cast inputs_array pointer to float* */
690         float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
691         inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array,
692                                        float4_ptr_type, "");
693
694         /* Gather values from the temporary register array */
695         res = build_gather(bld, inputs_array, index_vec);
696      } else {
697         if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
698            LLVMValueRef lindex = lp_build_const_int32(gallivm,
699                                           reg->Register.Index * 4 + swizzle);
700            LLVMValueRef input_ptr =  LLVMBuildGEP(bld->base.builder,
701                                                   bld->inputs_array, &lindex, 1, "");
702            res = LLVMBuildLoad(bld->base.builder, input_ptr, "");
703         }
704         else {
705            res = bld->inputs[reg->Register.Index][swizzle];
706         }
707      }
708      assert(res);
709      break;
710
711   case TGSI_FILE_TEMPORARY:
712      if (reg->Register.Indirect) {
713         LLVMValueRef swizzle_vec =
714            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
715         LLVMValueRef length_vec =
716            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type,
717                                   bld->base.type.length);
718         LLVMValueRef index_vec;  /* index into the const buffer */
719         LLVMValueRef temps_array;
720         LLVMTypeRef float4_ptr_type;
721
722         /* index_vec = (indirect_index * 4 + swizzle) * length */
723         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
724         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
725         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
726
727         /* cast temps_array pointer to float* */
728         float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0);
729         temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array,
730                                        float4_ptr_type, "");
731
732         /* Gather values from the temporary register array */
733         res = build_gather(bld, temps_array, index_vec);
734      }
735      else {
736         LLVMValueRef temp_ptr;
737         temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
738         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
739         if (!res)
740            return bld->base.undef;
741      }
742      break;
743
744   default:
745      assert(0 && "invalid src register in emit_fetch()");
746      return bld->base.undef;
747   }
748
749   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
750   case TGSI_UTIL_SIGN_CLEAR:
751      res = lp_build_abs( &bld->base, res );
752      break;
753
754   case TGSI_UTIL_SIGN_SET:
755      res = lp_build_abs( &bld->base, res );
756      /* fall through */
757   case TGSI_UTIL_SIGN_TOGGLE:
758      res = lp_build_negate( &bld->base, res );
759      break;
760
761   case TGSI_UTIL_SIGN_KEEP:
762      break;
763   }
764
765   return res;
766}
767
768
769/**
770 * Register fetch with derivatives.
771 */
772static void
773emit_fetch_deriv(
774   struct lp_build_tgsi_soa_context *bld,
775   const struct tgsi_full_instruction *inst,
776   unsigned index,
777   const unsigned chan_index,
778   LLVMValueRef *res,
779   LLVMValueRef *ddx,
780   LLVMValueRef *ddy)
781{
782   LLVMValueRef src;
783
784   src = emit_fetch(bld, inst, index, chan_index);
785
786   if(res)
787      *res = src;
788
789   /* TODO: use interpolation coeffs for inputs */
790
791   if(ddx)
792      *ddx = lp_build_ddx(&bld->base, src);
793
794   if(ddy)
795      *ddy = lp_build_ddy(&bld->base, src);
796}
797
798
799/**
800 * Predicate.
801 */
802static void
803emit_fetch_predicate(
804   struct lp_build_tgsi_soa_context *bld,
805   const struct tgsi_full_instruction *inst,
806   LLVMValueRef *pred)
807{
808   unsigned index;
809   unsigned char swizzles[4];
810   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
811   LLVMValueRef value;
812   unsigned chan;
813
814   if (!inst->Instruction.Predicate) {
815      FOR_EACH_CHANNEL( chan ) {
816         pred[chan] = NULL;
817      }
818      return;
819   }
820
821   swizzles[0] = inst->Predicate.SwizzleX;
822   swizzles[1] = inst->Predicate.SwizzleY;
823   swizzles[2] = inst->Predicate.SwizzleZ;
824   swizzles[3] = inst->Predicate.SwizzleW;
825
826   index = inst->Predicate.Index;
827   assert(index < LP_MAX_TGSI_PREDS);
828
829   FOR_EACH_CHANNEL( chan ) {
830      unsigned swizzle = swizzles[chan];
831
832      /*
833       * Only fetch the predicate register channels that are actually listed
834       * in the swizzles
835       */
836      if (!unswizzled[swizzle]) {
837         value = LLVMBuildLoad(bld->base.builder,
838                               bld->preds[index][swizzle], "");
839
840         /*
841          * Convert the value to an integer mask.
842          *
843          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
844          * is needlessly causing two comparisons due to storing the intermediate
845          * result as float vector instead of an integer mask vector.
846          */
847         value = lp_build_compare(bld->base.gallivm,
848                                  bld->base.type,
849                                  PIPE_FUNC_NOTEQUAL,
850                                  value,
851                                  bld->base.zero);
852         if (inst->Predicate.Negate) {
853            value = LLVMBuildNot(bld->base.builder, value, "");
854         }
855
856         unswizzled[swizzle] = value;
857      } else {
858         value = unswizzled[swizzle];
859      }
860
861      pred[chan] = value;
862   }
863}
864
865
866/**
867 * Register store.
868 */
869static void
870emit_store(
871   struct lp_build_tgsi_soa_context *bld,
872   const struct tgsi_full_instruction *inst,
873   unsigned index,
874   unsigned chan_index,
875   LLVMValueRef pred,
876   LLVMValueRef value)
877{
878   struct gallivm_state *gallivm = bld->base.gallivm;
879   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
880   struct lp_build_context *uint_bld = &bld->uint_bld;
881   LLVMValueRef indirect_index = NULL;
882
883   switch( inst->Instruction.Saturate ) {
884   case TGSI_SAT_NONE:
885      break;
886
887   case TGSI_SAT_ZERO_ONE:
888      value = lp_build_max(&bld->base, value, bld->base.zero);
889      value = lp_build_min(&bld->base, value, bld->base.one);
890      break;
891
892   case TGSI_SAT_MINUS_PLUS_ONE:
893      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
894      value = lp_build_min(&bld->base, value, bld->base.one);
895      break;
896
897   default:
898      assert(0);
899   }
900
901   if (reg->Register.Indirect) {
902      indirect_index = get_indirect_index(bld,
903                                          reg->Register.File,
904                                          reg->Register.Index,
905                                          &reg->Indirect);
906   } else {
907      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
908   }
909
910   switch( reg->Register.File ) {
911   case TGSI_FILE_OUTPUT:
912      if (reg->Register.Indirect) {
913         LLVMBuilderRef builder = bld->base.builder;
914         LLVMValueRef chan_vec =
915            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
916         LLVMValueRef length_vec =
917            lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
918         LLVMValueRef index_vec;  /* indexes into the temp registers */
919         LLVMValueRef outputs_array;
920         LLVMValueRef pixel_offsets;
921         LLVMTypeRef float_ptr_type;
922         int i;
923
924         /* build pixel offset vector: {0, 1, 2, 3, ...} */
925         pixel_offsets = uint_bld->undef;
926         for (i = 0; i < bld->base.type.length; i++) {
927            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
928            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
929                                                   ii, ii, "");
930         }
931
932         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
933         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
934         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
935         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
936         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
937
938         float_ptr_type =
939            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
940         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
941                                          float_ptr_type, "");
942
943         /* Scatter store values into temp registers */
944         emit_mask_scatter(bld, outputs_array, index_vec, value,
945                           &bld->exec_mask, pred);
946      }
947      else {
948         LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
949                                               chan_index);
950         lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
951      }
952      break;
953
954   case TGSI_FILE_TEMPORARY:
955      if (reg->Register.Indirect) {
956         LLVMBuilderRef builder = bld->base.builder;
957         LLVMValueRef chan_vec =
958            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
959         LLVMValueRef length_vec =
960            lp_build_const_int_vec(gallivm, uint_bld->type,
961                                   bld->base.type.length);
962         LLVMValueRef index_vec;  /* indexes into the temp registers */
963         LLVMValueRef temps_array;
964         LLVMValueRef pixel_offsets;
965         LLVMTypeRef float_ptr_type;
966         int i;
967
968         /* build pixel offset vector: {0, 1, 2, 3, ...} */
969         pixel_offsets = uint_bld->undef;
970         for (i = 0; i < bld->base.type.length; i++) {
971            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
972            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
973                                                   ii, ii, "");
974         }
975
976         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
977         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
978         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
979         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
980         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
981
982         float_ptr_type =
983            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
984         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
985                                        float_ptr_type, "");
986
987         /* Scatter store values into temp registers */
988         emit_mask_scatter(bld, temps_array, index_vec, value,
989                           &bld->exec_mask, pred);
990      }
991      else {
992         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
993                                              chan_index);
994         lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
995      }
996      break;
997
998   case TGSI_FILE_ADDRESS:
999      lp_exec_mask_store(&bld->exec_mask, pred, value,
1000                         bld->addr[reg->Register.Index][chan_index]);
1001      break;
1002
1003   case TGSI_FILE_PREDICATE:
1004      lp_exec_mask_store(&bld->exec_mask, pred, value,
1005                         bld->preds[reg->Register.Index][chan_index]);
1006      break;
1007
1008   default:
1009      assert( 0 );
1010   }
1011}
1012
1013
1014/**
1015 * High-level instruction translators.
1016 */
1017
1018static void
1019emit_tex( struct lp_build_tgsi_soa_context *bld,
1020          const struct tgsi_full_instruction *inst,
1021          enum lp_build_tex_modifier modifier,
1022          LLVMValueRef *texel)
1023{
1024   unsigned unit;
1025   LLVMValueRef lod_bias, explicit_lod;
1026   LLVMValueRef oow = NULL;
1027   LLVMValueRef coords[3];
1028   LLVMValueRef ddx[3];
1029   LLVMValueRef ddy[3];
1030   unsigned num_coords;
1031   unsigned i;
1032
1033   if (!bld->sampler) {
1034      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1035      for (i = 0; i < 4; i++) {
1036         texel[i] = bld->base.undef;
1037      }
1038      return;
1039   }
1040
1041   switch (inst->Texture.Texture) {
1042   case TGSI_TEXTURE_1D:
1043      num_coords = 1;
1044      break;
1045   case TGSI_TEXTURE_2D:
1046   case TGSI_TEXTURE_RECT:
1047      num_coords = 2;
1048      break;
1049   case TGSI_TEXTURE_SHADOW1D:
1050   case TGSI_TEXTURE_SHADOW2D:
1051   case TGSI_TEXTURE_SHADOWRECT:
1052   case TGSI_TEXTURE_3D:
1053   case TGSI_TEXTURE_CUBE:
1054      num_coords = 3;
1055      break;
1056   default:
1057      assert(0);
1058      return;
1059   }
1060
1061   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1062      lod_bias = emit_fetch( bld, inst, 0, 3 );
1063      explicit_lod = NULL;
1064   }
1065   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1066      lod_bias = NULL;
1067      explicit_lod = emit_fetch( bld, inst, 0, 3 );
1068   }
1069   else {
1070      lod_bias = NULL;
1071      explicit_lod = NULL;
1072   }
1073
1074   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1075      oow = emit_fetch( bld, inst, 0, 3 );
1076      oow = lp_build_rcp(&bld->base, oow);
1077   }
1078
1079   for (i = 0; i < num_coords; i++) {
1080      coords[i] = emit_fetch( bld, inst, 0, i );
1081      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1082         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1083   }
1084   for (i = num_coords; i < 3; i++) {
1085      coords[i] = bld->base.undef;
1086   }
1087
1088   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1089      LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0);
1090      for (i = 0; i < num_coords; i++) {
1091         LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1092         LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1093         ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
1094         ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
1095      }
1096      unit = inst->Src[3].Register.Index;
1097   }  else {
1098      for (i = 0; i < num_coords; i++) {
1099         ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1100         ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1101      }
1102      unit = inst->Src[1].Register.Index;
1103   }
1104   for (i = num_coords; i < 3; i++) {
1105      ddx[i] = LLVMGetUndef(bld->base.elem_type);
1106      ddy[i] = LLVMGetUndef(bld->base.elem_type);
1107   }
1108
1109   bld->sampler->emit_fetch_texel(bld->sampler,
1110                                  bld->base.gallivm,
1111                                  bld->base.type,
1112                                  unit, num_coords, coords,
1113                                  ddx, ddy,
1114                                  lod_bias, explicit_lod,
1115                                  texel);
1116}
1117
1118static boolean
1119near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1120		   int pc)
1121{
1122   int i;
1123
1124   for (i = 0; i < 5; i++) {
1125      unsigned opcode;
1126
1127      if (pc + i >= bld->info->num_instructions)
1128	 return TRUE;
1129
1130      opcode = bld->instructions[pc + i].Instruction.Opcode;
1131
1132      if (opcode == TGSI_OPCODE_END)
1133	 return TRUE;
1134
1135      if (opcode == TGSI_OPCODE_TEX ||
1136	  opcode == TGSI_OPCODE_TXP ||
1137	  opcode == TGSI_OPCODE_TXD ||
1138	  opcode == TGSI_OPCODE_TXB ||
1139	  opcode == TGSI_OPCODE_TXL ||
1140	  opcode == TGSI_OPCODE_TXF ||
1141	  opcode == TGSI_OPCODE_TXQ ||
1142	  opcode == TGSI_OPCODE_CAL ||
1143	  opcode == TGSI_OPCODE_CALLNZ ||
1144	  opcode == TGSI_OPCODE_IF ||
1145	  opcode == TGSI_OPCODE_IFC ||
1146	  opcode == TGSI_OPCODE_BGNLOOP ||
1147	  opcode == TGSI_OPCODE_SWITCH)
1148	 return FALSE;
1149   }
1150
1151   return TRUE;
1152}
1153
1154
1155
1156/**
1157 * Kill fragment if any of the src register values are negative.
1158 */
1159static void
1160emit_kil(
1161   struct lp_build_tgsi_soa_context *bld,
1162   const struct tgsi_full_instruction *inst,
1163   int pc)
1164{
1165   const struct tgsi_full_src_register *reg = &inst->Src[0];
1166   LLVMValueRef terms[NUM_CHANNELS];
1167   LLVMValueRef mask;
1168   unsigned chan_index;
1169
1170   memset(&terms, 0, sizeof terms);
1171
1172   FOR_EACH_CHANNEL( chan_index ) {
1173      unsigned swizzle;
1174
1175      /* Unswizzle channel */
1176      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1177
1178      /* Check if the component has not been already tested. */
1179      assert(swizzle < NUM_CHANNELS);
1180      if( !terms[swizzle] )
1181         /* TODO: change the comparison operator instead of setting the sign */
1182         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
1183   }
1184
1185   mask = NULL;
1186   FOR_EACH_CHANNEL( chan_index ) {
1187      if(terms[chan_index]) {
1188         LLVMValueRef chan_mask;
1189
1190         /*
1191          * If term < 0 then mask = 0 else mask = ~0.
1192          */
1193         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1194
1195         if(mask)
1196            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1197         else
1198            mask = chan_mask;
1199      }
1200   }
1201
1202   if(mask) {
1203      lp_build_mask_update(bld->mask, mask);
1204
1205      if (!near_end_of_shader(bld, pc))
1206	 lp_build_mask_check(bld->mask);
1207   }
1208}
1209
1210
1211/**
1212 * Predicated fragment kill.
1213 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1214 * The only predication is the execution mask which will apply if
1215 * we're inside a loop or conditional.
1216 */
1217static void
1218emit_kilp(struct lp_build_tgsi_soa_context *bld,
1219          const struct tgsi_full_instruction *inst,
1220	  int pc)
1221{
1222   LLVMValueRef mask;
1223
1224   /* For those channels which are "alive", disable fragment shader
1225    * execution.
1226    */
1227   if (bld->exec_mask.has_mask) {
1228      mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1229   }
1230   else {
1231      LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1232      mask = zero;
1233   }
1234
1235   lp_build_mask_update(bld->mask, mask);
1236
1237   if (!near_end_of_shader(bld, pc))
1238      lp_build_mask_check(bld->mask);
1239}
1240
1241
1242/**
1243 * Emit code which will dump the value of all the temporary registers
1244 * to stdout.
1245 */
1246static void
1247emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1248{
1249   struct gallivm_state *gallivm = bld->base.gallivm;
1250   LLVMBuilderRef builder = gallivm->builder;
1251   LLVMValueRef temp_ptr;
1252   LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1253   LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1254   LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1255   LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1256   int index;
1257   int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1258
1259   for (index = 0; index < n; index++) {
1260      LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1261      LLVMValueRef v[4][4], res;
1262      int chan;
1263
1264      lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1265
1266      for (chan = 0; chan < 4; chan++) {
1267         temp_ptr = get_temp_ptr(bld, index, chan);
1268         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
1269         v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1270         v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1271         v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1272         v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1273      }
1274
1275      lp_build_printf(gallivm, "  X: %f %f %f %f\n",
1276                      v[0][0], v[0][1], v[0][2], v[0][3]);
1277      lp_build_printf(gallivm, "  Y: %f %f %f %f\n",
1278                      v[1][0], v[1][1], v[1][2], v[1][3]);
1279      lp_build_printf(gallivm, "  Z: %f %f %f %f\n",
1280                      v[2][0], v[2][1], v[2][2], v[2][3]);
1281      lp_build_printf(gallivm, "  W: %f %f %f %f\n",
1282                      v[3][0], v[3][1], v[3][2], v[3][3]);
1283   }
1284}
1285
1286
1287
1288static void
1289emit_declaration(
1290   struct lp_build_tgsi_soa_context *bld,
1291   const struct tgsi_full_declaration *decl)
1292{
1293   struct gallivm_state *gallivm = bld->base.gallivm;
1294   LLVMTypeRef vec_type = bld->base.vec_type;
1295   const unsigned first = decl->Range.First;
1296   const unsigned last = decl->Range.Last;
1297   unsigned idx, i;
1298
1299   for (idx = first; idx <= last; ++idx) {
1300      assert(last <= bld->info->file_max[decl->Declaration.File]);
1301      switch (decl->Declaration.File) {
1302      case TGSI_FILE_TEMPORARY:
1303         assert(idx < LP_MAX_TGSI_TEMPS);
1304         if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1305            for (i = 0; i < NUM_CHANNELS; i++)
1306               bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1307         }
1308         break;
1309
1310      case TGSI_FILE_OUTPUT:
1311         if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1312            for (i = 0; i < NUM_CHANNELS; i++)
1313               bld->outputs[idx][i] = lp_build_alloca(gallivm,
1314                                                      vec_type, "output");
1315         }
1316         break;
1317
1318      case TGSI_FILE_ADDRESS:
1319         assert(idx < LP_MAX_TGSI_ADDRS);
1320         for (i = 0; i < NUM_CHANNELS; i++)
1321            bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr");
1322         break;
1323
1324      case TGSI_FILE_PREDICATE:
1325         assert(idx < LP_MAX_TGSI_PREDS);
1326         for (i = 0; i < NUM_CHANNELS; i++)
1327            bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1328                                                 "predicate");
1329         break;
1330
1331      default:
1332         /* don't need to declare other vars */
1333         break;
1334      }
1335   }
1336}
1337
1338
1339/**
1340 * Emit LLVM for one TGSI instruction.
1341 * \param return TRUE for success, FALSE otherwise
1342 */
1343static boolean
1344emit_instruction(
1345   struct lp_build_tgsi_soa_context *bld,
1346   const struct tgsi_full_instruction *inst,
1347   const struct tgsi_opcode_info *info,
1348   int *pc)
1349{
1350   unsigned chan_index;
1351   LLVMValueRef src0, src1, src2;
1352   LLVMValueRef tmp0, tmp1, tmp2;
1353   LLVMValueRef tmp3 = NULL;
1354   LLVMValueRef tmp4 = NULL;
1355   LLVMValueRef tmp5 = NULL;
1356   LLVMValueRef tmp6 = NULL;
1357   LLVMValueRef tmp7 = NULL;
1358   LLVMValueRef res;
1359   LLVMValueRef dst0[NUM_CHANNELS];
1360
1361   /*
1362    * Stores and write masks are handled in a general fashion after the long
1363    * instruction opcode switch statement.
1364    *
1365    * Although not stricitly necessary, we avoid generating instructions for
1366    * channels which won't be stored, in cases where's that easy. For some
1367    * complex instructions, like texture sampling, it is more convenient to
1368    * assume a full writemask and then let LLVM optimization passes eliminate
1369    * redundant code.
1370    */
1371
1372   (*pc)++;
1373
1374   assert(info->num_dst <= 1);
1375   if (info->num_dst) {
1376      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1377         dst0[chan_index] = bld->base.undef;
1378      }
1379   }
1380
1381   switch (inst->Instruction.Opcode) {
1382   case TGSI_OPCODE_ARL:
1383      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1384         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1385         tmp0 = lp_build_floor(&bld->base, tmp0);
1386         dst0[chan_index] = tmp0;
1387      }
1388      break;
1389
1390   case TGSI_OPCODE_MOV:
1391      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1392         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1393      }
1394      break;
1395
1396   case TGSI_OPCODE_LIT:
1397      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1398         dst0[CHAN_X] = bld->base.one;
1399      }
1400      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1401         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1402         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1403      }
1404      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1405         /* XMM[1] = SrcReg[0].yyyy */
1406         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1407         /* XMM[1] = max(XMM[1], 0) */
1408         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1409         /* XMM[2] = SrcReg[0].wwww */
1410         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1411         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1412         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1413         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1414         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1415      }
1416      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1417         dst0[CHAN_W] = bld->base.one;
1418      }
1419      break;
1420
1421   case TGSI_OPCODE_RCP:
1422   /* TGSI_OPCODE_RECIP */
1423      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1424      res = lp_build_rcp(&bld->base, src0);
1425      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1426         dst0[chan_index] = res;
1427      }
1428      break;
1429
1430   case TGSI_OPCODE_RSQ:
1431   /* TGSI_OPCODE_RECIPSQRT */
1432      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1433      src0 = lp_build_abs(&bld->base, src0);
1434      res = lp_build_rsqrt(&bld->base, src0);
1435      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1436         dst0[chan_index] = res;
1437      }
1438      break;
1439
1440   case TGSI_OPCODE_EXP:
1441      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1442          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1443          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1444         LLVMValueRef *p_exp2_int_part = NULL;
1445         LLVMValueRef *p_frac_part = NULL;
1446         LLVMValueRef *p_exp2 = NULL;
1447
1448         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1449
1450         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1451            p_exp2_int_part = &tmp0;
1452         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1453            p_frac_part = &tmp1;
1454         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1455            p_exp2 = &tmp2;
1456
1457         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1458
1459         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1460            dst0[CHAN_X] = tmp0;
1461         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1462            dst0[CHAN_Y] = tmp1;
1463         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1464            dst0[CHAN_Z] = tmp2;
1465      }
1466      /* dst.w = 1.0 */
1467      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1468         dst0[CHAN_W] = bld->base.one;
1469      }
1470      break;
1471
1472   case TGSI_OPCODE_LOG:
1473      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1474          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1475          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1476         LLVMValueRef *p_floor_log2 = NULL;
1477         LLVMValueRef *p_exp = NULL;
1478         LLVMValueRef *p_log2 = NULL;
1479
1480         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1481         src0 = lp_build_abs( &bld->base, src0 );
1482
1483         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1484            p_floor_log2 = &tmp0;
1485         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1486            p_exp = &tmp1;
1487         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1488            p_log2 = &tmp2;
1489
1490         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1491
1492         /* dst.x = floor(lg2(abs(src.x))) */
1493         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1494            dst0[CHAN_X] = tmp0;
1495         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1496         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1497            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1498         }
1499         /* dst.z = lg2(abs(src.x)) */
1500         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1501            dst0[CHAN_Z] = tmp2;
1502      }
1503      /* dst.w = 1.0 */
1504      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1505         dst0[CHAN_W] = bld->base.one;
1506      }
1507      break;
1508
1509   case TGSI_OPCODE_MUL:
1510      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1511         src0 = emit_fetch( bld, inst, 0, chan_index );
1512         src1 = emit_fetch( bld, inst, 1, chan_index );
1513         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1514      }
1515      break;
1516
1517   case TGSI_OPCODE_ADD:
1518      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1519         src0 = emit_fetch( bld, inst, 0, chan_index );
1520         src1 = emit_fetch( bld, inst, 1, chan_index );
1521         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1522      }
1523      break;
1524
1525   case TGSI_OPCODE_DP3:
1526   /* TGSI_OPCODE_DOT3 */
1527      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1528      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1529      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1530      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1531      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1532      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1533      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1534      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1535      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1536      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1537      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1538      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1539         dst0[chan_index] = tmp0;
1540      }
1541      break;
1542
1543   case TGSI_OPCODE_DP4:
1544   /* TGSI_OPCODE_DOT4 */
1545      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1546      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1547      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1548      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1549      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1550      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1551      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1552      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1553      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1554      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1555      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1556      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1557      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1558      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1559      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1560      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1561         dst0[chan_index] = tmp0;
1562      }
1563      break;
1564
1565   case TGSI_OPCODE_DST:
1566      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1567         dst0[CHAN_X] = bld->base.one;
1568      }
1569      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1570         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1571         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1572         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1573      }
1574      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1575         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1576      }
1577      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1578         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1579      }
1580      break;
1581
1582   case TGSI_OPCODE_MIN:
1583      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1584         src0 = emit_fetch( bld, inst, 0, chan_index );
1585         src1 = emit_fetch( bld, inst, 1, chan_index );
1586         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1587      }
1588      break;
1589
1590   case TGSI_OPCODE_MAX:
1591      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1592         src0 = emit_fetch( bld, inst, 0, chan_index );
1593         src1 = emit_fetch( bld, inst, 1, chan_index );
1594         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1595      }
1596      break;
1597
1598   case TGSI_OPCODE_SLT:
1599   /* TGSI_OPCODE_SETLT */
1600      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1601         src0 = emit_fetch( bld, inst, 0, chan_index );
1602         src1 = emit_fetch( bld, inst, 1, chan_index );
1603         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1604         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1605      }
1606      break;
1607
1608   case TGSI_OPCODE_SGE:
1609   /* TGSI_OPCODE_SETGE */
1610      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1611         src0 = emit_fetch( bld, inst, 0, chan_index );
1612         src1 = emit_fetch( bld, inst, 1, chan_index );
1613         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1614         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1615      }
1616      break;
1617
1618   case TGSI_OPCODE_MAD:
1619   /* TGSI_OPCODE_MADD */
1620      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1621         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1622         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1623         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1624         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1625         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1626         dst0[chan_index] = tmp0;
1627      }
1628      break;
1629
1630   case TGSI_OPCODE_SUB:
1631      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1632         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1633         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1634         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1635      }
1636      break;
1637
1638   case TGSI_OPCODE_LRP:
1639      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1640         src0 = emit_fetch( bld, inst, 0, chan_index );
1641         src1 = emit_fetch( bld, inst, 1, chan_index );
1642         src2 = emit_fetch( bld, inst, 2, chan_index );
1643         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1644         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1645         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1646      }
1647      break;
1648
1649   case TGSI_OPCODE_CND:
1650      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1651         src0 = emit_fetch( bld, inst, 0, chan_index );
1652         src1 = emit_fetch( bld, inst, 1, chan_index );
1653         src2 = emit_fetch( bld, inst, 2, chan_index );
1654         tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
1655         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1656         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1657      }
1658      break;
1659
1660   case TGSI_OPCODE_DP2A:
1661      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1662      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1663      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1664      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1665      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1666      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1667      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1668      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
1669      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1670      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1671         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1672      }
1673      break;
1674
1675   case TGSI_OPCODE_FRC:
1676      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1677         src0 = emit_fetch( bld, inst, 0, chan_index );
1678         tmp0 = lp_build_floor(&bld->base, src0);
1679         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1680         dst0[chan_index] = tmp0;
1681      }
1682      break;
1683
1684   case TGSI_OPCODE_CLAMP:
1685      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1686         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1687         src1 = emit_fetch( bld, inst, 1, chan_index );
1688         src2 = emit_fetch( bld, inst, 2, chan_index );
1689         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1690         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1691         dst0[chan_index] = tmp0;
1692      }
1693      break;
1694
1695   case TGSI_OPCODE_FLR:
1696      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1697         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1698         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1699      }
1700      break;
1701
1702   case TGSI_OPCODE_ROUND:
1703      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1704         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1705         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1706      }
1707      break;
1708
1709   case TGSI_OPCODE_EX2: {
1710      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1711      tmp0 = lp_build_exp2( &bld->base, tmp0);
1712      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1713         dst0[chan_index] = tmp0;
1714      }
1715      break;
1716   }
1717
1718   case TGSI_OPCODE_LG2:
1719      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1720      tmp0 = lp_build_log2( &bld->base, tmp0);
1721      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1722         dst0[chan_index] = tmp0;
1723      }
1724      break;
1725
1726   case TGSI_OPCODE_POW:
1727      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1728      src1 = emit_fetch( bld, inst, 1, CHAN_X );
1729      res = lp_build_pow( &bld->base, src0, src1 );
1730      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1731         dst0[chan_index] = res;
1732      }
1733      break;
1734
1735   case TGSI_OPCODE_XPD:
1736      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1737          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1738         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1739         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1740      }
1741      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1742          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1743         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1744         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1745      }
1746      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1747         tmp2 = tmp0;
1748         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1749         tmp5 = tmp3;
1750         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1751         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1752         dst0[CHAN_X] = tmp2;
1753      }
1754      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1755          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1756         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1757         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1758      }
1759      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1760         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1761         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1762         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1763         dst0[CHAN_Y] = tmp3;
1764      }
1765      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1766         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1767         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1768         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1769         dst0[CHAN_Z] = tmp5;
1770      }
1771      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1772         dst0[CHAN_W] = bld->base.one;
1773      }
1774      break;
1775
1776   case TGSI_OPCODE_ABS:
1777      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1778         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1779         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1780      }
1781      break;
1782
1783   case TGSI_OPCODE_RCC:
1784      /* deprecated? */
1785      assert(0);
1786      return FALSE;
1787
1788   case TGSI_OPCODE_DPH:
1789      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1790      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1791      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1792      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1793      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1794      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1795      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1796      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1797      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1798      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1799      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1800      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1801      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1802      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1803         dst0[chan_index] = tmp0;
1804      }
1805      break;
1806
1807   case TGSI_OPCODE_COS:
1808      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1809      tmp0 = lp_build_cos( &bld->base, tmp0 );
1810      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1811         dst0[chan_index] = tmp0;
1812      }
1813      break;
1814
1815   case TGSI_OPCODE_DDX:
1816      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1817         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1818      }
1819      break;
1820
1821   case TGSI_OPCODE_DDY:
1822      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1823         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1824      }
1825      break;
1826
1827   case TGSI_OPCODE_KILP:
1828      /* predicated kill */
1829      emit_kilp( bld, inst, (*pc)-1 );
1830      break;
1831
1832   case TGSI_OPCODE_KIL:
1833      /* conditional kill */
1834      emit_kil( bld, inst, (*pc)-1 );
1835      break;
1836
1837   case TGSI_OPCODE_PK2H:
1838      return FALSE;
1839      break;
1840
1841   case TGSI_OPCODE_PK2US:
1842      return FALSE;
1843      break;
1844
1845   case TGSI_OPCODE_PK4B:
1846      return FALSE;
1847      break;
1848
1849   case TGSI_OPCODE_PK4UB:
1850      return FALSE;
1851      break;
1852
1853   case TGSI_OPCODE_RFL:
1854      return FALSE;
1855      break;
1856
1857   case TGSI_OPCODE_SEQ:
1858      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1859         src0 = emit_fetch( bld, inst, 0, chan_index );
1860         src1 = emit_fetch( bld, inst, 1, chan_index );
1861         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1862         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1863      }
1864      break;
1865
1866   case TGSI_OPCODE_SFL:
1867      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1868         dst0[chan_index] = bld->base.zero;
1869      }
1870      break;
1871
1872   case TGSI_OPCODE_SGT:
1873      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1874         src0 = emit_fetch( bld, inst, 0, chan_index );
1875         src1 = emit_fetch( bld, inst, 1, chan_index );
1876         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1877         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1878      }
1879      break;
1880
1881   case TGSI_OPCODE_SIN:
1882      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1883      tmp0 = lp_build_sin( &bld->base, tmp0 );
1884      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1885         dst0[chan_index] = tmp0;
1886      }
1887      break;
1888
1889   case TGSI_OPCODE_SLE:
1890      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1891         src0 = emit_fetch( bld, inst, 0, chan_index );
1892         src1 = emit_fetch( bld, inst, 1, chan_index );
1893         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1894         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1895      }
1896      break;
1897
1898   case TGSI_OPCODE_SNE:
1899      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1900         src0 = emit_fetch( bld, inst, 0, chan_index );
1901         src1 = emit_fetch( bld, inst, 1, chan_index );
1902         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1903         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1904      }
1905      break;
1906
1907   case TGSI_OPCODE_STR:
1908      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1909         dst0[chan_index] = bld->base.one;
1910      }
1911      break;
1912
1913   case TGSI_OPCODE_TEX:
1914      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1915      break;
1916
1917   case TGSI_OPCODE_TXD:
1918      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1919      break;
1920
1921   case TGSI_OPCODE_UP2H:
1922      /* deprecated */
1923      assert (0);
1924      return FALSE;
1925      break;
1926
1927   case TGSI_OPCODE_UP2US:
1928      /* deprecated */
1929      assert(0);
1930      return FALSE;
1931      break;
1932
1933   case TGSI_OPCODE_UP4B:
1934      /* deprecated */
1935      assert(0);
1936      return FALSE;
1937      break;
1938
1939   case TGSI_OPCODE_UP4UB:
1940      /* deprecated */
1941      assert(0);
1942      return FALSE;
1943      break;
1944
1945   case TGSI_OPCODE_X2D:
1946      /* deprecated? */
1947      assert(0);
1948      return FALSE;
1949      break;
1950
1951   case TGSI_OPCODE_ARA:
1952      /* deprecated */
1953      assert(0);
1954      return FALSE;
1955      break;
1956
1957   case TGSI_OPCODE_ARR:
1958      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1959         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1960         tmp0 = lp_build_round(&bld->base, tmp0);
1961         dst0[chan_index] = tmp0;
1962      }
1963      break;
1964
1965   case TGSI_OPCODE_BRA:
1966      /* deprecated */
1967      assert(0);
1968      return FALSE;
1969      break;
1970
1971   case TGSI_OPCODE_CAL:
1972      lp_exec_mask_call(&bld->exec_mask,
1973                        inst->Label.Label,
1974                        pc);
1975
1976      break;
1977
1978   case TGSI_OPCODE_RET:
1979      lp_exec_mask_ret(&bld->exec_mask, pc);
1980      break;
1981
1982   case TGSI_OPCODE_END:
1983      if (0) {
1984         /* for debugging */
1985         emit_dump_temps(bld);
1986      }
1987      *pc = -1;
1988      break;
1989
1990   case TGSI_OPCODE_SSG:
1991   /* TGSI_OPCODE_SGN */
1992      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1993         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1994         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1995      }
1996      break;
1997
1998   case TGSI_OPCODE_CMP:
1999      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2000         src0 = emit_fetch( bld, inst, 0, chan_index );
2001         src1 = emit_fetch( bld, inst, 1, chan_index );
2002         src2 = emit_fetch( bld, inst, 2, chan_index );
2003         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
2004         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
2005      }
2006      break;
2007
2008   case TGSI_OPCODE_SCS:
2009      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
2010         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2011         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
2012      }
2013      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
2014         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2015         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
2016      }
2017      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
2018         dst0[CHAN_Z] = bld->base.zero;
2019      }
2020      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
2021         dst0[CHAN_W] = bld->base.one;
2022      }
2023      break;
2024
2025   case TGSI_OPCODE_TXB:
2026      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
2027      break;
2028
2029   case TGSI_OPCODE_NRM:
2030      /* fall-through */
2031   case TGSI_OPCODE_NRM4:
2032      /* 3 or 4-component normalization */
2033      {
2034         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2035
2036         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
2037             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
2038             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
2039             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
2040
2041            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2042
2043            /* xmm4 = src.x */
2044            /* xmm0 = src.x * src.x */
2045            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2046            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2047               tmp4 = tmp0;
2048            }
2049            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2050
2051            /* xmm5 = src.y */
2052            /* xmm0 = xmm0 + src.y * src.y */
2053            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2054            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2055               tmp5 = tmp1;
2056            }
2057            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2058            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2059
2060            /* xmm6 = src.z */
2061            /* xmm0 = xmm0 + src.z * src.z */
2062            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2063            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2064               tmp6 = tmp1;
2065            }
2066            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2067            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2068
2069            if (dims == 4) {
2070               /* xmm7 = src.w */
2071               /* xmm0 = xmm0 + src.w * src.w */
2072               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2073               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2074                  tmp7 = tmp1;
2075               }
2076               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2077               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2078            }
2079
2080            /* xmm1 = 1 / sqrt(xmm0) */
2081            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2082
2083            /* dst.x = xmm1 * src.x */
2084            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2085               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2086            }
2087
2088            /* dst.y = xmm1 * src.y */
2089            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2090               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2091            }
2092
2093            /* dst.z = xmm1 * src.z */
2094            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2095               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2096            }
2097
2098            /* dst.w = xmm1 * src.w */
2099            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2100               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2101            }
2102         }
2103
2104         /* dst.w = 1.0 */
2105         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2106            dst0[CHAN_W] = bld->base.one;
2107         }
2108      }
2109      break;
2110
2111   case TGSI_OPCODE_DIV:
2112      /* deprecated */
2113      assert( 0 );
2114      return FALSE;
2115      break;
2116
2117   case TGSI_OPCODE_DP2:
2118      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
2119      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
2120      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
2121      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
2122      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
2123      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
2124      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
2125      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2126         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
2127      }
2128      break;
2129
2130   case TGSI_OPCODE_TXL:
2131      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2132      break;
2133
2134   case TGSI_OPCODE_TXP:
2135      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2136      break;
2137
2138   case TGSI_OPCODE_BRK:
2139      lp_exec_break(&bld->exec_mask);
2140      break;
2141
2142   case TGSI_OPCODE_IF:
2143      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2144      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2145                          tmp0, bld->base.zero);
2146      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2147      break;
2148
2149   case TGSI_OPCODE_BGNLOOP:
2150      lp_exec_bgnloop(&bld->exec_mask);
2151      break;
2152
2153   case TGSI_OPCODE_BGNSUB:
2154      lp_exec_mask_bgnsub(&bld->exec_mask);
2155      break;
2156
2157   case TGSI_OPCODE_ELSE:
2158      lp_exec_mask_cond_invert(&bld->exec_mask);
2159      break;
2160
2161   case TGSI_OPCODE_ENDIF:
2162      lp_exec_mask_cond_pop(&bld->exec_mask);
2163      break;
2164
2165   case TGSI_OPCODE_ENDLOOP:
2166      lp_exec_endloop(bld->base.gallivm, &bld->exec_mask);
2167      break;
2168
2169   case TGSI_OPCODE_ENDSUB:
2170      lp_exec_mask_endsub(&bld->exec_mask, pc);
2171      break;
2172
2173   case TGSI_OPCODE_PUSHA:
2174      /* deprecated? */
2175      assert(0);
2176      return FALSE;
2177      break;
2178
2179   case TGSI_OPCODE_POPA:
2180      /* deprecated? */
2181      assert(0);
2182      return FALSE;
2183      break;
2184
2185   case TGSI_OPCODE_CEIL:
2186      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2187         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2188         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2189      }
2190      break;
2191
2192   case TGSI_OPCODE_I2F:
2193      /* deprecated? */
2194      assert(0);
2195      return FALSE;
2196      break;
2197
2198   case TGSI_OPCODE_NOT:
2199      /* deprecated? */
2200      assert(0);
2201      return FALSE;
2202      break;
2203
2204   case TGSI_OPCODE_TRUNC:
2205      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2206         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2207         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2208      }
2209      break;
2210
2211   case TGSI_OPCODE_SHL:
2212      /* deprecated? */
2213      assert(0);
2214      return FALSE;
2215      break;
2216
2217   case TGSI_OPCODE_ISHR:
2218      /* deprecated? */
2219      assert(0);
2220      return FALSE;
2221      break;
2222
2223   case TGSI_OPCODE_AND:
2224      /* deprecated? */
2225      assert(0);
2226      return FALSE;
2227      break;
2228
2229   case TGSI_OPCODE_OR:
2230      /* deprecated? */
2231      assert(0);
2232      return FALSE;
2233      break;
2234
2235   case TGSI_OPCODE_MOD:
2236      /* deprecated? */
2237      assert(0);
2238      return FALSE;
2239      break;
2240
2241   case TGSI_OPCODE_XOR:
2242      /* deprecated? */
2243      assert(0);
2244      return FALSE;
2245      break;
2246
2247   case TGSI_OPCODE_SAD:
2248      /* deprecated? */
2249      assert(0);
2250      return FALSE;
2251      break;
2252
2253   case TGSI_OPCODE_TXF:
2254      /* deprecated? */
2255      assert(0);
2256      return FALSE;
2257      break;
2258
2259   case TGSI_OPCODE_TXQ:
2260      /* deprecated? */
2261      assert(0);
2262      return FALSE;
2263      break;
2264
2265   case TGSI_OPCODE_CONT:
2266      lp_exec_continue(&bld->exec_mask);
2267      break;
2268
2269   case TGSI_OPCODE_EMIT:
2270      return FALSE;
2271      break;
2272
2273   case TGSI_OPCODE_ENDPRIM:
2274      return FALSE;
2275      break;
2276
2277   case TGSI_OPCODE_NOP:
2278      break;
2279
2280   default:
2281      return FALSE;
2282   }
2283
2284   if(info->num_dst) {
2285      LLVMValueRef pred[NUM_CHANNELS];
2286
2287      emit_fetch_predicate( bld, inst, pred );
2288
2289      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2290         emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2291      }
2292   }
2293
2294   return TRUE;
2295}
2296
2297
2298void
2299lp_build_tgsi_soa(struct gallivm_state *gallivm,
2300                  const struct tgsi_token *tokens,
2301                  struct lp_type type,
2302                  struct lp_build_mask_context *mask,
2303                  LLVMValueRef consts_ptr,
2304                  const LLVMValueRef *pos,
2305                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
2306                  LLVMValueRef (*outputs)[NUM_CHANNELS],
2307                  struct lp_build_sampler_soa *sampler,
2308                  const struct tgsi_shader_info *info)
2309{
2310   struct lp_build_tgsi_soa_context bld;
2311   struct tgsi_parse_context parse;
2312   uint num_immediates = 0;
2313   uint num_instructions = 0;
2314   unsigned i;
2315   int pc = 0;
2316
2317   struct lp_type res_type;
2318
2319   assert(type.length <= LP_MAX_VECTOR_LENGTH);
2320   memset(&res_type, 0, sizeof res_type);
2321   res_type.width = type.width;
2322   res_type.length = type.length;
2323   res_type.sign = 1;
2324
2325   /* Setup build context */
2326   memset(&bld, 0, sizeof bld);
2327   lp_build_context_init(&bld.base, gallivm, type);
2328   lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type));
2329   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2330   bld.mask = mask;
2331   bld.pos = pos;
2332   bld.inputs = inputs;
2333   bld.outputs = outputs;
2334   bld.consts_ptr = consts_ptr;
2335   bld.sampler = sampler;
2336   bld.info = info;
2337   bld.indirect_files = info->indirect_files;
2338   bld.instructions = (struct tgsi_full_instruction *)
2339                      MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2340   bld.max_instructions = LP_MAX_INSTRUCTIONS;
2341
2342   if (!bld.instructions) {
2343      return;
2344   }
2345
2346   lp_exec_mask_init(&bld.exec_mask, &bld.base);
2347
2348   if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2349      LLVMValueRef array_size =
2350         lp_build_const_int32(gallivm,
2351                              info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2352      bld.temps_array = lp_build_array_alloca(gallivm,
2353                                              bld.base.vec_type, array_size,
2354                                              "temp_array");
2355   }
2356
2357   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2358      LLVMValueRef array_size =
2359         lp_build_const_int32(gallivm,
2360                              info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2361      bld.outputs_array = lp_build_array_alloca(gallivm,
2362                                                bld.base.vec_type, array_size,
2363                                                "output_array");
2364   }
2365
2366   /* If we have indirect addressing in inputs we need to copy them into
2367    * our alloca array to be able to iterate over them */
2368   if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
2369      unsigned index, chan;
2370      LLVMTypeRef vec_type = bld.base.vec_type;
2371      LLVMValueRef array_size =
2372         lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4);
2373      bld.inputs_array = lp_build_array_alloca(gallivm,
2374                                               vec_type, array_size,
2375                                               "input_array");
2376
2377      assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
2378
2379      for (index = 0; index < info->num_inputs; ++index) {
2380         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2381            LLVMValueRef lindex =
2382               lp_build_const_int32(gallivm, index * 4 + chan);
2383            LLVMValueRef input_ptr =
2384               LLVMBuildGEP(bld.base.builder, bld.inputs_array,
2385                            &lindex, 1, "");
2386            LLVMValueRef value = bld.inputs[index][chan];
2387            if (value)
2388               LLVMBuildStore(bld.base.builder, value, input_ptr);
2389         }
2390      }
2391   }
2392
2393   tgsi_parse_init( &parse, tokens );
2394
2395   while( !tgsi_parse_end_of_tokens( &parse ) ) {
2396      tgsi_parse_token( &parse );
2397
2398      switch( parse.FullToken.Token.Type ) {
2399      case TGSI_TOKEN_TYPE_DECLARATION:
2400         /* Inputs already interpolated */
2401         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2402         break;
2403
2404      case TGSI_TOKEN_TYPE_INSTRUCTION:
2405         {
2406            /* save expanded instruction */
2407            if (num_instructions == bld.max_instructions) {
2408               struct tgsi_full_instruction *instructions;
2409               instructions = REALLOC(bld.instructions,
2410                                      bld.max_instructions
2411                                      * sizeof(struct tgsi_full_instruction),
2412                                      (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2413                                      * sizeof(struct tgsi_full_instruction));
2414               if (!instructions) {
2415                  break;
2416               }
2417               bld.instructions = instructions;
2418               bld.max_instructions += LP_MAX_INSTRUCTIONS;
2419            }
2420
2421            memcpy(bld.instructions + num_instructions,
2422                   &parse.FullToken.FullInstruction,
2423                   sizeof(bld.instructions[0]));
2424
2425            num_instructions++;
2426         }
2427
2428         break;
2429
2430      case TGSI_TOKEN_TYPE_IMMEDIATE:
2431         /* simply copy the immediate values into the next immediates[] slot */
2432         {
2433            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2434            assert(size <= 4);
2435            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2436            for( i = 0; i < size; ++i )
2437               bld.immediates[num_immediates][i] =
2438                  lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float);
2439            for( i = size; i < 4; ++i )
2440               bld.immediates[num_immediates][i] = bld.base.undef;
2441            num_immediates++;
2442         }
2443         break;
2444
2445      case TGSI_TOKEN_TYPE_PROPERTY:
2446         break;
2447
2448      default:
2449         assert( 0 );
2450      }
2451   }
2452
2453   while (pc != -1) {
2454      struct tgsi_full_instruction *instr = bld.instructions + pc;
2455      const struct tgsi_opcode_info *opcode_info =
2456         tgsi_get_opcode_info(instr->Instruction.Opcode);
2457      if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2458         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2459                       opcode_info->mnemonic);
2460   }
2461
2462   /* If we have indirect addressing in outputs we need to copy our alloca array
2463    * to the outputs slots specified by the called */
2464   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2465      unsigned index, chan;
2466      assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
2467      for (index = 0; index < info->num_outputs; ++index) {
2468         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2469            bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
2470         }
2471      }
2472   }
2473
2474   if (0) {
2475      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2476      LLVMValueRef function = LLVMGetBasicBlockParent(block);
2477      debug_printf("11111111111111111111111111111 \n");
2478      tgsi_dump(tokens, 0);
2479      lp_debug_dump_value(function);
2480      debug_printf("2222222222222222222222222222 \n");
2481   }
2482   tgsi_parse_free( &parse );
2483
2484   if (0) {
2485      LLVMModuleRef module = LLVMGetGlobalParent(
2486         LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2487      LLVMDumpModule(module);
2488
2489   }
2490
2491   FREE( bld.instructions );
2492}
2493
2494