lp_bld_tgsi_soa.c revision 6299f241e9fdd86e705d144a42d9b1979c13f9ad
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_info.h"
46#include "tgsi/tgsi_parse.h"
47#include "tgsi/tgsi_util.h"
48#include "tgsi/tgsi_scan.h"
49#include "lp_bld_type.h"
50#include "lp_bld_const.h"
51#include "lp_bld_arit.h"
52#include "lp_bld_bitarit.h"
53#include "lp_bld_gather.h"
54#include "lp_bld_init.h"
55#include "lp_bld_logic.h"
56#include "lp_bld_swizzle.h"
57#include "lp_bld_flow.h"
58#include "lp_bld_quad.h"
59#include "lp_bld_tgsi.h"
60#include "lp_bld_limits.h"
61#include "lp_bld_debug.h"
62#include "lp_bld_printf.h"
63
64
65#define FOR_EACH_CHANNEL( CHAN )\
66   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
67
68#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
69   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
70
71#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
72   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
73
74#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
75   FOR_EACH_CHANNEL( CHAN )\
76      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
77
78#define CHAN_X 0
79#define CHAN_Y 1
80#define CHAN_Z 2
81#define CHAN_W 3
82#define NUM_CHANNELS 4
83
84#define LP_MAX_INSTRUCTIONS 256
85
86
87struct lp_exec_mask {
88   struct lp_build_context *bld;
89
90   boolean has_mask;
91
92   LLVMTypeRef int_vec_type;
93
94   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
95   int cond_stack_size;
96   LLVMValueRef cond_mask;
97
98   LLVMBasicBlockRef loop_block;
99   LLVMValueRef cont_mask;
100   LLVMValueRef break_mask;
101   LLVMValueRef break_var;
102   struct {
103      LLVMBasicBlockRef loop_block;
104      LLVMValueRef cont_mask;
105      LLVMValueRef break_mask;
106      LLVMValueRef break_var;
107   } loop_stack[LP_MAX_TGSI_NESTING];
108   int loop_stack_size;
109
110   LLVMValueRef ret_mask;
111   struct {
112      int pc;
113      LLVMValueRef ret_mask;
114   } call_stack[LP_MAX_TGSI_NESTING];
115   int call_stack_size;
116
117   LLVMValueRef exec_mask;
118};
119
120struct lp_build_tgsi_soa_context
121{
122   struct lp_build_context base;
123
124   /* Builder for vector integer masks and indices */
125   struct lp_build_context uint_bld;
126
127   /* Builder for scalar elements of shader's data type (float) */
128   struct lp_build_context elem_bld;
129
130   LLVMValueRef consts_ptr;
131   const LLVMValueRef *pos;
132   const LLVMValueRef (*inputs)[NUM_CHANNELS];
133   LLVMValueRef (*outputs)[NUM_CHANNELS];
134
135   const struct lp_build_sampler_soa *sampler;
136
137   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
138   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
139   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
140   LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
141
142   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
143    * set in the indirect_files field.
144    * The temps[] array above is unused then.
145    */
146   LLVMValueRef temps_array;
147
148   /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
149    * set in the indirect_files field.
150    * The outputs[] array above is unused then.
151    */
152   LLVMValueRef outputs_array;
153
154   /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
155    * set in the indirect_files field.
156    * The inputs[] array above is unused then.
157    */
158   LLVMValueRef inputs_array;
159
160   const struct tgsi_shader_info *info;
161   /** bitmask indicating which register files are accessed indirectly */
162   unsigned indirect_files;
163
164   struct lp_build_mask_context *mask;
165   struct lp_exec_mask exec_mask;
166
167   struct tgsi_full_instruction *instructions;
168   uint max_instructions;
169};
170
171static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
172{
173   mask->bld = bld;
174   mask->has_mask = FALSE;
175   mask->cond_stack_size = 0;
176   mask->loop_stack_size = 0;
177   mask->call_stack_size = 0;
178
179   mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
180   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
181         LLVMConstAllOnes(mask->int_vec_type);
182}
183
184static void lp_exec_mask_update(struct lp_exec_mask *mask)
185{
186   LLVMBuilderRef builder = mask->bld->gallivm->builder;
187
188   if (mask->loop_stack_size) {
189      /*for loops we need to update the entire mask at runtime */
190      LLVMValueRef tmp;
191      assert(mask->break_mask);
192      tmp = LLVMBuildAnd(builder,
193                         mask->cont_mask,
194                         mask->break_mask,
195                         "maskcb");
196      mask->exec_mask = LLVMBuildAnd(builder,
197                                     mask->cond_mask,
198                                     tmp,
199                                     "maskfull");
200   } else
201      mask->exec_mask = mask->cond_mask;
202
203   if (mask->call_stack_size) {
204      mask->exec_mask = LLVMBuildAnd(builder,
205                                     mask->exec_mask,
206                                     mask->ret_mask,
207                                     "callmask");
208   }
209
210   mask->has_mask = (mask->cond_stack_size > 0 ||
211                     mask->loop_stack_size > 0 ||
212                     mask->call_stack_size > 0);
213}
214
215static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
216                                   LLVMValueRef val)
217{
218   LLVMBuilderRef builder = mask->bld->gallivm->builder;
219
220   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
221   if (mask->cond_stack_size == 0) {
222      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
223   }
224   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
225   assert(LLVMTypeOf(val) == mask->int_vec_type);
226   mask->cond_mask = LLVMBuildAnd(builder,
227                                  mask->cond_mask,
228                                  val,
229                                  "");
230   lp_exec_mask_update(mask);
231}
232
233static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
234{
235   LLVMBuilderRef builder = mask->bld->gallivm->builder;
236   LLVMValueRef prev_mask;
237   LLVMValueRef inv_mask;
238
239   assert(mask->cond_stack_size);
240   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
241   if (mask->cond_stack_size == 1) {
242      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
243   }
244
245   inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
246
247   mask->cond_mask = LLVMBuildAnd(builder,
248                                  inv_mask,
249                                  prev_mask, "");
250   lp_exec_mask_update(mask);
251}
252
253static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
254{
255   assert(mask->cond_stack_size);
256   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
257   lp_exec_mask_update(mask);
258}
259
260static void lp_exec_bgnloop(struct lp_exec_mask *mask)
261{
262   LLVMBuilderRef builder = mask->bld->gallivm->builder;
263
264   if (mask->loop_stack_size == 0) {
265      assert(mask->loop_block == NULL);
266      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
267      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
268      assert(mask->break_var == NULL);
269   }
270
271   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
272
273   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
274   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
275   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
276   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
277   ++mask->loop_stack_size;
278
279   mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
280   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
281
282   mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
283   LLVMBuildBr(builder, mask->loop_block);
284   LLVMPositionBuilderAtEnd(builder, mask->loop_block);
285
286   mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
287
288   lp_exec_mask_update(mask);
289}
290
291static void lp_exec_break(struct lp_exec_mask *mask)
292{
293   LLVMBuilderRef builder = mask->bld->gallivm->builder;
294   LLVMValueRef exec_mask = LLVMBuildNot(builder,
295                                         mask->exec_mask,
296                                         "break");
297
298   mask->break_mask = LLVMBuildAnd(builder,
299                                   mask->break_mask,
300                                   exec_mask, "break_full");
301
302   lp_exec_mask_update(mask);
303}
304
305static void lp_exec_continue(struct lp_exec_mask *mask)
306{
307   LLVMBuilderRef builder = mask->bld->gallivm->builder;
308   LLVMValueRef exec_mask = LLVMBuildNot(builder,
309                                         mask->exec_mask,
310                                         "");
311
312   mask->cont_mask = LLVMBuildAnd(builder,
313                                  mask->cont_mask,
314                                  exec_mask, "");
315
316   lp_exec_mask_update(mask);
317}
318
319
320static void lp_exec_endloop(struct gallivm_state *gallivm,
321                            struct lp_exec_mask *mask)
322{
323   LLVMBuilderRef builder = mask->bld->gallivm->builder;
324   LLVMBasicBlockRef endloop;
325   LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
326                                               mask->bld->type.width *
327                                               mask->bld->type.length);
328   LLVMValueRef i1cond;
329
330   assert(mask->break_mask);
331
332   /*
333    * Restore the cont_mask, but don't pop
334    */
335   assert(mask->loop_stack_size);
336   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
337   lp_exec_mask_update(mask);
338
339   /*
340    * Unlike the continue mask, the break_mask must be preserved across loop
341    * iterations
342    */
343   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
344
345   /* i1cond = (mask == 0) */
346   i1cond = LLVMBuildICmp(
347      builder,
348      LLVMIntNE,
349      LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
350      LLVMConstNull(reg_type), "");
351
352   endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
353
354   LLVMBuildCondBr(builder,
355                   i1cond, mask->loop_block, endloop);
356
357   LLVMPositionBuilderAtEnd(builder, endloop);
358
359   assert(mask->loop_stack_size);
360   --mask->loop_stack_size;
361   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
362   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
363   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
364   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
365
366   lp_exec_mask_update(mask);
367}
368
369/* stores val into an address pointed to by dst.
370 * mask->exec_mask is used to figure out which bits of val
371 * should be stored into the address
372 * (0 means don't store this bit, 1 means do store).
373 */
374static void lp_exec_mask_store(struct lp_exec_mask *mask,
375                               LLVMValueRef pred,
376                               LLVMValueRef val,
377                               LLVMValueRef dst)
378{
379   LLVMBuilderRef builder = mask->bld->gallivm->builder;
380
381   /* Mix the predicate and execution mask */
382   if (mask->has_mask) {
383      if (pred) {
384         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
385      } else {
386         pred = mask->exec_mask;
387      }
388   }
389
390   if (pred) {
391      LLVMValueRef real_val, dst_val;
392
393      dst_val = LLVMBuildLoad(builder, dst, "");
394      real_val = lp_build_select(mask->bld,
395                                 pred,
396                                 val, dst_val);
397
398      LLVMBuildStore(builder, real_val, dst);
399   } else
400      LLVMBuildStore(builder, val, dst);
401}
402
403static void lp_exec_mask_call(struct lp_exec_mask *mask,
404                              int func,
405                              int *pc)
406{
407   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
408   mask->call_stack[mask->call_stack_size].pc = *pc;
409   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
410   mask->call_stack_size++;
411   *pc = func;
412}
413
414static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
415{
416   LLVMBuilderRef builder = mask->bld->gallivm->builder;
417   LLVMValueRef exec_mask;
418
419   if (mask->call_stack_size == 0) {
420      /* returning from main() */
421      *pc = -1;
422      return;
423   }
424   exec_mask = LLVMBuildNot(builder,
425                            mask->exec_mask,
426                            "ret");
427
428   mask->ret_mask = LLVMBuildAnd(builder,
429                                 mask->ret_mask,
430                                 exec_mask, "ret_full");
431
432   lp_exec_mask_update(mask);
433}
434
435static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
436{
437}
438
439static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
440{
441   assert(mask->call_stack_size);
442   mask->call_stack_size--;
443   *pc = mask->call_stack[mask->call_stack_size].pc;
444   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
445   lp_exec_mask_update(mask);
446}
447
448
449/**
450 * Return pointer to a temporary register channel (src or dest).
451 * Note that indirect addressing cannot be handled here.
452 * \param index  which temporary register
453 * \param chan  which channel of the temp register.
454 */
455static LLVMValueRef
456get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
457             unsigned index,
458             unsigned chan)
459{
460   LLVMBuilderRef builder = bld->base.gallivm->builder;
461   assert(chan < 4);
462   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
463      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan);
464      return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
465   }
466   else {
467      return bld->temps[index][chan];
468   }
469}
470
471/**
472 * Return pointer to a output register channel (src or dest).
473 * Note that indirect addressing cannot be handled here.
474 * \param index  which output register
475 * \param chan  which channel of the output register.
476 */
477static LLVMValueRef
478get_output_ptr(struct lp_build_tgsi_soa_context *bld,
479               unsigned index,
480               unsigned chan)
481{
482   LLVMBuilderRef builder = bld->base.gallivm->builder;
483   assert(chan < 4);
484   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
485      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
486                                                 index * 4 + chan);
487      return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
488   }
489   else {
490      return bld->outputs[index][chan];
491   }
492}
493
494/**
495 * Gather vector.
496 * XXX the lp_build_gather() function should be capable of doing this
497 * with a little work.
498 */
499static LLVMValueRef
500build_gather(struct lp_build_tgsi_soa_context *bld,
501             LLVMValueRef base_ptr,
502             LLVMValueRef indexes)
503{
504   LLVMBuilderRef builder = bld->base.gallivm->builder;
505   LLVMValueRef res = bld->base.undef;
506   unsigned i;
507
508   /*
509    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
510    */
511   for (i = 0; i < bld->base.type.length; i++) {
512      LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i);
513      LLVMValueRef index = LLVMBuildExtractElement(builder,
514                                                   indexes, ii, "");
515      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
516                                             &index, 1, "gather_ptr");
517      LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
518
519      res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
520   }
521
522   return res;
523}
524
525
526/**
527 * Scatter/store vector.
528 */
529static void
530emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
531                  LLVMValueRef base_ptr,
532                  LLVMValueRef indexes,
533                  LLVMValueRef values,
534                  struct lp_exec_mask *mask,
535                  LLVMValueRef pred)
536{
537   struct gallivm_state *gallivm = bld->base.gallivm;
538   LLVMBuilderRef builder = builder;
539   unsigned i;
540
541   /* Mix the predicate and execution mask */
542   if (mask->has_mask) {
543      if (pred) {
544         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
545      }
546      else {
547         pred = mask->exec_mask;
548      }
549   }
550
551   /*
552    * Loop over elements of index_vec, store scalar value.
553    */
554   for (i = 0; i < bld->base.type.length; i++) {
555      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
556      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
557      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
558      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
559      LLVMValueRef scalar_pred = pred ?
560         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
561
562      if (0)
563         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
564                         ii, val, index, scalar_ptr);
565
566      if (scalar_pred) {
567         LLVMValueRef real_val, dst_val;
568         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
569         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
570         LLVMBuildStore(builder, real_val, scalar_ptr);
571      }
572      else {
573         LLVMBuildStore(builder, val, scalar_ptr);
574      }
575   }
576}
577
578
579/**
580 * Read the current value of the ADDR register, convert the floats to
581 * ints, add the base index and return the vector of offsets.
582 * The offsets will be used to index into the constant buffer or
583 * temporary register file.
584 */
585static LLVMValueRef
586get_indirect_index(struct lp_build_tgsi_soa_context *bld,
587                   unsigned reg_file, unsigned reg_index,
588                   const struct tgsi_src_register *indirect_reg)
589{
590   LLVMBuilderRef builder = bld->base.gallivm->builder;
591   struct lp_build_context *uint_bld = &bld->uint_bld;
592   /* always use X component of address register */
593   unsigned swizzle = indirect_reg->SwizzleX;
594   LLVMValueRef base;
595   LLVMValueRef rel;
596   LLVMValueRef max_index;
597   LLVMValueRef index;
598
599   assert(bld->indirect_files & (1 << reg_file));
600
601   base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index);
602
603   assert(swizzle < 4);
604   rel = LLVMBuildLoad(builder,
605                        bld->addr[indirect_reg->Index][swizzle],
606                        "load addr reg");
607
608   /* for indexing we want integers */
609   rel = LLVMBuildFPToSI(builder,
610                         rel,
611                         uint_bld->vec_type, "");
612
613   index = lp_build_add(uint_bld, base, rel);
614
615   max_index = lp_build_const_int_vec(bld->base.gallivm,
616                                      uint_bld->type,
617                                      bld->info->file_max[reg_file]);
618
619   assert(!uint_bld->type.sign);
620   index = lp_build_min(uint_bld, index, max_index);
621
622   return index;
623}
624
625
626/**
627 * Register fetch.
628 */
629static LLVMValueRef
630emit_fetch(
631   struct lp_build_tgsi_soa_context *bld,
632   const struct tgsi_full_instruction *inst,
633   unsigned src_op,
634   const unsigned chan_index )
635{
636   struct gallivm_state *gallivm = bld->base.gallivm;
637   LLVMBuilderRef builder = gallivm->builder;
638   struct lp_build_context *uint_bld = &bld->uint_bld;
639   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
640   const unsigned swizzle =
641      tgsi_util_get_full_src_register_swizzle(reg, chan_index);
642   LLVMValueRef res;
643   LLVMValueRef indirect_index = NULL;
644
645   if (swizzle > 3) {
646      assert(0 && "invalid swizzle in emit_fetch()");
647      return bld->base.undef;
648   }
649
650   if (reg->Register.Indirect) {
651      indirect_index = get_indirect_index(bld,
652                                          reg->Register.File,
653                                          reg->Register.Index,
654                                          &reg->Indirect);
655   } else {
656      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
657   }
658
659   switch (reg->Register.File) {
660   case TGSI_FILE_CONSTANT:
661      if (reg->Register.Indirect) {
662         LLVMValueRef swizzle_vec =
663            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
664         LLVMValueRef index_vec;  /* index into the const buffer */
665
666         /* index_vec = indirect_index * 4 + swizzle */
667         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
668         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
669
670         /* Gather values from the constant buffer */
671         res = build_gather(bld, bld->consts_ptr, index_vec);
672      }
673      else {
674         LLVMValueRef index;  /* index into the const buffer */
675         LLVMValueRef scalar, scalar_ptr;
676
677         index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
678
679         scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
680                                   &index, 1, "");
681         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
682
683         res = lp_build_broadcast_scalar(&bld->base, scalar);
684      }
685      break;
686
687   case TGSI_FILE_IMMEDIATE:
688      res = bld->immediates[reg->Register.Index][swizzle];
689      assert(res);
690      break;
691
692   case TGSI_FILE_INPUT:
693      if (reg->Register.Indirect) {
694         LLVMValueRef swizzle_vec =
695            lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
696         LLVMValueRef length_vec =
697            lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
698         LLVMValueRef index_vec;  /* index into the const buffer */
699         LLVMValueRef inputs_array;
700         LLVMTypeRef float4_ptr_type;
701
702         /* index_vec = (indirect_index * 4 + swizzle) * length */
703         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
704         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
705         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
706
707         /* cast inputs_array pointer to float* */
708         float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
709         inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
710                                         float4_ptr_type, "");
711
712         /* Gather values from the temporary register array */
713         res = build_gather(bld, inputs_array, index_vec);
714      } else {
715         if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
716            LLVMValueRef lindex = lp_build_const_int32(gallivm,
717                                           reg->Register.Index * 4 + swizzle);
718            LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
719                                                   bld->inputs_array, &lindex, 1, "");
720            res = LLVMBuildLoad(builder, input_ptr, "");
721         }
722         else {
723            res = bld->inputs[reg->Register.Index][swizzle];
724         }
725      }
726      assert(res);
727      break;
728
729   case TGSI_FILE_TEMPORARY:
730      if (reg->Register.Indirect) {
731         LLVMValueRef swizzle_vec =
732            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
733         LLVMValueRef length_vec =
734            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type,
735                                   bld->base.type.length);
736         LLVMValueRef index_vec;  /* index into the const buffer */
737         LLVMValueRef temps_array;
738         LLVMTypeRef float4_ptr_type;
739
740         /* index_vec = (indirect_index * 4 + swizzle) * length */
741         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
742         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
743         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
744
745         /* cast temps_array pointer to float* */
746         float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0);
747         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
748                                        float4_ptr_type, "");
749
750         /* Gather values from the temporary register array */
751         res = build_gather(bld, temps_array, index_vec);
752      }
753      else {
754         LLVMValueRef temp_ptr;
755         temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
756         res = LLVMBuildLoad(builder, temp_ptr, "");
757         if (!res)
758            return bld->base.undef;
759      }
760      break;
761
762   default:
763      assert(0 && "invalid src register in emit_fetch()");
764      return bld->base.undef;
765   }
766
767   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
768   case TGSI_UTIL_SIGN_CLEAR:
769      res = lp_build_abs( &bld->base, res );
770      break;
771
772   case TGSI_UTIL_SIGN_SET:
773      res = lp_build_abs( &bld->base, res );
774      /* fall through */
775   case TGSI_UTIL_SIGN_TOGGLE:
776      res = lp_build_negate( &bld->base, res );
777      break;
778
779   case TGSI_UTIL_SIGN_KEEP:
780      break;
781   }
782
783   return res;
784}
785
786
787/**
788 * Register fetch with derivatives.
789 */
790static void
791emit_fetch_deriv(
792   struct lp_build_tgsi_soa_context *bld,
793   const struct tgsi_full_instruction *inst,
794   unsigned index,
795   const unsigned chan_index,
796   LLVMValueRef *res,
797   LLVMValueRef *ddx,
798   LLVMValueRef *ddy)
799{
800   LLVMValueRef src;
801
802   src = emit_fetch(bld, inst, index, chan_index);
803
804   if(res)
805      *res = src;
806
807   /* TODO: use interpolation coeffs for inputs */
808
809   if(ddx)
810      *ddx = lp_build_ddx(&bld->base, src);
811
812   if(ddy)
813      *ddy = lp_build_ddy(&bld->base, src);
814}
815
816
817/**
818 * Predicate.
819 */
820static void
821emit_fetch_predicate(
822   struct lp_build_tgsi_soa_context *bld,
823   const struct tgsi_full_instruction *inst,
824   LLVMValueRef *pred)
825{
826   LLVMBuilderRef builder = bld->base.gallivm->builder;
827   unsigned index;
828   unsigned char swizzles[4];
829   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
830   LLVMValueRef value;
831   unsigned chan;
832
833   if (!inst->Instruction.Predicate) {
834      FOR_EACH_CHANNEL( chan ) {
835         pred[chan] = NULL;
836      }
837      return;
838   }
839
840   swizzles[0] = inst->Predicate.SwizzleX;
841   swizzles[1] = inst->Predicate.SwizzleY;
842   swizzles[2] = inst->Predicate.SwizzleZ;
843   swizzles[3] = inst->Predicate.SwizzleW;
844
845   index = inst->Predicate.Index;
846   assert(index < LP_MAX_TGSI_PREDS);
847
848   FOR_EACH_CHANNEL( chan ) {
849      unsigned swizzle = swizzles[chan];
850
851      /*
852       * Only fetch the predicate register channels that are actually listed
853       * in the swizzles
854       */
855      if (!unswizzled[swizzle]) {
856         value = LLVMBuildLoad(builder,
857                               bld->preds[index][swizzle], "");
858
859         /*
860          * Convert the value to an integer mask.
861          *
862          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
863          * is needlessly causing two comparisons due to storing the intermediate
864          * result as float vector instead of an integer mask vector.
865          */
866         value = lp_build_compare(bld->base.gallivm,
867                                  bld->base.type,
868                                  PIPE_FUNC_NOTEQUAL,
869                                  value,
870                                  bld->base.zero);
871         if (inst->Predicate.Negate) {
872            value = LLVMBuildNot(builder, value, "");
873         }
874
875         unswizzled[swizzle] = value;
876      } else {
877         value = unswizzled[swizzle];
878      }
879
880      pred[chan] = value;
881   }
882}
883
884
885/**
886 * Register store.
887 */
888static void
889emit_store(
890   struct lp_build_tgsi_soa_context *bld,
891   const struct tgsi_full_instruction *inst,
892   unsigned index,
893   unsigned chan_index,
894   LLVMValueRef pred,
895   LLVMValueRef value)
896{
897   struct gallivm_state *gallivm = bld->base.gallivm;
898   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
899   struct lp_build_context *uint_bld = &bld->uint_bld;
900   LLVMValueRef indirect_index = NULL;
901
902   switch( inst->Instruction.Saturate ) {
903   case TGSI_SAT_NONE:
904      break;
905
906   case TGSI_SAT_ZERO_ONE:
907      value = lp_build_max(&bld->base, value, bld->base.zero);
908      value = lp_build_min(&bld->base, value, bld->base.one);
909      break;
910
911   case TGSI_SAT_MINUS_PLUS_ONE:
912      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
913      value = lp_build_min(&bld->base, value, bld->base.one);
914      break;
915
916   default:
917      assert(0);
918   }
919
920   if (reg->Register.Indirect) {
921      indirect_index = get_indirect_index(bld,
922                                          reg->Register.File,
923                                          reg->Register.Index,
924                                          &reg->Indirect);
925   } else {
926      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
927   }
928
929   switch( reg->Register.File ) {
930   case TGSI_FILE_OUTPUT:
931      if (reg->Register.Indirect) {
932         LLVMBuilderRef builder = builder;
933         LLVMValueRef chan_vec =
934            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
935         LLVMValueRef length_vec =
936            lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
937         LLVMValueRef index_vec;  /* indexes into the temp registers */
938         LLVMValueRef outputs_array;
939         LLVMValueRef pixel_offsets;
940         LLVMTypeRef float_ptr_type;
941         int i;
942
943         /* build pixel offset vector: {0, 1, 2, 3, ...} */
944         pixel_offsets = uint_bld->undef;
945         for (i = 0; i < bld->base.type.length; i++) {
946            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
947            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
948                                                   ii, ii, "");
949         }
950
951         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
952         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
953         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
954         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
955         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
956
957         float_ptr_type =
958            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
959         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
960                                          float_ptr_type, "");
961
962         /* Scatter store values into temp registers */
963         emit_mask_scatter(bld, outputs_array, index_vec, value,
964                           &bld->exec_mask, pred);
965      }
966      else {
967         LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
968                                               chan_index);
969         lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
970      }
971      break;
972
973   case TGSI_FILE_TEMPORARY:
974      if (reg->Register.Indirect) {
975         LLVMBuilderRef builder = builder;
976         LLVMValueRef chan_vec =
977            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
978         LLVMValueRef length_vec =
979            lp_build_const_int_vec(gallivm, uint_bld->type,
980                                   bld->base.type.length);
981         LLVMValueRef index_vec;  /* indexes into the temp registers */
982         LLVMValueRef temps_array;
983         LLVMValueRef pixel_offsets;
984         LLVMTypeRef float_ptr_type;
985         int i;
986
987         /* build pixel offset vector: {0, 1, 2, 3, ...} */
988         pixel_offsets = uint_bld->undef;
989         for (i = 0; i < bld->base.type.length; i++) {
990            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
991            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
992                                                   ii, ii, "");
993         }
994
995         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
996         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
997         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
998         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
999         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1000
1001         float_ptr_type =
1002            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1003         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1004                                        float_ptr_type, "");
1005
1006         /* Scatter store values into temp registers */
1007         emit_mask_scatter(bld, temps_array, index_vec, value,
1008                           &bld->exec_mask, pred);
1009      }
1010      else {
1011         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
1012                                              chan_index);
1013         lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
1014      }
1015      break;
1016
1017   case TGSI_FILE_ADDRESS:
1018      lp_exec_mask_store(&bld->exec_mask, pred, value,
1019                         bld->addr[reg->Register.Index][chan_index]);
1020      break;
1021
1022   case TGSI_FILE_PREDICATE:
1023      lp_exec_mask_store(&bld->exec_mask, pred, value,
1024                         bld->preds[reg->Register.Index][chan_index]);
1025      break;
1026
1027   default:
1028      assert( 0 );
1029   }
1030}
1031
1032
1033/**
1034 * High-level instruction translators.
1035 */
1036
1037static void
1038emit_tex( struct lp_build_tgsi_soa_context *bld,
1039          const struct tgsi_full_instruction *inst,
1040          enum lp_build_tex_modifier modifier,
1041          LLVMValueRef *texel)
1042{
1043   LLVMBuilderRef builder = bld->base.gallivm->builder;
1044   unsigned unit;
1045   LLVMValueRef lod_bias, explicit_lod;
1046   LLVMValueRef oow = NULL;
1047   LLVMValueRef coords[3];
1048   LLVMValueRef ddx[3];
1049   LLVMValueRef ddy[3];
1050   unsigned num_coords;
1051   unsigned i;
1052
1053   if (!bld->sampler) {
1054      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1055      for (i = 0; i < 4; i++) {
1056         texel[i] = bld->base.undef;
1057      }
1058      return;
1059   }
1060
1061   switch (inst->Texture.Texture) {
1062   case TGSI_TEXTURE_1D:
1063      num_coords = 1;
1064      break;
1065   case TGSI_TEXTURE_2D:
1066   case TGSI_TEXTURE_RECT:
1067      num_coords = 2;
1068      break;
1069   case TGSI_TEXTURE_SHADOW1D:
1070   case TGSI_TEXTURE_SHADOW2D:
1071   case TGSI_TEXTURE_SHADOWRECT:
1072   case TGSI_TEXTURE_3D:
1073   case TGSI_TEXTURE_CUBE:
1074      num_coords = 3;
1075      break;
1076   default:
1077      assert(0);
1078      return;
1079   }
1080
1081   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1082      lod_bias = emit_fetch( bld, inst, 0, 3 );
1083      explicit_lod = NULL;
1084   }
1085   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1086      lod_bias = NULL;
1087      explicit_lod = emit_fetch( bld, inst, 0, 3 );
1088   }
1089   else {
1090      lod_bias = NULL;
1091      explicit_lod = NULL;
1092   }
1093
1094   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1095      oow = emit_fetch( bld, inst, 0, 3 );
1096      oow = lp_build_rcp(&bld->base, oow);
1097   }
1098
1099   for (i = 0; i < num_coords; i++) {
1100      coords[i] = emit_fetch( bld, inst, 0, i );
1101      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1102         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1103   }
1104   for (i = num_coords; i < 3; i++) {
1105      coords[i] = bld->base.undef;
1106   }
1107
1108   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1109      LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0);
1110      for (i = 0; i < num_coords; i++) {
1111         LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1112         LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1113         ddx[i] = LLVMBuildExtractElement(builder, src1, index0, "");
1114         ddy[i] = LLVMBuildExtractElement(builder, src2, index0, "");
1115      }
1116      unit = inst->Src[3].Register.Index;
1117   }  else {
1118      for (i = 0; i < num_coords; i++) {
1119         ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1120         ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1121      }
1122      unit = inst->Src[1].Register.Index;
1123   }
1124   for (i = num_coords; i < 3; i++) {
1125      ddx[i] = LLVMGetUndef(bld->base.elem_type);
1126      ddy[i] = LLVMGetUndef(bld->base.elem_type);
1127   }
1128
1129   bld->sampler->emit_fetch_texel(bld->sampler,
1130                                  bld->base.gallivm,
1131                                  bld->base.type,
1132                                  unit, num_coords, coords,
1133                                  ddx, ddy,
1134                                  lod_bias, explicit_lod,
1135                                  texel);
1136}
1137
1138static boolean
1139near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1140		   int pc)
1141{
1142   int i;
1143
1144   for (i = 0; i < 5; i++) {
1145      unsigned opcode;
1146
1147      if (pc + i >= bld->info->num_instructions)
1148	 return TRUE;
1149
1150      opcode = bld->instructions[pc + i].Instruction.Opcode;
1151
1152      if (opcode == TGSI_OPCODE_END)
1153	 return TRUE;
1154
1155      if (opcode == TGSI_OPCODE_TEX ||
1156	  opcode == TGSI_OPCODE_TXP ||
1157	  opcode == TGSI_OPCODE_TXD ||
1158	  opcode == TGSI_OPCODE_TXB ||
1159	  opcode == TGSI_OPCODE_TXL ||
1160	  opcode == TGSI_OPCODE_TXF ||
1161	  opcode == TGSI_OPCODE_TXQ ||
1162	  opcode == TGSI_OPCODE_CAL ||
1163	  opcode == TGSI_OPCODE_CALLNZ ||
1164	  opcode == TGSI_OPCODE_IF ||
1165	  opcode == TGSI_OPCODE_IFC ||
1166	  opcode == TGSI_OPCODE_BGNLOOP ||
1167	  opcode == TGSI_OPCODE_SWITCH)
1168	 return FALSE;
1169   }
1170
1171   return TRUE;
1172}
1173
1174
1175
1176/**
1177 * Kill fragment if any of the src register values are negative.
1178 */
1179static void
1180emit_kil(
1181   struct lp_build_tgsi_soa_context *bld,
1182   const struct tgsi_full_instruction *inst,
1183   int pc)
1184{
1185   LLVMBuilderRef builder = bld->base.gallivm->builder;
1186   const struct tgsi_full_src_register *reg = &inst->Src[0];
1187   LLVMValueRef terms[NUM_CHANNELS];
1188   LLVMValueRef mask;
1189   unsigned chan_index;
1190
1191   memset(&terms, 0, sizeof terms);
1192
1193   FOR_EACH_CHANNEL( chan_index ) {
1194      unsigned swizzle;
1195
1196      /* Unswizzle channel */
1197      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1198
1199      /* Check if the component has not been already tested. */
1200      assert(swizzle < NUM_CHANNELS);
1201      if( !terms[swizzle] )
1202         /* TODO: change the comparison operator instead of setting the sign */
1203         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
1204   }
1205
1206   mask = NULL;
1207   FOR_EACH_CHANNEL( chan_index ) {
1208      if(terms[chan_index]) {
1209         LLVMValueRef chan_mask;
1210
1211         /*
1212          * If term < 0 then mask = 0 else mask = ~0.
1213          */
1214         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1215
1216         if(mask)
1217            mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1218         else
1219            mask = chan_mask;
1220      }
1221   }
1222
1223   if(mask) {
1224      lp_build_mask_update(bld->mask, mask);
1225
1226      if (!near_end_of_shader(bld, pc))
1227	 lp_build_mask_check(bld->mask);
1228   }
1229}
1230
1231
1232/**
1233 * Predicated fragment kill.
1234 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1235 * The only predication is the execution mask which will apply if
1236 * we're inside a loop or conditional.
1237 */
1238static void
1239emit_kilp(struct lp_build_tgsi_soa_context *bld,
1240          const struct tgsi_full_instruction *inst,
1241	  int pc)
1242{
1243   LLVMBuilderRef builder = bld->base.gallivm->builder;
1244   LLVMValueRef mask;
1245
1246   /* For those channels which are "alive", disable fragment shader
1247    * execution.
1248    */
1249   if (bld->exec_mask.has_mask) {
1250      mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1251   }
1252   else {
1253      LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1254      mask = zero;
1255   }
1256
1257   lp_build_mask_update(bld->mask, mask);
1258
1259   if (!near_end_of_shader(bld, pc))
1260      lp_build_mask_check(bld->mask);
1261}
1262
1263
1264/**
1265 * Emit code which will dump the value of all the temporary registers
1266 * to stdout.
1267 */
1268static void
1269emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1270{
1271   struct gallivm_state *gallivm = bld->base.gallivm;
1272   LLVMBuilderRef builder = gallivm->builder;
1273   LLVMValueRef temp_ptr;
1274   LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1275   LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1276   LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1277   LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1278   int index;
1279   int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1280
1281   for (index = 0; index < n; index++) {
1282      LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1283      LLVMValueRef v[4][4], res;
1284      int chan;
1285
1286      lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1287
1288      for (chan = 0; chan < 4; chan++) {
1289         temp_ptr = get_temp_ptr(bld, index, chan);
1290         res = LLVMBuildLoad(builder, temp_ptr, "");
1291         v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1292         v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1293         v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1294         v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1295      }
1296
1297      lp_build_printf(gallivm, "  X: %f %f %f %f\n",
1298                      v[0][0], v[0][1], v[0][2], v[0][3]);
1299      lp_build_printf(gallivm, "  Y: %f %f %f %f\n",
1300                      v[1][0], v[1][1], v[1][2], v[1][3]);
1301      lp_build_printf(gallivm, "  Z: %f %f %f %f\n",
1302                      v[2][0], v[2][1], v[2][2], v[2][3]);
1303      lp_build_printf(gallivm, "  W: %f %f %f %f\n",
1304                      v[3][0], v[3][1], v[3][2], v[3][3]);
1305   }
1306}
1307
1308
1309
1310static void
1311emit_declaration(
1312   struct lp_build_tgsi_soa_context *bld,
1313   const struct tgsi_full_declaration *decl)
1314{
1315   struct gallivm_state *gallivm = bld->base.gallivm;
1316   LLVMTypeRef vec_type = bld->base.vec_type;
1317   const unsigned first = decl->Range.First;
1318   const unsigned last = decl->Range.Last;
1319   unsigned idx, i;
1320
1321   for (idx = first; idx <= last; ++idx) {
1322      assert(last <= bld->info->file_max[decl->Declaration.File]);
1323      switch (decl->Declaration.File) {
1324      case TGSI_FILE_TEMPORARY:
1325         assert(idx < LP_MAX_TGSI_TEMPS);
1326         if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1327            for (i = 0; i < NUM_CHANNELS; i++)
1328               bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1329         }
1330         break;
1331
1332      case TGSI_FILE_OUTPUT:
1333         if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1334            for (i = 0; i < NUM_CHANNELS; i++)
1335               bld->outputs[idx][i] = lp_build_alloca(gallivm,
1336                                                      vec_type, "output");
1337         }
1338         break;
1339
1340      case TGSI_FILE_ADDRESS:
1341         assert(idx < LP_MAX_TGSI_ADDRS);
1342         for (i = 0; i < NUM_CHANNELS; i++)
1343            bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr");
1344         break;
1345
1346      case TGSI_FILE_PREDICATE:
1347         assert(idx < LP_MAX_TGSI_PREDS);
1348         for (i = 0; i < NUM_CHANNELS; i++)
1349            bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1350                                                 "predicate");
1351         break;
1352
1353      default:
1354         /* don't need to declare other vars */
1355         break;
1356      }
1357   }
1358}
1359
1360
1361/**
1362 * Emit LLVM for one TGSI instruction.
1363 * \param return TRUE for success, FALSE otherwise
1364 */
1365static boolean
1366emit_instruction(
1367   struct lp_build_tgsi_soa_context *bld,
1368   const struct tgsi_full_instruction *inst,
1369   const struct tgsi_opcode_info *info,
1370   int *pc)
1371{
1372   unsigned chan_index;
1373   LLVMValueRef src0, src1, src2;
1374   LLVMValueRef tmp0, tmp1, tmp2;
1375   LLVMValueRef tmp3 = NULL;
1376   LLVMValueRef tmp4 = NULL;
1377   LLVMValueRef tmp5 = NULL;
1378   LLVMValueRef tmp6 = NULL;
1379   LLVMValueRef tmp7 = NULL;
1380   LLVMValueRef res;
1381   LLVMValueRef dst0[NUM_CHANNELS];
1382
1383   /*
1384    * Stores and write masks are handled in a general fashion after the long
1385    * instruction opcode switch statement.
1386    *
1387    * Although not stricitly necessary, we avoid generating instructions for
1388    * channels which won't be stored, in cases where's that easy. For some
1389    * complex instructions, like texture sampling, it is more convenient to
1390    * assume a full writemask and then let LLVM optimization passes eliminate
1391    * redundant code.
1392    */
1393
1394   (*pc)++;
1395
1396   assert(info->num_dst <= 1);
1397   if (info->num_dst) {
1398      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1399         dst0[chan_index] = bld->base.undef;
1400      }
1401   }
1402
1403   switch (inst->Instruction.Opcode) {
1404   case TGSI_OPCODE_ARL:
1405      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1406         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1407         tmp0 = lp_build_floor(&bld->base, tmp0);
1408         dst0[chan_index] = tmp0;
1409      }
1410      break;
1411
1412   case TGSI_OPCODE_MOV:
1413      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1414         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1415      }
1416      break;
1417
1418   case TGSI_OPCODE_LIT:
1419      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1420         dst0[CHAN_X] = bld->base.one;
1421      }
1422      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1423         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1424         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1425      }
1426      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1427         /* XMM[1] = SrcReg[0].yyyy */
1428         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1429         /* XMM[1] = max(XMM[1], 0) */
1430         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1431         /* XMM[2] = SrcReg[0].wwww */
1432         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1433         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1434         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1435         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1436         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1437      }
1438      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1439         dst0[CHAN_W] = bld->base.one;
1440      }
1441      break;
1442
1443   case TGSI_OPCODE_RCP:
1444   /* TGSI_OPCODE_RECIP */
1445      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1446      res = lp_build_rcp(&bld->base, src0);
1447      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1448         dst0[chan_index] = res;
1449      }
1450      break;
1451
1452   case TGSI_OPCODE_RSQ:
1453   /* TGSI_OPCODE_RECIPSQRT */
1454      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1455      src0 = lp_build_abs(&bld->base, src0);
1456      res = lp_build_rsqrt(&bld->base, src0);
1457      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1458         dst0[chan_index] = res;
1459      }
1460      break;
1461
1462   case TGSI_OPCODE_EXP:
1463      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1464          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1465          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1466         LLVMValueRef *p_exp2_int_part = NULL;
1467         LLVMValueRef *p_frac_part = NULL;
1468         LLVMValueRef *p_exp2 = NULL;
1469
1470         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1471
1472         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1473            p_exp2_int_part = &tmp0;
1474         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1475            p_frac_part = &tmp1;
1476         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1477            p_exp2 = &tmp2;
1478
1479         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1480
1481         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1482            dst0[CHAN_X] = tmp0;
1483         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1484            dst0[CHAN_Y] = tmp1;
1485         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1486            dst0[CHAN_Z] = tmp2;
1487      }
1488      /* dst.w = 1.0 */
1489      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1490         dst0[CHAN_W] = bld->base.one;
1491      }
1492      break;
1493
1494   case TGSI_OPCODE_LOG:
1495      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1496          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1497          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1498         LLVMValueRef *p_floor_log2 = NULL;
1499         LLVMValueRef *p_exp = NULL;
1500         LLVMValueRef *p_log2 = NULL;
1501
1502         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1503         src0 = lp_build_abs( &bld->base, src0 );
1504
1505         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1506            p_floor_log2 = &tmp0;
1507         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1508            p_exp = &tmp1;
1509         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1510            p_log2 = &tmp2;
1511
1512         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1513
1514         /* dst.x = floor(lg2(abs(src.x))) */
1515         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1516            dst0[CHAN_X] = tmp0;
1517         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1518         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1519            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1520         }
1521         /* dst.z = lg2(abs(src.x)) */
1522         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1523            dst0[CHAN_Z] = tmp2;
1524      }
1525      /* dst.w = 1.0 */
1526      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1527         dst0[CHAN_W] = bld->base.one;
1528      }
1529      break;
1530
1531   case TGSI_OPCODE_MUL:
1532      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1533         src0 = emit_fetch( bld, inst, 0, chan_index );
1534         src1 = emit_fetch( bld, inst, 1, chan_index );
1535         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1536      }
1537      break;
1538
1539   case TGSI_OPCODE_ADD:
1540      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1541         src0 = emit_fetch( bld, inst, 0, chan_index );
1542         src1 = emit_fetch( bld, inst, 1, chan_index );
1543         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1544      }
1545      break;
1546
1547   case TGSI_OPCODE_DP3:
1548   /* TGSI_OPCODE_DOT3 */
1549      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1550      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1551      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1552      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1553      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1554      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1555      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1556      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1557      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1558      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1559      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1560      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1561         dst0[chan_index] = tmp0;
1562      }
1563      break;
1564
1565   case TGSI_OPCODE_DP4:
1566   /* TGSI_OPCODE_DOT4 */
1567      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1568      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1569      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1570      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1571      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1572      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1573      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1574      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1575      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1576      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1577      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1578      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1579      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1580      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1581      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1582      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1583         dst0[chan_index] = tmp0;
1584      }
1585      break;
1586
1587   case TGSI_OPCODE_DST:
1588      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1589         dst0[CHAN_X] = bld->base.one;
1590      }
1591      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1592         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1593         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1594         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1595      }
1596      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1597         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1598      }
1599      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1600         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1601      }
1602      break;
1603
1604   case TGSI_OPCODE_MIN:
1605      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1606         src0 = emit_fetch( bld, inst, 0, chan_index );
1607         src1 = emit_fetch( bld, inst, 1, chan_index );
1608         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1609      }
1610      break;
1611
1612   case TGSI_OPCODE_MAX:
1613      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1614         src0 = emit_fetch( bld, inst, 0, chan_index );
1615         src1 = emit_fetch( bld, inst, 1, chan_index );
1616         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1617      }
1618      break;
1619
1620   case TGSI_OPCODE_SLT:
1621   /* TGSI_OPCODE_SETLT */
1622      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1623         src0 = emit_fetch( bld, inst, 0, chan_index );
1624         src1 = emit_fetch( bld, inst, 1, chan_index );
1625         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1626         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1627      }
1628      break;
1629
1630   case TGSI_OPCODE_SGE:
1631   /* TGSI_OPCODE_SETGE */
1632      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1633         src0 = emit_fetch( bld, inst, 0, chan_index );
1634         src1 = emit_fetch( bld, inst, 1, chan_index );
1635         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1636         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1637      }
1638      break;
1639
1640   case TGSI_OPCODE_MAD:
1641   /* TGSI_OPCODE_MADD */
1642      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1643         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1644         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1645         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1646         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1647         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1648         dst0[chan_index] = tmp0;
1649      }
1650      break;
1651
1652   case TGSI_OPCODE_SUB:
1653      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1654         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1655         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1656         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1657      }
1658      break;
1659
1660   case TGSI_OPCODE_LRP:
1661      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1662         src0 = emit_fetch( bld, inst, 0, chan_index );
1663         src1 = emit_fetch( bld, inst, 1, chan_index );
1664         src2 = emit_fetch( bld, inst, 2, chan_index );
1665         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1666         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1667         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1668      }
1669      break;
1670
1671   case TGSI_OPCODE_CND:
1672      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1673         src0 = emit_fetch( bld, inst, 0, chan_index );
1674         src1 = emit_fetch( bld, inst, 1, chan_index );
1675         src2 = emit_fetch( bld, inst, 2, chan_index );
1676         tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
1677         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1678         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1679      }
1680      break;
1681
1682   case TGSI_OPCODE_DP2A:
1683      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1684      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1685      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1686      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1687      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1688      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1689      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1690      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
1691      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1692      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1693         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1694      }
1695      break;
1696
1697   case TGSI_OPCODE_FRC:
1698      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1699         src0 = emit_fetch( bld, inst, 0, chan_index );
1700         tmp0 = lp_build_floor(&bld->base, src0);
1701         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1702         dst0[chan_index] = tmp0;
1703      }
1704      break;
1705
1706   case TGSI_OPCODE_CLAMP:
1707      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1708         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1709         src1 = emit_fetch( bld, inst, 1, chan_index );
1710         src2 = emit_fetch( bld, inst, 2, chan_index );
1711         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1712         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1713         dst0[chan_index] = tmp0;
1714      }
1715      break;
1716
1717   case TGSI_OPCODE_FLR:
1718      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1719         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1720         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1721      }
1722      break;
1723
1724   case TGSI_OPCODE_ROUND:
1725      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1726         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1727         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1728      }
1729      break;
1730
1731   case TGSI_OPCODE_EX2: {
1732      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1733      tmp0 = lp_build_exp2( &bld->base, tmp0);
1734      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1735         dst0[chan_index] = tmp0;
1736      }
1737      break;
1738   }
1739
1740   case TGSI_OPCODE_LG2:
1741      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1742      tmp0 = lp_build_log2( &bld->base, tmp0);
1743      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1744         dst0[chan_index] = tmp0;
1745      }
1746      break;
1747
1748   case TGSI_OPCODE_POW:
1749      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1750      src1 = emit_fetch( bld, inst, 1, CHAN_X );
1751      res = lp_build_pow( &bld->base, src0, src1 );
1752      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1753         dst0[chan_index] = res;
1754      }
1755      break;
1756
1757   case TGSI_OPCODE_XPD:
1758      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1759          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1760         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1761         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1762      }
1763      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1764          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1765         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1766         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1767      }
1768      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1769         tmp2 = tmp0;
1770         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1771         tmp5 = tmp3;
1772         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1773         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1774         dst0[CHAN_X] = tmp2;
1775      }
1776      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1777          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1778         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1779         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1780      }
1781      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1782         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1783         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1784         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1785         dst0[CHAN_Y] = tmp3;
1786      }
1787      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1788         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1789         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1790         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1791         dst0[CHAN_Z] = tmp5;
1792      }
1793      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1794         dst0[CHAN_W] = bld->base.one;
1795      }
1796      break;
1797
1798   case TGSI_OPCODE_ABS:
1799      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1800         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1801         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1802      }
1803      break;
1804
1805   case TGSI_OPCODE_RCC:
1806      /* deprecated? */
1807      assert(0);
1808      return FALSE;
1809
1810   case TGSI_OPCODE_DPH:
1811      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1812      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1813      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1814      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1815      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1816      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1817      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1818      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1819      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1820      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1821      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1822      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1823      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1824      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1825         dst0[chan_index] = tmp0;
1826      }
1827      break;
1828
1829   case TGSI_OPCODE_COS:
1830      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1831      tmp0 = lp_build_cos( &bld->base, tmp0 );
1832      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1833         dst0[chan_index] = tmp0;
1834      }
1835      break;
1836
1837   case TGSI_OPCODE_DDX:
1838      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1839         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1840      }
1841      break;
1842
1843   case TGSI_OPCODE_DDY:
1844      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1845         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1846      }
1847      break;
1848
1849   case TGSI_OPCODE_KILP:
1850      /* predicated kill */
1851      emit_kilp( bld, inst, (*pc)-1 );
1852      break;
1853
1854   case TGSI_OPCODE_KIL:
1855      /* conditional kill */
1856      emit_kil( bld, inst, (*pc)-1 );
1857      break;
1858
1859   case TGSI_OPCODE_PK2H:
1860      return FALSE;
1861      break;
1862
1863   case TGSI_OPCODE_PK2US:
1864      return FALSE;
1865      break;
1866
1867   case TGSI_OPCODE_PK4B:
1868      return FALSE;
1869      break;
1870
1871   case TGSI_OPCODE_PK4UB:
1872      return FALSE;
1873      break;
1874
1875   case TGSI_OPCODE_RFL:
1876      return FALSE;
1877      break;
1878
1879   case TGSI_OPCODE_SEQ:
1880      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1881         src0 = emit_fetch( bld, inst, 0, chan_index );
1882         src1 = emit_fetch( bld, inst, 1, chan_index );
1883         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1884         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1885      }
1886      break;
1887
1888   case TGSI_OPCODE_SFL:
1889      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1890         dst0[chan_index] = bld->base.zero;
1891      }
1892      break;
1893
1894   case TGSI_OPCODE_SGT:
1895      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1896         src0 = emit_fetch( bld, inst, 0, chan_index );
1897         src1 = emit_fetch( bld, inst, 1, chan_index );
1898         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1899         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1900      }
1901      break;
1902
1903   case TGSI_OPCODE_SIN:
1904      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1905      tmp0 = lp_build_sin( &bld->base, tmp0 );
1906      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1907         dst0[chan_index] = tmp0;
1908      }
1909      break;
1910
1911   case TGSI_OPCODE_SLE:
1912      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1913         src0 = emit_fetch( bld, inst, 0, chan_index );
1914         src1 = emit_fetch( bld, inst, 1, chan_index );
1915         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1916         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1917      }
1918      break;
1919
1920   case TGSI_OPCODE_SNE:
1921      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1922         src0 = emit_fetch( bld, inst, 0, chan_index );
1923         src1 = emit_fetch( bld, inst, 1, chan_index );
1924         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1925         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1926      }
1927      break;
1928
1929   case TGSI_OPCODE_STR:
1930      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1931         dst0[chan_index] = bld->base.one;
1932      }
1933      break;
1934
1935   case TGSI_OPCODE_TEX:
1936      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1937      break;
1938
1939   case TGSI_OPCODE_TXD:
1940      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1941      break;
1942
1943   case TGSI_OPCODE_UP2H:
1944      /* deprecated */
1945      assert (0);
1946      return FALSE;
1947      break;
1948
1949   case TGSI_OPCODE_UP2US:
1950      /* deprecated */
1951      assert(0);
1952      return FALSE;
1953      break;
1954
1955   case TGSI_OPCODE_UP4B:
1956      /* deprecated */
1957      assert(0);
1958      return FALSE;
1959      break;
1960
1961   case TGSI_OPCODE_UP4UB:
1962      /* deprecated */
1963      assert(0);
1964      return FALSE;
1965      break;
1966
1967   case TGSI_OPCODE_X2D:
1968      /* deprecated? */
1969      assert(0);
1970      return FALSE;
1971      break;
1972
1973   case TGSI_OPCODE_ARA:
1974      /* deprecated */
1975      assert(0);
1976      return FALSE;
1977      break;
1978
1979   case TGSI_OPCODE_ARR:
1980      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1981         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1982         tmp0 = lp_build_round(&bld->base, tmp0);
1983         dst0[chan_index] = tmp0;
1984      }
1985      break;
1986
1987   case TGSI_OPCODE_BRA:
1988      /* deprecated */
1989      assert(0);
1990      return FALSE;
1991      break;
1992
1993   case TGSI_OPCODE_CAL:
1994      lp_exec_mask_call(&bld->exec_mask,
1995                        inst->Label.Label,
1996                        pc);
1997
1998      break;
1999
2000   case TGSI_OPCODE_RET:
2001      lp_exec_mask_ret(&bld->exec_mask, pc);
2002      break;
2003
2004   case TGSI_OPCODE_END:
2005      if (0) {
2006         /* for debugging */
2007         emit_dump_temps(bld);
2008      }
2009      *pc = -1;
2010      break;
2011
2012   case TGSI_OPCODE_SSG:
2013   /* TGSI_OPCODE_SGN */
2014      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2015         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2016         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
2017      }
2018      break;
2019
2020   case TGSI_OPCODE_CMP:
2021      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2022         src0 = emit_fetch( bld, inst, 0, chan_index );
2023         src1 = emit_fetch( bld, inst, 1, chan_index );
2024         src2 = emit_fetch( bld, inst, 2, chan_index );
2025         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
2026         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
2027      }
2028      break;
2029
2030   case TGSI_OPCODE_SCS:
2031      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
2032         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2033         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
2034      }
2035      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
2036         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2037         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
2038      }
2039      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
2040         dst0[CHAN_Z] = bld->base.zero;
2041      }
2042      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
2043         dst0[CHAN_W] = bld->base.one;
2044      }
2045      break;
2046
2047   case TGSI_OPCODE_TXB:
2048      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
2049      break;
2050
2051   case TGSI_OPCODE_NRM:
2052      /* fall-through */
2053   case TGSI_OPCODE_NRM4:
2054      /* 3 or 4-component normalization */
2055      {
2056         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2057
2058         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
2059             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
2060             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
2061             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
2062
2063            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2064
2065            /* xmm4 = src.x */
2066            /* xmm0 = src.x * src.x */
2067            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2068            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2069               tmp4 = tmp0;
2070            }
2071            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2072
2073            /* xmm5 = src.y */
2074            /* xmm0 = xmm0 + src.y * src.y */
2075            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2076            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2077               tmp5 = tmp1;
2078            }
2079            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2080            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2081
2082            /* xmm6 = src.z */
2083            /* xmm0 = xmm0 + src.z * src.z */
2084            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2085            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2086               tmp6 = tmp1;
2087            }
2088            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2089            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2090
2091            if (dims == 4) {
2092               /* xmm7 = src.w */
2093               /* xmm0 = xmm0 + src.w * src.w */
2094               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2095               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2096                  tmp7 = tmp1;
2097               }
2098               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2099               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2100            }
2101
2102            /* xmm1 = 1 / sqrt(xmm0) */
2103            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2104
2105            /* dst.x = xmm1 * src.x */
2106            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2107               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2108            }
2109
2110            /* dst.y = xmm1 * src.y */
2111            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2112               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2113            }
2114
2115            /* dst.z = xmm1 * src.z */
2116            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2117               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2118            }
2119
2120            /* dst.w = xmm1 * src.w */
2121            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2122               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2123            }
2124         }
2125
2126         /* dst.w = 1.0 */
2127         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2128            dst0[CHAN_W] = bld->base.one;
2129         }
2130      }
2131      break;
2132
2133   case TGSI_OPCODE_DIV:
2134      /* deprecated */
2135      assert( 0 );
2136      return FALSE;
2137      break;
2138
2139   case TGSI_OPCODE_DP2:
2140      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
2141      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
2142      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
2143      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
2144      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
2145      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
2146      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
2147      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2148         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
2149      }
2150      break;
2151
2152   case TGSI_OPCODE_TXL:
2153      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2154      break;
2155
2156   case TGSI_OPCODE_TXP:
2157      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2158      break;
2159
2160   case TGSI_OPCODE_BRK:
2161      lp_exec_break(&bld->exec_mask);
2162      break;
2163
2164   case TGSI_OPCODE_IF:
2165      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2166      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2167                          tmp0, bld->base.zero);
2168      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2169      break;
2170
2171   case TGSI_OPCODE_BGNLOOP:
2172      lp_exec_bgnloop(&bld->exec_mask);
2173      break;
2174
2175   case TGSI_OPCODE_BGNSUB:
2176      lp_exec_mask_bgnsub(&bld->exec_mask);
2177      break;
2178
2179   case TGSI_OPCODE_ELSE:
2180      lp_exec_mask_cond_invert(&bld->exec_mask);
2181      break;
2182
2183   case TGSI_OPCODE_ENDIF:
2184      lp_exec_mask_cond_pop(&bld->exec_mask);
2185      break;
2186
2187   case TGSI_OPCODE_ENDLOOP:
2188      lp_exec_endloop(bld->base.gallivm, &bld->exec_mask);
2189      break;
2190
2191   case TGSI_OPCODE_ENDSUB:
2192      lp_exec_mask_endsub(&bld->exec_mask, pc);
2193      break;
2194
2195   case TGSI_OPCODE_PUSHA:
2196      /* deprecated? */
2197      assert(0);
2198      return FALSE;
2199      break;
2200
2201   case TGSI_OPCODE_POPA:
2202      /* deprecated? */
2203      assert(0);
2204      return FALSE;
2205      break;
2206
2207   case TGSI_OPCODE_CEIL:
2208      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2209         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2210         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2211      }
2212      break;
2213
2214   case TGSI_OPCODE_I2F:
2215      /* deprecated? */
2216      assert(0);
2217      return FALSE;
2218      break;
2219
2220   case TGSI_OPCODE_NOT:
2221      /* deprecated? */
2222      assert(0);
2223      return FALSE;
2224      break;
2225
2226   case TGSI_OPCODE_TRUNC:
2227      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2228         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2229         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2230      }
2231      break;
2232
2233   case TGSI_OPCODE_SHL:
2234      /* deprecated? */
2235      assert(0);
2236      return FALSE;
2237      break;
2238
2239   case TGSI_OPCODE_ISHR:
2240      /* deprecated? */
2241      assert(0);
2242      return FALSE;
2243      break;
2244
2245   case TGSI_OPCODE_AND:
2246      /* deprecated? */
2247      assert(0);
2248      return FALSE;
2249      break;
2250
2251   case TGSI_OPCODE_OR:
2252      /* deprecated? */
2253      assert(0);
2254      return FALSE;
2255      break;
2256
2257   case TGSI_OPCODE_MOD:
2258      /* deprecated? */
2259      assert(0);
2260      return FALSE;
2261      break;
2262
2263   case TGSI_OPCODE_XOR:
2264      /* deprecated? */
2265      assert(0);
2266      return FALSE;
2267      break;
2268
2269   case TGSI_OPCODE_SAD:
2270      /* deprecated? */
2271      assert(0);
2272      return FALSE;
2273      break;
2274
2275   case TGSI_OPCODE_TXF:
2276      /* deprecated? */
2277      assert(0);
2278      return FALSE;
2279      break;
2280
2281   case TGSI_OPCODE_TXQ:
2282      /* deprecated? */
2283      assert(0);
2284      return FALSE;
2285      break;
2286
2287   case TGSI_OPCODE_CONT:
2288      lp_exec_continue(&bld->exec_mask);
2289      break;
2290
2291   case TGSI_OPCODE_EMIT:
2292      return FALSE;
2293      break;
2294
2295   case TGSI_OPCODE_ENDPRIM:
2296      return FALSE;
2297      break;
2298
2299   case TGSI_OPCODE_NOP:
2300      break;
2301
2302   default:
2303      return FALSE;
2304   }
2305
2306   if(info->num_dst) {
2307      LLVMValueRef pred[NUM_CHANNELS];
2308
2309      emit_fetch_predicate( bld, inst, pred );
2310
2311      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2312         emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2313      }
2314   }
2315
2316   return TRUE;
2317}
2318
2319
2320void
2321lp_build_tgsi_soa(struct gallivm_state *gallivm,
2322                  const struct tgsi_token *tokens,
2323                  struct lp_type type,
2324                  struct lp_build_mask_context *mask,
2325                  LLVMValueRef consts_ptr,
2326                  const LLVMValueRef *pos,
2327                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
2328                  LLVMValueRef (*outputs)[NUM_CHANNELS],
2329                  struct lp_build_sampler_soa *sampler,
2330                  const struct tgsi_shader_info *info)
2331{
2332   struct lp_build_tgsi_soa_context bld;
2333   struct tgsi_parse_context parse;
2334   uint num_immediates = 0;
2335   uint num_instructions = 0;
2336   unsigned i;
2337   int pc = 0;
2338
2339   struct lp_type res_type;
2340
2341   assert(type.length <= LP_MAX_VECTOR_LENGTH);
2342   memset(&res_type, 0, sizeof res_type);
2343   res_type.width = type.width;
2344   res_type.length = type.length;
2345   res_type.sign = 1;
2346
2347   /* Setup build context */
2348   memset(&bld, 0, sizeof bld);
2349   lp_build_context_init(&bld.base, gallivm, type);
2350   lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type));
2351   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2352   bld.mask = mask;
2353   bld.pos = pos;
2354   bld.inputs = inputs;
2355   bld.outputs = outputs;
2356   bld.consts_ptr = consts_ptr;
2357   bld.sampler = sampler;
2358   bld.info = info;
2359   bld.indirect_files = info->indirect_files;
2360   bld.instructions = (struct tgsi_full_instruction *)
2361                      MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2362   bld.max_instructions = LP_MAX_INSTRUCTIONS;
2363
2364   if (!bld.instructions) {
2365      return;
2366   }
2367
2368   lp_exec_mask_init(&bld.exec_mask, &bld.base);
2369
2370   if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2371      LLVMValueRef array_size =
2372         lp_build_const_int32(gallivm,
2373                              info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2374      bld.temps_array = lp_build_array_alloca(gallivm,
2375                                              bld.base.vec_type, array_size,
2376                                              "temp_array");
2377   }
2378
2379   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2380      LLVMValueRef array_size =
2381         lp_build_const_int32(gallivm,
2382                              info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2383      bld.outputs_array = lp_build_array_alloca(gallivm,
2384                                                bld.base.vec_type, array_size,
2385                                                "output_array");
2386   }
2387
2388   /* If we have indirect addressing in inputs we need to copy them into
2389    * our alloca array to be able to iterate over them */
2390   if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
2391      unsigned index, chan;
2392      LLVMTypeRef vec_type = bld.base.vec_type;
2393      LLVMValueRef array_size =
2394         lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4);
2395      bld.inputs_array = lp_build_array_alloca(gallivm,
2396                                               vec_type, array_size,
2397                                               "input_array");
2398
2399      assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
2400
2401      for (index = 0; index < info->num_inputs; ++index) {
2402         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2403            LLVMValueRef lindex =
2404               lp_build_const_int32(gallivm, index * 4 + chan);
2405            LLVMValueRef input_ptr =
2406               LLVMBuildGEP(gallivm->builder, bld.inputs_array,
2407                            &lindex, 1, "");
2408            LLVMValueRef value = bld.inputs[index][chan];
2409            if (value)
2410               LLVMBuildStore(gallivm->builder, value, input_ptr);
2411         }
2412      }
2413   }
2414
2415   tgsi_parse_init( &parse, tokens );
2416
2417   while( !tgsi_parse_end_of_tokens( &parse ) ) {
2418      tgsi_parse_token( &parse );
2419
2420      switch( parse.FullToken.Token.Type ) {
2421      case TGSI_TOKEN_TYPE_DECLARATION:
2422         /* Inputs already interpolated */
2423         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2424         break;
2425
2426      case TGSI_TOKEN_TYPE_INSTRUCTION:
2427         {
2428            /* save expanded instruction */
2429            if (num_instructions == bld.max_instructions) {
2430               struct tgsi_full_instruction *instructions;
2431               instructions = REALLOC(bld.instructions,
2432                                      bld.max_instructions
2433                                      * sizeof(struct tgsi_full_instruction),
2434                                      (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2435                                      * sizeof(struct tgsi_full_instruction));
2436               if (!instructions) {
2437                  break;
2438               }
2439               bld.instructions = instructions;
2440               bld.max_instructions += LP_MAX_INSTRUCTIONS;
2441            }
2442
2443            memcpy(bld.instructions + num_instructions,
2444                   &parse.FullToken.FullInstruction,
2445                   sizeof(bld.instructions[0]));
2446
2447            num_instructions++;
2448         }
2449
2450         break;
2451
2452      case TGSI_TOKEN_TYPE_IMMEDIATE:
2453         /* simply copy the immediate values into the next immediates[] slot */
2454         {
2455            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2456            assert(size <= 4);
2457            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2458            for( i = 0; i < size; ++i )
2459               bld.immediates[num_immediates][i] =
2460                  lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float);
2461            for( i = size; i < 4; ++i )
2462               bld.immediates[num_immediates][i] = bld.base.undef;
2463            num_immediates++;
2464         }
2465         break;
2466
2467      case TGSI_TOKEN_TYPE_PROPERTY:
2468         break;
2469
2470      default:
2471         assert( 0 );
2472      }
2473   }
2474
2475   while (pc != -1) {
2476      struct tgsi_full_instruction *instr = bld.instructions + pc;
2477      const struct tgsi_opcode_info *opcode_info =
2478         tgsi_get_opcode_info(instr->Instruction.Opcode);
2479      if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2480         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2481                       opcode_info->mnemonic);
2482   }
2483
2484   /* If we have indirect addressing in outputs we need to copy our alloca array
2485    * to the outputs slots specified by the called */
2486   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2487      unsigned index, chan;
2488      assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
2489      for (index = 0; index < info->num_outputs; ++index) {
2490         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2491            bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
2492         }
2493      }
2494   }
2495
2496   if (0) {
2497      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2498      LLVMValueRef function = LLVMGetBasicBlockParent(block);
2499      debug_printf("11111111111111111111111111111 \n");
2500      tgsi_dump(tokens, 0);
2501      lp_debug_dump_value(function);
2502      debug_printf("2222222222222222222222222222 \n");
2503   }
2504   tgsi_parse_free( &parse );
2505
2506   if (0) {
2507      LLVMModuleRef module = LLVMGetGlobalParent(
2508         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2509      LLVMDumpModule(module);
2510
2511   }
2512
2513   FREE( bld.instructions );
2514}
2515
2516