lp_bld_tgsi_soa.c revision 82b71db03ddaf0eed504412c9169db37cf9bdadc
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_exec.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_bitarit.h"
54#include "lp_bld_gather.h"
55#include "lp_bld_init.h"
56#include "lp_bld_logic.h"
57#include "lp_bld_swizzle.h"
58#include "lp_bld_flow.h"
59#include "lp_bld_quad.h"
60#include "lp_bld_tgsi.h"
61#include "lp_bld_limits.h"
62#include "lp_bld_debug.h"
63#include "lp_bld_printf.h"
64
65
66#define NUM_CHANNELS 4
67
68#define LP_MAX_INSTRUCTIONS 256
69
70
71struct lp_exec_mask {
72   struct lp_build_context *bld;
73
74   boolean has_mask;
75
76   LLVMTypeRef int_vec_type;
77
78   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
79   int cond_stack_size;
80   LLVMValueRef cond_mask;
81
82   LLVMBasicBlockRef loop_block;
83   LLVMValueRef cont_mask;
84   LLVMValueRef break_mask;
85   LLVMValueRef break_var;
86   struct {
87      LLVMBasicBlockRef loop_block;
88      LLVMValueRef cont_mask;
89      LLVMValueRef break_mask;
90      LLVMValueRef break_var;
91   } loop_stack[LP_MAX_TGSI_NESTING];
92   int loop_stack_size;
93
94   LLVMValueRef ret_mask;
95   struct {
96      int pc;
97      LLVMValueRef ret_mask;
98   } call_stack[LP_MAX_TGSI_NESTING];
99   int call_stack_size;
100
101   LLVMValueRef exec_mask;
102};
103
104struct lp_build_tgsi_soa_context
105{
106   struct lp_build_context base;
107
108   /* Builder for vector integer masks and indices */
109   struct lp_build_context uint_bld;
110
111   /* Builder for scalar elements of shader's data type (float) */
112   struct lp_build_context elem_bld;
113
114   LLVMValueRef consts_ptr;
115   const LLVMValueRef *pos;
116   const LLVMValueRef (*inputs)[NUM_CHANNELS];
117   LLVMValueRef (*outputs)[NUM_CHANNELS];
118
119   const struct lp_build_sampler_soa *sampler;
120
121   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
122   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
123   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
124   LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
125
126   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
127    * set in the indirect_files field.
128    * The temps[] array above is unused then.
129    */
130   LLVMValueRef temps_array;
131
132   /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
133    * set in the indirect_files field.
134    * The outputs[] array above is unused then.
135    */
136   LLVMValueRef outputs_array;
137
138   /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
139    * set in the indirect_files field.
140    * The inputs[] array above is unused then.
141    */
142   LLVMValueRef inputs_array;
143
144   LLVMValueRef system_values_array;
145
146   const struct tgsi_shader_info *info;
147   /** bitmask indicating which register files are accessed indirectly */
148   unsigned indirect_files;
149
150   struct lp_build_mask_context *mask;
151   struct lp_exec_mask exec_mask;
152
153   struct tgsi_full_instruction *instructions;
154   uint max_instructions;
155};
156
157static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
158{
159   mask->bld = bld;
160   mask->has_mask = FALSE;
161   mask->cond_stack_size = 0;
162   mask->loop_stack_size = 0;
163   mask->call_stack_size = 0;
164
165   mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
166   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
167         LLVMConstAllOnes(mask->int_vec_type);
168}
169
170static void lp_exec_mask_update(struct lp_exec_mask *mask)
171{
172   LLVMBuilderRef builder = mask->bld->gallivm->builder;
173
174   if (mask->loop_stack_size) {
175      /*for loops we need to update the entire mask at runtime */
176      LLVMValueRef tmp;
177      assert(mask->break_mask);
178      tmp = LLVMBuildAnd(builder,
179                         mask->cont_mask,
180                         mask->break_mask,
181                         "maskcb");
182      mask->exec_mask = LLVMBuildAnd(builder,
183                                     mask->cond_mask,
184                                     tmp,
185                                     "maskfull");
186   } else
187      mask->exec_mask = mask->cond_mask;
188
189   if (mask->call_stack_size) {
190      mask->exec_mask = LLVMBuildAnd(builder,
191                                     mask->exec_mask,
192                                     mask->ret_mask,
193                                     "callmask");
194   }
195
196   mask->has_mask = (mask->cond_stack_size > 0 ||
197                     mask->loop_stack_size > 0 ||
198                     mask->call_stack_size > 0);
199}
200
201static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
202                                   LLVMValueRef val)
203{
204   LLVMBuilderRef builder = mask->bld->gallivm->builder;
205
206   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
207   if (mask->cond_stack_size == 0) {
208      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
209   }
210   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
211   assert(LLVMTypeOf(val) == mask->int_vec_type);
212   mask->cond_mask = LLVMBuildAnd(builder,
213                                  mask->cond_mask,
214                                  val,
215                                  "");
216   lp_exec_mask_update(mask);
217}
218
219static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
220{
221   LLVMBuilderRef builder = mask->bld->gallivm->builder;
222   LLVMValueRef prev_mask;
223   LLVMValueRef inv_mask;
224
225   assert(mask->cond_stack_size);
226   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
227   if (mask->cond_stack_size == 1) {
228      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
229   }
230
231   inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
232
233   mask->cond_mask = LLVMBuildAnd(builder,
234                                  inv_mask,
235                                  prev_mask, "");
236   lp_exec_mask_update(mask);
237}
238
239static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
240{
241   assert(mask->cond_stack_size);
242   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
243   lp_exec_mask_update(mask);
244}
245
246static void lp_exec_bgnloop(struct lp_exec_mask *mask)
247{
248   LLVMBuilderRef builder = mask->bld->gallivm->builder;
249
250   if (mask->loop_stack_size == 0) {
251      assert(mask->loop_block == NULL);
252      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
253      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
254      assert(mask->break_var == NULL);
255   }
256
257   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
258
259   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
260   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
261   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
262   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
263   ++mask->loop_stack_size;
264
265   mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
266   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
267
268   mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
269   LLVMBuildBr(builder, mask->loop_block);
270   LLVMPositionBuilderAtEnd(builder, mask->loop_block);
271
272   mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
273
274   lp_exec_mask_update(mask);
275}
276
277static void lp_exec_break(struct lp_exec_mask *mask)
278{
279   LLVMBuilderRef builder = mask->bld->gallivm->builder;
280   LLVMValueRef exec_mask = LLVMBuildNot(builder,
281                                         mask->exec_mask,
282                                         "break");
283
284   mask->break_mask = LLVMBuildAnd(builder,
285                                   mask->break_mask,
286                                   exec_mask, "break_full");
287
288   lp_exec_mask_update(mask);
289}
290
291static void lp_exec_continue(struct lp_exec_mask *mask)
292{
293   LLVMBuilderRef builder = mask->bld->gallivm->builder;
294   LLVMValueRef exec_mask = LLVMBuildNot(builder,
295                                         mask->exec_mask,
296                                         "");
297
298   mask->cont_mask = LLVMBuildAnd(builder,
299                                  mask->cont_mask,
300                                  exec_mask, "");
301
302   lp_exec_mask_update(mask);
303}
304
305
306static void lp_exec_endloop(struct gallivm_state *gallivm,
307                            struct lp_exec_mask *mask)
308{
309   LLVMBuilderRef builder = mask->bld->gallivm->builder;
310   LLVMBasicBlockRef endloop;
311   LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
312                                               mask->bld->type.width *
313                                               mask->bld->type.length);
314   LLVMValueRef i1cond;
315
316   assert(mask->break_mask);
317
318   /*
319    * Restore the cont_mask, but don't pop
320    */
321   assert(mask->loop_stack_size);
322   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
323   lp_exec_mask_update(mask);
324
325   /*
326    * Unlike the continue mask, the break_mask must be preserved across loop
327    * iterations
328    */
329   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
330
331   /* i1cond = (mask == 0) */
332   i1cond = LLVMBuildICmp(
333      builder,
334      LLVMIntNE,
335      LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
336      LLVMConstNull(reg_type), "");
337
338   endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
339
340   LLVMBuildCondBr(builder,
341                   i1cond, mask->loop_block, endloop);
342
343   LLVMPositionBuilderAtEnd(builder, endloop);
344
345   assert(mask->loop_stack_size);
346   --mask->loop_stack_size;
347   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
348   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
349   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
350   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
351
352   lp_exec_mask_update(mask);
353}
354
355/* stores val into an address pointed to by dst.
356 * mask->exec_mask is used to figure out which bits of val
357 * should be stored into the address
358 * (0 means don't store this bit, 1 means do store).
359 */
360static void lp_exec_mask_store(struct lp_exec_mask *mask,
361                               LLVMValueRef pred,
362                               LLVMValueRef val,
363                               LLVMValueRef dst)
364{
365   LLVMBuilderRef builder = mask->bld->gallivm->builder;
366
367   /* Mix the predicate and execution mask */
368   if (mask->has_mask) {
369      if (pred) {
370         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
371      } else {
372         pred = mask->exec_mask;
373      }
374   }
375
376   if (pred) {
377      LLVMValueRef real_val, dst_val;
378
379      dst_val = LLVMBuildLoad(builder, dst, "");
380      real_val = lp_build_select(mask->bld,
381                                 pred,
382                                 val, dst_val);
383
384      LLVMBuildStore(builder, real_val, dst);
385   } else
386      LLVMBuildStore(builder, val, dst);
387}
388
389static void lp_exec_mask_call(struct lp_exec_mask *mask,
390                              int func,
391                              int *pc)
392{
393   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
394   mask->call_stack[mask->call_stack_size].pc = *pc;
395   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
396   mask->call_stack_size++;
397   *pc = func;
398}
399
400static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
401{
402   LLVMBuilderRef builder = mask->bld->gallivm->builder;
403   LLVMValueRef exec_mask;
404
405   if (mask->call_stack_size == 0) {
406      /* returning from main() */
407      *pc = -1;
408      return;
409   }
410   exec_mask = LLVMBuildNot(builder,
411                            mask->exec_mask,
412                            "ret");
413
414   mask->ret_mask = LLVMBuildAnd(builder,
415                                 mask->ret_mask,
416                                 exec_mask, "ret_full");
417
418   lp_exec_mask_update(mask);
419}
420
421static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
422{
423}
424
425static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
426{
427   assert(mask->call_stack_size);
428   mask->call_stack_size--;
429   *pc = mask->call_stack[mask->call_stack_size].pc;
430   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
431   lp_exec_mask_update(mask);
432}
433
434
435/**
436 * Return pointer to a temporary register channel (src or dest).
437 * Note that indirect addressing cannot be handled here.
438 * \param index  which temporary register
439 * \param chan  which channel of the temp register.
440 */
441static LLVMValueRef
442get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
443             unsigned index,
444             unsigned chan)
445{
446   LLVMBuilderRef builder = bld->base.gallivm->builder;
447   assert(chan < 4);
448   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
449      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan);
450      return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
451   }
452   else {
453      return bld->temps[index][chan];
454   }
455}
456
457/**
458 * Return pointer to a output register channel (src or dest).
459 * Note that indirect addressing cannot be handled here.
460 * \param index  which output register
461 * \param chan  which channel of the output register.
462 */
463static LLVMValueRef
464get_output_ptr(struct lp_build_tgsi_soa_context *bld,
465               unsigned index,
466               unsigned chan)
467{
468   LLVMBuilderRef builder = bld->base.gallivm->builder;
469   assert(chan < 4);
470   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
471      LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm,
472                                                 index * 4 + chan);
473      return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
474   }
475   else {
476      return bld->outputs[index][chan];
477   }
478}
479
480/**
481 * Gather vector.
482 * XXX the lp_build_gather() function should be capable of doing this
483 * with a little work.
484 */
485static LLVMValueRef
486build_gather(struct lp_build_tgsi_soa_context *bld,
487             LLVMValueRef base_ptr,
488             LLVMValueRef indexes)
489{
490   LLVMBuilderRef builder = bld->base.gallivm->builder;
491   LLVMValueRef res = bld->base.undef;
492   unsigned i;
493
494   /*
495    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
496    */
497   for (i = 0; i < bld->base.type.length; i++) {
498      LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i);
499      LLVMValueRef index = LLVMBuildExtractElement(builder,
500                                                   indexes, ii, "");
501      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
502                                             &index, 1, "gather_ptr");
503      LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
504
505      res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
506   }
507
508   return res;
509}
510
511
512/**
513 * Scatter/store vector.
514 */
515static void
516emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
517                  LLVMValueRef base_ptr,
518                  LLVMValueRef indexes,
519                  LLVMValueRef values,
520                  struct lp_exec_mask *mask,
521                  LLVMValueRef pred)
522{
523   struct gallivm_state *gallivm = bld->base.gallivm;
524   LLVMBuilderRef builder = gallivm->builder;
525   unsigned i;
526
527   /* Mix the predicate and execution mask */
528   if (mask->has_mask) {
529      if (pred) {
530         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
531      }
532      else {
533         pred = mask->exec_mask;
534      }
535   }
536
537   /*
538    * Loop over elements of index_vec, store scalar value.
539    */
540   for (i = 0; i < bld->base.type.length; i++) {
541      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
542      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
543      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
544      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
545      LLVMValueRef scalar_pred = pred ?
546         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
547
548      if (0)
549         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
550                         ii, val, index, scalar_ptr);
551
552      if (scalar_pred) {
553         LLVMValueRef real_val, dst_val;
554         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
555         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
556         LLVMBuildStore(builder, real_val, scalar_ptr);
557      }
558      else {
559         LLVMBuildStore(builder, val, scalar_ptr);
560      }
561   }
562}
563
564
565/**
566 * Read the current value of the ADDR register, convert the floats to
567 * ints, add the base index and return the vector of offsets.
568 * The offsets will be used to index into the constant buffer or
569 * temporary register file.
570 */
571static LLVMValueRef
572get_indirect_index(struct lp_build_tgsi_soa_context *bld,
573                   unsigned reg_file, unsigned reg_index,
574                   const struct tgsi_src_register *indirect_reg)
575{
576   LLVMBuilderRef builder = bld->base.gallivm->builder;
577   struct lp_build_context *uint_bld = &bld->uint_bld;
578   /* always use X component of address register */
579   unsigned swizzle = indirect_reg->SwizzleX;
580   LLVMValueRef base;
581   LLVMValueRef rel;
582   LLVMValueRef max_index;
583   LLVMValueRef index;
584
585   assert(bld->indirect_files & (1 << reg_file));
586
587   base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index);
588
589   assert(swizzle < 4);
590   rel = LLVMBuildLoad(builder,
591                        bld->addr[indirect_reg->Index][swizzle],
592                        "load addr reg");
593
594   /* for indexing we want integers */
595   rel = LLVMBuildFPToSI(builder,
596                         rel,
597                         uint_bld->vec_type, "");
598
599   index = lp_build_add(uint_bld, base, rel);
600
601   max_index = lp_build_const_int_vec(bld->base.gallivm,
602                                      uint_bld->type,
603                                      bld->info->file_max[reg_file]);
604
605   assert(!uint_bld->type.sign);
606   index = lp_build_min(uint_bld, index, max_index);
607
608   return index;
609}
610
611
612/**
613 * Register fetch.
614 */
615static LLVMValueRef
616emit_fetch(
617   struct lp_build_tgsi_soa_context *bld,
618   const struct tgsi_full_instruction *inst,
619   unsigned src_op,
620   const unsigned chan_index )
621{
622   struct gallivm_state *gallivm = bld->base.gallivm;
623   LLVMBuilderRef builder = gallivm->builder;
624   struct lp_build_context *uint_bld = &bld->uint_bld;
625   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
626   const unsigned swizzle =
627      tgsi_util_get_full_src_register_swizzle(reg, chan_index);
628   LLVMValueRef res;
629   LLVMValueRef indirect_index = NULL;
630
631   if (swizzle > 3) {
632      assert(0 && "invalid swizzle in emit_fetch()");
633      return bld->base.undef;
634   }
635
636   if (reg->Register.Indirect) {
637      indirect_index = get_indirect_index(bld,
638                                          reg->Register.File,
639                                          reg->Register.Index,
640                                          &reg->Indirect);
641   } else {
642      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
643   }
644
645   switch (reg->Register.File) {
646   case TGSI_FILE_CONSTANT:
647      if (reg->Register.Indirect) {
648         LLVMValueRef swizzle_vec =
649            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
650         LLVMValueRef index_vec;  /* index into the const buffer */
651
652         /* index_vec = indirect_index * 4 + swizzle */
653         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
654         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
655
656         /* Gather values from the constant buffer */
657         res = build_gather(bld, bld->consts_ptr, index_vec);
658      }
659      else {
660         LLVMValueRef index;  /* index into the const buffer */
661         LLVMValueRef scalar, scalar_ptr;
662
663         index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
664
665         scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
666                                   &index, 1, "");
667         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
668
669         res = lp_build_broadcast_scalar(&bld->base, scalar);
670      }
671      break;
672
673   case TGSI_FILE_IMMEDIATE:
674      res = bld->immediates[reg->Register.Index][swizzle];
675      assert(res);
676      break;
677
678   case TGSI_FILE_INPUT:
679      if (reg->Register.Indirect) {
680         LLVMValueRef swizzle_vec =
681            lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
682         LLVMValueRef length_vec =
683            lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
684         LLVMValueRef index_vec;  /* index into the const buffer */
685         LLVMValueRef inputs_array;
686         LLVMTypeRef float4_ptr_type;
687
688         /* index_vec = (indirect_index * 4 + swizzle) * length */
689         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
690         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
691         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
692
693         /* cast inputs_array pointer to float* */
694         float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
695         inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
696                                         float4_ptr_type, "");
697
698         /* Gather values from the temporary register array */
699         res = build_gather(bld, inputs_array, index_vec);
700      } else {
701         if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
702            LLVMValueRef lindex = lp_build_const_int32(gallivm,
703                                           reg->Register.Index * 4 + swizzle);
704            LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
705                                                   bld->inputs_array, &lindex, 1, "");
706            res = LLVMBuildLoad(builder, input_ptr, "");
707         }
708         else {
709            res = bld->inputs[reg->Register.Index][swizzle];
710         }
711      }
712      assert(res);
713      break;
714
715   case TGSI_FILE_TEMPORARY:
716      if (reg->Register.Indirect) {
717         LLVMValueRef swizzle_vec =
718            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle);
719         LLVMValueRef length_vec =
720            lp_build_const_int_vec(bld->base.gallivm, uint_bld->type,
721                                   bld->base.type.length);
722         LLVMValueRef index_vec;  /* index into the const buffer */
723         LLVMValueRef temps_array;
724         LLVMTypeRef float4_ptr_type;
725
726         /* index_vec = (indirect_index * 4 + swizzle) * length */
727         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
728         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
729         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
730
731         /* cast temps_array pointer to float* */
732         float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0);
733         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
734                                        float4_ptr_type, "");
735
736         /* Gather values from the temporary register array */
737         res = build_gather(bld, temps_array, index_vec);
738      }
739      else {
740         LLVMValueRef temp_ptr;
741         temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
742         res = LLVMBuildLoad(builder, temp_ptr, "");
743         if (!res)
744            return bld->base.undef;
745      }
746      break;
747
748   case TGSI_FILE_SYSTEM_VALUE:
749      assert(!reg->Register.Indirect);
750      {
751         LLVMValueRef index;  /* index into the system value array */
752         LLVMValueRef scalar, scalar_ptr;
753
754         index = lp_build_const_int32(gallivm,
755                                      reg->Register.Index * 4 + swizzle);
756
757         scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array,
758                                   &index, 1, "");
759         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
760
761         res = lp_build_broadcast_scalar(&bld->base, scalar);
762      }
763      break;
764
765   default:
766      assert(0 && "invalid src register in emit_fetch()");
767      return bld->base.undef;
768   }
769
770   if (reg->Register.Absolute) {
771      res = lp_build_abs( &bld->base, res );
772   }
773
774   if (reg->Register.Negate) {
775      res = lp_build_negate( &bld->base, res );
776   }
777
778   return res;
779}
780
781
782/**
783 * Register fetch with derivatives.
784 */
785static void
786emit_fetch_deriv(
787   struct lp_build_tgsi_soa_context *bld,
788   const struct tgsi_full_instruction *inst,
789   unsigned index,
790   const unsigned chan_index,
791   LLVMValueRef *res,
792   LLVMValueRef *ddx,
793   LLVMValueRef *ddy)
794{
795   LLVMValueRef src;
796
797   src = emit_fetch(bld, inst, index, chan_index);
798
799   if(res)
800      *res = src;
801
802   /* TODO: use interpolation coeffs for inputs */
803
804   if(ddx)
805      *ddx = lp_build_ddx(&bld->base, src);
806
807   if(ddy)
808      *ddy = lp_build_ddy(&bld->base, src);
809}
810
811
812/**
813 * Predicate.
814 */
815static void
816emit_fetch_predicate(
817   struct lp_build_tgsi_soa_context *bld,
818   const struct tgsi_full_instruction *inst,
819   LLVMValueRef *pred)
820{
821   LLVMBuilderRef builder = bld->base.gallivm->builder;
822   unsigned index;
823   unsigned char swizzles[4];
824   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
825   LLVMValueRef value;
826   unsigned chan;
827
828   if (!inst->Instruction.Predicate) {
829      TGSI_FOR_EACH_CHANNEL( chan ) {
830         pred[chan] = NULL;
831      }
832      return;
833   }
834
835   swizzles[0] = inst->Predicate.SwizzleX;
836   swizzles[1] = inst->Predicate.SwizzleY;
837   swizzles[2] = inst->Predicate.SwizzleZ;
838   swizzles[3] = inst->Predicate.SwizzleW;
839
840   index = inst->Predicate.Index;
841   assert(index < LP_MAX_TGSI_PREDS);
842
843   TGSI_FOR_EACH_CHANNEL( chan ) {
844      unsigned swizzle = swizzles[chan];
845
846      /*
847       * Only fetch the predicate register channels that are actually listed
848       * in the swizzles
849       */
850      if (!unswizzled[swizzle]) {
851         value = LLVMBuildLoad(builder,
852                               bld->preds[index][swizzle], "");
853
854         /*
855          * Convert the value to an integer mask.
856          *
857          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
858          * is needlessly causing two comparisons due to storing the intermediate
859          * result as float vector instead of an integer mask vector.
860          */
861         value = lp_build_compare(bld->base.gallivm,
862                                  bld->base.type,
863                                  PIPE_FUNC_NOTEQUAL,
864                                  value,
865                                  bld->base.zero);
866         if (inst->Predicate.Negate) {
867            value = LLVMBuildNot(builder, value, "");
868         }
869
870         unswizzled[swizzle] = value;
871      } else {
872         value = unswizzled[swizzle];
873      }
874
875      pred[chan] = value;
876   }
877}
878
879
880/**
881 * Register store.
882 */
883static void
884emit_store(
885   struct lp_build_tgsi_soa_context *bld,
886   const struct tgsi_full_instruction *inst,
887   unsigned index,
888   unsigned chan_index,
889   LLVMValueRef pred,
890   LLVMValueRef value)
891{
892   struct gallivm_state *gallivm = bld->base.gallivm;
893   LLVMBuilderRef builder = gallivm->builder;
894   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
895   struct lp_build_context *uint_bld = &bld->uint_bld;
896   LLVMValueRef indirect_index = NULL;
897
898   switch( inst->Instruction.Saturate ) {
899   case TGSI_SAT_NONE:
900      break;
901
902   case TGSI_SAT_ZERO_ONE:
903      value = lp_build_max(&bld->base, value, bld->base.zero);
904      value = lp_build_min(&bld->base, value, bld->base.one);
905      break;
906
907   case TGSI_SAT_MINUS_PLUS_ONE:
908      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
909      value = lp_build_min(&bld->base, value, bld->base.one);
910      break;
911
912   default:
913      assert(0);
914   }
915
916   if (reg->Register.Indirect) {
917      indirect_index = get_indirect_index(bld,
918                                          reg->Register.File,
919                                          reg->Register.Index,
920                                          &reg->Indirect);
921   } else {
922      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
923   }
924
925   switch( reg->Register.File ) {
926   case TGSI_FILE_OUTPUT:
927      if (reg->Register.Indirect) {
928         LLVMValueRef chan_vec =
929            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
930         LLVMValueRef length_vec =
931            lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length);
932         LLVMValueRef index_vec;  /* indexes into the temp registers */
933         LLVMValueRef outputs_array;
934         LLVMValueRef pixel_offsets;
935         LLVMTypeRef float_ptr_type;
936         int i;
937
938         /* build pixel offset vector: {0, 1, 2, 3, ...} */
939         pixel_offsets = uint_bld->undef;
940         for (i = 0; i < bld->base.type.length; i++) {
941            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
942            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
943                                                   ii, ii, "");
944         }
945
946         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
947         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
948         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
949         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
950         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
951
952         float_ptr_type =
953            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
954         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
955                                          float_ptr_type, "");
956
957         /* Scatter store values into temp registers */
958         emit_mask_scatter(bld, outputs_array, index_vec, value,
959                           &bld->exec_mask, pred);
960      }
961      else {
962         LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
963                                               chan_index);
964         lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
965      }
966      break;
967
968   case TGSI_FILE_TEMPORARY:
969      if (reg->Register.Indirect) {
970         LLVMValueRef chan_vec =
971            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
972         LLVMValueRef length_vec =
973            lp_build_const_int_vec(gallivm, uint_bld->type,
974                                   bld->base.type.length);
975         LLVMValueRef index_vec;  /* indexes into the temp registers */
976         LLVMValueRef temps_array;
977         LLVMValueRef pixel_offsets;
978         LLVMTypeRef float_ptr_type;
979         int i;
980
981         /* build pixel offset vector: {0, 1, 2, 3, ...} */
982         pixel_offsets = uint_bld->undef;
983         for (i = 0; i < bld->base.type.length; i++) {
984            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
985            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
986                                                   ii, ii, "");
987         }
988
989         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
990         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
991         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
992         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
993         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
994
995         float_ptr_type =
996            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
997         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
998                                        float_ptr_type, "");
999
1000         /* Scatter store values into temp registers */
1001         emit_mask_scatter(bld, temps_array, index_vec, value,
1002                           &bld->exec_mask, pred);
1003      }
1004      else {
1005         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
1006                                              chan_index);
1007         lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
1008      }
1009      break;
1010
1011   case TGSI_FILE_ADDRESS:
1012      lp_exec_mask_store(&bld->exec_mask, pred, value,
1013                         bld->addr[reg->Register.Index][chan_index]);
1014      break;
1015
1016   case TGSI_FILE_PREDICATE:
1017      lp_exec_mask_store(&bld->exec_mask, pred, value,
1018                         bld->preds[reg->Register.Index][chan_index]);
1019      break;
1020
1021   default:
1022      assert( 0 );
1023   }
1024}
1025
1026
1027/**
1028 * High-level instruction translators.
1029 */
1030
1031static void
1032emit_tex( struct lp_build_tgsi_soa_context *bld,
1033          const struct tgsi_full_instruction *inst,
1034          enum lp_build_tex_modifier modifier,
1035          LLVMValueRef *texel)
1036{
1037   LLVMBuilderRef builder = bld->base.gallivm->builder;
1038   unsigned unit;
1039   LLVMValueRef lod_bias, explicit_lod;
1040   LLVMValueRef oow = NULL;
1041   LLVMValueRef coords[3];
1042   LLVMValueRef ddx[3];
1043   LLVMValueRef ddy[3];
1044   unsigned num_coords;
1045   unsigned i;
1046
1047   if (!bld->sampler) {
1048      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1049      for (i = 0; i < 4; i++) {
1050         texel[i] = bld->base.undef;
1051      }
1052      return;
1053   }
1054
1055   switch (inst->Texture.Texture) {
1056   case TGSI_TEXTURE_1D:
1057      num_coords = 1;
1058      break;
1059   case TGSI_TEXTURE_1D_ARRAY:
1060   case TGSI_TEXTURE_2D:
1061   case TGSI_TEXTURE_RECT:
1062      num_coords = 2;
1063      break;
1064   case TGSI_TEXTURE_SHADOW1D:
1065   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1066   case TGSI_TEXTURE_SHADOW2D:
1067   case TGSI_TEXTURE_SHADOWRECT:
1068   case TGSI_TEXTURE_2D_ARRAY:
1069   case TGSI_TEXTURE_3D:
1070   case TGSI_TEXTURE_CUBE:
1071      num_coords = 3;
1072      break;
1073   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1074      num_coords = 4;
1075      break;
1076   default:
1077      assert(0);
1078      return;
1079   }
1080
1081   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1082      lod_bias = emit_fetch( bld, inst, 0, 3 );
1083      explicit_lod = NULL;
1084   }
1085   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1086      lod_bias = NULL;
1087      explicit_lod = emit_fetch( bld, inst, 0, 3 );
1088   }
1089   else {
1090      lod_bias = NULL;
1091      explicit_lod = NULL;
1092   }
1093
1094   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1095      oow = emit_fetch( bld, inst, 0, 3 );
1096      oow = lp_build_rcp(&bld->base, oow);
1097   }
1098
1099   for (i = 0; i < num_coords; i++) {
1100      coords[i] = emit_fetch( bld, inst, 0, i );
1101      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1102         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1103   }
1104   for (i = num_coords; i < 3; i++) {
1105      coords[i] = bld->base.undef;
1106   }
1107
1108   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1109      LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0);
1110      for (i = 0; i < num_coords; i++) {
1111         LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1112         LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1113         ddx[i] = LLVMBuildExtractElement(builder, src1, index0, "");
1114         ddy[i] = LLVMBuildExtractElement(builder, src2, index0, "");
1115      }
1116      unit = inst->Src[3].Register.Index;
1117   }  else {
1118      for (i = 0; i < num_coords; i++) {
1119         ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1120         ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1121      }
1122      unit = inst->Src[1].Register.Index;
1123   }
1124   for (i = num_coords; i < 3; i++) {
1125      ddx[i] = LLVMGetUndef(bld->base.elem_type);
1126      ddy[i] = LLVMGetUndef(bld->base.elem_type);
1127   }
1128
1129   bld->sampler->emit_fetch_texel(bld->sampler,
1130                                  bld->base.gallivm,
1131                                  bld->base.type,
1132                                  unit, num_coords, coords,
1133                                  ddx, ddy,
1134                                  lod_bias, explicit_lod,
1135                                  texel);
1136}
1137
1138static boolean
1139near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1140		   int pc)
1141{
1142   int i;
1143
1144   for (i = 0; i < 5; i++) {
1145      unsigned opcode;
1146
1147      if (pc + i >= bld->info->num_instructions)
1148	 return TRUE;
1149
1150      opcode = bld->instructions[pc + i].Instruction.Opcode;
1151
1152      if (opcode == TGSI_OPCODE_END)
1153	 return TRUE;
1154
1155      if (opcode == TGSI_OPCODE_TEX ||
1156	  opcode == TGSI_OPCODE_TXP ||
1157	  opcode == TGSI_OPCODE_TXD ||
1158	  opcode == TGSI_OPCODE_TXB ||
1159	  opcode == TGSI_OPCODE_TXL ||
1160	  opcode == TGSI_OPCODE_TXF ||
1161	  opcode == TGSI_OPCODE_TXQ ||
1162	  opcode == TGSI_OPCODE_CAL ||
1163	  opcode == TGSI_OPCODE_CALLNZ ||
1164	  opcode == TGSI_OPCODE_IF ||
1165	  opcode == TGSI_OPCODE_IFC ||
1166	  opcode == TGSI_OPCODE_BGNLOOP ||
1167	  opcode == TGSI_OPCODE_SWITCH)
1168	 return FALSE;
1169   }
1170
1171   return TRUE;
1172}
1173
1174
1175
1176/**
1177 * Kill fragment if any of the src register values are negative.
1178 */
1179static void
1180emit_kil(
1181   struct lp_build_tgsi_soa_context *bld,
1182   const struct tgsi_full_instruction *inst,
1183   int pc)
1184{
1185   LLVMBuilderRef builder = bld->base.gallivm->builder;
1186   const struct tgsi_full_src_register *reg = &inst->Src[0];
1187   LLVMValueRef terms[NUM_CHANNELS];
1188   LLVMValueRef mask;
1189   unsigned chan_index;
1190
1191   memset(&terms, 0, sizeof terms);
1192
1193   TGSI_FOR_EACH_CHANNEL( chan_index ) {
1194      unsigned swizzle;
1195
1196      /* Unswizzle channel */
1197      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1198
1199      /* Check if the component has not been already tested. */
1200      assert(swizzle < NUM_CHANNELS);
1201      if( !terms[swizzle] )
1202         /* TODO: change the comparison operator instead of setting the sign */
1203         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
1204   }
1205
1206   mask = NULL;
1207   TGSI_FOR_EACH_CHANNEL( chan_index ) {
1208      if(terms[chan_index]) {
1209         LLVMValueRef chan_mask;
1210
1211         /*
1212          * If term < 0 then mask = 0 else mask = ~0.
1213          */
1214         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1215
1216         if(mask)
1217            mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1218         else
1219            mask = chan_mask;
1220      }
1221   }
1222
1223   if(mask) {
1224      lp_build_mask_update(bld->mask, mask);
1225
1226      if (!near_end_of_shader(bld, pc))
1227	 lp_build_mask_check(bld->mask);
1228   }
1229}
1230
1231
1232/**
1233 * Predicated fragment kill.
1234 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1235 * The only predication is the execution mask which will apply if
1236 * we're inside a loop or conditional.
1237 */
1238static void
1239emit_kilp(struct lp_build_tgsi_soa_context *bld,
1240          const struct tgsi_full_instruction *inst,
1241	  int pc)
1242{
1243   LLVMBuilderRef builder = bld->base.gallivm->builder;
1244   LLVMValueRef mask;
1245
1246   /* For those channels which are "alive", disable fragment shader
1247    * execution.
1248    */
1249   if (bld->exec_mask.has_mask) {
1250      mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1251   }
1252   else {
1253      LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1254      mask = zero;
1255   }
1256
1257   lp_build_mask_update(bld->mask, mask);
1258
1259   if (!near_end_of_shader(bld, pc))
1260      lp_build_mask_check(bld->mask);
1261}
1262
1263
1264/**
1265 * Emit code which will dump the value of all the temporary registers
1266 * to stdout.
1267 */
1268static void
1269emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1270{
1271   struct gallivm_state *gallivm = bld->base.gallivm;
1272   LLVMBuilderRef builder = gallivm->builder;
1273   LLVMValueRef temp_ptr;
1274   LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1275   LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1276   LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1277   LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1278   int index;
1279   int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1280
1281   for (index = 0; index < n; index++) {
1282      LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1283      LLVMValueRef v[4][4], res;
1284      int chan;
1285
1286      lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1287
1288      for (chan = 0; chan < 4; chan++) {
1289         temp_ptr = get_temp_ptr(bld, index, chan);
1290         res = LLVMBuildLoad(builder, temp_ptr, "");
1291         v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1292         v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1293         v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1294         v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1295      }
1296
1297      lp_build_printf(gallivm, "  X: %f %f %f %f\n",
1298                      v[0][0], v[0][1], v[0][2], v[0][3]);
1299      lp_build_printf(gallivm, "  Y: %f %f %f %f\n",
1300                      v[1][0], v[1][1], v[1][2], v[1][3]);
1301      lp_build_printf(gallivm, "  Z: %f %f %f %f\n",
1302                      v[2][0], v[2][1], v[2][2], v[2][3]);
1303      lp_build_printf(gallivm, "  W: %f %f %f %f\n",
1304                      v[3][0], v[3][1], v[3][2], v[3][3]);
1305   }
1306}
1307
1308
1309
1310static void
1311emit_declaration(
1312   struct lp_build_tgsi_soa_context *bld,
1313   const struct tgsi_full_declaration *decl)
1314{
1315   struct gallivm_state *gallivm = bld->base.gallivm;
1316   LLVMTypeRef vec_type = bld->base.vec_type;
1317   const unsigned first = decl->Range.First;
1318   const unsigned last = decl->Range.Last;
1319   unsigned idx, i;
1320
1321   for (idx = first; idx <= last; ++idx) {
1322      assert(last <= bld->info->file_max[decl->Declaration.File]);
1323      switch (decl->Declaration.File) {
1324      case TGSI_FILE_TEMPORARY:
1325         assert(idx < LP_MAX_TGSI_TEMPS);
1326         if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1327            for (i = 0; i < NUM_CHANNELS; i++)
1328               bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1329         }
1330         break;
1331
1332      case TGSI_FILE_OUTPUT:
1333         if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1334            for (i = 0; i < NUM_CHANNELS; i++)
1335               bld->outputs[idx][i] = lp_build_alloca(gallivm,
1336                                                      vec_type, "output");
1337         }
1338         break;
1339
1340      case TGSI_FILE_ADDRESS:
1341         assert(idx < LP_MAX_TGSI_ADDRS);
1342         for (i = 0; i < NUM_CHANNELS; i++)
1343            bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr");
1344         break;
1345
1346      case TGSI_FILE_PREDICATE:
1347         assert(idx < LP_MAX_TGSI_PREDS);
1348         for (i = 0; i < NUM_CHANNELS; i++)
1349            bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1350                                                 "predicate");
1351         break;
1352
1353      default:
1354         /* don't need to declare other vars */
1355         break;
1356      }
1357   }
1358}
1359
1360
1361/**
1362 * Emit LLVM for one TGSI instruction.
1363 * \param return TRUE for success, FALSE otherwise
1364 */
1365static boolean
1366emit_instruction(
1367   struct lp_build_tgsi_soa_context *bld,
1368   const struct tgsi_full_instruction *inst,
1369   const struct tgsi_opcode_info *info,
1370   int *pc)
1371{
1372   unsigned chan_index;
1373   LLVMValueRef src0, src1, src2;
1374   LLVMValueRef tmp0, tmp1, tmp2;
1375   LLVMValueRef tmp3 = NULL;
1376   LLVMValueRef tmp4 = NULL;
1377   LLVMValueRef tmp5 = NULL;
1378   LLVMValueRef tmp6 = NULL;
1379   LLVMValueRef tmp7 = NULL;
1380   LLVMValueRef res;
1381   LLVMValueRef dst0[NUM_CHANNELS];
1382
1383   /*
1384    * Stores and write masks are handled in a general fashion after the long
1385    * instruction opcode switch statement.
1386    *
1387    * Although not stricitly necessary, we avoid generating instructions for
1388    * channels which won't be stored, in cases where's that easy. For some
1389    * complex instructions, like texture sampling, it is more convenient to
1390    * assume a full writemask and then let LLVM optimization passes eliminate
1391    * redundant code.
1392    */
1393
1394   (*pc)++;
1395
1396   assert(info->num_dst <= 1);
1397   if (info->num_dst) {
1398      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1399         dst0[chan_index] = bld->base.undef;
1400      }
1401   }
1402
1403   switch (inst->Instruction.Opcode) {
1404   case TGSI_OPCODE_ARL:
1405      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1406         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1407         tmp0 = lp_build_floor(&bld->base, tmp0);
1408         dst0[chan_index] = tmp0;
1409      }
1410      break;
1411
1412   case TGSI_OPCODE_MOV:
1413      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1414         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1415      }
1416      break;
1417
1418   case TGSI_OPCODE_LIT:
1419      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ) {
1420         dst0[TGSI_CHAN_X] = bld->base.one;
1421      }
1422      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
1423         src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1424         dst0[TGSI_CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1425      }
1426      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
1427         /* XMM[1] = SrcReg[0].yyyy */
1428         tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1429         /* XMM[1] = max(XMM[1], 0) */
1430         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1431         /* XMM[2] = SrcReg[0].wwww */
1432         tmp2 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
1433         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1434         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1435         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1436         dst0[TGSI_CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1437      }
1438      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) ) {
1439         dst0[TGSI_CHAN_W] = bld->base.one;
1440      }
1441      break;
1442
1443   case TGSI_OPCODE_RCP:
1444   /* TGSI_OPCODE_RECIP */
1445      src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1446      res = lp_build_rcp(&bld->base, src0);
1447      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1448         dst0[chan_index] = res;
1449      }
1450      break;
1451
1452   case TGSI_OPCODE_RSQ:
1453   /* TGSI_OPCODE_RECIPSQRT */
1454      src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1455      src0 = lp_build_abs(&bld->base, src0);
1456      res = lp_build_rsqrt(&bld->base, src0);
1457      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1458         dst0[chan_index] = res;
1459      }
1460      break;
1461
1462   case TGSI_OPCODE_EXP:
1463      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
1464         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
1465         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
1466         LLVMValueRef *p_exp2_int_part = NULL;
1467         LLVMValueRef *p_frac_part = NULL;
1468         LLVMValueRef *p_exp2 = NULL;
1469
1470         src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1471
1472         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
1473            p_exp2_int_part = &tmp0;
1474         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
1475            p_frac_part = &tmp1;
1476         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
1477            p_exp2 = &tmp2;
1478
1479         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1480
1481         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
1482            dst0[TGSI_CHAN_X] = tmp0;
1483         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
1484            dst0[TGSI_CHAN_Y] = tmp1;
1485         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
1486            dst0[TGSI_CHAN_Z] = tmp2;
1487      }
1488      /* dst.w = 1.0 */
1489      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
1490         dst0[TGSI_CHAN_W] = bld->base.one;
1491      }
1492      break;
1493
1494   case TGSI_OPCODE_LOG:
1495      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
1496         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
1497         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) {
1498         LLVMValueRef *p_floor_log2 = NULL;
1499         LLVMValueRef *p_exp = NULL;
1500         LLVMValueRef *p_log2 = NULL;
1501
1502         src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1503         src0 = lp_build_abs( &bld->base, src0 );
1504
1505         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
1506            p_floor_log2 = &tmp0;
1507         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ))
1508            p_exp = &tmp1;
1509         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
1510            p_log2 = &tmp2;
1511
1512         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1513
1514         /* dst.x = floor(lg2(abs(src.x))) */
1515         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ))
1516            dst0[TGSI_CHAN_X] = tmp0;
1517         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1518         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) {
1519            dst0[TGSI_CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1520         }
1521         /* dst.z = lg2(abs(src.x)) */
1522         if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ))
1523            dst0[TGSI_CHAN_Z] = tmp2;
1524      }
1525      /* dst.w = 1.0 */
1526      if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) {
1527         dst0[TGSI_CHAN_W] = bld->base.one;
1528      }
1529      break;
1530
1531   case TGSI_OPCODE_MUL:
1532      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1533         src0 = emit_fetch( bld, inst, 0, chan_index );
1534         src1 = emit_fetch( bld, inst, 1, chan_index );
1535         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1536      }
1537      break;
1538
1539   case TGSI_OPCODE_ADD:
1540      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1541         src0 = emit_fetch( bld, inst, 0, chan_index );
1542         src1 = emit_fetch( bld, inst, 1, chan_index );
1543         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1544      }
1545      break;
1546
1547   case TGSI_OPCODE_DP3:
1548   /* TGSI_OPCODE_DOT3 */
1549      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1550      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1551      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1552      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1553      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1554      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1555      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1556      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1557      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
1558      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1559      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1560      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1561         dst0[chan_index] = tmp0;
1562      }
1563      break;
1564
1565   case TGSI_OPCODE_DP4:
1566   /* TGSI_OPCODE_DOT4 */
1567      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1568      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1569      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1570      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1571      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1572      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1573      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1574      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1575      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
1576      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1577      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1578      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_W );
1579      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
1580      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1581      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1582      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1583         dst0[chan_index] = tmp0;
1584      }
1585      break;
1586
1587   case TGSI_OPCODE_DST:
1588      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
1589         dst0[TGSI_CHAN_X] = bld->base.one;
1590      }
1591      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
1592         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1593         tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1594         dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1595      }
1596      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
1597         dst0[TGSI_CHAN_Z] = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1598      }
1599      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
1600         dst0[TGSI_CHAN_W] = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
1601      }
1602      break;
1603
1604   case TGSI_OPCODE_MIN:
1605      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1606         src0 = emit_fetch( bld, inst, 0, chan_index );
1607         src1 = emit_fetch( bld, inst, 1, chan_index );
1608         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1609      }
1610      break;
1611
1612   case TGSI_OPCODE_MAX:
1613      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1614         src0 = emit_fetch( bld, inst, 0, chan_index );
1615         src1 = emit_fetch( bld, inst, 1, chan_index );
1616         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1617      }
1618      break;
1619
1620   case TGSI_OPCODE_SLT:
1621   /* TGSI_OPCODE_SETLT */
1622      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1623         src0 = emit_fetch( bld, inst, 0, chan_index );
1624         src1 = emit_fetch( bld, inst, 1, chan_index );
1625         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1626         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1627      }
1628      break;
1629
1630   case TGSI_OPCODE_SGE:
1631   /* TGSI_OPCODE_SETGE */
1632      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1633         src0 = emit_fetch( bld, inst, 0, chan_index );
1634         src1 = emit_fetch( bld, inst, 1, chan_index );
1635         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1636         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1637      }
1638      break;
1639
1640   case TGSI_OPCODE_MAD:
1641   /* TGSI_OPCODE_MADD */
1642      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1643         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1644         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1645         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1646         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1647         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1648         dst0[chan_index] = tmp0;
1649      }
1650      break;
1651
1652   case TGSI_OPCODE_SUB:
1653      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1654         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1655         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1656         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1657      }
1658      break;
1659
1660   case TGSI_OPCODE_LRP:
1661      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1662         src0 = emit_fetch( bld, inst, 0, chan_index );
1663         src1 = emit_fetch( bld, inst, 1, chan_index );
1664         src2 = emit_fetch( bld, inst, 2, chan_index );
1665         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1666         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1667         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1668      }
1669      break;
1670
1671   case TGSI_OPCODE_CND:
1672      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1673         src0 = emit_fetch( bld, inst, 0, chan_index );
1674         src1 = emit_fetch( bld, inst, 1, chan_index );
1675         src2 = emit_fetch( bld, inst, 2, chan_index );
1676         tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
1677         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1678         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1679      }
1680      break;
1681
1682   case TGSI_OPCODE_DP2A:
1683      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );  /* xmm0 = src[0].x */
1684      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );  /* xmm1 = src[1].x */
1685      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1686      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );  /* xmm1 = src[0].y */
1687      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );  /* xmm2 = src[1].y */
1688      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1689      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1690      tmp1 = emit_fetch( bld, inst, 2, TGSI_CHAN_X );  /* xmm1 = src[2].x */
1691      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1692      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1693         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1694      }
1695      break;
1696
1697   case TGSI_OPCODE_FRC:
1698      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1699         src0 = emit_fetch( bld, inst, 0, chan_index );
1700         tmp0 = lp_build_floor(&bld->base, src0);
1701         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1702         dst0[chan_index] = tmp0;
1703      }
1704      break;
1705
1706   case TGSI_OPCODE_CLAMP:
1707      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1708         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1709         src1 = emit_fetch( bld, inst, 1, chan_index );
1710         src2 = emit_fetch( bld, inst, 2, chan_index );
1711         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1712         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1713         dst0[chan_index] = tmp0;
1714      }
1715      break;
1716
1717   case TGSI_OPCODE_FLR:
1718      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1719         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1720         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1721      }
1722      break;
1723
1724   case TGSI_OPCODE_ROUND:
1725      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1726         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1727         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1728      }
1729      break;
1730
1731   case TGSI_OPCODE_EX2: {
1732      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1733      tmp0 = lp_build_exp2( &bld->base, tmp0);
1734      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1735         dst0[chan_index] = tmp0;
1736      }
1737      break;
1738   }
1739
1740   case TGSI_OPCODE_LG2:
1741      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1742      tmp0 = lp_build_log2( &bld->base, tmp0);
1743      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1744         dst0[chan_index] = tmp0;
1745      }
1746      break;
1747
1748   case TGSI_OPCODE_POW:
1749      src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1750      src1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1751      res = lp_build_pow( &bld->base, src0, src1 );
1752      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1753         dst0[chan_index] = res;
1754      }
1755      break;
1756
1757   case TGSI_OPCODE_XPD:
1758      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
1759         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) {
1760         tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
1761         tmp3 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1762      }
1763      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ||
1764         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
1765         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1766         tmp4 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1767      }
1768      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
1769         tmp2 = tmp0;
1770         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1771         tmp5 = tmp3;
1772         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1773         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1774         dst0[TGSI_CHAN_X] = tmp2;
1775      }
1776      if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ||
1777         TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) {
1778         tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1779         tmp5 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1780      }
1781      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
1782         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1783         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1784         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1785         dst0[TGSI_CHAN_Y] = tmp3;
1786      }
1787      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
1788         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1789         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1790         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1791         dst0[TGSI_CHAN_Z] = tmp5;
1792      }
1793      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
1794         dst0[TGSI_CHAN_W] = bld->base.one;
1795      }
1796      break;
1797
1798   case TGSI_OPCODE_ABS:
1799      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1800         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1801         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1802      }
1803      break;
1804
1805   case TGSI_OPCODE_RCC:
1806      /* deprecated? */
1807      assert(0);
1808      return FALSE;
1809
1810   case TGSI_OPCODE_DPH:
1811      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1812      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );
1813      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1814      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );
1815      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );
1816      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1817      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1818      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z );
1819      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z );
1820      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1821      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1822      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_W );
1823      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1824      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1825         dst0[chan_index] = tmp0;
1826      }
1827      break;
1828
1829   case TGSI_OPCODE_COS:
1830      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1831      tmp0 = lp_build_cos( &bld->base, tmp0 );
1832      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1833         dst0[chan_index] = tmp0;
1834      }
1835      break;
1836
1837   case TGSI_OPCODE_DDX:
1838      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1839         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1840      }
1841      break;
1842
1843   case TGSI_OPCODE_DDY:
1844      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1845         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1846      }
1847      break;
1848
1849   case TGSI_OPCODE_KILP:
1850      /* predicated kill */
1851      emit_kilp( bld, inst, (*pc)-1 );
1852      break;
1853
1854   case TGSI_OPCODE_KIL:
1855      /* conditional kill */
1856      emit_kil( bld, inst, (*pc)-1 );
1857      break;
1858
1859   case TGSI_OPCODE_PK2H:
1860      return FALSE;
1861      break;
1862
1863   case TGSI_OPCODE_PK2US:
1864      return FALSE;
1865      break;
1866
1867   case TGSI_OPCODE_PK4B:
1868      return FALSE;
1869      break;
1870
1871   case TGSI_OPCODE_PK4UB:
1872      return FALSE;
1873      break;
1874
1875   case TGSI_OPCODE_RFL:
1876      return FALSE;
1877      break;
1878
1879   case TGSI_OPCODE_SEQ:
1880      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1881         src0 = emit_fetch( bld, inst, 0, chan_index );
1882         src1 = emit_fetch( bld, inst, 1, chan_index );
1883         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1884         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1885      }
1886      break;
1887
1888   case TGSI_OPCODE_SFL:
1889      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1890         dst0[chan_index] = bld->base.zero;
1891      }
1892      break;
1893
1894   case TGSI_OPCODE_SGT:
1895      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1896         src0 = emit_fetch( bld, inst, 0, chan_index );
1897         src1 = emit_fetch( bld, inst, 1, chan_index );
1898         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1899         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1900      }
1901      break;
1902
1903   case TGSI_OPCODE_SIN:
1904      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
1905      tmp0 = lp_build_sin( &bld->base, tmp0 );
1906      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1907         dst0[chan_index] = tmp0;
1908      }
1909      break;
1910
1911   case TGSI_OPCODE_SLE:
1912      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1913         src0 = emit_fetch( bld, inst, 0, chan_index );
1914         src1 = emit_fetch( bld, inst, 1, chan_index );
1915         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1916         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1917      }
1918      break;
1919
1920   case TGSI_OPCODE_SNE:
1921      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1922         src0 = emit_fetch( bld, inst, 0, chan_index );
1923         src1 = emit_fetch( bld, inst, 1, chan_index );
1924         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1925         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1926      }
1927      break;
1928
1929   case TGSI_OPCODE_STR:
1930      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1931         dst0[chan_index] = bld->base.one;
1932      }
1933      break;
1934
1935   case TGSI_OPCODE_TEX:
1936      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1937      break;
1938
1939   case TGSI_OPCODE_TXD:
1940      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1941      break;
1942
1943   case TGSI_OPCODE_UP2H:
1944      /* deprecated */
1945      assert (0);
1946      return FALSE;
1947      break;
1948
1949   case TGSI_OPCODE_UP2US:
1950      /* deprecated */
1951      assert(0);
1952      return FALSE;
1953      break;
1954
1955   case TGSI_OPCODE_UP4B:
1956      /* deprecated */
1957      assert(0);
1958      return FALSE;
1959      break;
1960
1961   case TGSI_OPCODE_UP4UB:
1962      /* deprecated */
1963      assert(0);
1964      return FALSE;
1965      break;
1966
1967   case TGSI_OPCODE_X2D:
1968      /* deprecated? */
1969      assert(0);
1970      return FALSE;
1971      break;
1972
1973   case TGSI_OPCODE_ARA:
1974      /* deprecated */
1975      assert(0);
1976      return FALSE;
1977      break;
1978
1979   case TGSI_OPCODE_ARR:
1980      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1981         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1982         tmp0 = lp_build_round(&bld->base, tmp0);
1983         dst0[chan_index] = tmp0;
1984      }
1985      break;
1986
1987   case TGSI_OPCODE_BRA:
1988      /* deprecated */
1989      assert(0);
1990      return FALSE;
1991      break;
1992
1993   case TGSI_OPCODE_CAL:
1994      lp_exec_mask_call(&bld->exec_mask,
1995                        inst->Label.Label,
1996                        pc);
1997
1998      break;
1999
2000   case TGSI_OPCODE_RET:
2001      lp_exec_mask_ret(&bld->exec_mask, pc);
2002      break;
2003
2004   case TGSI_OPCODE_END:
2005      if (0) {
2006         /* for debugging */
2007         emit_dump_temps(bld);
2008      }
2009      *pc = -1;
2010      break;
2011
2012   case TGSI_OPCODE_SSG:
2013   /* TGSI_OPCODE_SGN */
2014      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2015         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2016         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
2017      }
2018      break;
2019
2020   case TGSI_OPCODE_CMP:
2021      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2022         src0 = emit_fetch( bld, inst, 0, chan_index );
2023         src1 = emit_fetch( bld, inst, 1, chan_index );
2024         src2 = emit_fetch( bld, inst, 2, chan_index );
2025         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
2026         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
2027      }
2028      break;
2029
2030   case TGSI_OPCODE_SCS:
2031      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) {
2032         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
2033         dst0[TGSI_CHAN_X] = lp_build_cos( &bld->base, tmp0 );
2034      }
2035      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) {
2036         tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );
2037         dst0[TGSI_CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
2038      }
2039      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) {
2040         dst0[TGSI_CHAN_Z] = bld->base.zero;
2041      }
2042      TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) {
2043         dst0[TGSI_CHAN_W] = bld->base.one;
2044      }
2045      break;
2046
2047   case TGSI_OPCODE_TXB:
2048      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
2049      break;
2050
2051   case TGSI_OPCODE_NRM:
2052      /* fall-through */
2053   case TGSI_OPCODE_NRM4:
2054      /* 3 or 4-component normalization */
2055      {
2056         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2057
2058         if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) ||
2059            TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) ||
2060            TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z) ||
2061             (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 4)) {
2062
2063            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2064
2065            /* xmm4 = src.x */
2066            /* xmm0 = src.x * src.x */
2067            tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
2068            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
2069               tmp4 = tmp0;
2070            }
2071            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2072
2073            /* xmm5 = src.y */
2074            /* xmm0 = xmm0 + src.y * src.y */
2075            tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Y);
2076            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
2077               tmp5 = tmp1;
2078            }
2079            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2080            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2081
2082            /* xmm6 = src.z */
2083            /* xmm0 = xmm0 + src.z * src.z */
2084            tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Z);
2085            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
2086               tmp6 = tmp1;
2087            }
2088            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2089            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2090
2091            if (dims == 4) {
2092               /* xmm7 = src.w */
2093               /* xmm0 = xmm0 + src.w * src.w */
2094               tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_W);
2095               if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W)) {
2096                  tmp7 = tmp1;
2097               }
2098               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2099               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2100            }
2101
2102            /* xmm1 = 1 / sqrt(xmm0) */
2103            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2104
2105            /* dst.x = xmm1 * src.x */
2106            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) {
2107               dst0[TGSI_CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2108            }
2109
2110            /* dst.y = xmm1 * src.y */
2111            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) {
2112               dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2113            }
2114
2115            /* dst.z = xmm1 * src.z */
2116            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) {
2117               dst0[TGSI_CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2118            }
2119
2120            /* dst.w = xmm1 * src.w */
2121            if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) && dims == 4) {
2122               dst0[TGSI_CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2123            }
2124         }
2125
2126         /* dst.w = 1.0 */
2127         if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 3) {
2128            dst0[TGSI_CHAN_W] = bld->base.one;
2129         }
2130      }
2131      break;
2132
2133   case TGSI_OPCODE_DIV:
2134      /* deprecated */
2135      assert( 0 );
2136      return FALSE;
2137      break;
2138
2139   case TGSI_OPCODE_DP2:
2140      tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X );  /* xmm0 = src[0].x */
2141      tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X );  /* xmm1 = src[1].x */
2142      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
2143      tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y );  /* xmm1 = src[0].y */
2144      tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y );  /* xmm2 = src[1].y */
2145      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
2146      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
2147      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2148         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
2149      }
2150      break;
2151
2152   case TGSI_OPCODE_TXL:
2153      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2154      break;
2155
2156   case TGSI_OPCODE_TXP:
2157      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2158      break;
2159
2160   case TGSI_OPCODE_BRK:
2161      lp_exec_break(&bld->exec_mask);
2162      break;
2163
2164   case TGSI_OPCODE_IF:
2165      tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X);
2166      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2167                          tmp0, bld->base.zero);
2168      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2169      break;
2170
2171   case TGSI_OPCODE_BGNLOOP:
2172      lp_exec_bgnloop(&bld->exec_mask);
2173      break;
2174
2175   case TGSI_OPCODE_BGNSUB:
2176      lp_exec_mask_bgnsub(&bld->exec_mask);
2177      break;
2178
2179   case TGSI_OPCODE_ELSE:
2180      lp_exec_mask_cond_invert(&bld->exec_mask);
2181      break;
2182
2183   case TGSI_OPCODE_ENDIF:
2184      lp_exec_mask_cond_pop(&bld->exec_mask);
2185      break;
2186
2187   case TGSI_OPCODE_ENDLOOP:
2188      lp_exec_endloop(bld->base.gallivm, &bld->exec_mask);
2189      break;
2190
2191   case TGSI_OPCODE_ENDSUB:
2192      lp_exec_mask_endsub(&bld->exec_mask, pc);
2193      break;
2194
2195   case TGSI_OPCODE_PUSHA:
2196      /* deprecated? */
2197      assert(0);
2198      return FALSE;
2199      break;
2200
2201   case TGSI_OPCODE_POPA:
2202      /* deprecated? */
2203      assert(0);
2204      return FALSE;
2205      break;
2206
2207   case TGSI_OPCODE_CEIL:
2208      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2209         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2210         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2211      }
2212      break;
2213
2214   case TGSI_OPCODE_I2F:
2215      /* deprecated? */
2216      assert(0);
2217      return FALSE;
2218      break;
2219
2220   case TGSI_OPCODE_NOT:
2221      /* deprecated? */
2222      assert(0);
2223      return FALSE;
2224      break;
2225
2226   case TGSI_OPCODE_TRUNC:
2227      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2228         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2229         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2230      }
2231      break;
2232
2233   case TGSI_OPCODE_SHL:
2234      /* deprecated? */
2235      assert(0);
2236      return FALSE;
2237      break;
2238
2239   case TGSI_OPCODE_ISHR:
2240      /* deprecated? */
2241      assert(0);
2242      return FALSE;
2243      break;
2244
2245   case TGSI_OPCODE_AND:
2246      /* deprecated? */
2247      assert(0);
2248      return FALSE;
2249      break;
2250
2251   case TGSI_OPCODE_OR:
2252      /* deprecated? */
2253      assert(0);
2254      return FALSE;
2255      break;
2256
2257   case TGSI_OPCODE_MOD:
2258      /* deprecated? */
2259      assert(0);
2260      return FALSE;
2261      break;
2262
2263   case TGSI_OPCODE_XOR:
2264      /* deprecated? */
2265      assert(0);
2266      return FALSE;
2267      break;
2268
2269   case TGSI_OPCODE_SAD:
2270      /* deprecated? */
2271      assert(0);
2272      return FALSE;
2273      break;
2274
2275   case TGSI_OPCODE_TXF:
2276      /* deprecated? */
2277      assert(0);
2278      return FALSE;
2279      break;
2280
2281   case TGSI_OPCODE_TXQ:
2282      /* deprecated? */
2283      assert(0);
2284      return FALSE;
2285      break;
2286
2287   case TGSI_OPCODE_CONT:
2288      lp_exec_continue(&bld->exec_mask);
2289      break;
2290
2291   case TGSI_OPCODE_EMIT:
2292      return FALSE;
2293      break;
2294
2295   case TGSI_OPCODE_ENDPRIM:
2296      return FALSE;
2297      break;
2298
2299   case TGSI_OPCODE_NOP:
2300      break;
2301
2302   default:
2303      return FALSE;
2304   }
2305
2306   if(info->num_dst) {
2307      LLVMValueRef pred[NUM_CHANNELS];
2308
2309      emit_fetch_predicate( bld, inst, pred );
2310
2311      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2312         emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2313      }
2314   }
2315
2316   return TRUE;
2317}
2318
2319
2320void
2321lp_build_tgsi_soa(struct gallivm_state *gallivm,
2322                  const struct tgsi_token *tokens,
2323                  struct lp_type type,
2324                  struct lp_build_mask_context *mask,
2325                  LLVMValueRef consts_ptr,
2326                  LLVMValueRef system_values_array,
2327                  const LLVMValueRef *pos,
2328                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
2329                  LLVMValueRef (*outputs)[NUM_CHANNELS],
2330                  struct lp_build_sampler_soa *sampler,
2331                  const struct tgsi_shader_info *info)
2332{
2333   struct lp_build_tgsi_soa_context bld;
2334   struct tgsi_parse_context parse;
2335   uint num_immediates = 0;
2336   uint num_instructions = 0;
2337   unsigned i;
2338   int pc = 0;
2339
2340   struct lp_type res_type;
2341
2342   assert(type.length <= LP_MAX_VECTOR_LENGTH);
2343   memset(&res_type, 0, sizeof res_type);
2344   res_type.width = type.width;
2345   res_type.length = type.length;
2346   res_type.sign = 1;
2347
2348   /* Setup build context */
2349   memset(&bld, 0, sizeof bld);
2350   lp_build_context_init(&bld.base, gallivm, type);
2351   lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type));
2352   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2353   bld.mask = mask;
2354   bld.pos = pos;
2355   bld.inputs = inputs;
2356   bld.outputs = outputs;
2357   bld.consts_ptr = consts_ptr;
2358   bld.sampler = sampler;
2359   bld.info = info;
2360   bld.indirect_files = info->indirect_files;
2361   bld.instructions = (struct tgsi_full_instruction *)
2362                      MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2363   bld.max_instructions = LP_MAX_INSTRUCTIONS;
2364
2365   if (!bld.instructions) {
2366      return;
2367   }
2368
2369   lp_exec_mask_init(&bld.exec_mask, &bld.base);
2370
2371   if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2372      LLVMValueRef array_size =
2373         lp_build_const_int32(gallivm,
2374                              info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
2375      bld.temps_array = lp_build_array_alloca(gallivm,
2376                                              bld.base.vec_type, array_size,
2377                                              "temp_array");
2378   }
2379
2380   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2381      LLVMValueRef array_size =
2382         lp_build_const_int32(gallivm,
2383                              info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
2384      bld.outputs_array = lp_build_array_alloca(gallivm,
2385                                                bld.base.vec_type, array_size,
2386                                                "output_array");
2387   }
2388
2389   /* If we have indirect addressing in inputs we need to copy them into
2390    * our alloca array to be able to iterate over them */
2391   if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
2392      unsigned index, chan;
2393      LLVMTypeRef vec_type = bld.base.vec_type;
2394      LLVMValueRef array_size =
2395         lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4);
2396      bld.inputs_array = lp_build_array_alloca(gallivm,
2397                                               vec_type, array_size,
2398                                               "input_array");
2399
2400      assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
2401
2402      for (index = 0; index < info->num_inputs; ++index) {
2403         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2404            LLVMValueRef lindex =
2405               lp_build_const_int32(gallivm, index * 4 + chan);
2406            LLVMValueRef input_ptr =
2407               LLVMBuildGEP(gallivm->builder, bld.inputs_array,
2408                            &lindex, 1, "");
2409            LLVMValueRef value = bld.inputs[index][chan];
2410            if (value)
2411               LLVMBuildStore(gallivm->builder, value, input_ptr);
2412         }
2413      }
2414   }
2415
2416   bld.system_values_array = system_values_array;
2417
2418   tgsi_parse_init( &parse, tokens );
2419
2420   while( !tgsi_parse_end_of_tokens( &parse ) ) {
2421      tgsi_parse_token( &parse );
2422
2423      switch( parse.FullToken.Token.Type ) {
2424      case TGSI_TOKEN_TYPE_DECLARATION:
2425         /* Inputs already interpolated */
2426         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2427         break;
2428
2429      case TGSI_TOKEN_TYPE_INSTRUCTION:
2430         {
2431            /* save expanded instruction */
2432            if (num_instructions == bld.max_instructions) {
2433               struct tgsi_full_instruction *instructions;
2434               instructions = REALLOC(bld.instructions,
2435                                      bld.max_instructions
2436                                      * sizeof(struct tgsi_full_instruction),
2437                                      (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2438                                      * sizeof(struct tgsi_full_instruction));
2439               if (!instructions) {
2440                  break;
2441               }
2442               bld.instructions = instructions;
2443               bld.max_instructions += LP_MAX_INSTRUCTIONS;
2444            }
2445
2446            memcpy(bld.instructions + num_instructions,
2447                   &parse.FullToken.FullInstruction,
2448                   sizeof(bld.instructions[0]));
2449
2450            num_instructions++;
2451         }
2452
2453         break;
2454
2455      case TGSI_TOKEN_TYPE_IMMEDIATE:
2456         /* simply copy the immediate values into the next immediates[] slot */
2457         {
2458            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2459            assert(size <= 4);
2460            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2461            for( i = 0; i < size; ++i )
2462               bld.immediates[num_immediates][i] =
2463                  lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float);
2464            for( i = size; i < 4; ++i )
2465               bld.immediates[num_immediates][i] = bld.base.undef;
2466            num_immediates++;
2467         }
2468         break;
2469
2470      case TGSI_TOKEN_TYPE_PROPERTY:
2471         break;
2472
2473      default:
2474         assert( 0 );
2475      }
2476   }
2477
2478   while (pc != -1) {
2479      struct tgsi_full_instruction *instr = bld.instructions + pc;
2480      const struct tgsi_opcode_info *opcode_info =
2481         tgsi_get_opcode_info(instr->Instruction.Opcode);
2482      if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2483         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2484                       opcode_info->mnemonic);
2485   }
2486
2487   /* If we have indirect addressing in outputs we need to copy our alloca array
2488    * to the outputs slots specified by the called */
2489   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2490      unsigned index, chan;
2491      assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
2492      for (index = 0; index < info->num_outputs; ++index) {
2493         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2494            bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
2495         }
2496      }
2497   }
2498
2499   if (0) {
2500      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2501      LLVMValueRef function = LLVMGetBasicBlockParent(block);
2502      debug_printf("11111111111111111111111111111 \n");
2503      tgsi_dump(tokens, 0);
2504      lp_debug_dump_value(function);
2505      debug_printf("2222222222222222222222222222 \n");
2506   }
2507   tgsi_parse_free( &parse );
2508
2509   if (0) {
2510      LLVMModuleRef module = LLVMGetGlobalParent(
2511         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2512      LLVMDumpModule(module);
2513
2514   }
2515
2516   FREE( bld.instructions );
2517}
2518
2519
2520/**
2521 * Build up the system values array out of individual values such as
2522 * the instance ID, front-face, primitive ID, etc.  The shader info is
2523 * used to determine which system values are needed and where to put
2524 * them in the system values array.
2525 *
2526 * XXX only instance ID is implemented at this time.
2527 *
2528 * The system values register file is similar to the constants buffer.
2529 * Example declaration:
2530 *    DCL SV[0], INSTANCEID
2531 * Example instruction:
2532 *    MOVE foo, SV[0].xxxx;
2533 *
2534 * \return  LLVM float array (interpreted as float [][4])
2535 */
2536LLVMValueRef
2537lp_build_system_values_array(struct gallivm_state *gallivm,
2538                             const struct tgsi_shader_info *info,
2539                             LLVMValueRef instance_id,
2540                             LLVMValueRef facing)
2541{
2542   LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values);
2543   LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context);
2544   LLVMValueRef array = lp_build_array_alloca(gallivm, float_t,
2545                                              size, "sysvals_array");
2546   unsigned i;
2547
2548   for (i = 0; i < info->num_system_values; i++) {
2549      LLVMValueRef index = lp_build_const_int32(gallivm, i * 4);
2550      LLVMValueRef ptr, value = 0;
2551
2552      switch (info->system_value_semantic_name[i]) {
2553      case TGSI_SEMANTIC_INSTANCEID:
2554         /* convert instance ID from int to float */
2555         value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t,
2556                                 "sysval_instanceid");
2557         break;
2558      case TGSI_SEMANTIC_FACE:
2559         /* fall-through */
2560      default:
2561         assert(0 && "unexpected semantic in build_system_values_array()");
2562      }
2563
2564      ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, "");
2565      LLVMBuildStore(gallivm->builder, value, ptr);
2566   }
2567
2568   return array;
2569}
2570