lp_bld_tgsi_soa.c revision 6c8c88f02f0dc9cf39ce51d068525a94fccd5dc7
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_info.h"
46#include "tgsi/tgsi_parse.h"
47#include "tgsi/tgsi_util.h"
48#include "tgsi/tgsi_exec.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_tgsi.h"
57#include "lp_bld_limits.h"
58#include "lp_bld_debug.h"
59
60
61#define FOR_EACH_CHANNEL( CHAN )\
62   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71   FOR_EACH_CHANNEL( CHAN )\
72      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74#define CHAN_X 0
75#define CHAN_Y 1
76#define CHAN_Z 2
77#define CHAN_W 3
78
79#define QUAD_TOP_LEFT     0
80#define QUAD_TOP_RIGHT    1
81#define QUAD_BOTTOM_LEFT  2
82#define QUAD_BOTTOM_RIGHT 3
83
84
85struct lp_exec_mask {
86   struct lp_build_context *bld;
87
88   boolean has_mask;
89
90   LLVMTypeRef int_vec_type;
91
92   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
93   int cond_stack_size;
94   LLVMValueRef cond_mask;
95
96   LLVMValueRef break_stack[LP_MAX_TGSI_NESTING];
97   int break_stack_size;
98   LLVMValueRef break_mask;
99
100   LLVMValueRef cont_stack[LP_MAX_TGSI_NESTING];
101   int cont_stack_size;
102   LLVMValueRef cont_mask;
103
104   LLVMBasicBlockRef loop_stack[LP_MAX_TGSI_NESTING];
105   int loop_stack_size;
106   LLVMBasicBlockRef loop_block;
107
108
109   LLVMValueRef exec_mask;
110};
111
112struct lp_build_tgsi_soa_context
113{
114   struct lp_build_context base;
115
116   LLVMValueRef consts_ptr;
117   const LLVMValueRef *pos;
118   const LLVMValueRef (*inputs)[NUM_CHANNELS];
119   LLVMValueRef (*outputs)[NUM_CHANNELS];
120
121   struct lp_build_sampler_soa *sampler;
122
123   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
124   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
125   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
126
127   /* we allocate an array of temps if we have indirect
128    * addressing and then the temps above is unused */
129   LLVMValueRef temps_array;
130   boolean has_indirect_addressing;
131
132   struct lp_build_mask_context *mask;
133   struct lp_exec_mask exec_mask;
134};
135
136static const unsigned char
137swizzle_left[4] = {
138   QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
139   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_LEFT
140};
141
142static const unsigned char
143swizzle_right[4] = {
144   QUAD_TOP_RIGHT,    QUAD_TOP_RIGHT,
145   QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
146};
147
148static const unsigned char
149swizzle_top[4] = {
150   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT,
151   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT
152};
153
154static const unsigned char
155swizzle_bottom[4] = {
156   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT,
157   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
158};
159
160static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
161{
162   mask->bld = bld;
163   mask->has_mask = FALSE;
164   mask->cond_stack_size = 0;
165   mask->loop_stack_size = 0;
166   mask->break_stack_size = 0;
167   mask->cont_stack_size = 0;
168
169   mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
170}
171
172static void lp_exec_mask_update(struct lp_exec_mask *mask)
173{
174   if (mask->loop_stack_size) {
175      /*for loops we need to update the entire mask at runtime */
176      LLVMValueRef tmp;
177      assert(mask->break_mask);
178      tmp = LLVMBuildAnd(mask->bld->builder,
179                         mask->cont_mask,
180                         mask->break_mask,
181                         "maskcb");
182      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
183                                     mask->cond_mask,
184                                     tmp,
185                                     "maskfull");
186   } else
187      mask->exec_mask = mask->cond_mask;
188
189
190   mask->has_mask = (mask->cond_stack_size > 0 ||
191                     mask->loop_stack_size > 0);
192}
193
194static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
195                                   LLVMValueRef val)
196{
197   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
198   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
199   mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
200                                      mask->int_vec_type, "");
201
202   lp_exec_mask_update(mask);
203}
204
205static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
206{
207   LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
208   LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
209                                        mask->cond_mask, "");
210
211   /* means that we didn't have any mask before and that
212    * we were fully enabled */
213   if (mask->cond_stack_size <= 1) {
214      prev_mask = LLVMConstAllOnes(mask->int_vec_type);
215   }
216
217   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
218                                  inv_mask,
219                                  prev_mask, "");
220   lp_exec_mask_update(mask);
221}
222
223static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
224{
225   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
226   lp_exec_mask_update(mask);
227}
228
229static void lp_exec_bgnloop(struct lp_exec_mask *mask)
230{
231
232   if (mask->cont_stack_size == 0)
233      mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
234   if (mask->break_stack_size == 0)
235      mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
236   if (mask->cond_stack_size == 0)
237      mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
238
239   assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
240   assert(mask->cont_stack_size < LP_MAX_TGSI_NESTING);
241   assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
242
243   mask->break_stack[mask->break_stack_size++] = mask->break_mask;
244   mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
245   mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
246   mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
247   LLVMBuildBr(mask->bld->builder, mask->loop_block);
248   LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
249
250   lp_exec_mask_update(mask);
251}
252
253static void lp_exec_break(struct lp_exec_mask *mask)
254{
255   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
256                                         mask->exec_mask,
257                                         "break");
258
259   mask->break_mask = LLVMBuildAnd(mask->bld->builder,
260                                   mask->break_mask,
261                                   exec_mask, "break_full");
262
263   lp_exec_mask_update(mask);
264}
265
266static void lp_exec_continue(struct lp_exec_mask *mask)
267{
268   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
269                                         mask->exec_mask,
270                                         "");
271
272   mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
273                                  mask->cont_mask,
274                                  exec_mask, "");
275
276   lp_exec_mask_update(mask);
277}
278
279
280static void lp_exec_endloop(struct lp_exec_mask *mask)
281{
282   LLVMBasicBlockRef endloop;
283   LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
284                                      mask->bld->type.length);
285   LLVMValueRef i1cond;
286
287   assert(mask->break_mask);
288
289   /* i1cond = (mask == 0) */
290   i1cond = LLVMBuildICmp(
291      mask->bld->builder,
292      LLVMIntNE,
293      LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
294      LLVMConstNull(reg_type), "");
295
296   endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
297
298   LLVMBuildCondBr(mask->bld->builder,
299                   i1cond, mask->loop_block, endloop);
300
301   LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
302
303   mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
304   /* pop the cont mask */
305   if (mask->cont_stack_size) {
306      mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
307   }
308   /* pop the break mask */
309   if (mask->break_stack_size) {
310      mask->break_mask = mask->break_stack[--mask->break_stack_size];
311   }
312
313   lp_exec_mask_update(mask);
314}
315
316/* stores val into an address pointed to by dst.
317 * mask->exec_mask is used to figure out which bits of val
318 * should be stored into the address
319 * (0 means don't store this bit, 1 means do store).
320 */
321static void lp_exec_mask_store(struct lp_exec_mask *mask,
322                               LLVMValueRef val,
323                               LLVMValueRef dst)
324{
325   if (mask->has_mask) {
326      LLVMValueRef real_val, dst_val;
327
328      dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
329      real_val = lp_build_select(mask->bld,
330                                 mask->exec_mask,
331                                 val, dst_val);
332
333      LLVMBuildStore(mask->bld->builder, real_val, dst);
334   } else
335      LLVMBuildStore(mask->bld->builder, val, dst);
336}
337
338
339static LLVMValueRef
340emit_ddx(struct lp_build_tgsi_soa_context *bld,
341         LLVMValueRef src)
342{
343   LLVMValueRef src_left  = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
344   LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
345   return lp_build_sub(&bld->base, src_right, src_left);
346}
347
348
349static LLVMValueRef
350emit_ddy(struct lp_build_tgsi_soa_context *bld,
351         LLVMValueRef src)
352{
353   LLVMValueRef src_top    = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
354   LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
355   return lp_build_sub(&bld->base, src_top, src_bottom);
356}
357
358static LLVMValueRef
359get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
360             unsigned index,
361             unsigned swizzle,
362             boolean is_indirect,
363             LLVMValueRef addr)
364{
365   if (!bld->has_indirect_addressing) {
366      return bld->temps[index][swizzle];
367   } else {
368      LLVMValueRef lindex =
369         LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
370      if (is_indirect)
371         lindex = lp_build_add(&bld->base, lindex, addr);
372      return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
373   }
374}
375
376/**
377 * Register fetch.
378 */
379static LLVMValueRef
380emit_fetch(
381   struct lp_build_tgsi_soa_context *bld,
382   const struct tgsi_full_instruction *inst,
383   unsigned index,
384   const unsigned chan_index )
385{
386   const struct tgsi_full_src_register *reg = &inst->Src[index];
387   unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
388   LLVMValueRef res;
389   LLVMValueRef addr;
390
391   switch (swizzle) {
392   case TGSI_SWIZZLE_X:
393   case TGSI_SWIZZLE_Y:
394   case TGSI_SWIZZLE_Z:
395   case TGSI_SWIZZLE_W:
396
397      if (reg->Register.Indirect) {
398         LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
399         unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
400         addr = LLVMBuildLoad(bld->base.builder,
401                              bld->addr[reg->Indirect.Index][swizzle],
402                              "");
403         /* for indexing we want integers */
404         addr = LLVMBuildFPToSI(bld->base.builder, addr,
405                                int_vec_type, "");
406         addr = LLVMBuildExtractElement(bld->base.builder,
407                                        addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
408                                        "");
409         addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
410      }
411
412      switch (reg->Register.File) {
413      case TGSI_FILE_CONSTANT: {
414         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
415         LLVMValueRef scalar, scalar_ptr;
416
417         if (reg->Register.Indirect) {
418            /*lp_build_printf(bld->base.builder,
419              "\taddr = %d\n", addr);*/
420            index = lp_build_add(&bld->base, index, addr);
421         }
422         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
423         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
424
425         res = lp_build_broadcast_scalar(&bld->base, scalar);
426         break;
427      }
428
429      case TGSI_FILE_IMMEDIATE:
430         res = bld->immediates[reg->Register.Index][swizzle];
431         assert(res);
432         break;
433
434      case TGSI_FILE_INPUT:
435         res = bld->inputs[reg->Register.Index][swizzle];
436         assert(res);
437         break;
438
439      case TGSI_FILE_TEMPORARY: {
440         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
441                                              swizzle,
442                                              reg->Register.Indirect,
443                                              addr);
444         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
445         if(!res)
446            return bld->base.undef;
447         break;
448      }
449
450      default:
451         assert( 0 );
452         return bld->base.undef;
453      }
454      break;
455
456   default:
457      assert( 0 );
458      return bld->base.undef;
459   }
460
461   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
462   case TGSI_UTIL_SIGN_CLEAR:
463      res = lp_build_abs( &bld->base, res );
464      break;
465
466   case TGSI_UTIL_SIGN_SET:
467      /* TODO: Use bitwese OR for floating point */
468      res = lp_build_abs( &bld->base, res );
469      res = LLVMBuildNeg( bld->base.builder, res, "" );
470      break;
471
472   case TGSI_UTIL_SIGN_TOGGLE:
473      res = LLVMBuildNeg( bld->base.builder, res, "" );
474      break;
475
476   case TGSI_UTIL_SIGN_KEEP:
477      break;
478   }
479
480   return res;
481}
482
483
484/**
485 * Register fetch with derivatives.
486 */
487static void
488emit_fetch_deriv(
489   struct lp_build_tgsi_soa_context *bld,
490   const struct tgsi_full_instruction *inst,
491   unsigned index,
492   const unsigned chan_index,
493   LLVMValueRef *res,
494   LLVMValueRef *ddx,
495   LLVMValueRef *ddy)
496{
497   LLVMValueRef src;
498
499   src = emit_fetch(bld, inst, index, chan_index);
500
501   if(res)
502      *res = src;
503
504   /* TODO: use interpolation coeffs for inputs */
505
506   if(ddx)
507      *ddx = emit_ddx(bld, src);
508
509   if(ddy)
510      *ddy = emit_ddy(bld, src);
511}
512
513
514/**
515 * Register store.
516 */
517static void
518emit_store(
519   struct lp_build_tgsi_soa_context *bld,
520   const struct tgsi_full_instruction *inst,
521   unsigned index,
522   unsigned chan_index,
523   LLVMValueRef value)
524{
525   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
526   LLVMValueRef addr;
527
528   switch( inst->Instruction.Saturate ) {
529   case TGSI_SAT_NONE:
530      break;
531
532   case TGSI_SAT_ZERO_ONE:
533      value = lp_build_max(&bld->base, value, bld->base.zero);
534      value = lp_build_min(&bld->base, value, bld->base.one);
535      break;
536
537   case TGSI_SAT_MINUS_PLUS_ONE:
538      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
539      value = lp_build_min(&bld->base, value, bld->base.one);
540      break;
541
542   default:
543      assert(0);
544   }
545
546   if (reg->Register.Indirect) {
547      LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
548      unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
549      addr = LLVMBuildLoad(bld->base.builder,
550                           bld->addr[reg->Indirect.Index][swizzle],
551                           "");
552      /* for indexing we want integers */
553      addr = LLVMBuildFPToSI(bld->base.builder, addr,
554                             int_vec_type, "");
555      addr = LLVMBuildExtractElement(bld->base.builder,
556                                     addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
557                                     "");
558      addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
559   }
560
561   switch( reg->Register.File ) {
562   case TGSI_FILE_OUTPUT:
563      lp_exec_mask_store(&bld->exec_mask, value,
564                         bld->outputs[reg->Register.Index][chan_index]);
565      break;
566
567   case TGSI_FILE_TEMPORARY: {
568      LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
569                                           chan_index,
570                                           reg->Register.Indirect,
571                                           addr);
572      lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
573      break;
574   }
575
576   case TGSI_FILE_ADDRESS:
577      lp_exec_mask_store(&bld->exec_mask, value,
578                         bld->addr[reg->Indirect.Index][chan_index]);
579      break;
580
581   case TGSI_FILE_PREDICATE:
582      /* FIXME */
583      break;
584
585   default:
586      assert( 0 );
587   }
588}
589
590
591/**
592 * High-level instruction translators.
593 */
594
595
596static void
597emit_tex( struct lp_build_tgsi_soa_context *bld,
598          const struct tgsi_full_instruction *inst,
599          boolean apply_lodbias,
600          boolean projected,
601          LLVMValueRef *texel)
602{
603   const uint unit = inst->Src[1].Register.Index;
604   LLVMValueRef lodbias;
605   LLVMValueRef oow = NULL;
606   LLVMValueRef coords[3];
607   unsigned num_coords;
608   unsigned i;
609
610   if (!bld->sampler) {
611      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
612      for (i = 0; i < 4; i++) {
613         texel[i] = bld->base.undef;
614      }
615      return;
616   }
617
618   switch (inst->Texture.Texture) {
619   case TGSI_TEXTURE_1D:
620      num_coords = 1;
621      break;
622   case TGSI_TEXTURE_2D:
623   case TGSI_TEXTURE_RECT:
624      num_coords = 2;
625      break;
626   case TGSI_TEXTURE_SHADOW1D:
627   case TGSI_TEXTURE_SHADOW2D:
628   case TGSI_TEXTURE_SHADOWRECT:
629   case TGSI_TEXTURE_3D:
630   case TGSI_TEXTURE_CUBE:
631      num_coords = 3;
632      break;
633   default:
634      assert(0);
635      return;
636   }
637
638   if(apply_lodbias)
639      lodbias = emit_fetch( bld, inst, 0, 3 );
640   else
641      lodbias = bld->base.zero;
642
643   if (projected) {
644      oow = emit_fetch( bld, inst, 0, 3 );
645      oow = lp_build_rcp(&bld->base, oow);
646   }
647
648   for (i = 0; i < num_coords; i++) {
649      coords[i] = emit_fetch( bld, inst, 0, i );
650      if (projected)
651         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
652   }
653   for (i = num_coords; i < 3; i++) {
654      coords[i] = bld->base.undef;
655   }
656
657   bld->sampler->emit_fetch_texel(bld->sampler,
658                                  bld->base.builder,
659                                  bld->base.type,
660                                  unit, num_coords, coords, lodbias,
661                                  texel);
662}
663
664
665/**
666 * Kill fragment if any of the src register values are negative.
667 */
668static void
669emit_kil(
670   struct lp_build_tgsi_soa_context *bld,
671   const struct tgsi_full_instruction *inst )
672{
673   const struct tgsi_full_src_register *reg = &inst->Src[0];
674   LLVMValueRef terms[NUM_CHANNELS];
675   LLVMValueRef mask;
676   unsigned chan_index;
677
678   memset(&terms, 0, sizeof terms);
679
680   FOR_EACH_CHANNEL( chan_index ) {
681      unsigned swizzle;
682
683      /* Unswizzle channel */
684      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
685
686      /* Check if the component has not been already tested. */
687      assert(swizzle < NUM_CHANNELS);
688      if( !terms[swizzle] )
689         /* TODO: change the comparison operator instead of setting the sign */
690         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
691   }
692
693   mask = NULL;
694   FOR_EACH_CHANNEL( chan_index ) {
695      if(terms[chan_index]) {
696         LLVMValueRef chan_mask;
697
698         /*
699          * If term < 0 then mask = 0 else mask = ~0.
700          */
701         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
702
703         if(mask)
704            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
705         else
706            mask = chan_mask;
707      }
708   }
709
710   if(mask)
711      lp_build_mask_update(bld->mask, mask);
712}
713
714
715/**
716 * Predicated fragment kill.
717 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
718 * The only predication is the execution mask which will apply if
719 * we're inside a loop or conditional.
720 */
721static void
722emit_kilp(struct lp_build_tgsi_soa_context *bld,
723          const struct tgsi_full_instruction *inst)
724{
725   LLVMValueRef mask;
726
727   /* For those channels which are "alive", disable fragment shader
728    * execution.
729    */
730   if (bld->exec_mask.has_mask) {
731      mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
732   }
733   else {
734      mask = bld->base.zero;
735   }
736
737   lp_build_mask_update(bld->mask, mask);
738}
739
740static void
741emit_declaration(
742   struct lp_build_tgsi_soa_context *bld,
743   const struct tgsi_full_declaration *decl)
744{
745   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
746
747   unsigned first = decl->Range.First;
748   unsigned last = decl->Range.Last;
749   unsigned idx, i;
750
751   for (idx = first; idx <= last; ++idx) {
752      switch (decl->Declaration.File) {
753      case TGSI_FILE_TEMPORARY:
754         assert(idx < LP_MAX_TGSI_TEMPS);
755         if (bld->has_indirect_addressing) {
756            LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
757                                            last*4 + 4, 0);
758            bld->temps_array = lp_build_array_alloca(bld->base.builder,
759                                                     vec_type, val, "");
760         } else {
761            for (i = 0; i < NUM_CHANNELS; i++)
762               bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
763                                                    vec_type, "");
764         }
765         break;
766
767      case TGSI_FILE_OUTPUT:
768         for (i = 0; i < NUM_CHANNELS; i++)
769            bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
770                                                   vec_type, "");
771         break;
772
773      case TGSI_FILE_ADDRESS:
774         assert(idx < LP_MAX_TGSI_ADDRS);
775         for (i = 0; i < NUM_CHANNELS; i++)
776            bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
777                                                vec_type, "");
778         break;
779
780      case TGSI_FILE_PREDICATE:
781         _debug_printf("warning: predicate registers not yet implemented\n");
782         break;
783
784      default:
785         /* don't need to declare other vars */
786         break;
787      }
788   }
789}
790
791
792/**
793 * Emit LLVM for one TGSI instruction.
794 * \param return TRUE for success, FALSE otherwise
795 */
796static boolean
797emit_instruction(
798   struct lp_build_tgsi_soa_context *bld,
799   const struct tgsi_full_instruction *inst,
800   const struct tgsi_opcode_info *info)
801{
802   unsigned chan_index;
803   LLVMValueRef src0, src1, src2;
804   LLVMValueRef tmp0, tmp1, tmp2;
805   LLVMValueRef tmp3 = NULL;
806   LLVMValueRef tmp4 = NULL;
807   LLVMValueRef tmp5 = NULL;
808   LLVMValueRef tmp6 = NULL;
809   LLVMValueRef tmp7 = NULL;
810   LLVMValueRef res;
811   LLVMValueRef dst0[NUM_CHANNELS];
812
813   /*
814    * Stores and write masks are handled in a general fashion after the long
815    * instruction opcode switch statement.
816    *
817    * Although not stricitly necessary, we avoid generating instructions for
818    * channels which won't be stored, in cases where's that easy. For some
819    * complex instructions, like texture sampling, it is more convenient to
820    * assume a full writemask and then let LLVM optimization passes eliminate
821    * redundant code.
822    */
823
824   assert(info->num_dst <= 1);
825   if(info->num_dst) {
826      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
827         dst0[chan_index] = bld->base.undef;
828      }
829   }
830
831   switch (inst->Instruction.Opcode) {
832   case TGSI_OPCODE_ARL:
833      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
834         tmp0 = emit_fetch( bld, inst, 0, chan_index );
835         tmp0 = lp_build_floor(&bld->base, tmp0);
836         dst0[chan_index] = tmp0;
837      }
838      break;
839
840   case TGSI_OPCODE_MOV:
841      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
842         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
843      }
844      break;
845
846   case TGSI_OPCODE_LIT:
847      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
848         dst0[CHAN_X] = bld->base.one;
849      }
850      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
851         src0 = emit_fetch( bld, inst, 0, CHAN_X );
852         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
853      }
854      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
855         /* XMM[1] = SrcReg[0].yyyy */
856         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
857         /* XMM[1] = max(XMM[1], 0) */
858         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
859         /* XMM[2] = SrcReg[0].wwww */
860         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
861         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
862         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
863         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
864         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
865      }
866      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
867         dst0[CHAN_W] = bld->base.one;
868      }
869      break;
870
871   case TGSI_OPCODE_RCP:
872   /* TGSI_OPCODE_RECIP */
873      src0 = emit_fetch( bld, inst, 0, CHAN_X );
874      res = lp_build_rcp(&bld->base, src0);
875      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
876         dst0[chan_index] = res;
877      }
878      break;
879
880   case TGSI_OPCODE_RSQ:
881   /* TGSI_OPCODE_RECIPSQRT */
882      src0 = emit_fetch( bld, inst, 0, CHAN_X );
883      src0 = lp_build_abs(&bld->base, src0);
884      res = lp_build_rsqrt(&bld->base, src0);
885      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
886         dst0[chan_index] = res;
887      }
888      break;
889
890   case TGSI_OPCODE_EXP:
891      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
892          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
893          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
894         LLVMValueRef *p_exp2_int_part = NULL;
895         LLVMValueRef *p_frac_part = NULL;
896         LLVMValueRef *p_exp2 = NULL;
897
898         src0 = emit_fetch( bld, inst, 0, CHAN_X );
899
900         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
901            p_exp2_int_part = &tmp0;
902         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
903            p_frac_part = &tmp1;
904         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
905            p_exp2 = &tmp2;
906
907         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
908
909         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
910            dst0[CHAN_X] = tmp0;
911         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
912            dst0[CHAN_Y] = tmp1;
913         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
914            dst0[CHAN_Z] = tmp2;
915      }
916      /* dst.w = 1.0 */
917      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
918         dst0[CHAN_W] = bld->base.one;
919      }
920      break;
921
922   case TGSI_OPCODE_LOG:
923      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
924          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
925          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
926         LLVMValueRef *p_floor_log2 = NULL;
927         LLVMValueRef *p_exp = NULL;
928         LLVMValueRef *p_log2 = NULL;
929
930         src0 = emit_fetch( bld, inst, 0, CHAN_X );
931         src0 = lp_build_abs( &bld->base, src0 );
932
933         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
934            p_floor_log2 = &tmp0;
935         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
936            p_exp = &tmp1;
937         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
938            p_log2 = &tmp2;
939
940         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
941
942         /* dst.x = floor(lg2(abs(src.x))) */
943         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
944            dst0[CHAN_X] = tmp0;
945         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
946         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
947            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
948         }
949         /* dst.z = lg2(abs(src.x)) */
950         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
951            dst0[CHAN_Z] = tmp2;
952      }
953      /* dst.w = 1.0 */
954      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
955         dst0[CHAN_W] = bld->base.one;
956      }
957      break;
958
959   case TGSI_OPCODE_MUL:
960      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
961         src0 = emit_fetch( bld, inst, 0, chan_index );
962         src1 = emit_fetch( bld, inst, 1, chan_index );
963         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
964      }
965      break;
966
967   case TGSI_OPCODE_ADD:
968      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
969         src0 = emit_fetch( bld, inst, 0, chan_index );
970         src1 = emit_fetch( bld, inst, 1, chan_index );
971         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
972      }
973      break;
974
975   case TGSI_OPCODE_DP3:
976   /* TGSI_OPCODE_DOT3 */
977      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
978      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
979      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
980      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
981      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
982      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
983      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
984      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
985      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
986      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
987      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
988      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
989         dst0[chan_index] = tmp0;
990      }
991      break;
992
993   case TGSI_OPCODE_DP4:
994   /* TGSI_OPCODE_DOT4 */
995      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
996      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
997      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
998      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
999      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1000      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1001      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1002      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1003      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1004      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1005      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1006      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1007      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1008      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1009      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1010      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1011         dst0[chan_index] = tmp0;
1012      }
1013      break;
1014
1015   case TGSI_OPCODE_DST:
1016      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1017         dst0[CHAN_X] = bld->base.one;
1018      }
1019      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1020         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1021         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1022         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1023      }
1024      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1025         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1026      }
1027      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1028         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1029      }
1030      break;
1031
1032   case TGSI_OPCODE_MIN:
1033      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1034         src0 = emit_fetch( bld, inst, 0, chan_index );
1035         src1 = emit_fetch( bld, inst, 1, chan_index );
1036         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1037      }
1038      break;
1039
1040   case TGSI_OPCODE_MAX:
1041      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1042         src0 = emit_fetch( bld, inst, 0, chan_index );
1043         src1 = emit_fetch( bld, inst, 1, chan_index );
1044         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1045      }
1046      break;
1047
1048   case TGSI_OPCODE_SLT:
1049   /* TGSI_OPCODE_SETLT */
1050      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1051         src0 = emit_fetch( bld, inst, 0, chan_index );
1052         src1 = emit_fetch( bld, inst, 1, chan_index );
1053         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1054         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1055      }
1056      break;
1057
1058   case TGSI_OPCODE_SGE:
1059   /* TGSI_OPCODE_SETGE */
1060      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1061         src0 = emit_fetch( bld, inst, 0, chan_index );
1062         src1 = emit_fetch( bld, inst, 1, chan_index );
1063         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1064         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1065      }
1066      break;
1067
1068   case TGSI_OPCODE_MAD:
1069   /* TGSI_OPCODE_MADD */
1070      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1071         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1072         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1073         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1074         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1075         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1076         dst0[chan_index] = tmp0;
1077      }
1078      break;
1079
1080   case TGSI_OPCODE_SUB:
1081      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1082         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1083         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1084         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1085      }
1086      break;
1087
1088   case TGSI_OPCODE_LRP:
1089      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1090         src0 = emit_fetch( bld, inst, 0, chan_index );
1091         src1 = emit_fetch( bld, inst, 1, chan_index );
1092         src2 = emit_fetch( bld, inst, 2, chan_index );
1093         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1094         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1095         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1096      }
1097      break;
1098
1099   case TGSI_OPCODE_CND:
1100      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1101         src0 = emit_fetch( bld, inst, 0, chan_index );
1102         src1 = emit_fetch( bld, inst, 1, chan_index );
1103         src2 = emit_fetch( bld, inst, 2, chan_index );
1104         tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1105         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1106         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1107      }
1108      break;
1109
1110   case TGSI_OPCODE_DP2A:
1111      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1112      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1113      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1114      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1115      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1116      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1117      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1118      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
1119      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1120      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1121         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1122      }
1123      break;
1124
1125   case TGSI_OPCODE_FRC:
1126      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1127         src0 = emit_fetch( bld, inst, 0, chan_index );
1128         tmp0 = lp_build_floor(&bld->base, src0);
1129         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1130         dst0[chan_index] = tmp0;
1131      }
1132      break;
1133
1134   case TGSI_OPCODE_CLAMP:
1135      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1136         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1137         src1 = emit_fetch( bld, inst, 1, chan_index );
1138         src2 = emit_fetch( bld, inst, 2, chan_index );
1139         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1140         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1141         dst0[chan_index] = tmp0;
1142      }
1143      break;
1144
1145   case TGSI_OPCODE_FLR:
1146      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1147         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1148         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1149      }
1150      break;
1151
1152   case TGSI_OPCODE_ROUND:
1153      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1154         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1155         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1156      }
1157      break;
1158
1159   case TGSI_OPCODE_EX2: {
1160      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1161      tmp0 = lp_build_exp2( &bld->base, tmp0);
1162      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1163         dst0[chan_index] = tmp0;
1164      }
1165      break;
1166   }
1167
1168   case TGSI_OPCODE_LG2:
1169      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1170      tmp0 = lp_build_log2( &bld->base, tmp0);
1171      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1172         dst0[chan_index] = tmp0;
1173      }
1174      break;
1175
1176   case TGSI_OPCODE_POW:
1177      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1178      src1 = emit_fetch( bld, inst, 1, CHAN_X );
1179      res = lp_build_pow( &bld->base, src0, src1 );
1180      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1181         dst0[chan_index] = res;
1182      }
1183      break;
1184
1185   case TGSI_OPCODE_XPD:
1186      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1187          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1188         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1189         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1190      }
1191      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1192          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1193         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1194         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1195      }
1196      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1197         tmp2 = tmp0;
1198         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1199         tmp5 = tmp3;
1200         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1201         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1202         dst0[CHAN_X] = tmp2;
1203      }
1204      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1205          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1206         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1207         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1208      }
1209      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1210         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1211         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1212         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1213         dst0[CHAN_Y] = tmp3;
1214      }
1215      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1216         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1217         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1218         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1219         dst0[CHAN_Z] = tmp5;
1220      }
1221      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1222         dst0[CHAN_W] = bld->base.one;
1223      }
1224      break;
1225
1226   case TGSI_OPCODE_ABS:
1227      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1228         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1229         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1230      }
1231      break;
1232
1233   case TGSI_OPCODE_RCC:
1234      /* deprecated? */
1235      assert(0);
1236      return FALSE;
1237
1238   case TGSI_OPCODE_DPH:
1239      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1240      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1241      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1242      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1243      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1244      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1245      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1246      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1247      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1248      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1249      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1250      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1251      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1252      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1253         dst0[chan_index] = tmp0;
1254      }
1255      break;
1256
1257   case TGSI_OPCODE_COS:
1258      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1259      tmp0 = lp_build_cos( &bld->base, tmp0 );
1260      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1261         dst0[chan_index] = tmp0;
1262      }
1263      break;
1264
1265   case TGSI_OPCODE_DDX:
1266      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1267         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1268      }
1269      break;
1270
1271   case TGSI_OPCODE_DDY:
1272      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1273         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1274      }
1275      break;
1276
1277   case TGSI_OPCODE_KILP:
1278      /* predicated kill */
1279      emit_kilp( bld, inst );
1280      break;
1281
1282   case TGSI_OPCODE_KIL:
1283      /* conditional kill */
1284      emit_kil( bld, inst );
1285      break;
1286
1287   case TGSI_OPCODE_PK2H:
1288      return FALSE;
1289      break;
1290
1291   case TGSI_OPCODE_PK2US:
1292      return FALSE;
1293      break;
1294
1295   case TGSI_OPCODE_PK4B:
1296      return FALSE;
1297      break;
1298
1299   case TGSI_OPCODE_PK4UB:
1300      return FALSE;
1301      break;
1302
1303   case TGSI_OPCODE_RFL:
1304      return FALSE;
1305      break;
1306
1307   case TGSI_OPCODE_SEQ:
1308      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1309         src0 = emit_fetch( bld, inst, 0, chan_index );
1310         src1 = emit_fetch( bld, inst, 1, chan_index );
1311         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1312         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1313      }
1314      break;
1315
1316   case TGSI_OPCODE_SFL:
1317      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1318         dst0[chan_index] = bld->base.zero;
1319      }
1320      break;
1321
1322   case TGSI_OPCODE_SGT:
1323      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1324         src0 = emit_fetch( bld, inst, 0, chan_index );
1325         src1 = emit_fetch( bld, inst, 1, chan_index );
1326         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1327         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1328      }
1329      break;
1330
1331   case TGSI_OPCODE_SIN:
1332      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1333      tmp0 = lp_build_sin( &bld->base, tmp0 );
1334      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1335         dst0[chan_index] = tmp0;
1336      }
1337      break;
1338
1339   case TGSI_OPCODE_SLE:
1340      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1341         src0 = emit_fetch( bld, inst, 0, chan_index );
1342         src1 = emit_fetch( bld, inst, 1, chan_index );
1343         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1344         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1345      }
1346      break;
1347
1348   case TGSI_OPCODE_SNE:
1349      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1350         src0 = emit_fetch( bld, inst, 0, chan_index );
1351         src1 = emit_fetch( bld, inst, 1, chan_index );
1352         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1353         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1354      }
1355      break;
1356
1357   case TGSI_OPCODE_STR:
1358      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1359         dst0[chan_index] = bld->base.one;
1360      }
1361      break;
1362
1363   case TGSI_OPCODE_TEX:
1364      emit_tex( bld, inst, FALSE, FALSE, dst0 );
1365      break;
1366
1367   case TGSI_OPCODE_TXD:
1368      /* FIXME */
1369      return FALSE;
1370      break;
1371
1372   case TGSI_OPCODE_UP2H:
1373      /* deprecated */
1374      assert (0);
1375      return FALSE;
1376      break;
1377
1378   case TGSI_OPCODE_UP2US:
1379      /* deprecated */
1380      assert(0);
1381      return FALSE;
1382      break;
1383
1384   case TGSI_OPCODE_UP4B:
1385      /* deprecated */
1386      assert(0);
1387      return FALSE;
1388      break;
1389
1390   case TGSI_OPCODE_UP4UB:
1391      /* deprecated */
1392      assert(0);
1393      return FALSE;
1394      break;
1395
1396   case TGSI_OPCODE_X2D:
1397      /* deprecated? */
1398      assert(0);
1399      return FALSE;
1400      break;
1401
1402   case TGSI_OPCODE_ARA:
1403      /* deprecated */
1404      assert(0);
1405      return FALSE;
1406      break;
1407
1408   case TGSI_OPCODE_ARR:
1409      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1410         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1411         tmp0 = lp_build_round(&bld->base, tmp0);
1412         dst0[chan_index] = tmp0;
1413      }
1414      break;
1415
1416   case TGSI_OPCODE_BRA:
1417      /* deprecated */
1418      assert(0);
1419      return FALSE;
1420      break;
1421
1422   case TGSI_OPCODE_CAL:
1423      /* FIXME */
1424      return FALSE;
1425      break;
1426
1427   case TGSI_OPCODE_RET:
1428      /* FIXME */
1429      return FALSE;
1430      break;
1431
1432   case TGSI_OPCODE_END:
1433      break;
1434
1435   case TGSI_OPCODE_SSG:
1436   /* TGSI_OPCODE_SGN */
1437      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1438         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1439         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1440      }
1441      break;
1442
1443   case TGSI_OPCODE_CMP:
1444      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1445         src0 = emit_fetch( bld, inst, 0, chan_index );
1446         src1 = emit_fetch( bld, inst, 1, chan_index );
1447         src2 = emit_fetch( bld, inst, 2, chan_index );
1448         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1449         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1450      }
1451      break;
1452
1453   case TGSI_OPCODE_SCS:
1454      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1455         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1456         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1457      }
1458      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1459         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1460         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1461      }
1462      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1463         dst0[CHAN_Z] = bld->base.zero;
1464      }
1465      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1466         dst0[CHAN_W] = bld->base.one;
1467      }
1468      break;
1469
1470   case TGSI_OPCODE_TXB:
1471      emit_tex( bld, inst, TRUE, FALSE, dst0 );
1472      break;
1473
1474   case TGSI_OPCODE_NRM:
1475      /* fall-through */
1476   case TGSI_OPCODE_NRM4:
1477      /* 3 or 4-component normalization */
1478      {
1479         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1480
1481         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1482             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1483             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1484             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1485
1486            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1487
1488            /* xmm4 = src.x */
1489            /* xmm0 = src.x * src.x */
1490            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1491            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1492               tmp4 = tmp0;
1493            }
1494            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1495
1496            /* xmm5 = src.y */
1497            /* xmm0 = xmm0 + src.y * src.y */
1498            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1499            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1500               tmp5 = tmp1;
1501            }
1502            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1503            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1504
1505            /* xmm6 = src.z */
1506            /* xmm0 = xmm0 + src.z * src.z */
1507            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1508            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1509               tmp6 = tmp1;
1510            }
1511            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1512            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1513
1514            if (dims == 4) {
1515               /* xmm7 = src.w */
1516               /* xmm0 = xmm0 + src.w * src.w */
1517               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1518               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1519                  tmp7 = tmp1;
1520               }
1521               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1522               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1523            }
1524
1525            /* xmm1 = 1 / sqrt(xmm0) */
1526            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1527
1528            /* dst.x = xmm1 * src.x */
1529            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1530               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1531            }
1532
1533            /* dst.y = xmm1 * src.y */
1534            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1535               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1536            }
1537
1538            /* dst.z = xmm1 * src.z */
1539            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1540               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1541            }
1542
1543            /* dst.w = xmm1 * src.w */
1544            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1545               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1546            }
1547         }
1548
1549         /* dst.w = 1.0 */
1550         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1551            dst0[CHAN_W] = bld->base.one;
1552         }
1553      }
1554      break;
1555
1556   case TGSI_OPCODE_DIV:
1557      /* deprecated */
1558      assert( 0 );
1559      return FALSE;
1560      break;
1561
1562   case TGSI_OPCODE_DP2:
1563      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1564      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1565      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1566      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1567      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1568      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1569      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1570      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1571         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1572      }
1573      break;
1574
1575   case TGSI_OPCODE_TXL:
1576      emit_tex( bld, inst, TRUE, FALSE, dst0 );
1577      break;
1578
1579   case TGSI_OPCODE_TXP:
1580      emit_tex( bld, inst, FALSE, TRUE, dst0 );
1581      break;
1582
1583   case TGSI_OPCODE_BRK:
1584      lp_exec_break(&bld->exec_mask);
1585      break;
1586
1587   case TGSI_OPCODE_IF:
1588      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1589      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1590                          tmp0, bld->base.zero);
1591      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1592      break;
1593
1594   case TGSI_OPCODE_BGNLOOP:
1595      lp_exec_bgnloop(&bld->exec_mask);
1596      break;
1597
1598   case TGSI_OPCODE_ELSE:
1599      lp_exec_mask_cond_invert(&bld->exec_mask);
1600      break;
1601
1602   case TGSI_OPCODE_ENDIF:
1603      lp_exec_mask_cond_pop(&bld->exec_mask);
1604      break;
1605
1606   case TGSI_OPCODE_ENDLOOP:
1607      lp_exec_endloop(&bld->exec_mask);
1608      break;
1609
1610   case TGSI_OPCODE_PUSHA:
1611      /* deprecated? */
1612      assert(0);
1613      return FALSE;
1614      break;
1615
1616   case TGSI_OPCODE_POPA:
1617      /* deprecated? */
1618      assert(0);
1619      return FALSE;
1620      break;
1621
1622   case TGSI_OPCODE_CEIL:
1623      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1624         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1625         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1626      }
1627      break;
1628
1629   case TGSI_OPCODE_I2F:
1630      /* deprecated? */
1631      assert(0);
1632      return FALSE;
1633      break;
1634
1635   case TGSI_OPCODE_NOT:
1636      /* deprecated? */
1637      assert(0);
1638      return FALSE;
1639      break;
1640
1641   case TGSI_OPCODE_TRUNC:
1642      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1643         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1644         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1645      }
1646      break;
1647
1648   case TGSI_OPCODE_SHL:
1649      /* deprecated? */
1650      assert(0);
1651      return FALSE;
1652      break;
1653
1654   case TGSI_OPCODE_ISHR:
1655      /* deprecated? */
1656      assert(0);
1657      return FALSE;
1658      break;
1659
1660   case TGSI_OPCODE_AND:
1661      /* deprecated? */
1662      assert(0);
1663      return FALSE;
1664      break;
1665
1666   case TGSI_OPCODE_OR:
1667      /* deprecated? */
1668      assert(0);
1669      return FALSE;
1670      break;
1671
1672   case TGSI_OPCODE_MOD:
1673      /* deprecated? */
1674      assert(0);
1675      return FALSE;
1676      break;
1677
1678   case TGSI_OPCODE_XOR:
1679      /* deprecated? */
1680      assert(0);
1681      return FALSE;
1682      break;
1683
1684   case TGSI_OPCODE_SAD:
1685      /* deprecated? */
1686      assert(0);
1687      return FALSE;
1688      break;
1689
1690   case TGSI_OPCODE_TXF:
1691      /* deprecated? */
1692      assert(0);
1693      return FALSE;
1694      break;
1695
1696   case TGSI_OPCODE_TXQ:
1697      /* deprecated? */
1698      assert(0);
1699      return FALSE;
1700      break;
1701
1702   case TGSI_OPCODE_CONT:
1703      lp_exec_continue(&bld->exec_mask);
1704      break;
1705
1706   case TGSI_OPCODE_EMIT:
1707      return FALSE;
1708      break;
1709
1710   case TGSI_OPCODE_ENDPRIM:
1711      return FALSE;
1712      break;
1713
1714   case TGSI_OPCODE_NOP:
1715      break;
1716
1717   default:
1718      return FALSE;
1719   }
1720
1721   if(info->num_dst) {
1722      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1723         emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1724      }
1725   }
1726
1727   return TRUE;
1728}
1729
1730
1731void
1732lp_build_tgsi_soa(LLVMBuilderRef builder,
1733                  const struct tgsi_token *tokens,
1734                  struct lp_type type,
1735                  struct lp_build_mask_context *mask,
1736                  LLVMValueRef consts_ptr,
1737                  const LLVMValueRef *pos,
1738                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
1739                  LLVMValueRef (*outputs)[NUM_CHANNELS],
1740                  struct lp_build_sampler_soa *sampler,
1741                  struct tgsi_shader_info *info)
1742{
1743   struct lp_build_tgsi_soa_context bld;
1744   struct tgsi_parse_context parse;
1745   uint num_immediates = 0;
1746   unsigned i;
1747
1748   /* Setup build context */
1749   memset(&bld, 0, sizeof bld);
1750   lp_build_context_init(&bld.base, builder, type);
1751   bld.mask = mask;
1752   bld.pos = pos;
1753   bld.inputs = inputs;
1754   bld.outputs = outputs;
1755   bld.consts_ptr = consts_ptr;
1756   bld.sampler = sampler;
1757   bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1758                                 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1759
1760   lp_exec_mask_init(&bld.exec_mask, &bld.base);
1761
1762   tgsi_parse_init( &parse, tokens );
1763
1764   while( !tgsi_parse_end_of_tokens( &parse ) ) {
1765      tgsi_parse_token( &parse );
1766
1767      switch( parse.FullToken.Token.Type ) {
1768      case TGSI_TOKEN_TYPE_DECLARATION:
1769         /* Inputs already interpolated */
1770         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1771         break;
1772
1773      case TGSI_TOKEN_TYPE_INSTRUCTION:
1774         {
1775            unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1776            const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
1777            if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
1778               _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1779                             opcode_info->mnemonic);
1780         }
1781
1782         break;
1783
1784      case TGSI_TOKEN_TYPE_IMMEDIATE:
1785         /* simply copy the immediate values into the next immediates[] slot */
1786         {
1787            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1788            assert(size <= 4);
1789            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1790            for( i = 0; i < size; ++i )
1791               bld.immediates[num_immediates][i] =
1792                  lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1793            for( i = size; i < 4; ++i )
1794               bld.immediates[num_immediates][i] = bld.base.undef;
1795            num_immediates++;
1796         }
1797         break;
1798
1799      case TGSI_TOKEN_TYPE_PROPERTY:
1800         break;
1801
1802      default:
1803         assert( 0 );
1804      }
1805   }
1806   if (0) {
1807      LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1808      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1809      debug_printf("11111111111111111111111111111 \n");
1810      tgsi_dump(tokens, 0);
1811      LLVMDumpValue(function);
1812      debug_printf("2222222222222222222222222222 \n");
1813   }
1814   tgsi_parse_free( &parse );
1815}
1816
1817