lp_bld_tgsi_soa.c revision 32a7209c0a0d5ae63f12056ed969087d942c6298
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_info.h"
46#include "tgsi/tgsi_parse.h"
47#include "tgsi/tgsi_util.h"
48#include "tgsi/tgsi_exec.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_tgsi.h"
57#include "lp_bld_limits.h"
58#include "lp_bld_debug.h"
59
60
61#define FOR_EACH_CHANNEL( CHAN )\
62   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71   FOR_EACH_CHANNEL( CHAN )\
72      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74#define CHAN_X 0
75#define CHAN_Y 1
76#define CHAN_Z 2
77#define CHAN_W 3
78
79#define QUAD_TOP_LEFT     0
80#define QUAD_TOP_RIGHT    1
81#define QUAD_BOTTOM_LEFT  2
82#define QUAD_BOTTOM_RIGHT 3
83
84#define LP_MAX_INSTRUCTIONS 256
85
86
87struct lp_exec_mask {
88   struct lp_build_context *bld;
89
90   boolean has_mask;
91
92   LLVMTypeRef int_vec_type;
93
94   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
95   int cond_stack_size;
96   LLVMValueRef cond_mask;
97
98   LLVMBasicBlockRef loop_block;
99   LLVMValueRef cont_mask;
100   LLVMValueRef break_mask;
101   LLVMValueRef break_var;
102   struct {
103      LLVMBasicBlockRef loop_block;
104      LLVMValueRef cont_mask;
105      LLVMValueRef break_mask;
106      LLVMValueRef break_var;
107   } loop_stack[LP_MAX_TGSI_NESTING];
108   int loop_stack_size;
109
110   LLVMValueRef ret_mask;
111   struct {
112      int pc;
113      LLVMValueRef ret_mask;
114   } call_stack[LP_MAX_TGSI_NESTING];
115   int call_stack_size;
116
117   LLVMValueRef exec_mask;
118};
119
120struct lp_build_tgsi_soa_context
121{
122   struct lp_build_context base;
123
124   /* Builder for integer masks and indices */
125   struct lp_build_context int_bld;
126
127   LLVMValueRef consts_ptr;
128   const LLVMValueRef *pos;
129   const LLVMValueRef (*inputs)[NUM_CHANNELS];
130   LLVMValueRef (*outputs)[NUM_CHANNELS];
131
132   const struct lp_build_sampler_soa *sampler;
133
134   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
135   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
136   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
137   LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
138
139   /* we allocate an array of temps if we have indirect
140    * addressing and then the temps above is unused */
141   LLVMValueRef temps_array;
142   boolean has_indirect_addressing;
143
144   struct lp_build_mask_context *mask;
145   struct lp_exec_mask exec_mask;
146
147   struct tgsi_full_instruction *instructions;
148   uint max_instructions;
149};
150
151static const unsigned char
152swizzle_left[4] = {
153   QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
154   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_LEFT
155};
156
157static const unsigned char
158swizzle_right[4] = {
159   QUAD_TOP_RIGHT,    QUAD_TOP_RIGHT,
160   QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
161};
162
163static const unsigned char
164swizzle_top[4] = {
165   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT,
166   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT
167};
168
169static const unsigned char
170swizzle_bottom[4] = {
171   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT,
172   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
173};
174
175static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
176{
177   mask->bld = bld;
178   mask->has_mask = FALSE;
179   mask->cond_stack_size = 0;
180   mask->loop_stack_size = 0;
181   mask->call_stack_size = 0;
182
183   mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
184   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
185         LLVMConstAllOnes(mask->int_vec_type);
186}
187
188static void lp_exec_mask_update(struct lp_exec_mask *mask)
189{
190   if (mask->loop_stack_size) {
191      /*for loops we need to update the entire mask at runtime */
192      LLVMValueRef tmp;
193      assert(mask->break_mask);
194      tmp = LLVMBuildAnd(mask->bld->builder,
195                         mask->cont_mask,
196                         mask->break_mask,
197                         "maskcb");
198      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
199                                     mask->cond_mask,
200                                     tmp,
201                                     "maskfull");
202   } else
203      mask->exec_mask = mask->cond_mask;
204
205   if (mask->call_stack_size) {
206      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
207                                     mask->exec_mask,
208                                     mask->ret_mask,
209                                     "callmask");
210   }
211
212   mask->has_mask = (mask->cond_stack_size > 0 ||
213                     mask->loop_stack_size > 0 ||
214                     mask->call_stack_size > 0);
215}
216
217static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
218                                   LLVMValueRef val)
219{
220   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
221   if (mask->cond_stack_size == 0) {
222      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
223   }
224   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
225   assert(LLVMTypeOf(val) == mask->int_vec_type);
226   mask->cond_mask = val;
227
228   lp_exec_mask_update(mask);
229}
230
231static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
232{
233   LLVMValueRef prev_mask;
234   LLVMValueRef inv_mask;
235
236   assert(mask->cond_stack_size);
237   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
238   if (mask->cond_stack_size == 1) {
239      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
240   }
241
242   inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
243
244   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
245                                  inv_mask,
246                                  prev_mask, "");
247   lp_exec_mask_update(mask);
248}
249
250static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
251{
252   assert(mask->cond_stack_size);
253   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
254   lp_exec_mask_update(mask);
255}
256
257static void lp_exec_bgnloop(struct lp_exec_mask *mask)
258{
259   if (mask->loop_stack_size == 0) {
260      assert(mask->loop_block == NULL);
261      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
262      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
263      assert(mask->break_var == NULL);
264   }
265
266   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
267
268   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
269   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
270   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
271   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
272   ++mask->loop_stack_size;
273
274   mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
275   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
276
277   mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
278   LLVMBuildBr(mask->bld->builder, mask->loop_block);
279   LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
280
281   mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
282
283   lp_exec_mask_update(mask);
284}
285
286static void lp_exec_break(struct lp_exec_mask *mask)
287{
288   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
289                                         mask->exec_mask,
290                                         "break");
291
292   mask->break_mask = LLVMBuildAnd(mask->bld->builder,
293                                   mask->break_mask,
294                                   exec_mask, "break_full");
295
296   lp_exec_mask_update(mask);
297}
298
299static void lp_exec_continue(struct lp_exec_mask *mask)
300{
301   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
302                                         mask->exec_mask,
303                                         "");
304
305   mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
306                                  mask->cont_mask,
307                                  exec_mask, "");
308
309   lp_exec_mask_update(mask);
310}
311
312
313static void lp_exec_endloop(struct lp_exec_mask *mask)
314{
315   LLVMBasicBlockRef endloop;
316   LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
317                                      mask->bld->type.length);
318   LLVMValueRef i1cond;
319
320   assert(mask->break_mask);
321
322   /*
323    * Restore the cont_mask, but don't pop
324    */
325   assert(mask->loop_stack_size);
326   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
327   lp_exec_mask_update(mask);
328
329   /*
330    * Unlike the continue mask, the break_mask must be preserved across loop
331    * iterations
332    */
333   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
334
335   /* i1cond = (mask == 0) */
336   i1cond = LLVMBuildICmp(
337      mask->bld->builder,
338      LLVMIntNE,
339      LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
340      LLVMConstNull(reg_type), "");
341
342   endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
343
344   LLVMBuildCondBr(mask->bld->builder,
345                   i1cond, mask->loop_block, endloop);
346
347   LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
348
349   assert(mask->loop_stack_size);
350   --mask->loop_stack_size;
351   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
352   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
353   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
354   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
355
356   lp_exec_mask_update(mask);
357}
358
359/* stores val into an address pointed to by dst.
360 * mask->exec_mask is used to figure out which bits of val
361 * should be stored into the address
362 * (0 means don't store this bit, 1 means do store).
363 */
364static void lp_exec_mask_store(struct lp_exec_mask *mask,
365                               LLVMValueRef pred,
366                               LLVMValueRef val,
367                               LLVMValueRef dst)
368{
369   /* Mix the predicate and execution mask */
370   if (mask->has_mask) {
371      if (pred) {
372         pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
373      } else {
374         pred = mask->exec_mask;
375      }
376   }
377
378   if (pred) {
379      LLVMValueRef real_val, dst_val;
380
381      dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
382      real_val = lp_build_select(mask->bld,
383                                 pred,
384                                 val, dst_val);
385
386      LLVMBuildStore(mask->bld->builder, real_val, dst);
387   } else
388      LLVMBuildStore(mask->bld->builder, val, dst);
389}
390
391static void lp_exec_mask_call(struct lp_exec_mask *mask,
392                              int func,
393                              int *pc)
394{
395   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
396   mask->call_stack[mask->call_stack_size].pc = *pc;
397   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
398   mask->call_stack_size++;
399   *pc = func;
400}
401
402static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
403{
404   LLVMValueRef exec_mask;
405
406   if (mask->call_stack_size == 0) {
407      /* returning from main() */
408      *pc = -1;
409      return;
410   }
411   exec_mask = LLVMBuildNot(mask->bld->builder,
412                            mask->exec_mask,
413                            "ret");
414
415   mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
416                                 mask->ret_mask,
417                                 exec_mask, "ret_full");
418
419   lp_exec_mask_update(mask);
420}
421
422static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
423{
424}
425
426static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
427{
428   assert(mask->call_stack_size);
429   mask->call_stack_size--;
430   *pc = mask->call_stack[mask->call_stack_size].pc;
431   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
432   lp_exec_mask_update(mask);
433}
434
435static LLVMValueRef
436emit_ddx(struct lp_build_tgsi_soa_context *bld,
437         LLVMValueRef src)
438{
439   LLVMValueRef src_left  = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
440   LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
441   return lp_build_sub(&bld->base, src_right, src_left);
442}
443
444
445static LLVMValueRef
446emit_ddy(struct lp_build_tgsi_soa_context *bld,
447         LLVMValueRef src)
448{
449   LLVMValueRef src_top    = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
450   LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
451   return lp_build_sub(&bld->base, src_top, src_bottom);
452}
453
454static LLVMValueRef
455get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
456             unsigned index,
457             unsigned chan,
458             boolean is_indirect,
459             LLVMValueRef addr)
460{
461   assert(chan < 4);
462   if (!bld->has_indirect_addressing) {
463      return bld->temps[index][chan];
464   } else {
465      LLVMValueRef lindex =
466         LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
467      if (is_indirect)
468         lindex = lp_build_add(&bld->base, lindex, addr);
469      return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
470   }
471}
472
473/**
474 * Register fetch.
475 */
476static LLVMValueRef
477emit_fetch(
478   struct lp_build_tgsi_soa_context *bld,
479   const struct tgsi_full_instruction *inst,
480   unsigned index,
481   const unsigned chan_index )
482{
483   const struct tgsi_full_src_register *reg = &inst->Src[index];
484   const unsigned swizzle =
485      tgsi_util_get_full_src_register_swizzle(reg, chan_index);
486   LLVMValueRef res;
487   LLVMValueRef addr = NULL;
488
489   if (swizzle > 3) {
490      assert(0 && "invalid swizzle in emit_fetch()");
491      return bld->base.undef;
492   }
493
494   if (reg->Register.Indirect) {
495      LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
496      unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
497      addr = LLVMBuildLoad(bld->base.builder,
498                           bld->addr[reg->Indirect.Index][swizzle],
499                           "");
500      /* for indexing we want integers */
501      addr = LLVMBuildFPToSI(bld->base.builder, addr,
502                             int_vec_type, "");
503      addr = LLVMBuildExtractElement(bld->base.builder,
504                                     addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
505                                     "");
506      addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
507   }
508
509   switch (reg->Register.File) {
510   case TGSI_FILE_CONSTANT:
511      {
512         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(),
513                                           reg->Register.Index*4 + swizzle, 0);
514         LLVMValueRef scalar, scalar_ptr;
515
516         if (reg->Register.Indirect) {
517            /*lp_build_printf(bld->base.builder,
518              "\taddr = %d\n", addr);*/
519            index = lp_build_add(&bld->base, index, addr);
520         }
521         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
522                                   &index, 1, "");
523         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
524
525         res = lp_build_broadcast_scalar(&bld->base, scalar);
526      }
527      break;
528
529   case TGSI_FILE_IMMEDIATE:
530      res = bld->immediates[reg->Register.Index][swizzle];
531      assert(res);
532      break;
533
534   case TGSI_FILE_INPUT:
535      res = bld->inputs[reg->Register.Index][swizzle];
536      assert(res);
537      break;
538
539   case TGSI_FILE_TEMPORARY:
540      {
541         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
542                                              swizzle,
543                                              reg->Register.Indirect,
544                                              addr);
545         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
546         if(!res)
547            return bld->base.undef;
548      }
549      break;
550
551   default:
552      assert(0 && "invalid src register in emit_fetch()");
553      return bld->base.undef;
554   }
555
556   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
557   case TGSI_UTIL_SIGN_CLEAR:
558      res = lp_build_abs( &bld->base, res );
559      break;
560
561   case TGSI_UTIL_SIGN_SET:
562      /* TODO: Use bitwese OR for floating point */
563      res = lp_build_abs( &bld->base, res );
564      res = LLVMBuildNeg( bld->base.builder, res, "" );
565      break;
566
567   case TGSI_UTIL_SIGN_TOGGLE:
568      res = LLVMBuildNeg( bld->base.builder, res, "" );
569      break;
570
571   case TGSI_UTIL_SIGN_KEEP:
572      break;
573   }
574
575   return res;
576}
577
578
579/**
580 * Register fetch with derivatives.
581 */
582static void
583emit_fetch_deriv(
584   struct lp_build_tgsi_soa_context *bld,
585   const struct tgsi_full_instruction *inst,
586   unsigned index,
587   const unsigned chan_index,
588   LLVMValueRef *res,
589   LLVMValueRef *ddx,
590   LLVMValueRef *ddy)
591{
592   LLVMValueRef src;
593
594   src = emit_fetch(bld, inst, index, chan_index);
595
596   if(res)
597      *res = src;
598
599   /* TODO: use interpolation coeffs for inputs */
600
601   if(ddx)
602      *ddx = emit_ddx(bld, src);
603
604   if(ddy)
605      *ddy = emit_ddy(bld, src);
606}
607
608
609/**
610 * Predicate.
611 */
612static void
613emit_fetch_predicate(
614   struct lp_build_tgsi_soa_context *bld,
615   const struct tgsi_full_instruction *inst,
616   LLVMValueRef *pred)
617{
618   unsigned index;
619   unsigned char swizzles[4];
620   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
621   LLVMValueRef value;
622   unsigned chan;
623
624   if (!inst->Instruction.Predicate) {
625      FOR_EACH_CHANNEL( chan ) {
626         pred[chan] = NULL;
627      }
628      return;
629   }
630
631   swizzles[0] = inst->Predicate.SwizzleX;
632   swizzles[1] = inst->Predicate.SwizzleY;
633   swizzles[2] = inst->Predicate.SwizzleZ;
634   swizzles[3] = inst->Predicate.SwizzleW;
635
636   index = inst->Predicate.Index;
637   assert(index < LP_MAX_TGSI_PREDS);
638
639   FOR_EACH_CHANNEL( chan ) {
640      unsigned swizzle = swizzles[chan];
641
642      /*
643       * Only fetch the predicate register channels that are actually listed
644       * in the swizzles
645       */
646      if (!unswizzled[swizzle]) {
647         value = LLVMBuildLoad(bld->base.builder,
648                               bld->preds[index][swizzle], "");
649
650         /*
651          * Convert the value to an integer mask.
652          *
653          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
654          * is needlessly causing two comparisons due to storing the intermediate
655          * result as float vector instead of an integer mask vector.
656          */
657         value = lp_build_compare(bld->base.builder,
658                                  bld->base.type,
659                                  PIPE_FUNC_NOTEQUAL,
660                                  value,
661                                  bld->base.zero);
662         if (inst->Predicate.Negate) {
663            value = LLVMBuildNot(bld->base.builder, value, "");
664         }
665
666         unswizzled[swizzle] = value;
667      } else {
668         value = unswizzled[swizzle];
669      }
670
671      pred[chan] = value;
672   }
673}
674
675
676/**
677 * Register store.
678 */
679static void
680emit_store(
681   struct lp_build_tgsi_soa_context *bld,
682   const struct tgsi_full_instruction *inst,
683   unsigned index,
684   unsigned chan_index,
685   LLVMValueRef pred,
686   LLVMValueRef value)
687{
688   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
689   LLVMValueRef addr = NULL;
690
691   switch( inst->Instruction.Saturate ) {
692   case TGSI_SAT_NONE:
693      break;
694
695   case TGSI_SAT_ZERO_ONE:
696      value = lp_build_max(&bld->base, value, bld->base.zero);
697      value = lp_build_min(&bld->base, value, bld->base.one);
698      break;
699
700   case TGSI_SAT_MINUS_PLUS_ONE:
701      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
702      value = lp_build_min(&bld->base, value, bld->base.one);
703      break;
704
705   default:
706      assert(0);
707   }
708
709   if (reg->Register.Indirect) {
710      LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
711      unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
712      addr = LLVMBuildLoad(bld->base.builder,
713                           bld->addr[reg->Indirect.Index][swizzle],
714                           "");
715      /* for indexing we want integers */
716      addr = LLVMBuildFPToSI(bld->base.builder, addr,
717                             int_vec_type, "");
718      addr = LLVMBuildExtractElement(bld->base.builder,
719                                     addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
720                                     "");
721      addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
722   }
723
724   switch( reg->Register.File ) {
725   case TGSI_FILE_OUTPUT:
726      lp_exec_mask_store(&bld->exec_mask, pred, value,
727                         bld->outputs[reg->Register.Index][chan_index]);
728      break;
729
730   case TGSI_FILE_TEMPORARY: {
731      LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
732                                           chan_index,
733                                           reg->Register.Indirect,
734                                           addr);
735      lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
736      break;
737   }
738
739   case TGSI_FILE_ADDRESS:
740      lp_exec_mask_store(&bld->exec_mask, pred, value,
741                         bld->addr[reg->Indirect.Index][chan_index]);
742      break;
743
744   case TGSI_FILE_PREDICATE:
745      lp_exec_mask_store(&bld->exec_mask, pred, value,
746                         bld->preds[index][chan_index]);
747      break;
748
749   default:
750      assert( 0 );
751   }
752}
753
754
755/**
756 * High-level instruction translators.
757 */
758
759enum tex_modifier {
760   TEX_MODIFIER_NONE = 0,
761   TEX_MODIFIER_PROJECTED,
762   TEX_MODIFIER_LOD_BIAS,
763   TEX_MODIFIER_EXPLICIT_LOD,
764   TEX_MODIFIER_EXPLICIT_DERIV
765};
766
767static void
768emit_tex( struct lp_build_tgsi_soa_context *bld,
769          const struct tgsi_full_instruction *inst,
770          enum tex_modifier modifier,
771          LLVMValueRef *texel)
772{
773   unsigned unit;
774   LLVMValueRef lod_bias, explicit_lod;
775   LLVMValueRef oow = NULL;
776   LLVMValueRef coords[3];
777   LLVMValueRef ddx[3];
778   LLVMValueRef ddy[3];
779   unsigned num_coords;
780   unsigned i;
781
782   if (!bld->sampler) {
783      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
784      for (i = 0; i < 4; i++) {
785         texel[i] = bld->base.undef;
786      }
787      return;
788   }
789
790   switch (inst->Texture.Texture) {
791   case TGSI_TEXTURE_1D:
792      num_coords = 1;
793      break;
794   case TGSI_TEXTURE_2D:
795   case TGSI_TEXTURE_RECT:
796      num_coords = 2;
797      break;
798   case TGSI_TEXTURE_SHADOW1D:
799   case TGSI_TEXTURE_SHADOW2D:
800   case TGSI_TEXTURE_SHADOWRECT:
801   case TGSI_TEXTURE_3D:
802   case TGSI_TEXTURE_CUBE:
803      num_coords = 3;
804      break;
805   default:
806      assert(0);
807      return;
808   }
809
810   if (modifier == TEX_MODIFIER_LOD_BIAS) {
811      lod_bias = emit_fetch( bld, inst, 0, 3 );
812      explicit_lod = NULL;
813   }
814   else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
815      lod_bias = NULL;
816      explicit_lod = emit_fetch( bld, inst, 0, 3 );
817   }
818   else {
819      lod_bias = NULL;
820      explicit_lod = NULL;
821   }
822
823   if (modifier == TEX_MODIFIER_PROJECTED) {
824      oow = emit_fetch( bld, inst, 0, 3 );
825      oow = lp_build_rcp(&bld->base, oow);
826   }
827
828   for (i = 0; i < num_coords; i++) {
829      coords[i] = emit_fetch( bld, inst, 0, i );
830      if (modifier == TEX_MODIFIER_PROJECTED)
831         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
832   }
833   for (i = num_coords; i < 3; i++) {
834      coords[i] = bld->base.undef;
835   }
836
837   if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
838      for (i = 0; i < num_coords; i++) {
839         ddx[i] = emit_fetch( bld, inst, 1, i );
840         ddy[i] = emit_fetch( bld, inst, 2, i );
841      }
842      unit = inst->Src[3].Register.Index;
843   }  else {
844      for (i = 0; i < num_coords; i++) {
845         ddx[i] = emit_ddx( bld, coords[i] );
846         ddy[i] = emit_ddy( bld, coords[i] );
847      }
848      unit = inst->Src[1].Register.Index;
849   }
850   for (i = num_coords; i < 3; i++) {
851      ddx[i] = bld->base.undef;
852      ddy[i] = bld->base.undef;
853   }
854
855   bld->sampler->emit_fetch_texel(bld->sampler,
856                                  bld->base.builder,
857                                  bld->base.type,
858                                  unit, num_coords, coords,
859                                  ddx, ddy,
860                                  lod_bias, explicit_lod,
861                                  texel);
862}
863
864
865/**
866 * Kill fragment if any of the src register values are negative.
867 */
868static void
869emit_kil(
870   struct lp_build_tgsi_soa_context *bld,
871   const struct tgsi_full_instruction *inst )
872{
873   const struct tgsi_full_src_register *reg = &inst->Src[0];
874   LLVMValueRef terms[NUM_CHANNELS];
875   LLVMValueRef mask;
876   unsigned chan_index;
877
878   memset(&terms, 0, sizeof terms);
879
880   FOR_EACH_CHANNEL( chan_index ) {
881      unsigned swizzle;
882
883      /* Unswizzle channel */
884      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
885
886      /* Check if the component has not been already tested. */
887      assert(swizzle < NUM_CHANNELS);
888      if( !terms[swizzle] )
889         /* TODO: change the comparison operator instead of setting the sign */
890         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
891   }
892
893   mask = NULL;
894   FOR_EACH_CHANNEL( chan_index ) {
895      if(terms[chan_index]) {
896         LLVMValueRef chan_mask;
897
898         /*
899          * If term < 0 then mask = 0 else mask = ~0.
900          */
901         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
902
903         if(mask)
904            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
905         else
906            mask = chan_mask;
907      }
908   }
909
910   if(mask)
911      lp_build_mask_update(bld->mask, mask);
912}
913
914
915/**
916 * Predicated fragment kill.
917 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
918 * The only predication is the execution mask which will apply if
919 * we're inside a loop or conditional.
920 */
921static void
922emit_kilp(struct lp_build_tgsi_soa_context *bld,
923          const struct tgsi_full_instruction *inst)
924{
925   LLVMValueRef mask;
926
927   /* For those channels which are "alive", disable fragment shader
928    * execution.
929    */
930   if (bld->exec_mask.has_mask) {
931      mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
932   }
933   else {
934      mask = bld->base.zero;
935   }
936
937   lp_build_mask_update(bld->mask, mask);
938}
939
940static void
941emit_declaration(
942   struct lp_build_tgsi_soa_context *bld,
943   const struct tgsi_full_declaration *decl)
944{
945   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
946
947   unsigned first = decl->Range.First;
948   unsigned last = decl->Range.Last;
949   unsigned idx, i;
950
951   for (idx = first; idx <= last; ++idx) {
952      switch (decl->Declaration.File) {
953      case TGSI_FILE_TEMPORARY:
954         assert(idx < LP_MAX_TGSI_TEMPS);
955         if (bld->has_indirect_addressing) {
956            LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
957                                            last*4 + 4, 0);
958            bld->temps_array = lp_build_array_alloca(bld->base.builder,
959                                                     vec_type, val, "");
960         } else {
961            for (i = 0; i < NUM_CHANNELS; i++)
962               bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
963                                                    vec_type, "");
964         }
965         break;
966
967      case TGSI_FILE_OUTPUT:
968         for (i = 0; i < NUM_CHANNELS; i++)
969            bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
970                                                   vec_type, "");
971         break;
972
973      case TGSI_FILE_ADDRESS:
974         assert(idx < LP_MAX_TGSI_ADDRS);
975         for (i = 0; i < NUM_CHANNELS; i++)
976            bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
977                                                vec_type, "");
978         break;
979
980      case TGSI_FILE_PREDICATE:
981         assert(idx < LP_MAX_TGSI_PREDS);
982         for (i = 0; i < NUM_CHANNELS; i++)
983            bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
984                                                 vec_type, "");
985         break;
986
987      default:
988         /* don't need to declare other vars */
989         break;
990      }
991   }
992}
993
994
995/**
996 * Emit LLVM for one TGSI instruction.
997 * \param return TRUE for success, FALSE otherwise
998 */
999static boolean
1000emit_instruction(
1001   struct lp_build_tgsi_soa_context *bld,
1002   const struct tgsi_full_instruction *inst,
1003   const struct tgsi_opcode_info *info,
1004   int *pc)
1005{
1006   unsigned chan_index;
1007   LLVMValueRef src0, src1, src2;
1008   LLVMValueRef tmp0, tmp1, tmp2;
1009   LLVMValueRef tmp3 = NULL;
1010   LLVMValueRef tmp4 = NULL;
1011   LLVMValueRef tmp5 = NULL;
1012   LLVMValueRef tmp6 = NULL;
1013   LLVMValueRef tmp7 = NULL;
1014   LLVMValueRef res;
1015   LLVMValueRef dst0[NUM_CHANNELS];
1016
1017   /*
1018    * Stores and write masks are handled in a general fashion after the long
1019    * instruction opcode switch statement.
1020    *
1021    * Although not stricitly necessary, we avoid generating instructions for
1022    * channels which won't be stored, in cases where's that easy. For some
1023    * complex instructions, like texture sampling, it is more convenient to
1024    * assume a full writemask and then let LLVM optimization passes eliminate
1025    * redundant code.
1026    */
1027
1028   (*pc)++;
1029
1030   assert(info->num_dst <= 1);
1031   if (info->num_dst) {
1032      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1033         dst0[chan_index] = bld->base.undef;
1034      }
1035   }
1036
1037   switch (inst->Instruction.Opcode) {
1038   case TGSI_OPCODE_ARL:
1039      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1040         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1041         tmp0 = lp_build_floor(&bld->base, tmp0);
1042         dst0[chan_index] = tmp0;
1043      }
1044      break;
1045
1046   case TGSI_OPCODE_MOV:
1047      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1048         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1049      }
1050      break;
1051
1052   case TGSI_OPCODE_LIT:
1053      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1054         dst0[CHAN_X] = bld->base.one;
1055      }
1056      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1057         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1058         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1059      }
1060      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1061         /* XMM[1] = SrcReg[0].yyyy */
1062         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1063         /* XMM[1] = max(XMM[1], 0) */
1064         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1065         /* XMM[2] = SrcReg[0].wwww */
1066         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1067         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1068         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1069         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1070         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1071      }
1072      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1073         dst0[CHAN_W] = bld->base.one;
1074      }
1075      break;
1076
1077   case TGSI_OPCODE_RCP:
1078   /* TGSI_OPCODE_RECIP */
1079      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1080      res = lp_build_rcp(&bld->base, src0);
1081      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1082         dst0[chan_index] = res;
1083      }
1084      break;
1085
1086   case TGSI_OPCODE_RSQ:
1087   /* TGSI_OPCODE_RECIPSQRT */
1088      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1089      src0 = lp_build_abs(&bld->base, src0);
1090      res = lp_build_rsqrt(&bld->base, src0);
1091      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1092         dst0[chan_index] = res;
1093      }
1094      break;
1095
1096   case TGSI_OPCODE_EXP:
1097      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1098          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1099          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1100         LLVMValueRef *p_exp2_int_part = NULL;
1101         LLVMValueRef *p_frac_part = NULL;
1102         LLVMValueRef *p_exp2 = NULL;
1103
1104         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1105
1106         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1107            p_exp2_int_part = &tmp0;
1108         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1109            p_frac_part = &tmp1;
1110         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1111            p_exp2 = &tmp2;
1112
1113         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1114
1115         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1116            dst0[CHAN_X] = tmp0;
1117         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1118            dst0[CHAN_Y] = tmp1;
1119         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1120            dst0[CHAN_Z] = tmp2;
1121      }
1122      /* dst.w = 1.0 */
1123      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1124         dst0[CHAN_W] = bld->base.one;
1125      }
1126      break;
1127
1128   case TGSI_OPCODE_LOG:
1129      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1130          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1131          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1132         LLVMValueRef *p_floor_log2 = NULL;
1133         LLVMValueRef *p_exp = NULL;
1134         LLVMValueRef *p_log2 = NULL;
1135
1136         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1137         src0 = lp_build_abs( &bld->base, src0 );
1138
1139         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1140            p_floor_log2 = &tmp0;
1141         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1142            p_exp = &tmp1;
1143         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1144            p_log2 = &tmp2;
1145
1146         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1147
1148         /* dst.x = floor(lg2(abs(src.x))) */
1149         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1150            dst0[CHAN_X] = tmp0;
1151         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1152         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1153            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1154         }
1155         /* dst.z = lg2(abs(src.x)) */
1156         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1157            dst0[CHAN_Z] = tmp2;
1158      }
1159      /* dst.w = 1.0 */
1160      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1161         dst0[CHAN_W] = bld->base.one;
1162      }
1163      break;
1164
1165   case TGSI_OPCODE_MUL:
1166      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1167         src0 = emit_fetch( bld, inst, 0, chan_index );
1168         src1 = emit_fetch( bld, inst, 1, chan_index );
1169         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1170      }
1171      break;
1172
1173   case TGSI_OPCODE_ADD:
1174      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1175         src0 = emit_fetch( bld, inst, 0, chan_index );
1176         src1 = emit_fetch( bld, inst, 1, chan_index );
1177         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1178      }
1179      break;
1180
1181   case TGSI_OPCODE_DP3:
1182   /* TGSI_OPCODE_DOT3 */
1183      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1184      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1185      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1186      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1187      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1188      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1189      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1190      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1191      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1192      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1193      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1194      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1195         dst0[chan_index] = tmp0;
1196      }
1197      break;
1198
1199   case TGSI_OPCODE_DP4:
1200   /* TGSI_OPCODE_DOT4 */
1201      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1202      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1203      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1204      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1205      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1206      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1207      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1208      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1209      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1210      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1211      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1212      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1213      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1214      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1215      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1216      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1217         dst0[chan_index] = tmp0;
1218      }
1219      break;
1220
1221   case TGSI_OPCODE_DST:
1222      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1223         dst0[CHAN_X] = bld->base.one;
1224      }
1225      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1226         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1227         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1228         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1229      }
1230      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1231         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1232      }
1233      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1234         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1235      }
1236      break;
1237
1238   case TGSI_OPCODE_MIN:
1239      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1240         src0 = emit_fetch( bld, inst, 0, chan_index );
1241         src1 = emit_fetch( bld, inst, 1, chan_index );
1242         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1243      }
1244      break;
1245
1246   case TGSI_OPCODE_MAX:
1247      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1248         src0 = emit_fetch( bld, inst, 0, chan_index );
1249         src1 = emit_fetch( bld, inst, 1, chan_index );
1250         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1251      }
1252      break;
1253
1254   case TGSI_OPCODE_SLT:
1255   /* TGSI_OPCODE_SETLT */
1256      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1257         src0 = emit_fetch( bld, inst, 0, chan_index );
1258         src1 = emit_fetch( bld, inst, 1, chan_index );
1259         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1260         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1261      }
1262      break;
1263
1264   case TGSI_OPCODE_SGE:
1265   /* TGSI_OPCODE_SETGE */
1266      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1267         src0 = emit_fetch( bld, inst, 0, chan_index );
1268         src1 = emit_fetch( bld, inst, 1, chan_index );
1269         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1270         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1271      }
1272      break;
1273
1274   case TGSI_OPCODE_MAD:
1275   /* TGSI_OPCODE_MADD */
1276      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1277         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1278         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1279         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1280         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1281         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1282         dst0[chan_index] = tmp0;
1283      }
1284      break;
1285
1286   case TGSI_OPCODE_SUB:
1287      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1288         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1289         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1290         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1291      }
1292      break;
1293
1294   case TGSI_OPCODE_LRP:
1295      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1296         src0 = emit_fetch( bld, inst, 0, chan_index );
1297         src1 = emit_fetch( bld, inst, 1, chan_index );
1298         src2 = emit_fetch( bld, inst, 2, chan_index );
1299         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1300         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1301         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1302      }
1303      break;
1304
1305   case TGSI_OPCODE_CND:
1306      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1307         src0 = emit_fetch( bld, inst, 0, chan_index );
1308         src1 = emit_fetch( bld, inst, 1, chan_index );
1309         src2 = emit_fetch( bld, inst, 2, chan_index );
1310         tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1311         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1312         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1313      }
1314      break;
1315
1316   case TGSI_OPCODE_DP2A:
1317      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1318      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1319      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1320      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1321      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1322      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1323      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1324      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
1325      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1326      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1327         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1328      }
1329      break;
1330
1331   case TGSI_OPCODE_FRC:
1332      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1333         src0 = emit_fetch( bld, inst, 0, chan_index );
1334         tmp0 = lp_build_floor(&bld->base, src0);
1335         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1336         dst0[chan_index] = tmp0;
1337      }
1338      break;
1339
1340   case TGSI_OPCODE_CLAMP:
1341      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1342         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1343         src1 = emit_fetch( bld, inst, 1, chan_index );
1344         src2 = emit_fetch( bld, inst, 2, chan_index );
1345         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1346         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1347         dst0[chan_index] = tmp0;
1348      }
1349      break;
1350
1351   case TGSI_OPCODE_FLR:
1352      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1353         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1354         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1355      }
1356      break;
1357
1358   case TGSI_OPCODE_ROUND:
1359      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1360         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1361         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1362      }
1363      break;
1364
1365   case TGSI_OPCODE_EX2: {
1366      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1367      tmp0 = lp_build_exp2( &bld->base, tmp0);
1368      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1369         dst0[chan_index] = tmp0;
1370      }
1371      break;
1372   }
1373
1374   case TGSI_OPCODE_LG2:
1375      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1376      tmp0 = lp_build_log2( &bld->base, tmp0);
1377      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1378         dst0[chan_index] = tmp0;
1379      }
1380      break;
1381
1382   case TGSI_OPCODE_POW:
1383      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1384      src1 = emit_fetch( bld, inst, 1, CHAN_X );
1385      res = lp_build_pow( &bld->base, src0, src1 );
1386      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1387         dst0[chan_index] = res;
1388      }
1389      break;
1390
1391   case TGSI_OPCODE_XPD:
1392      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1393          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1394         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1395         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1396      }
1397      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1398          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1399         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1400         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1401      }
1402      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1403         tmp2 = tmp0;
1404         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1405         tmp5 = tmp3;
1406         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1407         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1408         dst0[CHAN_X] = tmp2;
1409      }
1410      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1411          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1412         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1413         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1414      }
1415      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1416         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1417         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1418         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1419         dst0[CHAN_Y] = tmp3;
1420      }
1421      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1422         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1423         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1424         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1425         dst0[CHAN_Z] = tmp5;
1426      }
1427      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1428         dst0[CHAN_W] = bld->base.one;
1429      }
1430      break;
1431
1432   case TGSI_OPCODE_ABS:
1433      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1434         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1435         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1436      }
1437      break;
1438
1439   case TGSI_OPCODE_RCC:
1440      /* deprecated? */
1441      assert(0);
1442      return FALSE;
1443
1444   case TGSI_OPCODE_DPH:
1445      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1446      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1447      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1448      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1449      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1450      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1451      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1452      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1453      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1454      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1455      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1456      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1457      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1458      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1459         dst0[chan_index] = tmp0;
1460      }
1461      break;
1462
1463   case TGSI_OPCODE_COS:
1464      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1465      tmp0 = lp_build_cos( &bld->base, tmp0 );
1466      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1467         dst0[chan_index] = tmp0;
1468      }
1469      break;
1470
1471   case TGSI_OPCODE_DDX:
1472      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1473         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1474      }
1475      break;
1476
1477   case TGSI_OPCODE_DDY:
1478      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1479         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1480      }
1481      break;
1482
1483   case TGSI_OPCODE_KILP:
1484      /* predicated kill */
1485      emit_kilp( bld, inst );
1486      break;
1487
1488   case TGSI_OPCODE_KIL:
1489      /* conditional kill */
1490      emit_kil( bld, inst );
1491      break;
1492
1493   case TGSI_OPCODE_PK2H:
1494      return FALSE;
1495      break;
1496
1497   case TGSI_OPCODE_PK2US:
1498      return FALSE;
1499      break;
1500
1501   case TGSI_OPCODE_PK4B:
1502      return FALSE;
1503      break;
1504
1505   case TGSI_OPCODE_PK4UB:
1506      return FALSE;
1507      break;
1508
1509   case TGSI_OPCODE_RFL:
1510      return FALSE;
1511      break;
1512
1513   case TGSI_OPCODE_SEQ:
1514      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1515         src0 = emit_fetch( bld, inst, 0, chan_index );
1516         src1 = emit_fetch( bld, inst, 1, chan_index );
1517         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1518         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1519      }
1520      break;
1521
1522   case TGSI_OPCODE_SFL:
1523      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1524         dst0[chan_index] = bld->base.zero;
1525      }
1526      break;
1527
1528   case TGSI_OPCODE_SGT:
1529      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1530         src0 = emit_fetch( bld, inst, 0, chan_index );
1531         src1 = emit_fetch( bld, inst, 1, chan_index );
1532         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1533         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1534      }
1535      break;
1536
1537   case TGSI_OPCODE_SIN:
1538      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1539      tmp0 = lp_build_sin( &bld->base, tmp0 );
1540      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1541         dst0[chan_index] = tmp0;
1542      }
1543      break;
1544
1545   case TGSI_OPCODE_SLE:
1546      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1547         src0 = emit_fetch( bld, inst, 0, chan_index );
1548         src1 = emit_fetch( bld, inst, 1, chan_index );
1549         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1550         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1551      }
1552      break;
1553
1554   case TGSI_OPCODE_SNE:
1555      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1556         src0 = emit_fetch( bld, inst, 0, chan_index );
1557         src1 = emit_fetch( bld, inst, 1, chan_index );
1558         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1559         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1560      }
1561      break;
1562
1563   case TGSI_OPCODE_STR:
1564      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1565         dst0[chan_index] = bld->base.one;
1566      }
1567      break;
1568
1569   case TGSI_OPCODE_TEX:
1570      emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1571      break;
1572
1573   case TGSI_OPCODE_TXD:
1574      emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1575      break;
1576
1577   case TGSI_OPCODE_UP2H:
1578      /* deprecated */
1579      assert (0);
1580      return FALSE;
1581      break;
1582
1583   case TGSI_OPCODE_UP2US:
1584      /* deprecated */
1585      assert(0);
1586      return FALSE;
1587      break;
1588
1589   case TGSI_OPCODE_UP4B:
1590      /* deprecated */
1591      assert(0);
1592      return FALSE;
1593      break;
1594
1595   case TGSI_OPCODE_UP4UB:
1596      /* deprecated */
1597      assert(0);
1598      return FALSE;
1599      break;
1600
1601   case TGSI_OPCODE_X2D:
1602      /* deprecated? */
1603      assert(0);
1604      return FALSE;
1605      break;
1606
1607   case TGSI_OPCODE_ARA:
1608      /* deprecated */
1609      assert(0);
1610      return FALSE;
1611      break;
1612
1613   case TGSI_OPCODE_ARR:
1614      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1615         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1616         tmp0 = lp_build_round(&bld->base, tmp0);
1617         dst0[chan_index] = tmp0;
1618      }
1619      break;
1620
1621   case TGSI_OPCODE_BRA:
1622      /* deprecated */
1623      assert(0);
1624      return FALSE;
1625      break;
1626
1627   case TGSI_OPCODE_CAL:
1628      lp_exec_mask_call(&bld->exec_mask,
1629                        inst->Label.Label,
1630                        pc);
1631
1632      break;
1633
1634   case TGSI_OPCODE_RET:
1635      lp_exec_mask_ret(&bld->exec_mask, pc);
1636      break;
1637
1638   case TGSI_OPCODE_END:
1639      *pc = -1;
1640      break;
1641
1642   case TGSI_OPCODE_SSG:
1643   /* TGSI_OPCODE_SGN */
1644      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1645         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1646         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1647      }
1648      break;
1649
1650   case TGSI_OPCODE_CMP:
1651      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1652         src0 = emit_fetch( bld, inst, 0, chan_index );
1653         src1 = emit_fetch( bld, inst, 1, chan_index );
1654         src2 = emit_fetch( bld, inst, 2, chan_index );
1655         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1656         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1657      }
1658      break;
1659
1660   case TGSI_OPCODE_SCS:
1661      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1662         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1663         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1664      }
1665      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1666         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1667         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1668      }
1669      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1670         dst0[CHAN_Z] = bld->base.zero;
1671      }
1672      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1673         dst0[CHAN_W] = bld->base.one;
1674      }
1675      break;
1676
1677   case TGSI_OPCODE_TXB:
1678      emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1679      break;
1680
1681   case TGSI_OPCODE_NRM:
1682      /* fall-through */
1683   case TGSI_OPCODE_NRM4:
1684      /* 3 or 4-component normalization */
1685      {
1686         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1687
1688         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1689             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1690             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1691             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1692
1693            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1694
1695            /* xmm4 = src.x */
1696            /* xmm0 = src.x * src.x */
1697            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1698            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1699               tmp4 = tmp0;
1700            }
1701            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1702
1703            /* xmm5 = src.y */
1704            /* xmm0 = xmm0 + src.y * src.y */
1705            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1706            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1707               tmp5 = tmp1;
1708            }
1709            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1710            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1711
1712            /* xmm6 = src.z */
1713            /* xmm0 = xmm0 + src.z * src.z */
1714            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1715            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1716               tmp6 = tmp1;
1717            }
1718            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1719            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1720
1721            if (dims == 4) {
1722               /* xmm7 = src.w */
1723               /* xmm0 = xmm0 + src.w * src.w */
1724               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1725               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1726                  tmp7 = tmp1;
1727               }
1728               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1729               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1730            }
1731
1732            /* xmm1 = 1 / sqrt(xmm0) */
1733            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1734
1735            /* dst.x = xmm1 * src.x */
1736            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1737               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1738            }
1739
1740            /* dst.y = xmm1 * src.y */
1741            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1742               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1743            }
1744
1745            /* dst.z = xmm1 * src.z */
1746            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1747               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1748            }
1749
1750            /* dst.w = xmm1 * src.w */
1751            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1752               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1753            }
1754         }
1755
1756         /* dst.w = 1.0 */
1757         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1758            dst0[CHAN_W] = bld->base.one;
1759         }
1760      }
1761      break;
1762
1763   case TGSI_OPCODE_DIV:
1764      /* deprecated */
1765      assert( 0 );
1766      return FALSE;
1767      break;
1768
1769   case TGSI_OPCODE_DP2:
1770      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1771      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1772      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1773      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1774      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1775      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1776      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1777      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1778         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1779      }
1780      break;
1781
1782   case TGSI_OPCODE_TXL:
1783      emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1784      break;
1785
1786   case TGSI_OPCODE_TXP:
1787      emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1788      break;
1789
1790   case TGSI_OPCODE_BRK:
1791      lp_exec_break(&bld->exec_mask);
1792      break;
1793
1794   case TGSI_OPCODE_IF:
1795      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1796      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1797                          tmp0, bld->base.zero);
1798      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1799      break;
1800
1801   case TGSI_OPCODE_BGNLOOP:
1802      lp_exec_bgnloop(&bld->exec_mask);
1803      break;
1804
1805   case TGSI_OPCODE_BGNSUB:
1806      lp_exec_mask_bgnsub(&bld->exec_mask);
1807      break;
1808
1809   case TGSI_OPCODE_ELSE:
1810      lp_exec_mask_cond_invert(&bld->exec_mask);
1811      break;
1812
1813   case TGSI_OPCODE_ENDIF:
1814      lp_exec_mask_cond_pop(&bld->exec_mask);
1815      break;
1816
1817   case TGSI_OPCODE_ENDLOOP:
1818      lp_exec_endloop(&bld->exec_mask);
1819      break;
1820
1821   case TGSI_OPCODE_ENDSUB:
1822      lp_exec_mask_endsub(&bld->exec_mask, pc);
1823      break;
1824
1825   case TGSI_OPCODE_PUSHA:
1826      /* deprecated? */
1827      assert(0);
1828      return FALSE;
1829      break;
1830
1831   case TGSI_OPCODE_POPA:
1832      /* deprecated? */
1833      assert(0);
1834      return FALSE;
1835      break;
1836
1837   case TGSI_OPCODE_CEIL:
1838      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1839         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1840         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1841      }
1842      break;
1843
1844   case TGSI_OPCODE_I2F:
1845      /* deprecated? */
1846      assert(0);
1847      return FALSE;
1848      break;
1849
1850   case TGSI_OPCODE_NOT:
1851      /* deprecated? */
1852      assert(0);
1853      return FALSE;
1854      break;
1855
1856   case TGSI_OPCODE_TRUNC:
1857      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1858         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1859         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1860      }
1861      break;
1862
1863   case TGSI_OPCODE_SHL:
1864      /* deprecated? */
1865      assert(0);
1866      return FALSE;
1867      break;
1868
1869   case TGSI_OPCODE_ISHR:
1870      /* deprecated? */
1871      assert(0);
1872      return FALSE;
1873      break;
1874
1875   case TGSI_OPCODE_AND:
1876      /* deprecated? */
1877      assert(0);
1878      return FALSE;
1879      break;
1880
1881   case TGSI_OPCODE_OR:
1882      /* deprecated? */
1883      assert(0);
1884      return FALSE;
1885      break;
1886
1887   case TGSI_OPCODE_MOD:
1888      /* deprecated? */
1889      assert(0);
1890      return FALSE;
1891      break;
1892
1893   case TGSI_OPCODE_XOR:
1894      /* deprecated? */
1895      assert(0);
1896      return FALSE;
1897      break;
1898
1899   case TGSI_OPCODE_SAD:
1900      /* deprecated? */
1901      assert(0);
1902      return FALSE;
1903      break;
1904
1905   case TGSI_OPCODE_TXF:
1906      /* deprecated? */
1907      assert(0);
1908      return FALSE;
1909      break;
1910
1911   case TGSI_OPCODE_TXQ:
1912      /* deprecated? */
1913      assert(0);
1914      return FALSE;
1915      break;
1916
1917   case TGSI_OPCODE_CONT:
1918      lp_exec_continue(&bld->exec_mask);
1919      break;
1920
1921   case TGSI_OPCODE_EMIT:
1922      return FALSE;
1923      break;
1924
1925   case TGSI_OPCODE_ENDPRIM:
1926      return FALSE;
1927      break;
1928
1929   case TGSI_OPCODE_NOP:
1930      break;
1931
1932   default:
1933      return FALSE;
1934   }
1935
1936   if(info->num_dst) {
1937      LLVMValueRef pred[NUM_CHANNELS];
1938
1939      emit_fetch_predicate( bld, inst, pred );
1940
1941      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1942         emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1943      }
1944   }
1945
1946   return TRUE;
1947}
1948
1949
1950void
1951lp_build_tgsi_soa(LLVMBuilderRef builder,
1952                  const struct tgsi_token *tokens,
1953                  struct lp_type type,
1954                  struct lp_build_mask_context *mask,
1955                  LLVMValueRef consts_ptr,
1956                  const LLVMValueRef *pos,
1957                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
1958                  LLVMValueRef (*outputs)[NUM_CHANNELS],
1959                  struct lp_build_sampler_soa *sampler,
1960                  const struct tgsi_shader_info *info)
1961{
1962   struct lp_build_tgsi_soa_context bld;
1963   struct tgsi_parse_context parse;
1964   uint num_immediates = 0;
1965   uint num_instructions = 0;
1966   unsigned i;
1967   int pc = 0;
1968
1969   /* Setup build context */
1970   memset(&bld, 0, sizeof bld);
1971   lp_build_context_init(&bld.base, builder, type);
1972   lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1973   bld.mask = mask;
1974   bld.pos = pos;
1975   bld.inputs = inputs;
1976   bld.outputs = outputs;
1977   bld.consts_ptr = consts_ptr;
1978   bld.sampler = sampler;
1979   bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1980                                 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1981   bld.instructions = (struct tgsi_full_instruction *)
1982                      MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
1983   bld.max_instructions = LP_MAX_INSTRUCTIONS;
1984
1985   if (!bld.instructions) {
1986      return;
1987   }
1988
1989   lp_exec_mask_init(&bld.exec_mask, &bld.base);
1990
1991   tgsi_parse_init( &parse, tokens );
1992
1993   while( !tgsi_parse_end_of_tokens( &parse ) ) {
1994      tgsi_parse_token( &parse );
1995
1996      switch( parse.FullToken.Token.Type ) {
1997      case TGSI_TOKEN_TYPE_DECLARATION:
1998         /* Inputs already interpolated */
1999         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2000         break;
2001
2002      case TGSI_TOKEN_TYPE_INSTRUCTION:
2003         {
2004            /* save expanded instruction */
2005            if (num_instructions == bld.max_instructions) {
2006               bld.instructions = REALLOC(bld.instructions,
2007                                          bld.max_instructions
2008                                          * sizeof(struct tgsi_full_instruction),
2009                                          (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2010                                          * sizeof(struct tgsi_full_instruction));
2011               bld.max_instructions += LP_MAX_INSTRUCTIONS;
2012            }
2013
2014            memcpy(bld.instructions + num_instructions,
2015                   &parse.FullToken.FullInstruction,
2016                   sizeof(bld.instructions[0]));
2017
2018            num_instructions++;
2019         }
2020
2021         break;
2022
2023      case TGSI_TOKEN_TYPE_IMMEDIATE:
2024         /* simply copy the immediate values into the next immediates[] slot */
2025         {
2026            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2027            assert(size <= 4);
2028            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2029            for( i = 0; i < size; ++i )
2030               bld.immediates[num_immediates][i] =
2031                  lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2032            for( i = size; i < 4; ++i )
2033               bld.immediates[num_immediates][i] = bld.base.undef;
2034            num_immediates++;
2035         }
2036         break;
2037
2038      case TGSI_TOKEN_TYPE_PROPERTY:
2039         break;
2040
2041      default:
2042         assert( 0 );
2043      }
2044   }
2045
2046   while (pc != -1) {
2047      struct tgsi_full_instruction *instr = bld.instructions + pc;
2048      const struct tgsi_opcode_info *opcode_info =
2049         tgsi_get_opcode_info(instr->Instruction.Opcode);
2050      if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2051         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2052                       opcode_info->mnemonic);
2053   }
2054
2055   if (0) {
2056      LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2057      LLVMValueRef function = LLVMGetBasicBlockParent(block);
2058      debug_printf("11111111111111111111111111111 \n");
2059      tgsi_dump(tokens, 0);
2060      lp_debug_dump_value(function);
2061      debug_printf("2222222222222222222222222222 \n");
2062   }
2063   tgsi_parse_free( &parse );
2064
2065   if (0) {
2066      LLVMModuleRef module = LLVMGetGlobalParent(
2067         LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2068      LLVMDumpModule(module);
2069
2070   }
2071
2072   FREE( bld.instructions );
2073}
2074
2075