lp_bld_tgsi_soa.c revision 54b94ee96a6d750d57d99ae9819fcf8206d4680d
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_info.h"
46#include "tgsi/tgsi_parse.h"
47#include "tgsi/tgsi_util.h"
48#include "tgsi/tgsi_exec.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_tgsi.h"
57#include "lp_bld_limits.h"
58#include "lp_bld_debug.h"
59
60
61#define FOR_EACH_CHANNEL( CHAN )\
62   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71   FOR_EACH_CHANNEL( CHAN )\
72      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74#define CHAN_X 0
75#define CHAN_Y 1
76#define CHAN_Z 2
77#define CHAN_W 3
78
79#define QUAD_TOP_LEFT     0
80#define QUAD_TOP_RIGHT    1
81#define QUAD_BOTTOM_LEFT  2
82#define QUAD_BOTTOM_RIGHT 3
83
84
85struct lp_exec_mask {
86   struct lp_build_context *bld;
87
88   boolean has_mask;
89
90   LLVMTypeRef int_vec_type;
91
92   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
93   int cond_stack_size;
94   LLVMValueRef cond_mask;
95
96   LLVMBasicBlockRef loop_block;
97   LLVMValueRef cont_mask;
98   LLVMValueRef break_mask;
99   LLVMValueRef break_var;
100   struct {
101      LLVMBasicBlockRef loop_block;
102      LLVMValueRef cont_mask;
103      LLVMValueRef break_mask;
104      LLVMValueRef break_var;
105   } loop_stack[LP_MAX_TGSI_NESTING];
106   int loop_stack_size;
107
108   LLVMValueRef exec_mask;
109};
110
111struct lp_build_tgsi_soa_context
112{
113   struct lp_build_context base;
114
115   /* Builder for integer masks and indices */
116   struct lp_build_context int_bld;
117
118   LLVMValueRef consts_ptr;
119   const LLVMValueRef *pos;
120   const LLVMValueRef (*inputs)[NUM_CHANNELS];
121   LLVMValueRef (*outputs)[NUM_CHANNELS];
122
123   struct lp_build_sampler_soa *sampler;
124
125   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
126   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
127   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
128   LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
129
130   /* we allocate an array of temps if we have indirect
131    * addressing and then the temps above is unused */
132   LLVMValueRef temps_array;
133   boolean has_indirect_addressing;
134
135   struct lp_build_mask_context *mask;
136   struct lp_exec_mask exec_mask;
137};
138
139static const unsigned char
140swizzle_left[4] = {
141   QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
142   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_LEFT
143};
144
145static const unsigned char
146swizzle_right[4] = {
147   QUAD_TOP_RIGHT,    QUAD_TOP_RIGHT,
148   QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
149};
150
151static const unsigned char
152swizzle_top[4] = {
153   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT,
154   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT
155};
156
157static const unsigned char
158swizzle_bottom[4] = {
159   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT,
160   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
161};
162
163static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
164{
165   mask->bld = bld;
166   mask->has_mask = FALSE;
167   mask->cond_stack_size = 0;
168   mask->loop_stack_size = 0;
169
170   mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
171   mask->break_mask = mask->cont_mask = mask->cond_mask =
172         LLVMConstAllOnes(mask->int_vec_type);
173}
174
175static void lp_exec_mask_update(struct lp_exec_mask *mask)
176{
177   if (mask->loop_stack_size) {
178      /*for loops we need to update the entire mask at runtime */
179      LLVMValueRef tmp;
180      assert(mask->break_mask);
181      tmp = LLVMBuildAnd(mask->bld->builder,
182                         mask->cont_mask,
183                         mask->break_mask,
184                         "maskcb");
185      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
186                                     mask->cond_mask,
187                                     tmp,
188                                     "maskfull");
189   } else
190      mask->exec_mask = mask->cond_mask;
191
192
193   mask->has_mask = (mask->cond_stack_size > 0 ||
194                     mask->loop_stack_size > 0);
195}
196
197static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
198                                   LLVMValueRef val)
199{
200   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
201   if (mask->cond_stack_size == 0) {
202      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
203   }
204   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
205   assert(LLVMTypeOf(val) == mask->int_vec_type);
206   mask->cond_mask = val;
207
208   lp_exec_mask_update(mask);
209}
210
211static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
212{
213   LLVMValueRef prev_mask;
214   LLVMValueRef inv_mask;
215
216   assert(mask->cond_stack_size);
217   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
218   if (mask->cond_stack_size == 1) {
219      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
220   }
221
222   inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
223
224   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
225                                  inv_mask,
226                                  prev_mask, "");
227   lp_exec_mask_update(mask);
228}
229
230static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
231{
232   assert(mask->cond_stack_size);
233   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
234   lp_exec_mask_update(mask);
235}
236
237static void lp_exec_bgnloop(struct lp_exec_mask *mask)
238{
239   if (mask->loop_stack_size == 0) {
240      assert(mask->loop_block == NULL);
241      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
242      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
243      assert(mask->break_var == NULL);
244   }
245
246   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
247
248   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
249   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
250   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
251   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
252   ++mask->loop_stack_size;
253
254   mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
255   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
256
257   mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
258   LLVMBuildBr(mask->bld->builder, mask->loop_block);
259   LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
260
261   mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
262
263   lp_exec_mask_update(mask);
264}
265
266static void lp_exec_break(struct lp_exec_mask *mask)
267{
268   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
269                                         mask->exec_mask,
270                                         "break");
271
272   mask->break_mask = LLVMBuildAnd(mask->bld->builder,
273                                   mask->break_mask,
274                                   exec_mask, "break_full");
275
276   lp_exec_mask_update(mask);
277}
278
279static void lp_exec_continue(struct lp_exec_mask *mask)
280{
281   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
282                                         mask->exec_mask,
283                                         "");
284
285   mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
286                                  mask->cont_mask,
287                                  exec_mask, "");
288
289   lp_exec_mask_update(mask);
290}
291
292
293static void lp_exec_endloop(struct lp_exec_mask *mask)
294{
295   LLVMBasicBlockRef endloop;
296   LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
297                                      mask->bld->type.length);
298   LLVMValueRef i1cond;
299
300   assert(mask->break_mask);
301
302   /*
303    * Restore the cont_mask, but don't pop
304    */
305   assert(mask->loop_stack_size);
306   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
307   lp_exec_mask_update(mask);
308
309   /*
310    * Unlike the continue mask, the break_mask must be preserved across loop
311    * iterations
312    */
313   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
314
315   /* i1cond = (mask == 0) */
316   i1cond = LLVMBuildICmp(
317      mask->bld->builder,
318      LLVMIntNE,
319      LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
320      LLVMConstNull(reg_type), "");
321
322   endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
323
324   LLVMBuildCondBr(mask->bld->builder,
325                   i1cond, mask->loop_block, endloop);
326
327   LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
328
329   assert(mask->loop_stack_size);
330   --mask->loop_stack_size;
331   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
332   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
333   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
334   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
335
336   lp_exec_mask_update(mask);
337}
338
339/* stores val into an address pointed to by dst.
340 * mask->exec_mask is used to figure out which bits of val
341 * should be stored into the address
342 * (0 means don't store this bit, 1 means do store).
343 */
344static void lp_exec_mask_store(struct lp_exec_mask *mask,
345                               LLVMValueRef pred,
346                               LLVMValueRef val,
347                               LLVMValueRef dst)
348{
349   /* Mix the predicate and execution mask */
350   if (mask->has_mask) {
351      if (pred) {
352         pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
353      } else {
354         pred = mask->exec_mask;
355      }
356   }
357
358   if (pred) {
359      LLVMValueRef real_val, dst_val;
360
361      dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
362      real_val = lp_build_select(mask->bld,
363                                 pred,
364                                 val, dst_val);
365
366      LLVMBuildStore(mask->bld->builder, real_val, dst);
367   } else
368      LLVMBuildStore(mask->bld->builder, val, dst);
369}
370
371
372static LLVMValueRef
373emit_ddx(struct lp_build_tgsi_soa_context *bld,
374         LLVMValueRef src)
375{
376   LLVMValueRef src_left  = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
377   LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
378   return lp_build_sub(&bld->base, src_right, src_left);
379}
380
381
382static LLVMValueRef
383emit_ddy(struct lp_build_tgsi_soa_context *bld,
384         LLVMValueRef src)
385{
386   LLVMValueRef src_top    = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
387   LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
388   return lp_build_sub(&bld->base, src_top, src_bottom);
389}
390
391static LLVMValueRef
392get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
393             unsigned index,
394             unsigned swizzle,
395             boolean is_indirect,
396             LLVMValueRef addr)
397{
398   if (!bld->has_indirect_addressing) {
399      return bld->temps[index][swizzle];
400   } else {
401      LLVMValueRef lindex =
402         LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
403      if (is_indirect)
404         lindex = lp_build_add(&bld->base, lindex, addr);
405      return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
406   }
407}
408
409/**
410 * Register fetch.
411 */
412static LLVMValueRef
413emit_fetch(
414   struct lp_build_tgsi_soa_context *bld,
415   const struct tgsi_full_instruction *inst,
416   unsigned index,
417   const unsigned chan_index )
418{
419   const struct tgsi_full_src_register *reg = &inst->Src[index];
420   unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
421   LLVMValueRef res;
422   LLVMValueRef addr;
423
424   switch (swizzle) {
425   case TGSI_SWIZZLE_X:
426   case TGSI_SWIZZLE_Y:
427   case TGSI_SWIZZLE_Z:
428   case TGSI_SWIZZLE_W:
429
430      if (reg->Register.Indirect) {
431         LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
432         unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
433         addr = LLVMBuildLoad(bld->base.builder,
434                              bld->addr[reg->Indirect.Index][swizzle],
435                              "");
436         /* for indexing we want integers */
437         addr = LLVMBuildFPToSI(bld->base.builder, addr,
438                                int_vec_type, "");
439         addr = LLVMBuildExtractElement(bld->base.builder,
440                                        addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
441                                        "");
442         addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
443      }
444
445      switch (reg->Register.File) {
446      case TGSI_FILE_CONSTANT: {
447         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
448         LLVMValueRef scalar, scalar_ptr;
449
450         if (reg->Register.Indirect) {
451            /*lp_build_printf(bld->base.builder,
452              "\taddr = %d\n", addr);*/
453            index = lp_build_add(&bld->base, index, addr);
454         }
455         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
456         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
457
458         res = lp_build_broadcast_scalar(&bld->base, scalar);
459         break;
460      }
461
462      case TGSI_FILE_IMMEDIATE:
463         res = bld->immediates[reg->Register.Index][swizzle];
464         assert(res);
465         break;
466
467      case TGSI_FILE_INPUT:
468         res = bld->inputs[reg->Register.Index][swizzle];
469         assert(res);
470         break;
471
472      case TGSI_FILE_TEMPORARY: {
473         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
474                                              swizzle,
475                                              reg->Register.Indirect,
476                                              addr);
477         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
478         if(!res)
479            return bld->base.undef;
480         break;
481      }
482
483      default:
484         assert( 0 );
485         return bld->base.undef;
486      }
487      break;
488
489   default:
490      assert( 0 );
491      return bld->base.undef;
492   }
493
494   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
495   case TGSI_UTIL_SIGN_CLEAR:
496      res = lp_build_abs( &bld->base, res );
497      break;
498
499   case TGSI_UTIL_SIGN_SET:
500      /* TODO: Use bitwese OR for floating point */
501      res = lp_build_abs( &bld->base, res );
502      res = LLVMBuildNeg( bld->base.builder, res, "" );
503      break;
504
505   case TGSI_UTIL_SIGN_TOGGLE:
506      res = LLVMBuildNeg( bld->base.builder, res, "" );
507      break;
508
509   case TGSI_UTIL_SIGN_KEEP:
510      break;
511   }
512
513   return res;
514}
515
516
517/**
518 * Register fetch with derivatives.
519 */
520static void
521emit_fetch_deriv(
522   struct lp_build_tgsi_soa_context *bld,
523   const struct tgsi_full_instruction *inst,
524   unsigned index,
525   const unsigned chan_index,
526   LLVMValueRef *res,
527   LLVMValueRef *ddx,
528   LLVMValueRef *ddy)
529{
530   LLVMValueRef src;
531
532   src = emit_fetch(bld, inst, index, chan_index);
533
534   if(res)
535      *res = src;
536
537   /* TODO: use interpolation coeffs for inputs */
538
539   if(ddx)
540      *ddx = emit_ddx(bld, src);
541
542   if(ddy)
543      *ddy = emit_ddy(bld, src);
544}
545
546
547/**
548 * Predicate.
549 */
550static void
551emit_fetch_predicate(
552   struct lp_build_tgsi_soa_context *bld,
553   const struct tgsi_full_instruction *inst,
554   LLVMValueRef *pred)
555{
556   unsigned index;
557   unsigned char swizzles[4];
558   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
559   LLVMValueRef value;
560   unsigned chan;
561
562   if (!inst->Instruction.Predicate) {
563      FOR_EACH_CHANNEL( chan ) {
564         pred[chan] = NULL;
565      }
566      return;
567   }
568
569   swizzles[0] = inst->Predicate.SwizzleX;
570   swizzles[1] = inst->Predicate.SwizzleY;
571   swizzles[2] = inst->Predicate.SwizzleZ;
572   swizzles[3] = inst->Predicate.SwizzleW;
573
574   index = inst->Predicate.Index;
575   assert(index < LP_MAX_TGSI_PREDS);
576
577   FOR_EACH_CHANNEL( chan ) {
578      unsigned swizzle = swizzles[chan];
579
580      /*
581       * Only fetch the predicate register channels that are actually listed
582       * in the swizzles
583       */
584      if (!unswizzled[swizzle]) {
585         value = LLVMBuildLoad(bld->base.builder,
586                               bld->preds[index][swizzle], "");
587
588         /*
589          * Convert the value to an integer mask.
590          *
591          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
592          * is needlessly causing two comparisons due to storing the intermediate
593          * result as float vector instead of an integer mask vector.
594          */
595         value = lp_build_compare(bld->base.builder,
596                                  bld->base.type,
597                                  PIPE_FUNC_NOTEQUAL,
598                                  value,
599                                  bld->base.zero);
600         if (inst->Predicate.Negate) {
601            value = LLVMBuildNot(bld->base.builder, value, "");
602         }
603
604         unswizzled[swizzle] = value;
605      } else {
606         value = unswizzled[swizzle];
607      }
608
609      pred[chan] = value;
610   }
611}
612
613
614/**
615 * Register store.
616 */
617static void
618emit_store(
619   struct lp_build_tgsi_soa_context *bld,
620   const struct tgsi_full_instruction *inst,
621   unsigned index,
622   unsigned chan_index,
623   LLVMValueRef pred,
624   LLVMValueRef value)
625{
626   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
627   LLVMValueRef addr;
628
629   switch( inst->Instruction.Saturate ) {
630   case TGSI_SAT_NONE:
631      break;
632
633   case TGSI_SAT_ZERO_ONE:
634      value = lp_build_max(&bld->base, value, bld->base.zero);
635      value = lp_build_min(&bld->base, value, bld->base.one);
636      break;
637
638   case TGSI_SAT_MINUS_PLUS_ONE:
639      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
640      value = lp_build_min(&bld->base, value, bld->base.one);
641      break;
642
643   default:
644      assert(0);
645   }
646
647   if (reg->Register.Indirect) {
648      LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
649      unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
650      addr = LLVMBuildLoad(bld->base.builder,
651                           bld->addr[reg->Indirect.Index][swizzle],
652                           "");
653      /* for indexing we want integers */
654      addr = LLVMBuildFPToSI(bld->base.builder, addr,
655                             int_vec_type, "");
656      addr = LLVMBuildExtractElement(bld->base.builder,
657                                     addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
658                                     "");
659      addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
660   }
661
662   switch( reg->Register.File ) {
663   case TGSI_FILE_OUTPUT:
664      lp_exec_mask_store(&bld->exec_mask, pred, value,
665                         bld->outputs[reg->Register.Index][chan_index]);
666      break;
667
668   case TGSI_FILE_TEMPORARY: {
669      LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
670                                           chan_index,
671                                           reg->Register.Indirect,
672                                           addr);
673      lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
674      break;
675   }
676
677   case TGSI_FILE_ADDRESS:
678      lp_exec_mask_store(&bld->exec_mask, pred, value,
679                         bld->addr[reg->Indirect.Index][chan_index]);
680      break;
681
682   case TGSI_FILE_PREDICATE:
683      lp_exec_mask_store(&bld->exec_mask, pred, value,
684                         bld->preds[index][chan_index]);
685      break;
686
687   default:
688      assert( 0 );
689   }
690}
691
692
693/**
694 * High-level instruction translators.
695 */
696
697enum tex_modifier {
698   TEX_MODIFIER_NONE = 0,
699   TEX_MODIFIER_PROJECTED,
700   TEX_MODIFIER_LOD_BIAS,
701   TEX_MODIFIER_EXPLICIT_LOD,
702   TEX_MODIFIER_EXPLICIT_DERIV
703};
704
705static void
706emit_tex( struct lp_build_tgsi_soa_context *bld,
707          const struct tgsi_full_instruction *inst,
708          enum tex_modifier modifier,
709          LLVMValueRef *texel)
710{
711   unsigned unit;
712   LLVMValueRef lod_bias, explicit_lod;
713   LLVMValueRef oow = NULL;
714   LLVMValueRef coords[3];
715   LLVMValueRef ddx[3];
716   LLVMValueRef ddy[3];
717   unsigned num_coords;
718   unsigned i;
719
720   if (!bld->sampler) {
721      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
722      for (i = 0; i < 4; i++) {
723         texel[i] = bld->base.undef;
724      }
725      return;
726   }
727
728   switch (inst->Texture.Texture) {
729   case TGSI_TEXTURE_1D:
730      num_coords = 1;
731      break;
732   case TGSI_TEXTURE_2D:
733   case TGSI_TEXTURE_RECT:
734      num_coords = 2;
735      break;
736   case TGSI_TEXTURE_SHADOW1D:
737   case TGSI_TEXTURE_SHADOW2D:
738   case TGSI_TEXTURE_SHADOWRECT:
739   case TGSI_TEXTURE_3D:
740   case TGSI_TEXTURE_CUBE:
741      num_coords = 3;
742      break;
743   default:
744      assert(0);
745      return;
746   }
747
748   if (modifier == TEX_MODIFIER_LOD_BIAS) {
749      lod_bias = emit_fetch( bld, inst, 0, 3 );
750      explicit_lod = NULL;
751   }
752   else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
753      lod_bias = NULL;
754      explicit_lod = emit_fetch( bld, inst, 0, 3 );
755   }
756   else {
757      lod_bias = NULL;
758      explicit_lod = NULL;
759   }
760
761   if (modifier == TEX_MODIFIER_PROJECTED) {
762      oow = emit_fetch( bld, inst, 0, 3 );
763      oow = lp_build_rcp(&bld->base, oow);
764   }
765
766   for (i = 0; i < num_coords; i++) {
767      coords[i] = emit_fetch( bld, inst, 0, i );
768      if (modifier == TEX_MODIFIER_PROJECTED)
769         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
770   }
771   for (i = num_coords; i < 3; i++) {
772      coords[i] = bld->base.undef;
773   }
774
775   if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
776      for (i = 0; i < num_coords; i++) {
777         ddx[i] = emit_fetch( bld, inst, 1, i );
778         ddy[i] = emit_fetch( bld, inst, 2, i );
779      }
780      unit = inst->Src[3].Register.Index;
781   }  else {
782      for (i = 0; i < num_coords; i++) {
783         ddx[i] = emit_ddx( bld, coords[i] );
784         ddy[i] = emit_ddy( bld, coords[i] );
785      }
786      unit = inst->Src[1].Register.Index;
787   }
788   for (i = num_coords; i < 3; i++) {
789      ddx[i] = bld->base.undef;
790      ddy[i] = bld->base.undef;
791   }
792
793   bld->sampler->emit_fetch_texel(bld->sampler,
794                                  bld->base.builder,
795                                  bld->base.type,
796                                  unit, num_coords, coords,
797                                  ddx, ddy,
798                                  lod_bias, explicit_lod,
799                                  texel);
800}
801
802
803/**
804 * Kill fragment if any of the src register values are negative.
805 */
806static void
807emit_kil(
808   struct lp_build_tgsi_soa_context *bld,
809   const struct tgsi_full_instruction *inst )
810{
811   const struct tgsi_full_src_register *reg = &inst->Src[0];
812   LLVMValueRef terms[NUM_CHANNELS];
813   LLVMValueRef mask;
814   unsigned chan_index;
815
816   memset(&terms, 0, sizeof terms);
817
818   FOR_EACH_CHANNEL( chan_index ) {
819      unsigned swizzle;
820
821      /* Unswizzle channel */
822      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
823
824      /* Check if the component has not been already tested. */
825      assert(swizzle < NUM_CHANNELS);
826      if( !terms[swizzle] )
827         /* TODO: change the comparison operator instead of setting the sign */
828         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
829   }
830
831   mask = NULL;
832   FOR_EACH_CHANNEL( chan_index ) {
833      if(terms[chan_index]) {
834         LLVMValueRef chan_mask;
835
836         /*
837          * If term < 0 then mask = 0 else mask = ~0.
838          */
839         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
840
841         if(mask)
842            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
843         else
844            mask = chan_mask;
845      }
846   }
847
848   if(mask)
849      lp_build_mask_update(bld->mask, mask);
850}
851
852
853/**
854 * Predicated fragment kill.
855 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
856 * The only predication is the execution mask which will apply if
857 * we're inside a loop or conditional.
858 */
859static void
860emit_kilp(struct lp_build_tgsi_soa_context *bld,
861          const struct tgsi_full_instruction *inst)
862{
863   LLVMValueRef mask;
864
865   /* For those channels which are "alive", disable fragment shader
866    * execution.
867    */
868   if (bld->exec_mask.has_mask) {
869      mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
870   }
871   else {
872      mask = bld->base.zero;
873   }
874
875   lp_build_mask_update(bld->mask, mask);
876}
877
878static void
879emit_declaration(
880   struct lp_build_tgsi_soa_context *bld,
881   const struct tgsi_full_declaration *decl)
882{
883   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
884
885   unsigned first = decl->Range.First;
886   unsigned last = decl->Range.Last;
887   unsigned idx, i;
888
889   for (idx = first; idx <= last; ++idx) {
890      switch (decl->Declaration.File) {
891      case TGSI_FILE_TEMPORARY:
892         assert(idx < LP_MAX_TGSI_TEMPS);
893         if (bld->has_indirect_addressing) {
894            LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
895                                            last*4 + 4, 0);
896            bld->temps_array = lp_build_array_alloca(bld->base.builder,
897                                                     vec_type, val, "");
898         } else {
899            for (i = 0; i < NUM_CHANNELS; i++)
900               bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
901                                                    vec_type, "");
902         }
903         break;
904
905      case TGSI_FILE_OUTPUT:
906         for (i = 0; i < NUM_CHANNELS; i++)
907            bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
908                                                   vec_type, "");
909         break;
910
911      case TGSI_FILE_ADDRESS:
912         assert(idx < LP_MAX_TGSI_ADDRS);
913         for (i = 0; i < NUM_CHANNELS; i++)
914            bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
915                                                vec_type, "");
916         break;
917
918      case TGSI_FILE_PREDICATE:
919         assert(idx < LP_MAX_TGSI_PREDS);
920         for (i = 0; i < NUM_CHANNELS; i++)
921            bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
922                                                 vec_type, "");
923         break;
924
925      default:
926         /* don't need to declare other vars */
927         break;
928      }
929   }
930}
931
932
933/**
934 * Emit LLVM for one TGSI instruction.
935 * \param return TRUE for success, FALSE otherwise
936 */
937static boolean
938emit_instruction(
939   struct lp_build_tgsi_soa_context *bld,
940   const struct tgsi_full_instruction *inst,
941   const struct tgsi_opcode_info *info)
942{
943   unsigned chan_index;
944   LLVMValueRef src0, src1, src2;
945   LLVMValueRef tmp0, tmp1, tmp2;
946   LLVMValueRef tmp3 = NULL;
947   LLVMValueRef tmp4 = NULL;
948   LLVMValueRef tmp5 = NULL;
949   LLVMValueRef tmp6 = NULL;
950   LLVMValueRef tmp7 = NULL;
951   LLVMValueRef res;
952   LLVMValueRef dst0[NUM_CHANNELS];
953
954   /*
955    * Stores and write masks are handled in a general fashion after the long
956    * instruction opcode switch statement.
957    *
958    * Although not stricitly necessary, we avoid generating instructions for
959    * channels which won't be stored, in cases where's that easy. For some
960    * complex instructions, like texture sampling, it is more convenient to
961    * assume a full writemask and then let LLVM optimization passes eliminate
962    * redundant code.
963    */
964
965   assert(info->num_dst <= 1);
966   if (info->num_dst) {
967      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
968         dst0[chan_index] = bld->base.undef;
969      }
970   }
971
972   switch (inst->Instruction.Opcode) {
973   case TGSI_OPCODE_ARL:
974      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
975         tmp0 = emit_fetch( bld, inst, 0, chan_index );
976         tmp0 = lp_build_floor(&bld->base, tmp0);
977         dst0[chan_index] = tmp0;
978      }
979      break;
980
981   case TGSI_OPCODE_MOV:
982      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
983         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
984      }
985      break;
986
987   case TGSI_OPCODE_LIT:
988      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
989         dst0[CHAN_X] = bld->base.one;
990      }
991      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
992         src0 = emit_fetch( bld, inst, 0, CHAN_X );
993         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
994      }
995      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
996         /* XMM[1] = SrcReg[0].yyyy */
997         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
998         /* XMM[1] = max(XMM[1], 0) */
999         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1000         /* XMM[2] = SrcReg[0].wwww */
1001         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1002         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1003         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1004         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1005         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1006      }
1007      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1008         dst0[CHAN_W] = bld->base.one;
1009      }
1010      break;
1011
1012   case TGSI_OPCODE_RCP:
1013   /* TGSI_OPCODE_RECIP */
1014      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1015      res = lp_build_rcp(&bld->base, src0);
1016      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1017         dst0[chan_index] = res;
1018      }
1019      break;
1020
1021   case TGSI_OPCODE_RSQ:
1022   /* TGSI_OPCODE_RECIPSQRT */
1023      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1024      src0 = lp_build_abs(&bld->base, src0);
1025      res = lp_build_rsqrt(&bld->base, src0);
1026      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1027         dst0[chan_index] = res;
1028      }
1029      break;
1030
1031   case TGSI_OPCODE_EXP:
1032      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1033          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1034          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1035         LLVMValueRef *p_exp2_int_part = NULL;
1036         LLVMValueRef *p_frac_part = NULL;
1037         LLVMValueRef *p_exp2 = NULL;
1038
1039         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1040
1041         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1042            p_exp2_int_part = &tmp0;
1043         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1044            p_frac_part = &tmp1;
1045         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1046            p_exp2 = &tmp2;
1047
1048         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1049
1050         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1051            dst0[CHAN_X] = tmp0;
1052         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1053            dst0[CHAN_Y] = tmp1;
1054         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1055            dst0[CHAN_Z] = tmp2;
1056      }
1057      /* dst.w = 1.0 */
1058      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1059         dst0[CHAN_W] = bld->base.one;
1060      }
1061      break;
1062
1063   case TGSI_OPCODE_LOG:
1064      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1065          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1066          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1067         LLVMValueRef *p_floor_log2 = NULL;
1068         LLVMValueRef *p_exp = NULL;
1069         LLVMValueRef *p_log2 = NULL;
1070
1071         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1072         src0 = lp_build_abs( &bld->base, src0 );
1073
1074         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1075            p_floor_log2 = &tmp0;
1076         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1077            p_exp = &tmp1;
1078         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1079            p_log2 = &tmp2;
1080
1081         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1082
1083         /* dst.x = floor(lg2(abs(src.x))) */
1084         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1085            dst0[CHAN_X] = tmp0;
1086         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1087         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1088            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1089         }
1090         /* dst.z = lg2(abs(src.x)) */
1091         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1092            dst0[CHAN_Z] = tmp2;
1093      }
1094      /* dst.w = 1.0 */
1095      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1096         dst0[CHAN_W] = bld->base.one;
1097      }
1098      break;
1099
1100   case TGSI_OPCODE_MUL:
1101      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1102         src0 = emit_fetch( bld, inst, 0, chan_index );
1103         src1 = emit_fetch( bld, inst, 1, chan_index );
1104         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1105      }
1106      break;
1107
1108   case TGSI_OPCODE_ADD:
1109      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1110         src0 = emit_fetch( bld, inst, 0, chan_index );
1111         src1 = emit_fetch( bld, inst, 1, chan_index );
1112         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1113      }
1114      break;
1115
1116   case TGSI_OPCODE_DP3:
1117   /* TGSI_OPCODE_DOT3 */
1118      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1119      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1120      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1121      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1122      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1123      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1124      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1125      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1126      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1127      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1128      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1129      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1130         dst0[chan_index] = tmp0;
1131      }
1132      break;
1133
1134   case TGSI_OPCODE_DP4:
1135   /* TGSI_OPCODE_DOT4 */
1136      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1137      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1138      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1139      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1140      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1141      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1142      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1143      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1144      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1145      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1146      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1147      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1148      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1149      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1150      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1151      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1152         dst0[chan_index] = tmp0;
1153      }
1154      break;
1155
1156   case TGSI_OPCODE_DST:
1157      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1158         dst0[CHAN_X] = bld->base.one;
1159      }
1160      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1161         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1162         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1163         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1164      }
1165      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1166         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1167      }
1168      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1169         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1170      }
1171      break;
1172
1173   case TGSI_OPCODE_MIN:
1174      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1175         src0 = emit_fetch( bld, inst, 0, chan_index );
1176         src1 = emit_fetch( bld, inst, 1, chan_index );
1177         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1178      }
1179      break;
1180
1181   case TGSI_OPCODE_MAX:
1182      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1183         src0 = emit_fetch( bld, inst, 0, chan_index );
1184         src1 = emit_fetch( bld, inst, 1, chan_index );
1185         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1186      }
1187      break;
1188
1189   case TGSI_OPCODE_SLT:
1190   /* TGSI_OPCODE_SETLT */
1191      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1192         src0 = emit_fetch( bld, inst, 0, chan_index );
1193         src1 = emit_fetch( bld, inst, 1, chan_index );
1194         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1195         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1196      }
1197      break;
1198
1199   case TGSI_OPCODE_SGE:
1200   /* TGSI_OPCODE_SETGE */
1201      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1202         src0 = emit_fetch( bld, inst, 0, chan_index );
1203         src1 = emit_fetch( bld, inst, 1, chan_index );
1204         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1205         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1206      }
1207      break;
1208
1209   case TGSI_OPCODE_MAD:
1210   /* TGSI_OPCODE_MADD */
1211      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1212         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1213         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1214         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1215         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1216         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1217         dst0[chan_index] = tmp0;
1218      }
1219      break;
1220
1221   case TGSI_OPCODE_SUB:
1222      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1223         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1224         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1225         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1226      }
1227      break;
1228
1229   case TGSI_OPCODE_LRP:
1230      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1231         src0 = emit_fetch( bld, inst, 0, chan_index );
1232         src1 = emit_fetch( bld, inst, 1, chan_index );
1233         src2 = emit_fetch( bld, inst, 2, chan_index );
1234         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1235         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1236         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1237      }
1238      break;
1239
1240   case TGSI_OPCODE_CND:
1241      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1242         src0 = emit_fetch( bld, inst, 0, chan_index );
1243         src1 = emit_fetch( bld, inst, 1, chan_index );
1244         src2 = emit_fetch( bld, inst, 2, chan_index );
1245         tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1246         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1247         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1248      }
1249      break;
1250
1251   case TGSI_OPCODE_DP2A:
1252      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1253      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1254      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1255      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1256      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1257      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1258      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1259      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
1260      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1261      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1262         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1263      }
1264      break;
1265
1266   case TGSI_OPCODE_FRC:
1267      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1268         src0 = emit_fetch( bld, inst, 0, chan_index );
1269         tmp0 = lp_build_floor(&bld->base, src0);
1270         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1271         dst0[chan_index] = tmp0;
1272      }
1273      break;
1274
1275   case TGSI_OPCODE_CLAMP:
1276      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1277         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1278         src1 = emit_fetch( bld, inst, 1, chan_index );
1279         src2 = emit_fetch( bld, inst, 2, chan_index );
1280         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1281         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1282         dst0[chan_index] = tmp0;
1283      }
1284      break;
1285
1286   case TGSI_OPCODE_FLR:
1287      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1288         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1289         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1290      }
1291      break;
1292
1293   case TGSI_OPCODE_ROUND:
1294      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1295         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1296         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1297      }
1298      break;
1299
1300   case TGSI_OPCODE_EX2: {
1301      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1302      tmp0 = lp_build_exp2( &bld->base, tmp0);
1303      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1304         dst0[chan_index] = tmp0;
1305      }
1306      break;
1307   }
1308
1309   case TGSI_OPCODE_LG2:
1310      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1311      tmp0 = lp_build_log2( &bld->base, tmp0);
1312      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1313         dst0[chan_index] = tmp0;
1314      }
1315      break;
1316
1317   case TGSI_OPCODE_POW:
1318      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1319      src1 = emit_fetch( bld, inst, 1, CHAN_X );
1320      res = lp_build_pow( &bld->base, src0, src1 );
1321      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1322         dst0[chan_index] = res;
1323      }
1324      break;
1325
1326   case TGSI_OPCODE_XPD:
1327      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1328          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1329         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1330         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1331      }
1332      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1333          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1334         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1335         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1336      }
1337      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1338         tmp2 = tmp0;
1339         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1340         tmp5 = tmp3;
1341         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1342         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1343         dst0[CHAN_X] = tmp2;
1344      }
1345      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1346          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1347         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1348         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1349      }
1350      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1351         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1352         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1353         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1354         dst0[CHAN_Y] = tmp3;
1355      }
1356      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1357         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1358         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1359         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1360         dst0[CHAN_Z] = tmp5;
1361      }
1362      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1363         dst0[CHAN_W] = bld->base.one;
1364      }
1365      break;
1366
1367   case TGSI_OPCODE_ABS:
1368      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1369         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1370         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1371      }
1372      break;
1373
1374   case TGSI_OPCODE_RCC:
1375      /* deprecated? */
1376      assert(0);
1377      return FALSE;
1378
1379   case TGSI_OPCODE_DPH:
1380      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1381      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1382      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1383      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1384      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1385      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1386      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1387      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1388      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1389      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1390      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1391      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1392      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1393      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1394         dst0[chan_index] = tmp0;
1395      }
1396      break;
1397
1398   case TGSI_OPCODE_COS:
1399      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1400      tmp0 = lp_build_cos( &bld->base, tmp0 );
1401      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1402         dst0[chan_index] = tmp0;
1403      }
1404      break;
1405
1406   case TGSI_OPCODE_DDX:
1407      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1408         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1409      }
1410      break;
1411
1412   case TGSI_OPCODE_DDY:
1413      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1414         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1415      }
1416      break;
1417
1418   case TGSI_OPCODE_KILP:
1419      /* predicated kill */
1420      emit_kilp( bld, inst );
1421      break;
1422
1423   case TGSI_OPCODE_KIL:
1424      /* conditional kill */
1425      emit_kil( bld, inst );
1426      break;
1427
1428   case TGSI_OPCODE_PK2H:
1429      return FALSE;
1430      break;
1431
1432   case TGSI_OPCODE_PK2US:
1433      return FALSE;
1434      break;
1435
1436   case TGSI_OPCODE_PK4B:
1437      return FALSE;
1438      break;
1439
1440   case TGSI_OPCODE_PK4UB:
1441      return FALSE;
1442      break;
1443
1444   case TGSI_OPCODE_RFL:
1445      return FALSE;
1446      break;
1447
1448   case TGSI_OPCODE_SEQ:
1449      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1450         src0 = emit_fetch( bld, inst, 0, chan_index );
1451         src1 = emit_fetch( bld, inst, 1, chan_index );
1452         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1453         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1454      }
1455      break;
1456
1457   case TGSI_OPCODE_SFL:
1458      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1459         dst0[chan_index] = bld->base.zero;
1460      }
1461      break;
1462
1463   case TGSI_OPCODE_SGT:
1464      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1465         src0 = emit_fetch( bld, inst, 0, chan_index );
1466         src1 = emit_fetch( bld, inst, 1, chan_index );
1467         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1468         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1469      }
1470      break;
1471
1472   case TGSI_OPCODE_SIN:
1473      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1474      tmp0 = lp_build_sin( &bld->base, tmp0 );
1475      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1476         dst0[chan_index] = tmp0;
1477      }
1478      break;
1479
1480   case TGSI_OPCODE_SLE:
1481      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1482         src0 = emit_fetch( bld, inst, 0, chan_index );
1483         src1 = emit_fetch( bld, inst, 1, chan_index );
1484         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1485         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1486      }
1487      break;
1488
1489   case TGSI_OPCODE_SNE:
1490      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1491         src0 = emit_fetch( bld, inst, 0, chan_index );
1492         src1 = emit_fetch( bld, inst, 1, chan_index );
1493         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1494         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1495      }
1496      break;
1497
1498   case TGSI_OPCODE_STR:
1499      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1500         dst0[chan_index] = bld->base.one;
1501      }
1502      break;
1503
1504   case TGSI_OPCODE_TEX:
1505      emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1506      break;
1507
1508   case TGSI_OPCODE_TXD:
1509      emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1510      break;
1511
1512   case TGSI_OPCODE_UP2H:
1513      /* deprecated */
1514      assert (0);
1515      return FALSE;
1516      break;
1517
1518   case TGSI_OPCODE_UP2US:
1519      /* deprecated */
1520      assert(0);
1521      return FALSE;
1522      break;
1523
1524   case TGSI_OPCODE_UP4B:
1525      /* deprecated */
1526      assert(0);
1527      return FALSE;
1528      break;
1529
1530   case TGSI_OPCODE_UP4UB:
1531      /* deprecated */
1532      assert(0);
1533      return FALSE;
1534      break;
1535
1536   case TGSI_OPCODE_X2D:
1537      /* deprecated? */
1538      assert(0);
1539      return FALSE;
1540      break;
1541
1542   case TGSI_OPCODE_ARA:
1543      /* deprecated */
1544      assert(0);
1545      return FALSE;
1546      break;
1547
1548   case TGSI_OPCODE_ARR:
1549      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1550         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1551         tmp0 = lp_build_round(&bld->base, tmp0);
1552         dst0[chan_index] = tmp0;
1553      }
1554      break;
1555
1556   case TGSI_OPCODE_BRA:
1557      /* deprecated */
1558      assert(0);
1559      return FALSE;
1560      break;
1561
1562   case TGSI_OPCODE_CAL:
1563      /* FIXME */
1564      return FALSE;
1565      break;
1566
1567   case TGSI_OPCODE_RET:
1568      /* FIXME */
1569      return FALSE;
1570      break;
1571
1572   case TGSI_OPCODE_END:
1573      break;
1574
1575   case TGSI_OPCODE_SSG:
1576   /* TGSI_OPCODE_SGN */
1577      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1578         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1579         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1580      }
1581      break;
1582
1583   case TGSI_OPCODE_CMP:
1584      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1585         src0 = emit_fetch( bld, inst, 0, chan_index );
1586         src1 = emit_fetch( bld, inst, 1, chan_index );
1587         src2 = emit_fetch( bld, inst, 2, chan_index );
1588         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1589         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1590      }
1591      break;
1592
1593   case TGSI_OPCODE_SCS:
1594      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1595         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1596         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1597      }
1598      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1599         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1600         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1601      }
1602      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1603         dst0[CHAN_Z] = bld->base.zero;
1604      }
1605      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1606         dst0[CHAN_W] = bld->base.one;
1607      }
1608      break;
1609
1610   case TGSI_OPCODE_TXB:
1611      emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1612      break;
1613
1614   case TGSI_OPCODE_NRM:
1615      /* fall-through */
1616   case TGSI_OPCODE_NRM4:
1617      /* 3 or 4-component normalization */
1618      {
1619         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1620
1621         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1622             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1623             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1624             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1625
1626            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1627
1628            /* xmm4 = src.x */
1629            /* xmm0 = src.x * src.x */
1630            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1631            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1632               tmp4 = tmp0;
1633            }
1634            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1635
1636            /* xmm5 = src.y */
1637            /* xmm0 = xmm0 + src.y * src.y */
1638            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1639            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1640               tmp5 = tmp1;
1641            }
1642            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1643            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1644
1645            /* xmm6 = src.z */
1646            /* xmm0 = xmm0 + src.z * src.z */
1647            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1648            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1649               tmp6 = tmp1;
1650            }
1651            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1652            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1653
1654            if (dims == 4) {
1655               /* xmm7 = src.w */
1656               /* xmm0 = xmm0 + src.w * src.w */
1657               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1658               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1659                  tmp7 = tmp1;
1660               }
1661               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1662               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1663            }
1664
1665            /* xmm1 = 1 / sqrt(xmm0) */
1666            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1667
1668            /* dst.x = xmm1 * src.x */
1669            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1670               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1671            }
1672
1673            /* dst.y = xmm1 * src.y */
1674            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1675               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1676            }
1677
1678            /* dst.z = xmm1 * src.z */
1679            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1680               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1681            }
1682
1683            /* dst.w = xmm1 * src.w */
1684            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1685               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1686            }
1687         }
1688
1689         /* dst.w = 1.0 */
1690         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1691            dst0[CHAN_W] = bld->base.one;
1692         }
1693      }
1694      break;
1695
1696   case TGSI_OPCODE_DIV:
1697      /* deprecated */
1698      assert( 0 );
1699      return FALSE;
1700      break;
1701
1702   case TGSI_OPCODE_DP2:
1703      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1704      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1705      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1706      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1707      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1708      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1709      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1710      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1711         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1712      }
1713      break;
1714
1715   case TGSI_OPCODE_TXL:
1716      emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1717      break;
1718
1719   case TGSI_OPCODE_TXP:
1720      emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1721      break;
1722
1723   case TGSI_OPCODE_BRK:
1724      lp_exec_break(&bld->exec_mask);
1725      break;
1726
1727   case TGSI_OPCODE_IF:
1728      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1729      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1730                          tmp0, bld->base.zero);
1731      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1732      break;
1733
1734   case TGSI_OPCODE_BGNLOOP:
1735      lp_exec_bgnloop(&bld->exec_mask);
1736      break;
1737
1738   case TGSI_OPCODE_ELSE:
1739      lp_exec_mask_cond_invert(&bld->exec_mask);
1740      break;
1741
1742   case TGSI_OPCODE_ENDIF:
1743      lp_exec_mask_cond_pop(&bld->exec_mask);
1744      break;
1745
1746   case TGSI_OPCODE_ENDLOOP:
1747      lp_exec_endloop(&bld->exec_mask);
1748      break;
1749
1750   case TGSI_OPCODE_PUSHA:
1751      /* deprecated? */
1752      assert(0);
1753      return FALSE;
1754      break;
1755
1756   case TGSI_OPCODE_POPA:
1757      /* deprecated? */
1758      assert(0);
1759      return FALSE;
1760      break;
1761
1762   case TGSI_OPCODE_CEIL:
1763      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1764         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1765         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1766      }
1767      break;
1768
1769   case TGSI_OPCODE_I2F:
1770      /* deprecated? */
1771      assert(0);
1772      return FALSE;
1773      break;
1774
1775   case TGSI_OPCODE_NOT:
1776      /* deprecated? */
1777      assert(0);
1778      return FALSE;
1779      break;
1780
1781   case TGSI_OPCODE_TRUNC:
1782      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1783         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1784         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1785      }
1786      break;
1787
1788   case TGSI_OPCODE_SHL:
1789      /* deprecated? */
1790      assert(0);
1791      return FALSE;
1792      break;
1793
1794   case TGSI_OPCODE_ISHR:
1795      /* deprecated? */
1796      assert(0);
1797      return FALSE;
1798      break;
1799
1800   case TGSI_OPCODE_AND:
1801      /* deprecated? */
1802      assert(0);
1803      return FALSE;
1804      break;
1805
1806   case TGSI_OPCODE_OR:
1807      /* deprecated? */
1808      assert(0);
1809      return FALSE;
1810      break;
1811
1812   case TGSI_OPCODE_MOD:
1813      /* deprecated? */
1814      assert(0);
1815      return FALSE;
1816      break;
1817
1818   case TGSI_OPCODE_XOR:
1819      /* deprecated? */
1820      assert(0);
1821      return FALSE;
1822      break;
1823
1824   case TGSI_OPCODE_SAD:
1825      /* deprecated? */
1826      assert(0);
1827      return FALSE;
1828      break;
1829
1830   case TGSI_OPCODE_TXF:
1831      /* deprecated? */
1832      assert(0);
1833      return FALSE;
1834      break;
1835
1836   case TGSI_OPCODE_TXQ:
1837      /* deprecated? */
1838      assert(0);
1839      return FALSE;
1840      break;
1841
1842   case TGSI_OPCODE_CONT:
1843      lp_exec_continue(&bld->exec_mask);
1844      break;
1845
1846   case TGSI_OPCODE_EMIT:
1847      return FALSE;
1848      break;
1849
1850   case TGSI_OPCODE_ENDPRIM:
1851      return FALSE;
1852      break;
1853
1854   case TGSI_OPCODE_NOP:
1855      break;
1856
1857   default:
1858      return FALSE;
1859   }
1860
1861   if(info->num_dst) {
1862      LLVMValueRef pred[NUM_CHANNELS];
1863
1864      emit_fetch_predicate( bld, inst, pred );
1865
1866      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1867         emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
1868      }
1869   }
1870
1871   return TRUE;
1872}
1873
1874
1875void
1876lp_build_tgsi_soa(LLVMBuilderRef builder,
1877                  const struct tgsi_token *tokens,
1878                  struct lp_type type,
1879                  struct lp_build_mask_context *mask,
1880                  LLVMValueRef consts_ptr,
1881                  const LLVMValueRef *pos,
1882                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
1883                  LLVMValueRef (*outputs)[NUM_CHANNELS],
1884                  struct lp_build_sampler_soa *sampler,
1885                  struct tgsi_shader_info *info)
1886{
1887   struct lp_build_tgsi_soa_context bld;
1888   struct tgsi_parse_context parse;
1889   uint num_immediates = 0;
1890   unsigned i;
1891
1892   /* Setup build context */
1893   memset(&bld, 0, sizeof bld);
1894   lp_build_context_init(&bld.base, builder, type);
1895   lp_build_context_init(&bld.int_bld, builder, lp_int_type(type));
1896   bld.mask = mask;
1897   bld.pos = pos;
1898   bld.inputs = inputs;
1899   bld.outputs = outputs;
1900   bld.consts_ptr = consts_ptr;
1901   bld.sampler = sampler;
1902   bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1903                                 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1904
1905   lp_exec_mask_init(&bld.exec_mask, &bld.base);
1906
1907   tgsi_parse_init( &parse, tokens );
1908
1909   while( !tgsi_parse_end_of_tokens( &parse ) ) {
1910      tgsi_parse_token( &parse );
1911
1912      switch( parse.FullToken.Token.Type ) {
1913      case TGSI_TOKEN_TYPE_DECLARATION:
1914         /* Inputs already interpolated */
1915         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1916         break;
1917
1918      case TGSI_TOKEN_TYPE_INSTRUCTION:
1919         {
1920            unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1921            const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
1922            if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
1923               _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1924                             opcode_info->mnemonic);
1925         }
1926
1927         break;
1928
1929      case TGSI_TOKEN_TYPE_IMMEDIATE:
1930         /* simply copy the immediate values into the next immediates[] slot */
1931         {
1932            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1933            assert(size <= 4);
1934            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1935            for( i = 0; i < size; ++i )
1936               bld.immediates[num_immediates][i] =
1937                  lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1938            for( i = size; i < 4; ++i )
1939               bld.immediates[num_immediates][i] = bld.base.undef;
1940            num_immediates++;
1941         }
1942         break;
1943
1944      case TGSI_TOKEN_TYPE_PROPERTY:
1945         break;
1946
1947      default:
1948         assert( 0 );
1949      }
1950   }
1951   if (0) {
1952      LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1953      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1954      debug_printf("11111111111111111111111111111 \n");
1955      tgsi_dump(tokens, 0);
1956      lp_debug_dump_value(function);
1957      debug_printf("2222222222222222222222222222 \n");
1958   }
1959   tgsi_parse_free( &parse );
1960}
1961
1962