lp_bld_tgsi_soa.c revision 962558daaed43b0111cd062e32821aad106869d7
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_info.h"
46#include "tgsi/tgsi_parse.h"
47#include "tgsi/tgsi_util.h"
48#include "tgsi/tgsi_exec.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_tgsi.h"
57#include "lp_bld_limits.h"
58#include "lp_bld_debug.h"
59
60
61#define FOR_EACH_CHANNEL( CHAN )\
62   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71   FOR_EACH_CHANNEL( CHAN )\
72      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74#define CHAN_X 0
75#define CHAN_Y 1
76#define CHAN_Z 2
77#define CHAN_W 3
78
79#define QUAD_TOP_LEFT     0
80#define QUAD_TOP_RIGHT    1
81#define QUAD_BOTTOM_LEFT  2
82#define QUAD_BOTTOM_RIGHT 3
83
84
85struct lp_exec_mask {
86   struct lp_build_context *bld;
87
88   boolean has_mask;
89
90   LLVMTypeRef int_vec_type;
91
92   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
93   int cond_stack_size;
94   LLVMValueRef cond_mask;
95
96   LLVMValueRef break_stack[LP_MAX_TGSI_NESTING];
97   int break_stack_size;
98   LLVMValueRef break_mask;
99
100   LLVMValueRef cont_stack[LP_MAX_TGSI_NESTING];
101   int cont_stack_size;
102   LLVMValueRef cont_mask;
103
104   LLVMBasicBlockRef loop_stack[LP_MAX_TGSI_NESTING];
105   int loop_stack_size;
106   LLVMBasicBlockRef loop_block;
107
108
109   LLVMValueRef exec_mask;
110};
111
112struct lp_build_tgsi_soa_context
113{
114   struct lp_build_context base;
115
116   LLVMValueRef consts_ptr;
117   const LLVMValueRef *pos;
118   const LLVMValueRef (*inputs)[NUM_CHANNELS];
119   LLVMValueRef (*outputs)[NUM_CHANNELS];
120
121   struct lp_build_sampler_soa *sampler;
122
123   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
124   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
125   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
126
127   /* we allocate an array of temps if we have indirect
128    * addressing and then the temps above is unused */
129   LLVMValueRef temps_array;
130   boolean has_indirect_addressing;
131
132   struct lp_build_mask_context *mask;
133   struct lp_exec_mask exec_mask;
134};
135
136static const unsigned char
137swizzle_left[4] = {
138   QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
139   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_LEFT
140};
141
142static const unsigned char
143swizzle_right[4] = {
144   QUAD_TOP_RIGHT,    QUAD_TOP_RIGHT,
145   QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
146};
147
148static const unsigned char
149swizzle_top[4] = {
150   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT,
151   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT
152};
153
154static const unsigned char
155swizzle_bottom[4] = {
156   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT,
157   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
158};
159
160static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
161{
162   mask->bld = bld;
163   mask->has_mask = FALSE;
164   mask->cond_stack_size = 0;
165   mask->loop_stack_size = 0;
166   mask->break_stack_size = 0;
167   mask->cont_stack_size = 0;
168
169   mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
170}
171
172static void lp_exec_mask_update(struct lp_exec_mask *mask)
173{
174   if (mask->loop_stack_size) {
175      /*for loops we need to update the entire mask at runtime */
176      LLVMValueRef tmp;
177      assert(mask->break_mask);
178      tmp = LLVMBuildAnd(mask->bld->builder,
179                         mask->cont_mask,
180                         mask->break_mask,
181                         "maskcb");
182      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
183                                     mask->cond_mask,
184                                     tmp,
185                                     "maskfull");
186   } else
187      mask->exec_mask = mask->cond_mask;
188
189
190   mask->has_mask = (mask->cond_stack_size > 0 ||
191                     mask->loop_stack_size > 0);
192}
193
194static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
195                                   LLVMValueRef val)
196{
197   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
198   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
199   mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
200                                      mask->int_vec_type, "");
201
202   lp_exec_mask_update(mask);
203}
204
205static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
206{
207   LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
208   LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
209                                        mask->cond_mask, "");
210
211   /* means that we didn't have any mask before and that
212    * we were fully enabled */
213   if (mask->cond_stack_size <= 1) {
214      prev_mask = LLVMConstAllOnes(mask->int_vec_type);
215   }
216
217   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
218                                  inv_mask,
219                                  prev_mask, "");
220   lp_exec_mask_update(mask);
221}
222
223static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
224{
225   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
226   lp_exec_mask_update(mask);
227}
228
229static void lp_exec_bgnloop(struct lp_exec_mask *mask)
230{
231
232   if (mask->cont_stack_size == 0)
233      mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
234   if (mask->break_stack_size == 0)
235      mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
236   if (mask->cond_stack_size == 0)
237      mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
238
239   assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
240   assert(mask->cont_stack_size < LP_MAX_TGSI_NESTING);
241   assert(mask->break_stack_size < LP_MAX_TGSI_NESTING);
242
243   mask->break_stack[mask->break_stack_size++] = mask->break_mask;
244   mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
245   mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
246   mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
247   LLVMBuildBr(mask->bld->builder, mask->loop_block);
248   LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
249
250   lp_exec_mask_update(mask);
251}
252
253static void lp_exec_break(struct lp_exec_mask *mask)
254{
255   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
256                                         mask->exec_mask,
257                                         "break");
258
259   mask->break_mask = LLVMBuildAnd(mask->bld->builder,
260                                   mask->break_mask,
261                                   exec_mask, "break_full");
262
263   lp_exec_mask_update(mask);
264}
265
266static void lp_exec_continue(struct lp_exec_mask *mask)
267{
268   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
269                                         mask->exec_mask,
270                                         "");
271
272   mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
273                                  mask->cont_mask,
274                                  exec_mask, "");
275
276   lp_exec_mask_update(mask);
277}
278
279
280static void lp_exec_endloop(struct lp_exec_mask *mask)
281{
282   LLVMBasicBlockRef endloop;
283   LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
284                                      mask->bld->type.length);
285   LLVMValueRef i1cond;
286
287   assert(mask->break_mask);
288
289   /* i1cond = (mask == 0) */
290   i1cond = LLVMBuildICmp(
291      mask->bld->builder,
292      LLVMIntNE,
293      LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
294      LLVMConstNull(reg_type), "");
295
296   endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
297
298   LLVMBuildCondBr(mask->bld->builder,
299                   i1cond, mask->loop_block, endloop);
300
301   LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
302
303   mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
304   /* pop the cont mask */
305   if (mask->cont_stack_size) {
306      mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
307   }
308   /* pop the break mask */
309   if (mask->break_stack_size) {
310      mask->break_mask = mask->break_stack[--mask->break_stack_size];
311   }
312
313   lp_exec_mask_update(mask);
314}
315
316/* stores val into an address pointed to by dst.
317 * mask->exec_mask is used to figure out which bits of val
318 * should be stored into the address
319 * (0 means don't store this bit, 1 means do store).
320 */
321static void lp_exec_mask_store(struct lp_exec_mask *mask,
322                               LLVMValueRef val,
323                               LLVMValueRef dst)
324{
325   if (mask->has_mask) {
326      LLVMValueRef real_val, dst_val;
327
328      dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
329      real_val = lp_build_select(mask->bld,
330                                 mask->exec_mask,
331                                 val, dst_val);
332
333      LLVMBuildStore(mask->bld->builder, real_val, dst);
334   } else
335      LLVMBuildStore(mask->bld->builder, val, dst);
336}
337
338
339static LLVMValueRef
340emit_ddx(struct lp_build_tgsi_soa_context *bld,
341         LLVMValueRef src)
342{
343   LLVMValueRef src_left  = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
344   LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
345   return lp_build_sub(&bld->base, src_right, src_left);
346}
347
348
349static LLVMValueRef
350emit_ddy(struct lp_build_tgsi_soa_context *bld,
351         LLVMValueRef src)
352{
353   LLVMValueRef src_top    = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
354   LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
355   return lp_build_sub(&bld->base, src_top, src_bottom);
356}
357
358static LLVMValueRef
359get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
360             unsigned index,
361             unsigned swizzle,
362             boolean is_indirect,
363             LLVMValueRef addr)
364{
365   if (!bld->has_indirect_addressing) {
366      return bld->temps[index][swizzle];
367   } else {
368      LLVMValueRef lindex =
369         LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0);
370      if (is_indirect)
371         lindex = lp_build_add(&bld->base, lindex, addr);
372      return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
373   }
374}
375
376/**
377 * Register fetch.
378 */
379static LLVMValueRef
380emit_fetch(
381   struct lp_build_tgsi_soa_context *bld,
382   const struct tgsi_full_instruction *inst,
383   unsigned index,
384   const unsigned chan_index )
385{
386   const struct tgsi_full_src_register *reg = &inst->Src[index];
387   unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
388   LLVMValueRef res;
389   LLVMValueRef addr;
390
391   switch (swizzle) {
392   case TGSI_SWIZZLE_X:
393   case TGSI_SWIZZLE_Y:
394   case TGSI_SWIZZLE_Z:
395   case TGSI_SWIZZLE_W:
396
397      if (reg->Register.Indirect) {
398         LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
399         unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
400         addr = LLVMBuildLoad(bld->base.builder,
401                              bld->addr[reg->Indirect.Index][swizzle],
402                              "");
403         /* for indexing we want integers */
404         addr = LLVMBuildFPToSI(bld->base.builder, addr,
405                                int_vec_type, "");
406         addr = LLVMBuildExtractElement(bld->base.builder,
407                                        addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
408                                        "");
409         addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
410      }
411
412      switch (reg->Register.File) {
413      case TGSI_FILE_CONSTANT: {
414         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
415         LLVMValueRef scalar, scalar_ptr;
416
417         if (reg->Register.Indirect) {
418            /*lp_build_printf(bld->base.builder,
419              "\taddr = %d\n", addr);*/
420            index = lp_build_add(&bld->base, index, addr);
421         }
422         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
423         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
424
425         res = lp_build_broadcast_scalar(&bld->base, scalar);
426         break;
427      }
428
429      case TGSI_FILE_IMMEDIATE:
430         res = bld->immediates[reg->Register.Index][swizzle];
431         assert(res);
432         break;
433
434      case TGSI_FILE_INPUT:
435         res = bld->inputs[reg->Register.Index][swizzle];
436         assert(res);
437         break;
438
439      case TGSI_FILE_TEMPORARY: {
440         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
441                                              swizzle,
442                                              reg->Register.Indirect,
443                                              addr);
444         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
445         if(!res)
446            return bld->base.undef;
447         break;
448      }
449
450      default:
451         assert( 0 );
452         return bld->base.undef;
453      }
454      break;
455
456   default:
457      assert( 0 );
458      return bld->base.undef;
459   }
460
461   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
462   case TGSI_UTIL_SIGN_CLEAR:
463      res = lp_build_abs( &bld->base, res );
464      break;
465
466   case TGSI_UTIL_SIGN_SET:
467      /* TODO: Use bitwese OR for floating point */
468      res = lp_build_abs( &bld->base, res );
469      res = LLVMBuildNeg( bld->base.builder, res, "" );
470      break;
471
472   case TGSI_UTIL_SIGN_TOGGLE:
473      res = LLVMBuildNeg( bld->base.builder, res, "" );
474      break;
475
476   case TGSI_UTIL_SIGN_KEEP:
477      break;
478   }
479
480   return res;
481}
482
483
484/**
485 * Register fetch with derivatives.
486 */
487static void
488emit_fetch_deriv(
489   struct lp_build_tgsi_soa_context *bld,
490   const struct tgsi_full_instruction *inst,
491   unsigned index,
492   const unsigned chan_index,
493   LLVMValueRef *res,
494   LLVMValueRef *ddx,
495   LLVMValueRef *ddy)
496{
497   LLVMValueRef src;
498
499   src = emit_fetch(bld, inst, index, chan_index);
500
501   if(res)
502      *res = src;
503
504   /* TODO: use interpolation coeffs for inputs */
505
506   if(ddx)
507      *ddx = emit_ddx(bld, src);
508
509   if(ddy)
510      *ddy = emit_ddy(bld, src);
511}
512
513
514/**
515 * Register store.
516 */
517static void
518emit_store(
519   struct lp_build_tgsi_soa_context *bld,
520   const struct tgsi_full_instruction *inst,
521   unsigned index,
522   unsigned chan_index,
523   LLVMValueRef value)
524{
525   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
526   LLVMValueRef addr;
527
528   switch( inst->Instruction.Saturate ) {
529   case TGSI_SAT_NONE:
530      break;
531
532   case TGSI_SAT_ZERO_ONE:
533      value = lp_build_max(&bld->base, value, bld->base.zero);
534      value = lp_build_min(&bld->base, value, bld->base.one);
535      break;
536
537   case TGSI_SAT_MINUS_PLUS_ONE:
538      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
539      value = lp_build_min(&bld->base, value, bld->base.one);
540      break;
541
542   default:
543      assert(0);
544   }
545
546   if (reg->Register.Indirect) {
547      LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
548      unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
549      addr = LLVMBuildLoad(bld->base.builder,
550                           bld->addr[reg->Indirect.Index][swizzle],
551                           "");
552      /* for indexing we want integers */
553      addr = LLVMBuildFPToSI(bld->base.builder, addr,
554                             int_vec_type, "");
555      addr = LLVMBuildExtractElement(bld->base.builder,
556                                     addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
557                                     "");
558      addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
559   }
560
561   switch( reg->Register.File ) {
562   case TGSI_FILE_OUTPUT:
563      lp_exec_mask_store(&bld->exec_mask, value,
564                         bld->outputs[reg->Register.Index][chan_index]);
565      break;
566
567   case TGSI_FILE_TEMPORARY: {
568      LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
569                                           chan_index,
570                                           reg->Register.Indirect,
571                                           addr);
572      lp_exec_mask_store(&bld->exec_mask, value, temp_ptr);
573      break;
574   }
575
576   case TGSI_FILE_ADDRESS:
577      lp_exec_mask_store(&bld->exec_mask, value,
578                         bld->addr[reg->Indirect.Index][chan_index]);
579      break;
580
581   case TGSI_FILE_PREDICATE:
582      /* FIXME */
583      break;
584
585   default:
586      assert( 0 );
587   }
588}
589
590
591/**
592 * High-level instruction translators.
593 */
594
595enum tex_modifier {
596   TEX_MODIFIER_NONE = 0,
597   TEX_MODIFIER_PROJECTED,
598   TEX_MODIFIER_LOD_BIAS,
599   TEX_MODIFIER_EXPLICIT_LOD,
600   TEX_MODIFIER_EXPLICIT_DERIV
601};
602
603static void
604emit_tex( struct lp_build_tgsi_soa_context *bld,
605          const struct tgsi_full_instruction *inst,
606          enum tex_modifier modifier,
607          LLVMValueRef *texel)
608{
609   unsigned unit;
610   LLVMValueRef lodbias;
611   LLVMValueRef oow = NULL;
612   LLVMValueRef coords[3];
613   LLVMValueRef ddx[3];
614   LLVMValueRef ddy[3];
615   unsigned num_coords;
616   unsigned i;
617
618   if (!bld->sampler) {
619      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
620      for (i = 0; i < 4; i++) {
621         texel[i] = bld->base.undef;
622      }
623      return;
624   }
625
626   switch (inst->Texture.Texture) {
627   case TGSI_TEXTURE_1D:
628      num_coords = 1;
629      break;
630   case TGSI_TEXTURE_2D:
631   case TGSI_TEXTURE_RECT:
632      num_coords = 2;
633      break;
634   case TGSI_TEXTURE_SHADOW1D:
635   case TGSI_TEXTURE_SHADOW2D:
636   case TGSI_TEXTURE_SHADOWRECT:
637   case TGSI_TEXTURE_3D:
638   case TGSI_TEXTURE_CUBE:
639      num_coords = 3;
640      break;
641   default:
642      assert(0);
643      return;
644   }
645
646   /* FIXME: Treat TEX_MODIFIER_EXPLICIT_LOD correctly */
647   if (modifier == TEX_MODIFIER_LOD_BIAS || TEX_MODIFIER_EXPLICIT_LOD)
648      lodbias = emit_fetch( bld, inst, 0, 3 );
649   else
650      lodbias = bld->base.zero;
651
652   if (modifier == TEX_MODIFIER_PROJECTED) {
653      oow = emit_fetch( bld, inst, 0, 3 );
654      oow = lp_build_rcp(&bld->base, oow);
655   }
656
657   for (i = 0; i < num_coords; i++) {
658      coords[i] = emit_fetch( bld, inst, 0, i );
659      if (modifier == TEX_MODIFIER_PROJECTED)
660         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
661   }
662   for (i = num_coords; i < 3; i++) {
663      coords[i] = bld->base.undef;
664   }
665
666   if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) {
667      for (i = 0; i < num_coords; i++) {
668         ddx[i] = emit_fetch( bld, inst, 1, i );
669         ddy[i] = emit_fetch( bld, inst, 2, i );
670      }
671      unit = inst->Src[3].Register.Index;
672   }  else {
673      for (i = 0; i < num_coords; i++) {
674         ddx[i] = emit_ddx( bld, coords[i] );
675         ddy[i] = emit_ddy( bld, coords[i] );
676      }
677      unit = inst->Src[1].Register.Index;
678   }
679
680   bld->sampler->emit_fetch_texel(bld->sampler,
681                                  bld->base.builder,
682                                  bld->base.type,
683                                  unit, num_coords, coords,
684                                  ddx, ddy, lodbias,
685                                  texel);
686}
687
688
689/**
690 * Kill fragment if any of the src register values are negative.
691 */
692static void
693emit_kil(
694   struct lp_build_tgsi_soa_context *bld,
695   const struct tgsi_full_instruction *inst )
696{
697   const struct tgsi_full_src_register *reg = &inst->Src[0];
698   LLVMValueRef terms[NUM_CHANNELS];
699   LLVMValueRef mask;
700   unsigned chan_index;
701
702   memset(&terms, 0, sizeof terms);
703
704   FOR_EACH_CHANNEL( chan_index ) {
705      unsigned swizzle;
706
707      /* Unswizzle channel */
708      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
709
710      /* Check if the component has not been already tested. */
711      assert(swizzle < NUM_CHANNELS);
712      if( !terms[swizzle] )
713         /* TODO: change the comparison operator instead of setting the sign */
714         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
715   }
716
717   mask = NULL;
718   FOR_EACH_CHANNEL( chan_index ) {
719      if(terms[chan_index]) {
720         LLVMValueRef chan_mask;
721
722         /*
723          * If term < 0 then mask = 0 else mask = ~0.
724          */
725         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
726
727         if(mask)
728            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
729         else
730            mask = chan_mask;
731      }
732   }
733
734   if(mask)
735      lp_build_mask_update(bld->mask, mask);
736}
737
738
739/**
740 * Predicated fragment kill.
741 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
742 * The only predication is the execution mask which will apply if
743 * we're inside a loop or conditional.
744 */
745static void
746emit_kilp(struct lp_build_tgsi_soa_context *bld,
747          const struct tgsi_full_instruction *inst)
748{
749   LLVMValueRef mask;
750
751   /* For those channels which are "alive", disable fragment shader
752    * execution.
753    */
754   if (bld->exec_mask.has_mask) {
755      mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
756   }
757   else {
758      mask = bld->base.zero;
759   }
760
761   lp_build_mask_update(bld->mask, mask);
762}
763
764static void
765emit_declaration(
766   struct lp_build_tgsi_soa_context *bld,
767   const struct tgsi_full_declaration *decl)
768{
769   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);
770
771   unsigned first = decl->Range.First;
772   unsigned last = decl->Range.Last;
773   unsigned idx, i;
774
775   for (idx = first; idx <= last; ++idx) {
776      switch (decl->Declaration.File) {
777      case TGSI_FILE_TEMPORARY:
778         assert(idx < LP_MAX_TGSI_TEMPS);
779         if (bld->has_indirect_addressing) {
780            LLVMValueRef val = LLVMConstInt(LLVMInt32Type(),
781                                            last*4 + 4, 0);
782            bld->temps_array = lp_build_array_alloca(bld->base.builder,
783                                                     vec_type, val, "");
784         } else {
785            for (i = 0; i < NUM_CHANNELS; i++)
786               bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
787                                                    vec_type, "");
788         }
789         break;
790
791      case TGSI_FILE_OUTPUT:
792         for (i = 0; i < NUM_CHANNELS; i++)
793            bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
794                                                   vec_type, "");
795         break;
796
797      case TGSI_FILE_ADDRESS:
798         assert(idx < LP_MAX_TGSI_ADDRS);
799         for (i = 0; i < NUM_CHANNELS; i++)
800            bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
801                                                vec_type, "");
802         break;
803
804      case TGSI_FILE_PREDICATE:
805         _debug_printf("warning: predicate registers not yet implemented\n");
806         break;
807
808      default:
809         /* don't need to declare other vars */
810         break;
811      }
812   }
813}
814
815
816/**
817 * Emit LLVM for one TGSI instruction.
818 * \param return TRUE for success, FALSE otherwise
819 */
820static boolean
821emit_instruction(
822   struct lp_build_tgsi_soa_context *bld,
823   const struct tgsi_full_instruction *inst,
824   const struct tgsi_opcode_info *info)
825{
826   unsigned chan_index;
827   LLVMValueRef src0, src1, src2;
828   LLVMValueRef tmp0, tmp1, tmp2;
829   LLVMValueRef tmp3 = NULL;
830   LLVMValueRef tmp4 = NULL;
831   LLVMValueRef tmp5 = NULL;
832   LLVMValueRef tmp6 = NULL;
833   LLVMValueRef tmp7 = NULL;
834   LLVMValueRef res;
835   LLVMValueRef dst0[NUM_CHANNELS];
836
837   /*
838    * Stores and write masks are handled in a general fashion after the long
839    * instruction opcode switch statement.
840    *
841    * Although not stricitly necessary, we avoid generating instructions for
842    * channels which won't be stored, in cases where's that easy. For some
843    * complex instructions, like texture sampling, it is more convenient to
844    * assume a full writemask and then let LLVM optimization passes eliminate
845    * redundant code.
846    */
847
848   assert(info->num_dst <= 1);
849   if(info->num_dst) {
850      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
851         dst0[chan_index] = bld->base.undef;
852      }
853   }
854
855   switch (inst->Instruction.Opcode) {
856   case TGSI_OPCODE_ARL:
857      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
858         tmp0 = emit_fetch( bld, inst, 0, chan_index );
859         tmp0 = lp_build_floor(&bld->base, tmp0);
860         dst0[chan_index] = tmp0;
861      }
862      break;
863
864   case TGSI_OPCODE_MOV:
865      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
866         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
867      }
868      break;
869
870   case TGSI_OPCODE_LIT:
871      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
872         dst0[CHAN_X] = bld->base.one;
873      }
874      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
875         src0 = emit_fetch( bld, inst, 0, CHAN_X );
876         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
877      }
878      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
879         /* XMM[1] = SrcReg[0].yyyy */
880         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
881         /* XMM[1] = max(XMM[1], 0) */
882         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
883         /* XMM[2] = SrcReg[0].wwww */
884         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
885         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
886         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
887         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
888         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
889      }
890      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
891         dst0[CHAN_W] = bld->base.one;
892      }
893      break;
894
895   case TGSI_OPCODE_RCP:
896   /* TGSI_OPCODE_RECIP */
897      src0 = emit_fetch( bld, inst, 0, CHAN_X );
898      res = lp_build_rcp(&bld->base, src0);
899      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
900         dst0[chan_index] = res;
901      }
902      break;
903
904   case TGSI_OPCODE_RSQ:
905   /* TGSI_OPCODE_RECIPSQRT */
906      src0 = emit_fetch( bld, inst, 0, CHAN_X );
907      src0 = lp_build_abs(&bld->base, src0);
908      res = lp_build_rsqrt(&bld->base, src0);
909      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
910         dst0[chan_index] = res;
911      }
912      break;
913
914   case TGSI_OPCODE_EXP:
915      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
916          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
917          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
918         LLVMValueRef *p_exp2_int_part = NULL;
919         LLVMValueRef *p_frac_part = NULL;
920         LLVMValueRef *p_exp2 = NULL;
921
922         src0 = emit_fetch( bld, inst, 0, CHAN_X );
923
924         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
925            p_exp2_int_part = &tmp0;
926         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
927            p_frac_part = &tmp1;
928         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
929            p_exp2 = &tmp2;
930
931         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
932
933         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
934            dst0[CHAN_X] = tmp0;
935         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
936            dst0[CHAN_Y] = tmp1;
937         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
938            dst0[CHAN_Z] = tmp2;
939      }
940      /* dst.w = 1.0 */
941      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
942         dst0[CHAN_W] = bld->base.one;
943      }
944      break;
945
946   case TGSI_OPCODE_LOG:
947      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
948          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
949          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
950         LLVMValueRef *p_floor_log2 = NULL;
951         LLVMValueRef *p_exp = NULL;
952         LLVMValueRef *p_log2 = NULL;
953
954         src0 = emit_fetch( bld, inst, 0, CHAN_X );
955         src0 = lp_build_abs( &bld->base, src0 );
956
957         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
958            p_floor_log2 = &tmp0;
959         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
960            p_exp = &tmp1;
961         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
962            p_log2 = &tmp2;
963
964         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
965
966         /* dst.x = floor(lg2(abs(src.x))) */
967         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
968            dst0[CHAN_X] = tmp0;
969         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
970         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
971            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
972         }
973         /* dst.z = lg2(abs(src.x)) */
974         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
975            dst0[CHAN_Z] = tmp2;
976      }
977      /* dst.w = 1.0 */
978      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
979         dst0[CHAN_W] = bld->base.one;
980      }
981      break;
982
983   case TGSI_OPCODE_MUL:
984      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
985         src0 = emit_fetch( bld, inst, 0, chan_index );
986         src1 = emit_fetch( bld, inst, 1, chan_index );
987         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
988      }
989      break;
990
991   case TGSI_OPCODE_ADD:
992      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
993         src0 = emit_fetch( bld, inst, 0, chan_index );
994         src1 = emit_fetch( bld, inst, 1, chan_index );
995         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
996      }
997      break;
998
999   case TGSI_OPCODE_DP3:
1000   /* TGSI_OPCODE_DOT3 */
1001      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1002      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1003      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1004      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1005      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1006      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1007      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1008      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1009      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1010      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1011      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1012      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1013         dst0[chan_index] = tmp0;
1014      }
1015      break;
1016
1017   case TGSI_OPCODE_DP4:
1018   /* TGSI_OPCODE_DOT4 */
1019      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1020      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1021      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1022      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1023      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1024      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1025      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1026      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1027      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1028      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1029      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1030      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1031      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1032      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1033      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1034      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1035         dst0[chan_index] = tmp0;
1036      }
1037      break;
1038
1039   case TGSI_OPCODE_DST:
1040      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1041         dst0[CHAN_X] = bld->base.one;
1042      }
1043      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1044         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1045         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1046         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1047      }
1048      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1049         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1050      }
1051      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1052         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1053      }
1054      break;
1055
1056   case TGSI_OPCODE_MIN:
1057      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1058         src0 = emit_fetch( bld, inst, 0, chan_index );
1059         src1 = emit_fetch( bld, inst, 1, chan_index );
1060         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1061      }
1062      break;
1063
1064   case TGSI_OPCODE_MAX:
1065      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1066         src0 = emit_fetch( bld, inst, 0, chan_index );
1067         src1 = emit_fetch( bld, inst, 1, chan_index );
1068         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1069      }
1070      break;
1071
1072   case TGSI_OPCODE_SLT:
1073   /* TGSI_OPCODE_SETLT */
1074      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1075         src0 = emit_fetch( bld, inst, 0, chan_index );
1076         src1 = emit_fetch( bld, inst, 1, chan_index );
1077         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1078         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1079      }
1080      break;
1081
1082   case TGSI_OPCODE_SGE:
1083   /* TGSI_OPCODE_SETGE */
1084      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1085         src0 = emit_fetch( bld, inst, 0, chan_index );
1086         src1 = emit_fetch( bld, inst, 1, chan_index );
1087         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1088         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1089      }
1090      break;
1091
1092   case TGSI_OPCODE_MAD:
1093   /* TGSI_OPCODE_MADD */
1094      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1095         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1096         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1097         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1098         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1099         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1100         dst0[chan_index] = tmp0;
1101      }
1102      break;
1103
1104   case TGSI_OPCODE_SUB:
1105      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1106         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1107         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1108         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1109      }
1110      break;
1111
1112   case TGSI_OPCODE_LRP:
1113      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1114         src0 = emit_fetch( bld, inst, 0, chan_index );
1115         src1 = emit_fetch( bld, inst, 1, chan_index );
1116         src2 = emit_fetch( bld, inst, 2, chan_index );
1117         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1118         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1119         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1120      }
1121      break;
1122
1123   case TGSI_OPCODE_CND:
1124      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1125         src0 = emit_fetch( bld, inst, 0, chan_index );
1126         src1 = emit_fetch( bld, inst, 1, chan_index );
1127         src2 = emit_fetch( bld, inst, 2, chan_index );
1128         tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1129         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1130         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1131      }
1132      break;
1133
1134   case TGSI_OPCODE_DP2A:
1135      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1136      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1137      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1138      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1139      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1140      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1141      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1142      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
1143      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1144      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1145         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1146      }
1147      break;
1148
1149   case TGSI_OPCODE_FRC:
1150      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1151         src0 = emit_fetch( bld, inst, 0, chan_index );
1152         tmp0 = lp_build_floor(&bld->base, src0);
1153         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1154         dst0[chan_index] = tmp0;
1155      }
1156      break;
1157
1158   case TGSI_OPCODE_CLAMP:
1159      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1160         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1161         src1 = emit_fetch( bld, inst, 1, chan_index );
1162         src2 = emit_fetch( bld, inst, 2, chan_index );
1163         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1164         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1165         dst0[chan_index] = tmp0;
1166      }
1167      break;
1168
1169   case TGSI_OPCODE_FLR:
1170      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1171         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1172         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1173      }
1174      break;
1175
1176   case TGSI_OPCODE_ROUND:
1177      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1178         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1179         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1180      }
1181      break;
1182
1183   case TGSI_OPCODE_EX2: {
1184      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1185      tmp0 = lp_build_exp2( &bld->base, tmp0);
1186      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1187         dst0[chan_index] = tmp0;
1188      }
1189      break;
1190   }
1191
1192   case TGSI_OPCODE_LG2:
1193      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1194      tmp0 = lp_build_log2( &bld->base, tmp0);
1195      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1196         dst0[chan_index] = tmp0;
1197      }
1198      break;
1199
1200   case TGSI_OPCODE_POW:
1201      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1202      src1 = emit_fetch( bld, inst, 1, CHAN_X );
1203      res = lp_build_pow( &bld->base, src0, src1 );
1204      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1205         dst0[chan_index] = res;
1206      }
1207      break;
1208
1209   case TGSI_OPCODE_XPD:
1210      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1211          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1212         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1213         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1214      }
1215      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1216          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1217         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1218         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1219      }
1220      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1221         tmp2 = tmp0;
1222         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1223         tmp5 = tmp3;
1224         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1225         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1226         dst0[CHAN_X] = tmp2;
1227      }
1228      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1229          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1230         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1231         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1232      }
1233      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1234         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1235         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1236         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1237         dst0[CHAN_Y] = tmp3;
1238      }
1239      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1240         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1241         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1242         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1243         dst0[CHAN_Z] = tmp5;
1244      }
1245      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1246         dst0[CHAN_W] = bld->base.one;
1247      }
1248      break;
1249
1250   case TGSI_OPCODE_ABS:
1251      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1252         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1253         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1254      }
1255      break;
1256
1257   case TGSI_OPCODE_RCC:
1258      /* deprecated? */
1259      assert(0);
1260      return FALSE;
1261
1262   case TGSI_OPCODE_DPH:
1263      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1264      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1265      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1266      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1267      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1268      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1269      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1270      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1271      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1272      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1273      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1274      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1275      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1276      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1277         dst0[chan_index] = tmp0;
1278      }
1279      break;
1280
1281   case TGSI_OPCODE_COS:
1282      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1283      tmp0 = lp_build_cos( &bld->base, tmp0 );
1284      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1285         dst0[chan_index] = tmp0;
1286      }
1287      break;
1288
1289   case TGSI_OPCODE_DDX:
1290      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1291         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1292      }
1293      break;
1294
1295   case TGSI_OPCODE_DDY:
1296      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1297         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1298      }
1299      break;
1300
1301   case TGSI_OPCODE_KILP:
1302      /* predicated kill */
1303      emit_kilp( bld, inst );
1304      break;
1305
1306   case TGSI_OPCODE_KIL:
1307      /* conditional kill */
1308      emit_kil( bld, inst );
1309      break;
1310
1311   case TGSI_OPCODE_PK2H:
1312      return FALSE;
1313      break;
1314
1315   case TGSI_OPCODE_PK2US:
1316      return FALSE;
1317      break;
1318
1319   case TGSI_OPCODE_PK4B:
1320      return FALSE;
1321      break;
1322
1323   case TGSI_OPCODE_PK4UB:
1324      return FALSE;
1325      break;
1326
1327   case TGSI_OPCODE_RFL:
1328      return FALSE;
1329      break;
1330
1331   case TGSI_OPCODE_SEQ:
1332      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1333         src0 = emit_fetch( bld, inst, 0, chan_index );
1334         src1 = emit_fetch( bld, inst, 1, chan_index );
1335         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1336         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1337      }
1338      break;
1339
1340   case TGSI_OPCODE_SFL:
1341      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1342         dst0[chan_index] = bld->base.zero;
1343      }
1344      break;
1345
1346   case TGSI_OPCODE_SGT:
1347      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1348         src0 = emit_fetch( bld, inst, 0, chan_index );
1349         src1 = emit_fetch( bld, inst, 1, chan_index );
1350         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1351         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1352      }
1353      break;
1354
1355   case TGSI_OPCODE_SIN:
1356      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1357      tmp0 = lp_build_sin( &bld->base, tmp0 );
1358      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1359         dst0[chan_index] = tmp0;
1360      }
1361      break;
1362
1363   case TGSI_OPCODE_SLE:
1364      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1365         src0 = emit_fetch( bld, inst, 0, chan_index );
1366         src1 = emit_fetch( bld, inst, 1, chan_index );
1367         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1368         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1369      }
1370      break;
1371
1372   case TGSI_OPCODE_SNE:
1373      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1374         src0 = emit_fetch( bld, inst, 0, chan_index );
1375         src1 = emit_fetch( bld, inst, 1, chan_index );
1376         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1377         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1378      }
1379      break;
1380
1381   case TGSI_OPCODE_STR:
1382      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1383         dst0[chan_index] = bld->base.one;
1384      }
1385      break;
1386
1387   case TGSI_OPCODE_TEX:
1388      emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 );
1389      break;
1390
1391   case TGSI_OPCODE_TXD:
1392      emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1393      break;
1394
1395   case TGSI_OPCODE_UP2H:
1396      /* deprecated */
1397      assert (0);
1398      return FALSE;
1399      break;
1400
1401   case TGSI_OPCODE_UP2US:
1402      /* deprecated */
1403      assert(0);
1404      return FALSE;
1405      break;
1406
1407   case TGSI_OPCODE_UP4B:
1408      /* deprecated */
1409      assert(0);
1410      return FALSE;
1411      break;
1412
1413   case TGSI_OPCODE_UP4UB:
1414      /* deprecated */
1415      assert(0);
1416      return FALSE;
1417      break;
1418
1419   case TGSI_OPCODE_X2D:
1420      /* deprecated? */
1421      assert(0);
1422      return FALSE;
1423      break;
1424
1425   case TGSI_OPCODE_ARA:
1426      /* deprecated */
1427      assert(0);
1428      return FALSE;
1429      break;
1430
1431   case TGSI_OPCODE_ARR:
1432      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1433         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1434         tmp0 = lp_build_round(&bld->base, tmp0);
1435         dst0[chan_index] = tmp0;
1436      }
1437      break;
1438
1439   case TGSI_OPCODE_BRA:
1440      /* deprecated */
1441      assert(0);
1442      return FALSE;
1443      break;
1444
1445   case TGSI_OPCODE_CAL:
1446      /* FIXME */
1447      return FALSE;
1448      break;
1449
1450   case TGSI_OPCODE_RET:
1451      /* FIXME */
1452      return FALSE;
1453      break;
1454
1455   case TGSI_OPCODE_END:
1456      break;
1457
1458   case TGSI_OPCODE_SSG:
1459   /* TGSI_OPCODE_SGN */
1460      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1461         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1462         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1463      }
1464      break;
1465
1466   case TGSI_OPCODE_CMP:
1467      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1468         src0 = emit_fetch( bld, inst, 0, chan_index );
1469         src1 = emit_fetch( bld, inst, 1, chan_index );
1470         src2 = emit_fetch( bld, inst, 2, chan_index );
1471         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1472         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1473      }
1474      break;
1475
1476   case TGSI_OPCODE_SCS:
1477      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1478         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1479         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1480      }
1481      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1482         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1483         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1484      }
1485      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1486         dst0[CHAN_Z] = bld->base.zero;
1487      }
1488      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1489         dst0[CHAN_W] = bld->base.one;
1490      }
1491      break;
1492
1493   case TGSI_OPCODE_TXB:
1494      emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 );
1495      break;
1496
1497   case TGSI_OPCODE_NRM:
1498      /* fall-through */
1499   case TGSI_OPCODE_NRM4:
1500      /* 3 or 4-component normalization */
1501      {
1502         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1503
1504         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1505             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1506             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1507             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1508
1509            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1510
1511            /* xmm4 = src.x */
1512            /* xmm0 = src.x * src.x */
1513            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1514            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1515               tmp4 = tmp0;
1516            }
1517            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1518
1519            /* xmm5 = src.y */
1520            /* xmm0 = xmm0 + src.y * src.y */
1521            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1522            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1523               tmp5 = tmp1;
1524            }
1525            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1526            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1527
1528            /* xmm6 = src.z */
1529            /* xmm0 = xmm0 + src.z * src.z */
1530            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1531            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1532               tmp6 = tmp1;
1533            }
1534            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1535            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1536
1537            if (dims == 4) {
1538               /* xmm7 = src.w */
1539               /* xmm0 = xmm0 + src.w * src.w */
1540               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1541               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1542                  tmp7 = tmp1;
1543               }
1544               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1545               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1546            }
1547
1548            /* xmm1 = 1 / sqrt(xmm0) */
1549            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1550
1551            /* dst.x = xmm1 * src.x */
1552            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1553               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1554            }
1555
1556            /* dst.y = xmm1 * src.y */
1557            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1558               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1559            }
1560
1561            /* dst.z = xmm1 * src.z */
1562            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1563               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1564            }
1565
1566            /* dst.w = xmm1 * src.w */
1567            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1568               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1569            }
1570         }
1571
1572         /* dst.w = 1.0 */
1573         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1574            dst0[CHAN_W] = bld->base.one;
1575         }
1576      }
1577      break;
1578
1579   case TGSI_OPCODE_DIV:
1580      /* deprecated */
1581      assert( 0 );
1582      return FALSE;
1583      break;
1584
1585   case TGSI_OPCODE_DP2:
1586      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1587      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1588      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1589      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1590      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1591      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1592      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1593      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1594         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1595      }
1596      break;
1597
1598   case TGSI_OPCODE_TXL:
1599      emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 );
1600      break;
1601
1602   case TGSI_OPCODE_TXP:
1603      emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 );
1604      break;
1605
1606   case TGSI_OPCODE_BRK:
1607      lp_exec_break(&bld->exec_mask);
1608      break;
1609
1610   case TGSI_OPCODE_IF:
1611      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1612      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1613                          tmp0, bld->base.zero);
1614      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1615      break;
1616
1617   case TGSI_OPCODE_BGNLOOP:
1618      lp_exec_bgnloop(&bld->exec_mask);
1619      break;
1620
1621   case TGSI_OPCODE_ELSE:
1622      lp_exec_mask_cond_invert(&bld->exec_mask);
1623      break;
1624
1625   case TGSI_OPCODE_ENDIF:
1626      lp_exec_mask_cond_pop(&bld->exec_mask);
1627      break;
1628
1629   case TGSI_OPCODE_ENDLOOP:
1630      lp_exec_endloop(&bld->exec_mask);
1631      break;
1632
1633   case TGSI_OPCODE_PUSHA:
1634      /* deprecated? */
1635      assert(0);
1636      return FALSE;
1637      break;
1638
1639   case TGSI_OPCODE_POPA:
1640      /* deprecated? */
1641      assert(0);
1642      return FALSE;
1643      break;
1644
1645   case TGSI_OPCODE_CEIL:
1646      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1647         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1648         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1649      }
1650      break;
1651
1652   case TGSI_OPCODE_I2F:
1653      /* deprecated? */
1654      assert(0);
1655      return FALSE;
1656      break;
1657
1658   case TGSI_OPCODE_NOT:
1659      /* deprecated? */
1660      assert(0);
1661      return FALSE;
1662      break;
1663
1664   case TGSI_OPCODE_TRUNC:
1665      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1666         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1667         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1668      }
1669      break;
1670
1671   case TGSI_OPCODE_SHL:
1672      /* deprecated? */
1673      assert(0);
1674      return FALSE;
1675      break;
1676
1677   case TGSI_OPCODE_ISHR:
1678      /* deprecated? */
1679      assert(0);
1680      return FALSE;
1681      break;
1682
1683   case TGSI_OPCODE_AND:
1684      /* deprecated? */
1685      assert(0);
1686      return FALSE;
1687      break;
1688
1689   case TGSI_OPCODE_OR:
1690      /* deprecated? */
1691      assert(0);
1692      return FALSE;
1693      break;
1694
1695   case TGSI_OPCODE_MOD:
1696      /* deprecated? */
1697      assert(0);
1698      return FALSE;
1699      break;
1700
1701   case TGSI_OPCODE_XOR:
1702      /* deprecated? */
1703      assert(0);
1704      return FALSE;
1705      break;
1706
1707   case TGSI_OPCODE_SAD:
1708      /* deprecated? */
1709      assert(0);
1710      return FALSE;
1711      break;
1712
1713   case TGSI_OPCODE_TXF:
1714      /* deprecated? */
1715      assert(0);
1716      return FALSE;
1717      break;
1718
1719   case TGSI_OPCODE_TXQ:
1720      /* deprecated? */
1721      assert(0);
1722      return FALSE;
1723      break;
1724
1725   case TGSI_OPCODE_CONT:
1726      lp_exec_continue(&bld->exec_mask);
1727      break;
1728
1729   case TGSI_OPCODE_EMIT:
1730      return FALSE;
1731      break;
1732
1733   case TGSI_OPCODE_ENDPRIM:
1734      return FALSE;
1735      break;
1736
1737   case TGSI_OPCODE_NOP:
1738      break;
1739
1740   default:
1741      return FALSE;
1742   }
1743
1744   if(info->num_dst) {
1745      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1746         emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1747      }
1748   }
1749
1750   return TRUE;
1751}
1752
1753
1754void
1755lp_build_tgsi_soa(LLVMBuilderRef builder,
1756                  const struct tgsi_token *tokens,
1757                  struct lp_type type,
1758                  struct lp_build_mask_context *mask,
1759                  LLVMValueRef consts_ptr,
1760                  const LLVMValueRef *pos,
1761                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
1762                  LLVMValueRef (*outputs)[NUM_CHANNELS],
1763                  struct lp_build_sampler_soa *sampler,
1764                  struct tgsi_shader_info *info)
1765{
1766   struct lp_build_tgsi_soa_context bld;
1767   struct tgsi_parse_context parse;
1768   uint num_immediates = 0;
1769   unsigned i;
1770
1771   /* Setup build context */
1772   memset(&bld, 0, sizeof bld);
1773   lp_build_context_init(&bld.base, builder, type);
1774   bld.mask = mask;
1775   bld.pos = pos;
1776   bld.inputs = inputs;
1777   bld.outputs = outputs;
1778   bld.consts_ptr = consts_ptr;
1779   bld.sampler = sampler;
1780   bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
1781                                 info->opcode_count[TGSI_OPCODE_ARL] > 0;
1782
1783   lp_exec_mask_init(&bld.exec_mask, &bld.base);
1784
1785   tgsi_parse_init( &parse, tokens );
1786
1787   while( !tgsi_parse_end_of_tokens( &parse ) ) {
1788      tgsi_parse_token( &parse );
1789
1790      switch( parse.FullToken.Token.Type ) {
1791      case TGSI_TOKEN_TYPE_DECLARATION:
1792         /* Inputs already interpolated */
1793         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
1794         break;
1795
1796      case TGSI_TOKEN_TYPE_INSTRUCTION:
1797         {
1798            unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1799            const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode);
1800            if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info ))
1801               _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1802                             opcode_info->mnemonic);
1803         }
1804
1805         break;
1806
1807      case TGSI_TOKEN_TYPE_IMMEDIATE:
1808         /* simply copy the immediate values into the next immediates[] slot */
1809         {
1810            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1811            assert(size <= 4);
1812            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1813            for( i = 0; i < size; ++i )
1814               bld.immediates[num_immediates][i] =
1815                  lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1816            for( i = size; i < 4; ++i )
1817               bld.immediates[num_immediates][i] = bld.base.undef;
1818            num_immediates++;
1819         }
1820         break;
1821
1822      case TGSI_TOKEN_TYPE_PROPERTY:
1823         break;
1824
1825      default:
1826         assert( 0 );
1827      }
1828   }
1829   if (0) {
1830      LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1831      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1832      debug_printf("11111111111111111111111111111 \n");
1833      tgsi_dump(tokens, 0);
1834      LLVMDumpValue(function);
1835      debug_printf("2222222222222222222222222222 \n");
1836   }
1837   tgsi_parse_free( &parse );
1838}
1839
1840