lp_bld_tgsi_soa.c revision 85c7ec70ad41c8ada75a4cbace83d16815d3e2c5
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_info.h"
45#include "tgsi/tgsi_parse.h"
46#include "tgsi/tgsi_util.h"
47#include "tgsi/tgsi_exec.h"
48#include "lp_bld_type.h"
49#include "lp_bld_const.h"
50#include "lp_bld_arit.h"
51#include "lp_bld_logic.h"
52#include "lp_bld_swizzle.h"
53#include "lp_bld_flow.h"
54#include "lp_bld_tgsi.h"
55
56
57#define LP_MAX_TEMPS 256
58#define LP_MAX_IMMEDIATES 256
59
60
61#define FOR_EACH_CHANNEL( CHAN )\
62   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71   FOR_EACH_CHANNEL( CHAN )\
72      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74#define CHAN_X 0
75#define CHAN_Y 1
76#define CHAN_Z 2
77#define CHAN_W 3
78
79#define QUAD_TOP_LEFT     0
80#define QUAD_TOP_RIGHT    1
81#define QUAD_BOTTOM_LEFT  2
82#define QUAD_BOTTOM_RIGHT 3
83
84
85struct lp_build_tgsi_soa_context
86{
87   struct lp_build_context base;
88
89   LLVMValueRef consts_ptr;
90   const LLVMValueRef *pos;
91   const LLVMValueRef (*inputs)[NUM_CHANNELS];
92   LLVMValueRef (*outputs)[NUM_CHANNELS];
93
94   struct lp_build_sampler_soa *sampler;
95
96   LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
97   LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
98
99   struct lp_build_mask_context *mask;
100};
101
102
103static const unsigned char
104swizzle_left[4] = {
105   QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
106   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_LEFT
107};
108
109static const unsigned char
110swizzle_right[4] = {
111   QUAD_TOP_RIGHT,    QUAD_TOP_RIGHT,
112   QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
113};
114
115static const unsigned char
116swizzle_top[4] = {
117   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT,
118   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT
119};
120
121static const unsigned char
122swizzle_bottom[4] = {
123   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT,
124   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
125};
126
127
128static LLVMValueRef
129emit_ddx(struct lp_build_tgsi_soa_context *bld,
130         LLVMValueRef src)
131{
132   LLVMValueRef src_left  = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
133   LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
134   return lp_build_sub(&bld->base, src_right, src_left);
135}
136
137
138static LLVMValueRef
139emit_ddy(struct lp_build_tgsi_soa_context *bld,
140         LLVMValueRef src)
141{
142   LLVMValueRef src_top    = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
143   LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
144   return lp_build_sub(&bld->base, src_top, src_bottom);
145}
146
147
148/**
149 * Register fetch.
150 */
151static LLVMValueRef
152emit_fetch(
153   struct lp_build_tgsi_soa_context *bld,
154   const struct tgsi_full_instruction *inst,
155   unsigned index,
156   const unsigned chan_index )
157{
158   const struct tgsi_full_src_register *reg = &inst->Src[index];
159   unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
160   LLVMValueRef res;
161
162   switch (swizzle) {
163   case TGSI_SWIZZLE_X:
164   case TGSI_SWIZZLE_Y:
165   case TGSI_SWIZZLE_Z:
166   case TGSI_SWIZZLE_W:
167
168      switch (reg->Register.File) {
169      case TGSI_FILE_CONSTANT: {
170         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
171         LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
172         LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
173         res = lp_build_broadcast_scalar(&bld->base, scalar);
174         break;
175      }
176
177      case TGSI_FILE_IMMEDIATE:
178         res = bld->immediates[reg->Register.Index][swizzle];
179         assert(res);
180         break;
181
182      case TGSI_FILE_INPUT:
183         res = bld->inputs[reg->Register.Index][swizzle];
184         assert(res);
185         break;
186
187      case TGSI_FILE_TEMPORARY:
188         res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
189         if(!res)
190            return bld->base.undef;
191         break;
192
193      default:
194         assert( 0 );
195         return bld->base.undef;
196      }
197      break;
198
199   default:
200      assert( 0 );
201      return bld->base.undef;
202   }
203
204   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
205   case TGSI_UTIL_SIGN_CLEAR:
206      res = lp_build_abs( &bld->base, res );
207      break;
208
209   case TGSI_UTIL_SIGN_SET:
210      /* TODO: Use bitwese OR for floating point */
211      res = lp_build_abs( &bld->base, res );
212      res = LLVMBuildNeg( bld->base.builder, res, "" );
213      break;
214
215   case TGSI_UTIL_SIGN_TOGGLE:
216      res = LLVMBuildNeg( bld->base.builder, res, "" );
217      break;
218
219   case TGSI_UTIL_SIGN_KEEP:
220      break;
221   }
222
223   return res;
224}
225
226
227/**
228 * Register fetch with derivatives.
229 */
230static void
231emit_fetch_deriv(
232   struct lp_build_tgsi_soa_context *bld,
233   const struct tgsi_full_instruction *inst,
234   unsigned index,
235   const unsigned chan_index,
236   LLVMValueRef *res,
237   LLVMValueRef *ddx,
238   LLVMValueRef *ddy)
239{
240   LLVMValueRef src;
241
242   src = emit_fetch(bld, inst, index, chan_index);
243
244   if(res)
245      *res = src;
246
247   /* TODO: use interpolation coeffs for inputs */
248
249   if(ddx)
250      *ddx = emit_ddx(bld, src);
251
252   if(ddy)
253      *ddy = emit_ddy(bld, src);
254}
255
256
257/**
258 * Register store.
259 */
260static void
261emit_store(
262   struct lp_build_tgsi_soa_context *bld,
263   const struct tgsi_full_instruction *inst,
264   unsigned index,
265   unsigned chan_index,
266   LLVMValueRef value)
267{
268   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
269
270   switch( inst->Instruction.Saturate ) {
271   case TGSI_SAT_NONE:
272      break;
273
274   case TGSI_SAT_ZERO_ONE:
275      value = lp_build_max(&bld->base, value, bld->base.zero);
276      value = lp_build_min(&bld->base, value, bld->base.one);
277      break;
278
279   case TGSI_SAT_MINUS_PLUS_ONE:
280      value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
281      value = lp_build_min(&bld->base, value, bld->base.one);
282      break;
283
284   default:
285      assert(0);
286   }
287
288   switch( reg->Register.File ) {
289   case TGSI_FILE_OUTPUT:
290      LLVMBuildStore(bld->base.builder, value,
291                     bld->outputs[reg->Register.Index][chan_index]);
292      break;
293
294   case TGSI_FILE_TEMPORARY:
295      LLVMBuildStore(bld->base.builder, value,
296                     bld->temps[reg->Register.Index][chan_index]);
297      break;
298
299   case TGSI_FILE_ADDRESS:
300      /* FIXME */
301      assert(0);
302      break;
303
304   default:
305      assert( 0 );
306   }
307}
308
309
310/**
311 * High-level instruction translators.
312 */
313
314
315static void
316emit_tex( struct lp_build_tgsi_soa_context *bld,
317          const struct tgsi_full_instruction *inst,
318          boolean apply_lodbias,
319          boolean projected,
320          LLVMValueRef *texel)
321{
322   const uint unit = inst->Src[1].Register.Index;
323   LLVMValueRef lodbias;
324   LLVMValueRef oow = NULL;
325   LLVMValueRef coords[3];
326   unsigned num_coords;
327   unsigned i;
328
329   switch (inst->Texture.Texture) {
330   case TGSI_TEXTURE_1D:
331      num_coords = 1;
332      break;
333   case TGSI_TEXTURE_2D:
334   case TGSI_TEXTURE_RECT:
335      num_coords = 2;
336      break;
337   case TGSI_TEXTURE_SHADOW1D:
338   case TGSI_TEXTURE_SHADOW2D:
339   case TGSI_TEXTURE_SHADOWRECT:
340   case TGSI_TEXTURE_3D:
341   case TGSI_TEXTURE_CUBE:
342      num_coords = 3;
343      break;
344   default:
345      assert(0);
346      return;
347   }
348
349   if(apply_lodbias)
350      lodbias = emit_fetch( bld, inst, 0, 3 );
351   else
352      lodbias = bld->base.zero;
353
354   if (projected) {
355      oow = emit_fetch( bld, inst, 0, 3 );
356      oow = lp_build_rcp(&bld->base, oow);
357   }
358
359   for (i = 0; i < num_coords; i++) {
360      coords[i] = emit_fetch( bld, inst, 0, i );
361      if (projected)
362         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
363   }
364   for (i = num_coords; i < 3; i++) {
365      coords[i] = bld->base.undef;
366   }
367
368   bld->sampler->emit_fetch_texel(bld->sampler,
369                                  bld->base.builder,
370                                  bld->base.type,
371                                  unit, num_coords, coords, lodbias,
372                                  texel);
373}
374
375
376static void
377emit_kil(
378   struct lp_build_tgsi_soa_context *bld,
379   const struct tgsi_full_instruction *inst )
380{
381   const struct tgsi_full_src_register *reg = &inst->Src[0];
382   LLVMValueRef terms[NUM_CHANNELS];
383   LLVMValueRef mask;
384   unsigned chan_index;
385
386   memset(&terms, 0, sizeof terms);
387
388   FOR_EACH_CHANNEL( chan_index ) {
389      unsigned swizzle;
390
391      /* Unswizzle channel */
392      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
393
394      /* Check if the component has not been already tested. */
395      assert(swizzle < NUM_CHANNELS);
396      if( !terms[swizzle] )
397         /* TODO: change the comparison operator instead of setting the sign */
398         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
399   }
400
401   mask = NULL;
402   FOR_EACH_CHANNEL( chan_index ) {
403      if(terms[chan_index]) {
404         LLVMValueRef chan_mask;
405
406         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
407
408         if(mask)
409            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
410         else
411            mask = chan_mask;
412      }
413   }
414
415   if(mask)
416      lp_build_mask_update(bld->mask, mask);
417}
418
419
420/**
421 * Check if inst src/dest regs use indirect addressing into temporary
422 * register file.
423 */
424static boolean
425indirect_temp_reference(const struct tgsi_full_instruction *inst)
426{
427   uint i;
428   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
429      const struct tgsi_full_src_register *reg = &inst->Src[i];
430      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
431          reg->Register.Indirect)
432         return TRUE;
433   }
434   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
435      const struct tgsi_full_dst_register *reg = &inst->Dst[i];
436      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
437          reg->Register.Indirect)
438         return TRUE;
439   }
440   return FALSE;
441}
442
443static int
444emit_declaration(
445   struct lp_build_tgsi_soa_context *bld,
446   const struct tgsi_full_declaration *decl)
447{
448   unsigned first = decl->Range.First;
449   unsigned last = decl->Range.Last;
450   unsigned idx, i;
451
452   for (idx = first; idx <= last; ++idx) {
453      boolean ok;
454
455      switch (decl->Declaration.File) {
456      case TGSI_FILE_TEMPORARY:
457         for (i = 0; i < NUM_CHANNELS; i++)
458            bld->temps[idx][i] = lp_build_alloca(&bld->base);
459         ok = TRUE;
460         break;
461
462      case TGSI_FILE_OUTPUT:
463         for (i = 0; i < NUM_CHANNELS; i++)
464            bld->outputs[idx][i] = lp_build_alloca(&bld->base);
465         ok = TRUE;
466         break;
467
468      default:
469         /* don't need to declare other vars */
470         ok = TRUE;
471      }
472
473      if (!ok)
474         return FALSE;
475   }
476
477   return TRUE;
478}
479
480static int
481emit_instruction(
482   struct lp_build_tgsi_soa_context *bld,
483   const struct tgsi_full_instruction *inst,
484   const struct tgsi_opcode_info *info)
485{
486   unsigned chan_index;
487   LLVMValueRef src0, src1, src2;
488   LLVMValueRef tmp0, tmp1, tmp2;
489   LLVMValueRef tmp3 = NULL;
490   LLVMValueRef tmp4 = NULL;
491   LLVMValueRef tmp5 = NULL;
492   LLVMValueRef tmp6 = NULL;
493   LLVMValueRef tmp7 = NULL;
494   LLVMValueRef res;
495   LLVMValueRef dst0[NUM_CHANNELS];
496
497   /* we can't handle indirect addressing into temp register file yet */
498   if (indirect_temp_reference(inst))
499      return FALSE;
500
501   assert(info->num_dst <= 1);
502   if(info->num_dst) {
503      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
504         dst0[chan_index] = bld->base.undef;
505      }
506   }
507
508   switch (inst->Instruction.Opcode) {
509#if 0
510   case TGSI_OPCODE_ARL:
511      /* FIXME */
512      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
513         tmp0 = emit_fetch( bld, inst, 0, chan_index );
514         emit_flr(bld, 0, 0);
515         emit_f2it( bld, 0 );
516         dst0[chan_index] = tmp0;
517      }
518      break;
519#endif
520
521   case TGSI_OPCODE_MOV:
522      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
523         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
524      }
525      break;
526
527   case TGSI_OPCODE_LIT:
528      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
529         dst0[CHAN_X] = bld->base.one;
530      }
531      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
532         src0 = emit_fetch( bld, inst, 0, CHAN_X );
533         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
534      }
535      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
536         /* XMM[1] = SrcReg[0].yyyy */
537         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
538         /* XMM[1] = max(XMM[1], 0) */
539         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
540         /* XMM[2] = SrcReg[0].wwww */
541         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
542         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
543         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
544         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
545         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
546      }
547      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
548         dst0[CHAN_W] = bld->base.one;
549      }
550      break;
551
552   case TGSI_OPCODE_RCP:
553   /* TGSI_OPCODE_RECIP */
554      src0 = emit_fetch( bld, inst, 0, CHAN_X );
555      res = lp_build_rcp(&bld->base, src0);
556      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
557         dst0[chan_index] = res;
558      }
559      break;
560
561   case TGSI_OPCODE_RSQ:
562   /* TGSI_OPCODE_RECIPSQRT */
563      src0 = emit_fetch( bld, inst, 0, CHAN_X );
564      src0 = lp_build_abs(&bld->base, src0);
565      res = lp_build_rsqrt(&bld->base, src0);
566      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
567         dst0[chan_index] = res;
568      }
569      break;
570
571   case TGSI_OPCODE_EXP:
572      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
573          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
574          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
575         LLVMValueRef *p_exp2_int_part = NULL;
576         LLVMValueRef *p_frac_part = NULL;
577         LLVMValueRef *p_exp2 = NULL;
578
579         src0 = emit_fetch( bld, inst, 0, CHAN_X );
580
581         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
582            p_exp2_int_part = &tmp0;
583         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
584            p_frac_part = &tmp1;
585         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
586            p_exp2 = &tmp2;
587
588         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
589
590         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
591            dst0[CHAN_X] = tmp0;
592         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
593            dst0[CHAN_Y] = tmp1;
594         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
595            dst0[CHAN_Z] = tmp2;
596      }
597      /* dst.w = 1.0 */
598      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
599         dst0[CHAN_W] = bld->base.one;
600      }
601      break;
602
603   case TGSI_OPCODE_LOG:
604      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
605          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
606          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
607         LLVMValueRef *p_floor_log2 = NULL;
608         LLVMValueRef *p_exp = NULL;
609         LLVMValueRef *p_log2 = NULL;
610
611         src0 = emit_fetch( bld, inst, 0, CHAN_X );
612         src0 = lp_build_abs( &bld->base, src0 );
613
614         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
615            p_floor_log2 = &tmp0;
616         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
617            p_exp = &tmp1;
618         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
619            p_log2 = &tmp2;
620
621         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
622
623         /* dst.x = floor(lg2(abs(src.x))) */
624         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
625            dst0[CHAN_X] = tmp0;
626         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
627         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
628            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
629         }
630         /* dst.z = lg2(abs(src.x)) */
631         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
632            dst0[CHAN_Z] = tmp2;
633      }
634      /* dst.w = 1.0 */
635      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
636         dst0[CHAN_W] = bld->base.one;
637      }
638      break;
639
640   case TGSI_OPCODE_MUL:
641      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
642         src0 = emit_fetch( bld, inst, 0, chan_index );
643         src1 = emit_fetch( bld, inst, 1, chan_index );
644         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
645      }
646      break;
647
648   case TGSI_OPCODE_ADD:
649      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
650         src0 = emit_fetch( bld, inst, 0, chan_index );
651         src1 = emit_fetch( bld, inst, 1, chan_index );
652         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
653      }
654      break;
655
656   case TGSI_OPCODE_DP3:
657   /* TGSI_OPCODE_DOT3 */
658      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
659      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
660      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
661      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
662      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
663      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
664      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
665      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
666      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
667      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
668      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
669      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
670         dst0[chan_index] = tmp0;
671      }
672      break;
673
674   case TGSI_OPCODE_DP4:
675   /* TGSI_OPCODE_DOT4 */
676      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
677      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
678      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
679      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
680      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
681      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
682      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
683      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
684      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
685      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
686      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
687      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
688      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
689      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
690      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
691      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
692         dst0[chan_index] = tmp0;
693      }
694      break;
695
696   case TGSI_OPCODE_DST:
697      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
698         dst0[CHAN_X] = bld->base.one;
699      }
700      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
701         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
702         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
703         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
704      }
705      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
706         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
707      }
708      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
709         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
710      }
711      break;
712
713   case TGSI_OPCODE_MIN:
714      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
715         src0 = emit_fetch( bld, inst, 0, chan_index );
716         src1 = emit_fetch( bld, inst, 1, chan_index );
717         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
718      }
719      break;
720
721   case TGSI_OPCODE_MAX:
722      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
723         src0 = emit_fetch( bld, inst, 0, chan_index );
724         src1 = emit_fetch( bld, inst, 1, chan_index );
725         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
726      }
727      break;
728
729   case TGSI_OPCODE_SLT:
730   /* TGSI_OPCODE_SETLT */
731      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
732         src0 = emit_fetch( bld, inst, 0, chan_index );
733         src1 = emit_fetch( bld, inst, 1, chan_index );
734         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
735         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
736      }
737      break;
738
739   case TGSI_OPCODE_SGE:
740   /* TGSI_OPCODE_SETGE */
741      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
742         src0 = emit_fetch( bld, inst, 0, chan_index );
743         src1 = emit_fetch( bld, inst, 1, chan_index );
744         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
745         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
746      }
747      break;
748
749   case TGSI_OPCODE_MAD:
750   /* TGSI_OPCODE_MADD */
751      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
752         tmp0 = emit_fetch( bld, inst, 0, chan_index );
753         tmp1 = emit_fetch( bld, inst, 1, chan_index );
754         tmp2 = emit_fetch( bld, inst, 2, chan_index );
755         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
756         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
757         dst0[chan_index] = tmp0;
758      }
759      break;
760
761   case TGSI_OPCODE_SUB:
762      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
763         tmp0 = emit_fetch( bld, inst, 0, chan_index );
764         tmp1 = emit_fetch( bld, inst, 1, chan_index );
765         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
766      }
767      break;
768
769   case TGSI_OPCODE_LRP:
770      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
771         src0 = emit_fetch( bld, inst, 0, chan_index );
772         src1 = emit_fetch( bld, inst, 1, chan_index );
773         src2 = emit_fetch( bld, inst, 2, chan_index );
774         tmp0 = lp_build_sub( &bld->base, src1, src2 );
775         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
776         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
777      }
778      break;
779
780   case TGSI_OPCODE_CND:
781      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
782         src0 = emit_fetch( bld, inst, 0, chan_index );
783         src1 = emit_fetch( bld, inst, 1, chan_index );
784         src2 = emit_fetch( bld, inst, 2, chan_index );
785         tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
786         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
787         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
788      }
789      break;
790
791   case TGSI_OPCODE_DP2A:
792      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
793      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
794      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
795      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
796      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
797      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
798      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
799      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
800      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
801      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
802         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
803      }
804      break;
805
806   case TGSI_OPCODE_FRC:
807      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
808         src0 = emit_fetch( bld, inst, 0, chan_index );
809         tmp0 = lp_build_floor(&bld->base, src0);
810         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
811         dst0[chan_index] = tmp0;
812      }
813      break;
814
815   case TGSI_OPCODE_CLAMP:
816      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
817         tmp0 = emit_fetch( bld, inst, 0, chan_index );
818         src1 = emit_fetch( bld, inst, 1, chan_index );
819         src2 = emit_fetch( bld, inst, 2, chan_index );
820         tmp0 = lp_build_max(&bld->base, tmp0, src1);
821         tmp0 = lp_build_min(&bld->base, tmp0, src2);
822         dst0[chan_index] = tmp0;
823      }
824      break;
825
826   case TGSI_OPCODE_FLR:
827      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
828         tmp0 = emit_fetch( bld, inst, 0, chan_index );
829         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
830      }
831      break;
832
833   case TGSI_OPCODE_ROUND:
834      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
835         tmp0 = emit_fetch( bld, inst, 0, chan_index );
836         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
837      }
838      break;
839
840   case TGSI_OPCODE_EX2: {
841      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
842      tmp0 = lp_build_exp2( &bld->base, tmp0);
843      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
844         dst0[chan_index] = tmp0;
845      }
846      break;
847   }
848
849   case TGSI_OPCODE_LG2:
850      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
851      tmp0 = lp_build_log2( &bld->base, tmp0);
852      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
853         dst0[chan_index] = tmp0;
854      }
855      break;
856
857   case TGSI_OPCODE_POW:
858      src0 = emit_fetch( bld, inst, 0, CHAN_X );
859      src1 = emit_fetch( bld, inst, 1, CHAN_X );
860      res = lp_build_pow( &bld->base, src0, src1 );
861      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
862         dst0[chan_index] = res;
863      }
864      break;
865
866   case TGSI_OPCODE_XPD:
867      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
868          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
869         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
870         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
871      }
872      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
873          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
874         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
875         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
876      }
877      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
878         tmp2 = tmp0;
879         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
880         tmp5 = tmp3;
881         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
882         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
883         dst0[CHAN_X] = tmp2;
884      }
885      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
886          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
887         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
888         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
889      }
890      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
891         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
892         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
893         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
894         dst0[CHAN_Y] = tmp3;
895      }
896      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
897         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
898         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
899         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
900         dst0[CHAN_Z] = tmp5;
901      }
902      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
903         dst0[CHAN_W] = bld->base.one;
904      }
905      break;
906
907   case TGSI_OPCODE_ABS:
908      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
909         tmp0 = emit_fetch( bld, inst, 0, chan_index );
910         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
911      }
912      break;
913
914   case TGSI_OPCODE_RCC:
915      /* deprecated? */
916      assert(0);
917      return 0;
918
919   case TGSI_OPCODE_DPH:
920      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
921      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
922      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
923      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
924      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
925      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
926      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
927      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
928      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
929      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
930      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
931      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
932      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
933      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
934         dst0[chan_index] = tmp0;
935      }
936      break;
937
938   case TGSI_OPCODE_COS:
939      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
940      tmp0 = lp_build_cos( &bld->base, tmp0 );
941      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
942         dst0[chan_index] = tmp0;
943      }
944      break;
945
946   case TGSI_OPCODE_DDX:
947      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
948         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
949      }
950      break;
951
952   case TGSI_OPCODE_DDY:
953      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
954         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
955      }
956      break;
957
958   case TGSI_OPCODE_KILP:
959      /* predicated kill */
960      /* FIXME */
961      return 0;
962      break;
963
964   case TGSI_OPCODE_KIL:
965      /* conditional kill */
966      emit_kil( bld, inst );
967      break;
968
969   case TGSI_OPCODE_PK2H:
970      return 0;
971      break;
972
973   case TGSI_OPCODE_PK2US:
974      return 0;
975      break;
976
977   case TGSI_OPCODE_PK4B:
978      return 0;
979      break;
980
981   case TGSI_OPCODE_PK4UB:
982      return 0;
983      break;
984
985   case TGSI_OPCODE_RFL:
986      return 0;
987      break;
988
989   case TGSI_OPCODE_SEQ:
990      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
991         src0 = emit_fetch( bld, inst, 0, chan_index );
992         src1 = emit_fetch( bld, inst, 1, chan_index );
993         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
994         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
995      }
996      break;
997
998   case TGSI_OPCODE_SFL:
999      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1000         dst0[chan_index] = bld->base.zero;
1001      }
1002      break;
1003
1004   case TGSI_OPCODE_SGT:
1005      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1006         src0 = emit_fetch( bld, inst, 0, chan_index );
1007         src1 = emit_fetch( bld, inst, 1, chan_index );
1008         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1009         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1010      }
1011      break;
1012
1013   case TGSI_OPCODE_SIN:
1014      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1015      tmp0 = lp_build_sin( &bld->base, tmp0 );
1016      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1017         dst0[chan_index] = tmp0;
1018      }
1019      break;
1020
1021   case TGSI_OPCODE_SLE:
1022      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1023         src0 = emit_fetch( bld, inst, 0, chan_index );
1024         src1 = emit_fetch( bld, inst, 1, chan_index );
1025         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1026         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1027      }
1028      break;
1029
1030   case TGSI_OPCODE_SNE:
1031      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1032         src0 = emit_fetch( bld, inst, 0, chan_index );
1033         src1 = emit_fetch( bld, inst, 1, chan_index );
1034         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1035         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1036      }
1037      break;
1038
1039   case TGSI_OPCODE_STR:
1040      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1041         dst0[chan_index] = bld->base.one;
1042      }
1043      break;
1044
1045   case TGSI_OPCODE_TEX:
1046      emit_tex( bld, inst, FALSE, FALSE, dst0 );
1047      break;
1048
1049   case TGSI_OPCODE_TXD:
1050      /* FIXME */
1051      return 0;
1052      break;
1053
1054   case TGSI_OPCODE_UP2H:
1055      /* deprecated */
1056      assert (0);
1057      return 0;
1058      break;
1059
1060   case TGSI_OPCODE_UP2US:
1061      /* deprecated */
1062      assert(0);
1063      return 0;
1064      break;
1065
1066   case TGSI_OPCODE_UP4B:
1067      /* deprecated */
1068      assert(0);
1069      return 0;
1070      break;
1071
1072   case TGSI_OPCODE_UP4UB:
1073      /* deprecated */
1074      assert(0);
1075      return 0;
1076      break;
1077
1078   case TGSI_OPCODE_X2D:
1079      /* deprecated? */
1080      assert(0);
1081      return 0;
1082      break;
1083
1084   case TGSI_OPCODE_ARA:
1085      /* deprecated */
1086      assert(0);
1087      return 0;
1088      break;
1089
1090#if 0
1091   case TGSI_OPCODE_ARR:
1092      /* FIXME */
1093      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1094         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1095         emit_rnd( bld, 0, 0 );
1096         emit_f2it( bld, 0 );
1097         dst0[chan_index] = tmp0;
1098      }
1099      break;
1100#endif
1101
1102   case TGSI_OPCODE_BRA:
1103      /* deprecated */
1104      assert(0);
1105      return 0;
1106      break;
1107
1108   case TGSI_OPCODE_CAL:
1109      /* FIXME */
1110      return 0;
1111      break;
1112
1113   case TGSI_OPCODE_RET:
1114      /* FIXME */
1115      return 0;
1116      break;
1117
1118   case TGSI_OPCODE_END:
1119      break;
1120
1121   case TGSI_OPCODE_SSG:
1122   /* TGSI_OPCODE_SGN */
1123      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1124         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1125         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1126      }
1127      break;
1128
1129   case TGSI_OPCODE_CMP:
1130      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1131         src0 = emit_fetch( bld, inst, 0, chan_index );
1132         src1 = emit_fetch( bld, inst, 1, chan_index );
1133         src2 = emit_fetch( bld, inst, 2, chan_index );
1134         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1135         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1136      }
1137      break;
1138
1139   case TGSI_OPCODE_SCS:
1140      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1141         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1142         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1143      }
1144      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1145         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1146         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1147      }
1148      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1149         dst0[CHAN_Z] = bld->base.zero;
1150      }
1151      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1152         dst0[CHAN_W] = bld->base.one;
1153      }
1154      break;
1155
1156   case TGSI_OPCODE_TXB:
1157      emit_tex( bld, inst, TRUE, FALSE, dst0 );
1158      break;
1159
1160   case TGSI_OPCODE_NRM:
1161      /* fall-through */
1162   case TGSI_OPCODE_NRM4:
1163      /* 3 or 4-component normalization */
1164      {
1165         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1166
1167         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1168             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1169             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1170             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1171
1172            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1173
1174            /* xmm4 = src.x */
1175            /* xmm0 = src.x * src.x */
1176            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1177            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1178               tmp4 = tmp0;
1179            }
1180            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1181
1182            /* xmm5 = src.y */
1183            /* xmm0 = xmm0 + src.y * src.y */
1184            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1185            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1186               tmp5 = tmp1;
1187            }
1188            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1189            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1190
1191            /* xmm6 = src.z */
1192            /* xmm0 = xmm0 + src.z * src.z */
1193            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1194            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1195               tmp6 = tmp1;
1196            }
1197            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1198            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1199
1200            if (dims == 4) {
1201               /* xmm7 = src.w */
1202               /* xmm0 = xmm0 + src.w * src.w */
1203               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1204               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1205                  tmp7 = tmp1;
1206               }
1207               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1208               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1209            }
1210
1211            /* xmm1 = 1 / sqrt(xmm0) */
1212            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1213
1214            /* dst.x = xmm1 * src.x */
1215            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1216               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1217            }
1218
1219            /* dst.y = xmm1 * src.y */
1220            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1221               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1222            }
1223
1224            /* dst.z = xmm1 * src.z */
1225            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1226               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1227            }
1228
1229            /* dst.w = xmm1 * src.w */
1230            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1231               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1232            }
1233         }
1234
1235         /* dst.w = 1.0 */
1236         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1237            dst0[CHAN_W] = bld->base.one;
1238         }
1239      }
1240      break;
1241
1242   case TGSI_OPCODE_DIV:
1243      /* deprecated */
1244      assert( 0 );
1245      return 0;
1246      break;
1247
1248   case TGSI_OPCODE_DP2:
1249      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1250      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1251      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1252      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1253      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1254      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1255      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1256      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1257         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1258      }
1259      break;
1260
1261   case TGSI_OPCODE_TXL:
1262      emit_tex( bld, inst, TRUE, FALSE, dst0 );
1263      break;
1264
1265   case TGSI_OPCODE_TXP:
1266      emit_tex( bld, inst, FALSE, TRUE, dst0 );
1267      break;
1268
1269   case TGSI_OPCODE_BRK:
1270      /* FIXME */
1271      return 0;
1272      break;
1273
1274   case TGSI_OPCODE_IF:
1275      /* FIXME */
1276      return 0;
1277      break;
1278
1279   case TGSI_OPCODE_BGNFOR:
1280      /* deprecated */
1281      assert(0);
1282      return 0;
1283      break;
1284
1285   case TGSI_OPCODE_REP:
1286      /* deprecated */
1287      assert(0);
1288      return 0;
1289      break;
1290
1291   case TGSI_OPCODE_ELSE:
1292      /* FIXME */
1293      return 0;
1294      break;
1295
1296   case TGSI_OPCODE_ENDIF:
1297      /* FIXME */
1298      return 0;
1299      break;
1300
1301   case TGSI_OPCODE_ENDFOR:
1302      /* deprecated */
1303      assert(0);
1304      return 0;
1305      break;
1306
1307   case TGSI_OPCODE_ENDREP:
1308      /* deprecated */
1309      assert(0);
1310      return 0;
1311      break;
1312
1313   case TGSI_OPCODE_PUSHA:
1314      /* deprecated? */
1315      assert(0);
1316      return 0;
1317      break;
1318
1319   case TGSI_OPCODE_POPA:
1320      /* deprecated? */
1321      assert(0);
1322      return 0;
1323      break;
1324
1325   case TGSI_OPCODE_CEIL:
1326      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1327         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1328         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1329      }
1330      break;
1331
1332   case TGSI_OPCODE_I2F:
1333      /* deprecated? */
1334      assert(0);
1335      return 0;
1336      break;
1337
1338   case TGSI_OPCODE_NOT:
1339      /* deprecated? */
1340      assert(0);
1341      return 0;
1342      break;
1343
1344   case TGSI_OPCODE_TRUNC:
1345      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1346         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1347         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1348      }
1349      break;
1350
1351   case TGSI_OPCODE_SHL:
1352      /* deprecated? */
1353      assert(0);
1354      return 0;
1355      break;
1356
1357   case TGSI_OPCODE_ISHR:
1358      /* deprecated? */
1359      assert(0);
1360      return 0;
1361      break;
1362
1363   case TGSI_OPCODE_AND:
1364      /* deprecated? */
1365      assert(0);
1366      return 0;
1367      break;
1368
1369   case TGSI_OPCODE_OR:
1370      /* deprecated? */
1371      assert(0);
1372      return 0;
1373      break;
1374
1375   case TGSI_OPCODE_MOD:
1376      /* deprecated? */
1377      assert(0);
1378      return 0;
1379      break;
1380
1381   case TGSI_OPCODE_XOR:
1382      /* deprecated? */
1383      assert(0);
1384      return 0;
1385      break;
1386
1387   case TGSI_OPCODE_SAD:
1388      /* deprecated? */
1389      assert(0);
1390      return 0;
1391      break;
1392
1393   case TGSI_OPCODE_TXF:
1394      /* deprecated? */
1395      assert(0);
1396      return 0;
1397      break;
1398
1399   case TGSI_OPCODE_TXQ:
1400      /* deprecated? */
1401      assert(0);
1402      return 0;
1403      break;
1404
1405   case TGSI_OPCODE_CONT:
1406      /* deprecated? */
1407      assert(0);
1408      return 0;
1409      break;
1410
1411   case TGSI_OPCODE_EMIT:
1412      return 0;
1413      break;
1414
1415   case TGSI_OPCODE_ENDPRIM:
1416      return 0;
1417      break;
1418
1419   case TGSI_OPCODE_NOP:
1420      break;
1421
1422   default:
1423      return 0;
1424   }
1425
1426   if(info->num_dst) {
1427      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1428         emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1429      }
1430   }
1431
1432   return 1;
1433}
1434
1435
1436void
1437lp_build_tgsi_soa(LLVMBuilderRef builder,
1438                  const struct tgsi_token *tokens,
1439                  struct lp_type type,
1440                  struct lp_build_mask_context *mask,
1441                  LLVMValueRef consts_ptr,
1442                  const LLVMValueRef *pos,
1443                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
1444                  LLVMValueRef (*outputs)[NUM_CHANNELS],
1445                  struct lp_build_sampler_soa *sampler)
1446{
1447   struct lp_build_tgsi_soa_context bld;
1448   struct tgsi_parse_context parse;
1449   uint num_immediates = 0;
1450   unsigned i;
1451
1452   /* Setup build context */
1453   memset(&bld, 0, sizeof bld);
1454   lp_build_context_init(&bld.base, builder, type);
1455   bld.mask = mask;
1456   bld.pos = pos;
1457   bld.inputs = inputs;
1458   bld.outputs = outputs;
1459   bld.consts_ptr = consts_ptr;
1460   bld.sampler = sampler;
1461
1462   tgsi_parse_init( &parse, tokens );
1463
1464   while( !tgsi_parse_end_of_tokens( &parse ) ) {
1465      tgsi_parse_token( &parse );
1466
1467      switch( parse.FullToken.Token.Type ) {
1468      case TGSI_TOKEN_TYPE_DECLARATION:
1469         /* Inputs already interpolated */
1470         {
1471            if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1472               _debug_printf("warning: failed to define LLVM variable\n");
1473         }
1474         break;
1475
1476      case TGSI_TOKEN_TYPE_INSTRUCTION:
1477         {
1478            unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1479            const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1480            if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1481               _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1482                             info ? info->mnemonic : "<invalid>");
1483         }
1484
1485         break;
1486
1487      case TGSI_TOKEN_TYPE_IMMEDIATE:
1488         /* simply copy the immediate values into the next immediates[] slot */
1489         {
1490            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1491            assert(size <= 4);
1492            assert(num_immediates < LP_MAX_IMMEDIATES);
1493            for( i = 0; i < size; ++i )
1494               bld.immediates[num_immediates][i] =
1495                  lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1496            for( i = size; i < 4; ++i )
1497               bld.immediates[num_immediates][i] = bld.base.undef;
1498            num_immediates++;
1499         }
1500         break;
1501
1502      default:
1503         assert( 0 );
1504      }
1505   }
1506
1507   tgsi_parse_free( &parse );
1508}
1509
1510