lp_bld_tgsi_soa.c revision c61bf363937f40624a5632745630d4f2b9907082
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_info.h"
45#include "tgsi/tgsi_parse.h"
46#include "tgsi/tgsi_util.h"
47#include "tgsi/tgsi_exec.h"
48#include "lp_bld_type.h"
49#include "lp_bld_const.h"
50#include "lp_bld_arit.h"
51#include "lp_bld_logic.h"
52#include "lp_bld_swizzle.h"
53#include "lp_bld_flow.h"
54#include "lp_bld_tgsi.h"
55
56
57#define LP_MAX_TEMPS 256
58#define LP_MAX_IMMEDIATES 256
59
60
61#define FOR_EACH_CHANNEL( CHAN )\
62   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
63
64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
65   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
66
67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
69
70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
71   FOR_EACH_CHANNEL( CHAN )\
72      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
73
74#define CHAN_X 0
75#define CHAN_Y 1
76#define CHAN_Z 2
77#define CHAN_W 3
78
79#define QUAD_TOP_LEFT     0
80#define QUAD_TOP_RIGHT    1
81#define QUAD_BOTTOM_LEFT  2
82#define QUAD_BOTTOM_RIGHT 3
83
84
85struct lp_build_tgsi_soa_context
86{
87   struct lp_build_context base;
88
89   LLVMValueRef consts_ptr;
90   const LLVMValueRef *pos;
91   const LLVMValueRef (*inputs)[NUM_CHANNELS];
92   LLVMValueRef (*outputs)[NUM_CHANNELS];
93
94   struct lp_build_sampler_soa *sampler;
95
96   LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
97   LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
98
99   struct lp_build_mask_context *mask;
100};
101
102
103static const unsigned char
104swizzle_left[4] = {
105   QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
106   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_LEFT
107};
108
109static const unsigned char
110swizzle_right[4] = {
111   QUAD_TOP_RIGHT,    QUAD_TOP_RIGHT,
112   QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
113};
114
115static const unsigned char
116swizzle_top[4] = {
117   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT,
118   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT
119};
120
121static const unsigned char
122swizzle_bottom[4] = {
123   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT,
124   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
125};
126
127
128static LLVMValueRef
129emit_ddx(struct lp_build_tgsi_soa_context *bld,
130         LLVMValueRef src)
131{
132   LLVMValueRef src_left  = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
133   LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
134   return lp_build_sub(&bld->base, src_right, src_left);
135}
136
137
138static LLVMValueRef
139emit_ddy(struct lp_build_tgsi_soa_context *bld,
140         LLVMValueRef src)
141{
142   LLVMValueRef src_top    = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
143   LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
144   return lp_build_sub(&bld->base, src_top, src_bottom);
145}
146
147
148/**
149 * Register fetch.
150 */
151static LLVMValueRef
152emit_fetch(
153   struct lp_build_tgsi_soa_context *bld,
154   const struct tgsi_full_instruction *inst,
155   unsigned index,
156   const unsigned chan_index )
157{
158   const struct tgsi_full_src_register *reg = &inst->Src[index];
159   unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
160   LLVMValueRef res;
161
162   switch (swizzle) {
163   case TGSI_SWIZZLE_X:
164   case TGSI_SWIZZLE_Y:
165   case TGSI_SWIZZLE_Z:
166   case TGSI_SWIZZLE_W:
167
168      switch (reg->Register.File) {
169      case TGSI_FILE_CONSTANT: {
170         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
171         LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
172         LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
173         res = lp_build_broadcast_scalar(&bld->base, scalar);
174         break;
175      }
176
177      case TGSI_FILE_IMMEDIATE:
178         res = bld->immediates[reg->Register.Index][swizzle];
179         assert(res);
180         break;
181
182      case TGSI_FILE_INPUT:
183         res = bld->inputs[reg->Register.Index][swizzle];
184         assert(res);
185         break;
186
187      case TGSI_FILE_TEMPORARY:
188         res = bld->temps[reg->Register.Index][swizzle];
189         if(!res)
190            return bld->base.undef;
191         break;
192
193      default:
194         assert( 0 );
195         return bld->base.undef;
196      }
197      break;
198
199   default:
200      assert( 0 );
201      return bld->base.undef;
202   }
203
204   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
205   case TGSI_UTIL_SIGN_CLEAR:
206      res = lp_build_abs( &bld->base, res );
207      break;
208
209   case TGSI_UTIL_SIGN_SET:
210      /* TODO: Use bitwese OR for floating point */
211      res = lp_build_abs( &bld->base, res );
212      res = LLVMBuildNeg( bld->base.builder, res, "" );
213      break;
214
215   case TGSI_UTIL_SIGN_TOGGLE:
216      res = LLVMBuildNeg( bld->base.builder, res, "" );
217      break;
218
219   case TGSI_UTIL_SIGN_KEEP:
220      break;
221   }
222
223   return res;
224}
225
226
227/**
228 * Register fetch with derivatives.
229 */
230static void
231emit_fetch_deriv(
232   struct lp_build_tgsi_soa_context *bld,
233   const struct tgsi_full_instruction *inst,
234   unsigned index,
235   const unsigned chan_index,
236   LLVMValueRef *res,
237   LLVMValueRef *ddx,
238   LLVMValueRef *ddy)
239{
240   LLVMValueRef src;
241
242   src = emit_fetch(bld, inst, index, chan_index);
243
244   if(res)
245      *res = src;
246
247   /* TODO: use interpolation coeffs for inputs */
248
249   if(ddx)
250      *ddx = emit_ddx(bld, src);
251
252   if(ddy)
253      *ddy = emit_ddy(bld, src);
254}
255
256
257/**
258 * Register store.
259 */
260static void
261emit_store(
262   struct lp_build_tgsi_soa_context *bld,
263   const struct tgsi_full_instruction *inst,
264   unsigned index,
265   unsigned chan_index,
266   LLVMValueRef value)
267{
268   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
269
270   switch( inst->Instruction.Saturate ) {
271   case TGSI_SAT_NONE:
272      break;
273
274   case TGSI_SAT_ZERO_ONE:
275      value = lp_build_max(&bld->base, value, bld->base.zero);
276      value = lp_build_min(&bld->base, value, bld->base.one);
277      break;
278
279   case TGSI_SAT_MINUS_PLUS_ONE:
280      value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0));
281      value = lp_build_min(&bld->base, value, bld->base.one);
282      break;
283
284   default:
285      assert(0);
286   }
287
288   switch( reg->Register.File ) {
289   case TGSI_FILE_OUTPUT:
290      bld->outputs[reg->Register.Index][chan_index] = value;
291      break;
292
293   case TGSI_FILE_TEMPORARY:
294      bld->temps[reg->Register.Index][chan_index] = value;
295      break;
296
297   case TGSI_FILE_ADDRESS:
298      /* FIXME */
299      assert(0);
300      break;
301
302   default:
303      assert( 0 );
304   }
305}
306
307
308/**
309 * High-level instruction translators.
310 */
311
312
313static void
314emit_tex( struct lp_build_tgsi_soa_context *bld,
315          const struct tgsi_full_instruction *inst,
316          boolean apply_lodbias,
317          boolean projected,
318          LLVMValueRef *texel)
319{
320   const uint unit = inst->Src[1].Register.Index;
321   LLVMValueRef lodbias;
322   LLVMValueRef oow = NULL;
323   LLVMValueRef coords[3];
324   unsigned num_coords;
325   unsigned i;
326
327   switch (inst->Texture.Texture) {
328   case TGSI_TEXTURE_1D:
329      num_coords = 1;
330      break;
331   case TGSI_TEXTURE_2D:
332   case TGSI_TEXTURE_RECT:
333      num_coords = 2;
334      break;
335   case TGSI_TEXTURE_SHADOW1D:
336   case TGSI_TEXTURE_SHADOW2D:
337   case TGSI_TEXTURE_SHADOWRECT:
338   case TGSI_TEXTURE_3D:
339   case TGSI_TEXTURE_CUBE:
340      num_coords = 3;
341      break;
342   default:
343      assert(0);
344      return;
345   }
346
347   if(apply_lodbias)
348      lodbias = emit_fetch( bld, inst, 0, 3 );
349   else
350      lodbias = bld->base.zero;
351
352   if (projected) {
353      oow = emit_fetch( bld, inst, 0, 3 );
354      oow = lp_build_rcp(&bld->base, oow);
355   }
356
357   for (i = 0; i < num_coords; i++) {
358      coords[i] = emit_fetch( bld, inst, 0, i );
359      if (projected)
360         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
361   }
362   for (i = num_coords; i < 3; i++) {
363      coords[i] = bld->base.undef;
364   }
365
366   bld->sampler->emit_fetch_texel(bld->sampler,
367                                  bld->base.builder,
368                                  bld->base.type,
369                                  unit, num_coords, coords, lodbias,
370                                  texel);
371}
372
373
374static void
375emit_kil(
376   struct lp_build_tgsi_soa_context *bld,
377   const struct tgsi_full_instruction *inst )
378{
379   const struct tgsi_full_src_register *reg = &inst->Src[0];
380   LLVMValueRef terms[NUM_CHANNELS];
381   LLVMValueRef mask;
382   unsigned chan_index;
383
384   memset(&terms, 0, sizeof terms);
385
386   FOR_EACH_CHANNEL( chan_index ) {
387      unsigned swizzle;
388
389      /* Unswizzle channel */
390      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
391
392      /* Check if the component has not been already tested. */
393      assert(swizzle < NUM_CHANNELS);
394      if( !terms[swizzle] )
395         /* TODO: change the comparison operator instead of setting the sign */
396         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
397   }
398
399   mask = NULL;
400   FOR_EACH_CHANNEL( chan_index ) {
401      if(terms[chan_index]) {
402         LLVMValueRef chan_mask;
403
404         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
405
406         if(mask)
407            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
408         else
409            mask = chan_mask;
410      }
411   }
412
413   if(mask)
414      lp_build_mask_update(bld->mask, mask);
415}
416
417
418/**
419 * Check if inst src/dest regs use indirect addressing into temporary
420 * register file.
421 */
422static boolean
423indirect_temp_reference(const struct tgsi_full_instruction *inst)
424{
425   uint i;
426   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
427      const struct tgsi_full_src_register *reg = &inst->Src[i];
428      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
429          reg->Register.Indirect)
430         return TRUE;
431   }
432   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
433      const struct tgsi_full_dst_register *reg = &inst->Dst[i];
434      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
435          reg->Register.Indirect)
436         return TRUE;
437   }
438   return FALSE;
439}
440
441
442static int
443emit_instruction(
444   struct lp_build_tgsi_soa_context *bld,
445   const struct tgsi_full_instruction *inst,
446   const struct tgsi_opcode_info *info)
447{
448   unsigned chan_index;
449   LLVMValueRef src0, src1, src2;
450   LLVMValueRef tmp0, tmp1, tmp2;
451   LLVMValueRef tmp3 = NULL;
452   LLVMValueRef tmp4 = NULL;
453   LLVMValueRef tmp5 = NULL;
454   LLVMValueRef tmp6 = NULL;
455   LLVMValueRef tmp7 = NULL;
456   LLVMValueRef res;
457   LLVMValueRef dst0[NUM_CHANNELS];
458
459   /* we can't handle indirect addressing into temp register file yet */
460   if (indirect_temp_reference(inst))
461      return FALSE;
462
463   assert(info->num_dst <= 1);
464   if(info->num_dst) {
465      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
466         dst0[chan_index] = bld->base.undef;
467      }
468   }
469
470   switch (inst->Instruction.Opcode) {
471#if 0
472   case TGSI_OPCODE_ARL:
473      /* FIXME */
474      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
475         tmp0 = emit_fetch( bld, inst, 0, chan_index );
476         emit_flr(bld, 0, 0);
477         emit_f2it( bld, 0 );
478         dst0[chan_index] = tmp0;
479      }
480      break;
481#endif
482
483   case TGSI_OPCODE_MOV:
484      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
485         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
486      }
487      break;
488
489   case TGSI_OPCODE_LIT:
490      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
491         dst0[CHAN_X] = bld->base.one;
492      }
493      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
494         src0 = emit_fetch( bld, inst, 0, CHAN_X );
495         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
496      }
497      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
498         /* XMM[1] = SrcReg[0].yyyy */
499         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
500         /* XMM[1] = max(XMM[1], 0) */
501         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
502         /* XMM[2] = SrcReg[0].wwww */
503         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
504         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
505         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
506         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
507         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
508      }
509      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
510         dst0[CHAN_W] = bld->base.one;
511      }
512      break;
513
514   case TGSI_OPCODE_RCP:
515   /* TGSI_OPCODE_RECIP */
516      src0 = emit_fetch( bld, inst, 0, CHAN_X );
517      res = lp_build_rcp(&bld->base, src0);
518      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
519         dst0[chan_index] = res;
520      }
521      break;
522
523   case TGSI_OPCODE_RSQ:
524   /* TGSI_OPCODE_RECIPSQRT */
525      src0 = emit_fetch( bld, inst, 0, CHAN_X );
526      src0 = lp_build_abs(&bld->base, src0);
527      res = lp_build_rsqrt(&bld->base, src0);
528      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
529         dst0[chan_index] = res;
530      }
531      break;
532
533   case TGSI_OPCODE_EXP:
534      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
535          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
536          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
537         LLVMValueRef *p_exp2_int_part = NULL;
538         LLVMValueRef *p_frac_part = NULL;
539         LLVMValueRef *p_exp2 = NULL;
540
541         src0 = emit_fetch( bld, inst, 0, CHAN_X );
542
543         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
544            p_exp2_int_part = &tmp0;
545         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
546            p_frac_part = &tmp1;
547         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
548            p_exp2 = &tmp2;
549
550         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
551
552         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
553            dst0[CHAN_X] = tmp0;
554         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
555            dst0[CHAN_Y] = tmp1;
556         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
557            dst0[CHAN_Z] = tmp2;
558      }
559      /* dst.w = 1.0 */
560      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
561         dst0[CHAN_W] = bld->base.one;
562      }
563      break;
564
565   case TGSI_OPCODE_LOG:
566      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
567          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
568          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
569         LLVMValueRef *p_floor_log2 = NULL;
570         LLVMValueRef *p_exp = NULL;
571         LLVMValueRef *p_log2 = NULL;
572
573         src0 = emit_fetch( bld, inst, 0, CHAN_X );
574         src0 = lp_build_abs( &bld->base, src0 );
575
576         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
577            p_floor_log2 = &tmp0;
578         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
579            p_exp = &tmp1;
580         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
581            p_log2 = &tmp2;
582
583         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
584
585         /* dst.x = floor(lg2(abs(src.x))) */
586         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
587            dst0[CHAN_X] = tmp0;
588         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
589         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
590            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
591         }
592         /* dst.z = lg2(abs(src.x)) */
593         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
594            dst0[CHAN_Z] = tmp2;
595      }
596      /* dst.w = 1.0 */
597      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
598         dst0[CHAN_W] = bld->base.one;
599      }
600      break;
601
602   case TGSI_OPCODE_MUL:
603      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
604         src0 = emit_fetch( bld, inst, 0, chan_index );
605         src1 = emit_fetch( bld, inst, 1, chan_index );
606         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
607      }
608      break;
609
610   case TGSI_OPCODE_ADD:
611      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
612         src0 = emit_fetch( bld, inst, 0, chan_index );
613         src1 = emit_fetch( bld, inst, 1, chan_index );
614         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
615      }
616      break;
617
618   case TGSI_OPCODE_DP3:
619   /* TGSI_OPCODE_DOT3 */
620      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
621      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
622      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
623      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
624      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
625      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
626      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
627      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
628      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
629      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
630      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
631      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
632         dst0[chan_index] = tmp0;
633      }
634      break;
635
636   case TGSI_OPCODE_DP4:
637   /* TGSI_OPCODE_DOT4 */
638      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
639      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
640      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
641      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
642      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
643      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
644      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
645      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
646      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
647      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
648      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
649      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
650      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
651      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
652      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
653      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
654         dst0[chan_index] = tmp0;
655      }
656      break;
657
658   case TGSI_OPCODE_DST:
659      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
660         dst0[CHAN_X] = bld->base.one;
661      }
662      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
663         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
664         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
665         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
666      }
667      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
668         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
669      }
670      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
671         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
672      }
673      break;
674
675   case TGSI_OPCODE_MIN:
676      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
677         src0 = emit_fetch( bld, inst, 0, chan_index );
678         src1 = emit_fetch( bld, inst, 1, chan_index );
679         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
680      }
681      break;
682
683   case TGSI_OPCODE_MAX:
684      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
685         src0 = emit_fetch( bld, inst, 0, chan_index );
686         src1 = emit_fetch( bld, inst, 1, chan_index );
687         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
688      }
689      break;
690
691   case TGSI_OPCODE_SLT:
692   /* TGSI_OPCODE_SETLT */
693      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
694         src0 = emit_fetch( bld, inst, 0, chan_index );
695         src1 = emit_fetch( bld, inst, 1, chan_index );
696         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
697         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
698      }
699      break;
700
701   case TGSI_OPCODE_SGE:
702   /* TGSI_OPCODE_SETGE */
703      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
704         src0 = emit_fetch( bld, inst, 0, chan_index );
705         src1 = emit_fetch( bld, inst, 1, chan_index );
706         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
707         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
708      }
709      break;
710
711   case TGSI_OPCODE_MAD:
712   /* TGSI_OPCODE_MADD */
713      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
714         tmp0 = emit_fetch( bld, inst, 0, chan_index );
715         tmp1 = emit_fetch( bld, inst, 1, chan_index );
716         tmp2 = emit_fetch( bld, inst, 2, chan_index );
717         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
718         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
719         dst0[chan_index] = tmp0;
720      }
721      break;
722
723   case TGSI_OPCODE_SUB:
724      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
725         tmp0 = emit_fetch( bld, inst, 0, chan_index );
726         tmp1 = emit_fetch( bld, inst, 1, chan_index );
727         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
728      }
729      break;
730
731   case TGSI_OPCODE_LRP:
732      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
733         src0 = emit_fetch( bld, inst, 0, chan_index );
734         src1 = emit_fetch( bld, inst, 1, chan_index );
735         src2 = emit_fetch( bld, inst, 2, chan_index );
736         tmp0 = lp_build_sub( &bld->base, src1, src2 );
737         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
738         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
739      }
740      break;
741
742   case TGSI_OPCODE_CND:
743      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
744         src0 = emit_fetch( bld, inst, 0, chan_index );
745         src1 = emit_fetch( bld, inst, 1, chan_index );
746         src2 = emit_fetch( bld, inst, 2, chan_index );
747         tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
748         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
749         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
750      }
751      break;
752
753   case TGSI_OPCODE_DP2A:
754      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
755      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
756      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
757      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
758      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
759      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
760      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
761      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
762      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
763      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
764         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
765      }
766      break;
767
768   case TGSI_OPCODE_FRC:
769      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
770         src0 = emit_fetch( bld, inst, 0, chan_index );
771         tmp0 = lp_build_floor(&bld->base, src0);
772         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
773         dst0[chan_index] = tmp0;
774      }
775      break;
776
777   case TGSI_OPCODE_CLAMP:
778      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
779         tmp0 = emit_fetch( bld, inst, 0, chan_index );
780         src1 = emit_fetch( bld, inst, 1, chan_index );
781         src2 = emit_fetch( bld, inst, 2, chan_index );
782         tmp0 = lp_build_max(&bld->base, tmp0, src1);
783         tmp0 = lp_build_min(&bld->base, tmp0, src2);
784         dst0[chan_index] = tmp0;
785      }
786      break;
787
788   case TGSI_OPCODE_FLR:
789      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
790         tmp0 = emit_fetch( bld, inst, 0, chan_index );
791         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
792      }
793      break;
794
795   case TGSI_OPCODE_ROUND:
796      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
797         tmp0 = emit_fetch( bld, inst, 0, chan_index );
798         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
799      }
800      break;
801
802   case TGSI_OPCODE_EX2: {
803      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
804      tmp0 = lp_build_exp2( &bld->base, tmp0);
805      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
806         dst0[chan_index] = tmp0;
807      }
808      break;
809   }
810
811   case TGSI_OPCODE_LG2:
812      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
813      tmp0 = lp_build_log2( &bld->base, tmp0);
814      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
815         dst0[chan_index] = tmp0;
816      }
817      break;
818
819   case TGSI_OPCODE_POW:
820      src0 = emit_fetch( bld, inst, 0, CHAN_X );
821      src1 = emit_fetch( bld, inst, 1, CHAN_X );
822      res = lp_build_pow( &bld->base, src0, src1 );
823      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
824         dst0[chan_index] = res;
825      }
826      break;
827
828   case TGSI_OPCODE_XPD:
829      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
830          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
831         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
832         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
833      }
834      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
835          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
836         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
837         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
838      }
839      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
840         tmp2 = tmp0;
841         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
842         tmp5 = tmp3;
843         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
844         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
845         dst0[CHAN_X] = tmp2;
846      }
847      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
848          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
849         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
850         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
851      }
852      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
853         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
854         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
855         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
856         dst0[CHAN_Y] = tmp3;
857      }
858      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
859         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
860         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
861         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
862         dst0[CHAN_Z] = tmp5;
863      }
864      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
865         dst0[CHAN_W] = bld->base.one;
866      }
867      break;
868
869   case TGSI_OPCODE_ABS:
870      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
871         tmp0 = emit_fetch( bld, inst, 0, chan_index );
872         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
873      }
874      break;
875
876   case TGSI_OPCODE_RCC:
877      /* deprecated? */
878      assert(0);
879      return 0;
880
881   case TGSI_OPCODE_DPH:
882      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
883      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
884      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
885      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
886      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
887      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
888      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
889      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
890      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
891      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
892      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
893      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
894      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
895      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
896         dst0[chan_index] = tmp0;
897      }
898      break;
899
900   case TGSI_OPCODE_COS:
901      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
902      tmp0 = lp_build_cos( &bld->base, tmp0 );
903      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
904         dst0[chan_index] = tmp0;
905      }
906      break;
907
908   case TGSI_OPCODE_DDX:
909      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
910         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
911      }
912      break;
913
914   case TGSI_OPCODE_DDY:
915      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
916         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
917      }
918      break;
919
920   case TGSI_OPCODE_KILP:
921      /* predicated kill */
922      /* FIXME */
923      return 0;
924      break;
925
926   case TGSI_OPCODE_KIL:
927      /* conditional kill */
928      emit_kil( bld, inst );
929      break;
930
931   case TGSI_OPCODE_PK2H:
932      return 0;
933      break;
934
935   case TGSI_OPCODE_PK2US:
936      return 0;
937      break;
938
939   case TGSI_OPCODE_PK4B:
940      return 0;
941      break;
942
943   case TGSI_OPCODE_PK4UB:
944      return 0;
945      break;
946
947   case TGSI_OPCODE_RFL:
948      return 0;
949      break;
950
951   case TGSI_OPCODE_SEQ:
952      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
953         src0 = emit_fetch( bld, inst, 0, chan_index );
954         src1 = emit_fetch( bld, inst, 1, chan_index );
955         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
956         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
957      }
958      break;
959
960   case TGSI_OPCODE_SFL:
961      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
962         dst0[chan_index] = bld->base.zero;
963      }
964      break;
965
966   case TGSI_OPCODE_SGT:
967      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
968         src0 = emit_fetch( bld, inst, 0, chan_index );
969         src1 = emit_fetch( bld, inst, 1, chan_index );
970         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
971         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
972      }
973      break;
974
975   case TGSI_OPCODE_SIN:
976      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
977      tmp0 = lp_build_sin( &bld->base, tmp0 );
978      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
979         dst0[chan_index] = tmp0;
980      }
981      break;
982
983   case TGSI_OPCODE_SLE:
984      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
985         src0 = emit_fetch( bld, inst, 0, chan_index );
986         src1 = emit_fetch( bld, inst, 1, chan_index );
987         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
988         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
989      }
990      break;
991
992   case TGSI_OPCODE_SNE:
993      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
994         src0 = emit_fetch( bld, inst, 0, chan_index );
995         src1 = emit_fetch( bld, inst, 1, chan_index );
996         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
997         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
998      }
999      break;
1000
1001   case TGSI_OPCODE_STR:
1002      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1003         dst0[chan_index] = bld->base.one;
1004      }
1005      break;
1006
1007   case TGSI_OPCODE_TEX:
1008      emit_tex( bld, inst, FALSE, FALSE, dst0 );
1009      break;
1010
1011   case TGSI_OPCODE_TXD:
1012      /* FIXME */
1013      return 0;
1014      break;
1015
1016   case TGSI_OPCODE_UP2H:
1017      /* deprecated */
1018      assert (0);
1019      return 0;
1020      break;
1021
1022   case TGSI_OPCODE_UP2US:
1023      /* deprecated */
1024      assert(0);
1025      return 0;
1026      break;
1027
1028   case TGSI_OPCODE_UP4B:
1029      /* deprecated */
1030      assert(0);
1031      return 0;
1032      break;
1033
1034   case TGSI_OPCODE_UP4UB:
1035      /* deprecated */
1036      assert(0);
1037      return 0;
1038      break;
1039
1040   case TGSI_OPCODE_X2D:
1041      /* deprecated? */
1042      assert(0);
1043      return 0;
1044      break;
1045
1046   case TGSI_OPCODE_ARA:
1047      /* deprecated */
1048      assert(0);
1049      return 0;
1050      break;
1051
1052#if 0
1053   case TGSI_OPCODE_ARR:
1054      /* FIXME */
1055      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1056         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1057         emit_rnd( bld, 0, 0 );
1058         emit_f2it( bld, 0 );
1059         dst0[chan_index] = tmp0;
1060      }
1061      break;
1062#endif
1063
1064   case TGSI_OPCODE_BRA:
1065      /* deprecated */
1066      assert(0);
1067      return 0;
1068      break;
1069
1070   case TGSI_OPCODE_CAL:
1071      /* FIXME */
1072      return 0;
1073      break;
1074
1075   case TGSI_OPCODE_RET:
1076      /* FIXME */
1077      return 0;
1078      break;
1079
1080   case TGSI_OPCODE_END:
1081      break;
1082
1083   case TGSI_OPCODE_SSG:
1084   /* TGSI_OPCODE_SGN */
1085      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1086         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1087         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1088      }
1089      break;
1090
1091   case TGSI_OPCODE_CMP:
1092      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1093         src0 = emit_fetch( bld, inst, 0, chan_index );
1094         src1 = emit_fetch( bld, inst, 1, chan_index );
1095         src2 = emit_fetch( bld, inst, 2, chan_index );
1096         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1097         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1098      }
1099      break;
1100
1101   case TGSI_OPCODE_SCS:
1102      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1103         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1104         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1105      }
1106      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1107         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1108         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1109      }
1110      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1111         dst0[CHAN_Z] = bld->base.zero;
1112      }
1113      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1114         dst0[CHAN_W] = bld->base.one;
1115      }
1116      break;
1117
1118   case TGSI_OPCODE_TXB:
1119      emit_tex( bld, inst, TRUE, FALSE, dst0 );
1120      break;
1121
1122   case TGSI_OPCODE_NRM:
1123      /* fall-through */
1124   case TGSI_OPCODE_NRM4:
1125      /* 3 or 4-component normalization */
1126      {
1127         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1128
1129         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1130             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1131             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1132             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1133
1134            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1135
1136            /* xmm4 = src.x */
1137            /* xmm0 = src.x * src.x */
1138            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1139            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1140               tmp4 = tmp0;
1141            }
1142            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1143
1144            /* xmm5 = src.y */
1145            /* xmm0 = xmm0 + src.y * src.y */
1146            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1147            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1148               tmp5 = tmp1;
1149            }
1150            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1151            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1152
1153            /* xmm6 = src.z */
1154            /* xmm0 = xmm0 + src.z * src.z */
1155            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1156            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1157               tmp6 = tmp1;
1158            }
1159            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1160            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1161
1162            if (dims == 4) {
1163               /* xmm7 = src.w */
1164               /* xmm0 = xmm0 + src.w * src.w */
1165               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1166               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1167                  tmp7 = tmp1;
1168               }
1169               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1170               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1171            }
1172
1173            /* xmm1 = 1 / sqrt(xmm0) */
1174            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1175
1176            /* dst.x = xmm1 * src.x */
1177            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1178               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1179            }
1180
1181            /* dst.y = xmm1 * src.y */
1182            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1183               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1184            }
1185
1186            /* dst.z = xmm1 * src.z */
1187            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1188               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1189            }
1190
1191            /* dst.w = xmm1 * src.w */
1192            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1193               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1194            }
1195         }
1196
1197         /* dst.w = 1.0 */
1198         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1199            dst0[CHAN_W] = bld->base.one;
1200         }
1201      }
1202      break;
1203
1204   case TGSI_OPCODE_DIV:
1205      /* deprecated */
1206      assert( 0 );
1207      return 0;
1208      break;
1209
1210   case TGSI_OPCODE_DP2:
1211      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1212      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1213      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1214      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1215      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1216      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1217      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1218      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1219         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1220      }
1221      break;
1222
1223   case TGSI_OPCODE_TXL:
1224      emit_tex( bld, inst, TRUE, FALSE, dst0 );
1225      break;
1226
1227   case TGSI_OPCODE_TXP:
1228      emit_tex( bld, inst, FALSE, TRUE, dst0 );
1229      break;
1230
1231   case TGSI_OPCODE_BRK:
1232      /* FIXME */
1233      return 0;
1234      break;
1235
1236   case TGSI_OPCODE_IF:
1237      /* FIXME */
1238      return 0;
1239      break;
1240
1241   case TGSI_OPCODE_BGNFOR:
1242      /* deprecated */
1243      assert(0);
1244      return 0;
1245      break;
1246
1247   case TGSI_OPCODE_REP:
1248      /* deprecated */
1249      assert(0);
1250      return 0;
1251      break;
1252
1253   case TGSI_OPCODE_ELSE:
1254      /* FIXME */
1255      return 0;
1256      break;
1257
1258   case TGSI_OPCODE_ENDIF:
1259      /* FIXME */
1260      return 0;
1261      break;
1262
1263   case TGSI_OPCODE_ENDFOR:
1264      /* deprecated */
1265      assert(0);
1266      return 0;
1267      break;
1268
1269   case TGSI_OPCODE_ENDREP:
1270      /* deprecated */
1271      assert(0);
1272      return 0;
1273      break;
1274
1275   case TGSI_OPCODE_PUSHA:
1276      /* deprecated? */
1277      assert(0);
1278      return 0;
1279      break;
1280
1281   case TGSI_OPCODE_POPA:
1282      /* deprecated? */
1283      assert(0);
1284      return 0;
1285      break;
1286
1287   case TGSI_OPCODE_CEIL:
1288      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1289         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1290         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1291      }
1292      break;
1293
1294   case TGSI_OPCODE_I2F:
1295      /* deprecated? */
1296      assert(0);
1297      return 0;
1298      break;
1299
1300   case TGSI_OPCODE_NOT:
1301      /* deprecated? */
1302      assert(0);
1303      return 0;
1304      break;
1305
1306   case TGSI_OPCODE_TRUNC:
1307      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1308         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1309         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1310      }
1311      break;
1312
1313   case TGSI_OPCODE_SHL:
1314      /* deprecated? */
1315      assert(0);
1316      return 0;
1317      break;
1318
1319   case TGSI_OPCODE_ISHR:
1320      /* deprecated? */
1321      assert(0);
1322      return 0;
1323      break;
1324
1325   case TGSI_OPCODE_AND:
1326      /* deprecated? */
1327      assert(0);
1328      return 0;
1329      break;
1330
1331   case TGSI_OPCODE_OR:
1332      /* deprecated? */
1333      assert(0);
1334      return 0;
1335      break;
1336
1337   case TGSI_OPCODE_MOD:
1338      /* deprecated? */
1339      assert(0);
1340      return 0;
1341      break;
1342
1343   case TGSI_OPCODE_XOR:
1344      /* deprecated? */
1345      assert(0);
1346      return 0;
1347      break;
1348
1349   case TGSI_OPCODE_SAD:
1350      /* deprecated? */
1351      assert(0);
1352      return 0;
1353      break;
1354
1355   case TGSI_OPCODE_TXF:
1356      /* deprecated? */
1357      assert(0);
1358      return 0;
1359      break;
1360
1361   case TGSI_OPCODE_TXQ:
1362      /* deprecated? */
1363      assert(0);
1364      return 0;
1365      break;
1366
1367   case TGSI_OPCODE_CONT:
1368      /* deprecated? */
1369      assert(0);
1370      return 0;
1371      break;
1372
1373   case TGSI_OPCODE_EMIT:
1374      return 0;
1375      break;
1376
1377   case TGSI_OPCODE_ENDPRIM:
1378      return 0;
1379      break;
1380
1381   case TGSI_OPCODE_NOP:
1382      break;
1383
1384   default:
1385      return 0;
1386   }
1387
1388   if(info->num_dst) {
1389      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1390         emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1391      }
1392   }
1393
1394   return 1;
1395}
1396
1397
1398void
1399lp_build_tgsi_soa(LLVMBuilderRef builder,
1400                  const struct tgsi_token *tokens,
1401                  struct lp_type type,
1402                  struct lp_build_mask_context *mask,
1403                  LLVMValueRef consts_ptr,
1404                  const LLVMValueRef *pos,
1405                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
1406                  LLVMValueRef (*outputs)[NUM_CHANNELS],
1407                  struct lp_build_sampler_soa *sampler)
1408{
1409   struct lp_build_tgsi_soa_context bld;
1410   struct tgsi_parse_context parse;
1411   uint num_immediates = 0;
1412   unsigned i;
1413
1414   /* Setup build context */
1415   memset(&bld, 0, sizeof bld);
1416   lp_build_context_init(&bld.base, builder, type);
1417   bld.mask = mask;
1418   bld.pos = pos;
1419   bld.inputs = inputs;
1420   bld.outputs = outputs;
1421   bld.consts_ptr = consts_ptr;
1422   bld.sampler = sampler;
1423
1424   tgsi_parse_init( &parse, tokens );
1425
1426   while( !tgsi_parse_end_of_tokens( &parse ) ) {
1427      tgsi_parse_token( &parse );
1428
1429      switch( parse.FullToken.Token.Type ) {
1430      case TGSI_TOKEN_TYPE_DECLARATION:
1431         /* Inputs already interpolated */
1432         break;
1433
1434      case TGSI_TOKEN_TYPE_INSTRUCTION:
1435         {
1436            unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1437            const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1438            if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1439               _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1440                             info ? info->mnemonic : "<invalid>");
1441         }
1442
1443         break;
1444
1445      case TGSI_TOKEN_TYPE_IMMEDIATE:
1446         /* simply copy the immediate values into the next immediates[] slot */
1447         {
1448            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1449            assert(size <= 4);
1450            assert(num_immediates < LP_MAX_IMMEDIATES);
1451            for( i = 0; i < size; ++i )
1452               bld.immediates[num_immediates][i] =
1453                  lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float);
1454            for( i = size; i < 4; ++i )
1455               bld.immediates[num_immediates][i] = bld.base.undef;
1456            num_immediates++;
1457         }
1458         break;
1459
1460      default:
1461         assert( 0 );
1462      }
1463   }
1464
1465   tgsi_parse_free( &parse );
1466}
1467
1468