lp_bld_tgsi_soa.c revision bba6a196bb69afc72a9ec56740a312987e77afc2
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_info.h"
46#include "tgsi/tgsi_parse.h"
47#include "tgsi/tgsi_util.h"
48#include "tgsi/tgsi_exec.h"
49#include "lp_bld_type.h"
50#include "lp_bld_const.h"
51#include "lp_bld_arit.h"
52#include "lp_bld_logic.h"
53#include "lp_bld_swizzle.h"
54#include "lp_bld_flow.h"
55#include "lp_bld_tgsi.h"
56#include "lp_bld_debug.h"
57
58
59#define LP_MAX_TEMPS 256
60#define LP_MAX_IMMEDIATES 256
61
62
63#define FOR_EACH_CHANNEL( CHAN )\
64   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
65
66#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
67   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
68
69#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
70   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
71
72#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
73   FOR_EACH_CHANNEL( CHAN )\
74      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
75
76#define CHAN_X 0
77#define CHAN_Y 1
78#define CHAN_Z 2
79#define CHAN_W 3
80
81#define QUAD_TOP_LEFT     0
82#define QUAD_TOP_RIGHT    1
83#define QUAD_BOTTOM_LEFT  2
84#define QUAD_BOTTOM_RIGHT 3
85
86#define LP_TGSI_MAX_NESTING 16
87
88struct lp_exec_mask {
89   struct lp_build_context *bld;
90
91   boolean has_mask;
92
93   LLVMTypeRef int_vec_type;
94
95   LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING];
96   int cond_stack_size;
97   LLVMValueRef cond_mask;
98
99   LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
100   int break_stack_size;
101   LLVMValueRef break_mask;
102
103   LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
104   int cont_stack_size;
105   LLVMValueRef cont_mask;
106
107   LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
108   int loop_stack_size;
109   LLVMBasicBlockRef loop_block;
110
111
112   LLVMValueRef exec_mask;
113};
114
115struct lp_build_tgsi_soa_context
116{
117   struct lp_build_context base;
118
119   LLVMValueRef consts_ptr;
120   const LLVMValueRef *pos;
121   const LLVMValueRef (*inputs)[NUM_CHANNELS];
122   LLVMValueRef (*outputs)[NUM_CHANNELS];
123
124   struct lp_build_sampler_soa *sampler;
125
126   LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
127   LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
128
129   struct lp_build_mask_context *mask;
130   struct lp_exec_mask exec_mask;
131};
132
133static const unsigned char
134swizzle_left[4] = {
135   QUAD_TOP_LEFT,     QUAD_TOP_LEFT,
136   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_LEFT
137};
138
139static const unsigned char
140swizzle_right[4] = {
141   QUAD_TOP_RIGHT,    QUAD_TOP_RIGHT,
142   QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
143};
144
145static const unsigned char
146swizzle_top[4] = {
147   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT,
148   QUAD_TOP_LEFT,     QUAD_TOP_RIGHT
149};
150
151static const unsigned char
152swizzle_bottom[4] = {
153   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT,
154   QUAD_BOTTOM_LEFT,  QUAD_BOTTOM_RIGHT
155};
156
157static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
158{
159   mask->bld = bld;
160   mask->has_mask = FALSE;
161   mask->cond_stack_size = 0;
162   mask->loop_stack_size = 0;
163   mask->break_stack_size = 0;
164   mask->cont_stack_size = 0;
165
166   mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
167}
168
169static void lp_exec_mask_update(struct lp_exec_mask *mask)
170{
171   if (mask->loop_stack_size) {
172      /*for loops we need to update the entire mask at
173       * runtime */
174      LLVMValueRef tmp;
175      tmp = LLVMBuildAnd(mask->bld->builder,
176                         mask->cont_mask,
177                         mask->break_mask,
178                         "maskcb");
179      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
180                                     mask->cond_mask,
181                                     tmp,
182                                     "maskfull");
183   } else
184      mask->exec_mask = mask->cond_mask;
185
186
187   mask->has_mask = (mask->cond_stack_size > 0 ||
188                     mask->loop_stack_size > 0);
189}
190
191static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
192                                   LLVMValueRef val)
193{
194   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
195   mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val,
196                                      mask->int_vec_type, "");
197
198   lp_exec_mask_update(mask);
199}
200
201static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
202{
203   LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
204   LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder,
205                                        mask->cond_mask, "");
206
207   /* means that we didn't have any mask before and that
208    * we were fully enabled */
209   if (mask->cond_stack_size <= 1) {
210      prev_mask = LLVMConstAllOnes(mask->int_vec_type);
211   }
212
213   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
214                                  inv_mask,
215                                  prev_mask, "");
216   lp_exec_mask_update(mask);
217}
218
219static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
220{
221   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
222   lp_exec_mask_update(mask);
223}
224
225static void lp_exec_bgnloop(struct lp_exec_mask *mask)
226{
227
228   if (mask->cont_stack_size == 0)
229      mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
230   if (mask->break_stack_size == 0)
231      mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
232   if (mask->cond_stack_size == 0)
233      mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
234   mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
235   mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
236   LLVMBuildBr(mask->bld->builder, mask->loop_block);
237   LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
238
239   lp_exec_mask_update(mask);
240}
241
242static void lp_exec_break(struct lp_exec_mask *mask)
243{
244   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
245                                         mask->exec_mask,
246                                         "break");
247
248   mask->break_stack[mask->break_stack_size++] = mask->break_mask;
249   if (mask->break_stack_size > 1) {
250      mask->break_mask = LLVMBuildAnd(mask->bld->builder,
251                                      mask->break_mask,
252                                      exec_mask, "break_full");
253   } else
254      mask->break_mask = exec_mask;
255
256   lp_exec_mask_update(mask);
257}
258
259static void lp_exec_continue(struct lp_exec_mask *mask)
260{
261   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
262                                         mask->exec_mask,
263                                         "");
264
265   mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
266   if (mask->cont_stack_size > 1) {
267      mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
268                                     mask->cont_mask,
269                                     exec_mask, "");
270   } else
271      mask->cont_mask = exec_mask;
272
273   lp_exec_mask_update(mask);
274}
275
276
277static void lp_exec_endloop(struct lp_exec_mask *mask)
278{
279   LLVMBasicBlockRef endloop;
280   LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
281                                      mask->bld->type.length);
282   /* i1cond = (mask == 0) */
283   LLVMValueRef i1cond = LLVMBuildICmp(
284      mask->bld->builder,
285      LLVMIntNE,
286      LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
287      LLVMConstNull(reg_type), "");
288
289   endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
290
291   LLVMBuildCondBr(mask->bld->builder,
292                   i1cond, mask->loop_block, endloop);
293
294   LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
295
296   mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
297   /* pop the break mask */
298   if (mask->cont_stack_size) {
299      mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
300   }
301   if (mask->break_stack_size) {
302      mask->break_mask = mask->cont_stack[--mask->break_stack_size];
303   }
304
305   lp_exec_mask_update(mask);
306}
307
308static void lp_exec_mask_store(struct lp_exec_mask *mask,
309                               LLVMValueRef val,
310                               LLVMValueRef dst)
311{
312   if (mask->has_mask) {
313      LLVMValueRef real_val, dst_val;
314
315      dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
316      real_val = lp_build_select(mask->bld,
317                                 mask->exec_mask,
318                                 val, dst_val);
319
320      LLVMBuildStore(mask->bld->builder, real_val, dst);
321   } else
322      LLVMBuildStore(mask->bld->builder, val, dst);
323}
324
325
326static LLVMValueRef
327emit_ddx(struct lp_build_tgsi_soa_context *bld,
328         LLVMValueRef src)
329{
330   LLVMValueRef src_left  = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
331   LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
332   return lp_build_sub(&bld->base, src_right, src_left);
333}
334
335
336static LLVMValueRef
337emit_ddy(struct lp_build_tgsi_soa_context *bld,
338         LLVMValueRef src)
339{
340   LLVMValueRef src_top    = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
341   LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
342   return lp_build_sub(&bld->base, src_top, src_bottom);
343}
344
345
346/**
347 * Register fetch.
348 */
349static LLVMValueRef
350emit_fetch(
351   struct lp_build_tgsi_soa_context *bld,
352   const struct tgsi_full_instruction *inst,
353   unsigned index,
354   const unsigned chan_index )
355{
356   const struct tgsi_full_src_register *reg = &inst->Src[index];
357   unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
358   LLVMValueRef res;
359
360   switch (swizzle) {
361   case TGSI_SWIZZLE_X:
362   case TGSI_SWIZZLE_Y:
363   case TGSI_SWIZZLE_Z:
364   case TGSI_SWIZZLE_W:
365
366      switch (reg->Register.File) {
367      case TGSI_FILE_CONSTANT: {
368         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
369         LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
370         LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
371         res = lp_build_broadcast_scalar(&bld->base, scalar);
372         break;
373      }
374
375      case TGSI_FILE_IMMEDIATE:
376         res = bld->immediates[reg->Register.Index][swizzle];
377         assert(res);
378         break;
379
380      case TGSI_FILE_INPUT:
381         res = bld->inputs[reg->Register.Index][swizzle];
382         assert(res);
383         break;
384
385      case TGSI_FILE_TEMPORARY:
386         res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], "");
387         if(!res)
388            return bld->base.undef;
389         break;
390
391      default:
392         assert( 0 );
393         return bld->base.undef;
394      }
395      break;
396
397   default:
398      assert( 0 );
399      return bld->base.undef;
400   }
401
402   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
403   case TGSI_UTIL_SIGN_CLEAR:
404      res = lp_build_abs( &bld->base, res );
405      break;
406
407   case TGSI_UTIL_SIGN_SET:
408      /* TODO: Use bitwese OR for floating point */
409      res = lp_build_abs( &bld->base, res );
410      res = LLVMBuildNeg( bld->base.builder, res, "" );
411      break;
412
413   case TGSI_UTIL_SIGN_TOGGLE:
414      res = LLVMBuildNeg( bld->base.builder, res, "" );
415      break;
416
417   case TGSI_UTIL_SIGN_KEEP:
418      break;
419   }
420
421   return res;
422}
423
424
425/**
426 * Register fetch with derivatives.
427 */
428static void
429emit_fetch_deriv(
430   struct lp_build_tgsi_soa_context *bld,
431   const struct tgsi_full_instruction *inst,
432   unsigned index,
433   const unsigned chan_index,
434   LLVMValueRef *res,
435   LLVMValueRef *ddx,
436   LLVMValueRef *ddy)
437{
438   LLVMValueRef src;
439
440   src = emit_fetch(bld, inst, index, chan_index);
441
442   if(res)
443      *res = src;
444
445   /* TODO: use interpolation coeffs for inputs */
446
447   if(ddx)
448      *ddx = emit_ddx(bld, src);
449
450   if(ddy)
451      *ddy = emit_ddy(bld, src);
452}
453
454
455/**
456 * Register store.
457 */
458static void
459emit_store(
460   struct lp_build_tgsi_soa_context *bld,
461   const struct tgsi_full_instruction *inst,
462   unsigned index,
463   unsigned chan_index,
464   LLVMValueRef value)
465{
466   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
467
468   switch( inst->Instruction.Saturate ) {
469   case TGSI_SAT_NONE:
470      break;
471
472   case TGSI_SAT_ZERO_ONE:
473      value = lp_build_max(&bld->base, value, bld->base.zero);
474      value = lp_build_min(&bld->base, value, bld->base.one);
475      break;
476
477   case TGSI_SAT_MINUS_PLUS_ONE:
478      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
479      value = lp_build_min(&bld->base, value, bld->base.one);
480      break;
481
482   default:
483      assert(0);
484   }
485
486   switch( reg->Register.File ) {
487   case TGSI_FILE_OUTPUT:
488      lp_exec_mask_store(&bld->exec_mask, value,
489                         bld->outputs[reg->Register.Index][chan_index]);
490      break;
491
492   case TGSI_FILE_TEMPORARY:
493      lp_exec_mask_store(&bld->exec_mask, value,
494                         bld->temps[reg->Register.Index][chan_index]);
495      break;
496
497   case TGSI_FILE_ADDRESS:
498      /* FIXME */
499      assert(0);
500      break;
501
502   case TGSI_FILE_PREDICATE:
503      /* FIXME */
504      assert(0);
505      break;
506
507   default:
508      assert( 0 );
509   }
510}
511
512
513/**
514 * High-level instruction translators.
515 */
516
517
518static void
519emit_tex( struct lp_build_tgsi_soa_context *bld,
520          const struct tgsi_full_instruction *inst,
521          boolean apply_lodbias,
522          boolean projected,
523          LLVMValueRef *texel)
524{
525   const uint unit = inst->Src[1].Register.Index;
526   LLVMValueRef lodbias;
527   LLVMValueRef oow = NULL;
528   LLVMValueRef coords[3];
529   unsigned num_coords;
530   unsigned i;
531
532   switch (inst->Texture.Texture) {
533   case TGSI_TEXTURE_1D:
534      num_coords = 1;
535      break;
536   case TGSI_TEXTURE_2D:
537   case TGSI_TEXTURE_RECT:
538      num_coords = 2;
539      break;
540   case TGSI_TEXTURE_SHADOW1D:
541   case TGSI_TEXTURE_SHADOW2D:
542   case TGSI_TEXTURE_SHADOWRECT:
543   case TGSI_TEXTURE_3D:
544   case TGSI_TEXTURE_CUBE:
545      num_coords = 3;
546      break;
547   default:
548      assert(0);
549      return;
550   }
551
552   if(apply_lodbias)
553      lodbias = emit_fetch( bld, inst, 0, 3 );
554   else
555      lodbias = bld->base.zero;
556
557   if (projected) {
558      oow = emit_fetch( bld, inst, 0, 3 );
559      oow = lp_build_rcp(&bld->base, oow);
560   }
561
562   for (i = 0; i < num_coords; i++) {
563      coords[i] = emit_fetch( bld, inst, 0, i );
564      if (projected)
565         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
566   }
567   for (i = num_coords; i < 3; i++) {
568      coords[i] = bld->base.undef;
569   }
570
571   bld->sampler->emit_fetch_texel(bld->sampler,
572                                  bld->base.builder,
573                                  bld->base.type,
574                                  unit, num_coords, coords, lodbias,
575                                  texel);
576}
577
578
579/**
580 * Kill fragment if any of the src register values are negative.
581 */
582static void
583emit_kil(
584   struct lp_build_tgsi_soa_context *bld,
585   const struct tgsi_full_instruction *inst )
586{
587   const struct tgsi_full_src_register *reg = &inst->Src[0];
588   LLVMValueRef terms[NUM_CHANNELS];
589   LLVMValueRef mask;
590   unsigned chan_index;
591
592   memset(&terms, 0, sizeof terms);
593
594   FOR_EACH_CHANNEL( chan_index ) {
595      unsigned swizzle;
596
597      /* Unswizzle channel */
598      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
599
600      /* Check if the component has not been already tested. */
601      assert(swizzle < NUM_CHANNELS);
602      if( !terms[swizzle] )
603         /* TODO: change the comparison operator instead of setting the sign */
604         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
605   }
606
607   mask = NULL;
608   FOR_EACH_CHANNEL( chan_index ) {
609      if(terms[chan_index]) {
610         LLVMValueRef chan_mask;
611
612         /*
613          * If term < 0 then mask = 0 else mask = ~0.
614          */
615         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
616
617         if(mask)
618            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
619         else
620            mask = chan_mask;
621      }
622   }
623
624   if(mask)
625      lp_build_mask_update(bld->mask, mask);
626}
627
628
629/**
630 * Predicated fragment kill.
631 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
632 * The only predication is the execution mask which will apply if
633 * we're inside a loop or conditional.
634 */
635static void
636emit_kilp(struct lp_build_tgsi_soa_context *bld,
637          const struct tgsi_full_instruction *inst)
638{
639   LLVMValueRef mask;
640
641   /* For those channels which are "alive", disable fragment shader
642    * execution.
643    */
644   if (bld->exec_mask.has_mask) {
645      mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
646   }
647   else {
648      mask = bld->base.zero;
649   }
650
651   lp_build_mask_update(bld->mask, mask);
652}
653
654
655/**
656 * Check if inst src/dest regs use indirect addressing into temporary
657 * register file.
658 */
659static boolean
660indirect_temp_reference(const struct tgsi_full_instruction *inst)
661{
662   uint i;
663   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
664      const struct tgsi_full_src_register *reg = &inst->Src[i];
665      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
666          reg->Register.Indirect)
667         return TRUE;
668   }
669   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
670      const struct tgsi_full_dst_register *reg = &inst->Dst[i];
671      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
672          reg->Register.Indirect)
673         return TRUE;
674   }
675   return FALSE;
676}
677
678static int
679emit_declaration(
680   struct lp_build_tgsi_soa_context *bld,
681   const struct tgsi_full_declaration *decl)
682{
683   unsigned first = decl->Range.First;
684   unsigned last = decl->Range.Last;
685   unsigned idx, i;
686   LLVMBasicBlockRef current_block =
687      LLVMGetInsertBlock(bld->base.builder);
688   LLVMBasicBlockRef first_block =
689      LLVMGetEntryBasicBlock(
690         LLVMGetBasicBlockParent(current_block));
691   LLVMValueRef first_inst =
692      LLVMGetFirstInstruction(first_block);
693
694   /* we want alloca's to be the first instruction
695    * in the function so we need to rewind the builder
696    * to the very beginning */
697   LLVMPositionBuilderBefore(bld->base.builder,
698                             first_inst);
699
700   for (idx = first; idx <= last; ++idx) {
701      switch (decl->Declaration.File) {
702      case TGSI_FILE_TEMPORARY:
703         for (i = 0; i < NUM_CHANNELS; i++)
704            bld->temps[idx][i] = lp_build_alloca(&bld->base);
705         break;
706
707      case TGSI_FILE_OUTPUT:
708         for (i = 0; i < NUM_CHANNELS; i++)
709            bld->outputs[idx][i] = lp_build_alloca(&bld->base);
710         break;
711
712      default:
713         /* don't need to declare other vars */
714         break;
715      }
716   }
717
718   LLVMPositionBuilderAtEnd(bld->base.builder,
719                            current_block);
720   return TRUE;
721}
722
723
724/**
725 * Emit LLVM for one TGSI instruction.
726 * \param return TRUE for success, FALSE otherwise
727 */
728static boolean
729emit_instruction(
730   struct lp_build_tgsi_soa_context *bld,
731   const struct tgsi_full_instruction *inst,
732   const struct tgsi_opcode_info *info)
733{
734   unsigned chan_index;
735   LLVMValueRef src0, src1, src2;
736   LLVMValueRef tmp0, tmp1, tmp2;
737   LLVMValueRef tmp3 = NULL;
738   LLVMValueRef tmp4 = NULL;
739   LLVMValueRef tmp5 = NULL;
740   LLVMValueRef tmp6 = NULL;
741   LLVMValueRef tmp7 = NULL;
742   LLVMValueRef res;
743   LLVMValueRef dst0[NUM_CHANNELS];
744
745   /* we can't handle indirect addressing into temp register file yet */
746   if (indirect_temp_reference(inst))
747      return FALSE;
748
749   /*
750    * Stores and write masks are handled in a general fashion after the long
751    * instruction opcode switch statement.
752    *
753    * Although not stricitly necessary, we avoid generating instructions for
754    * channels which won't be stored, in cases where's that easy. For some
755    * complex instructions, like texture sampling, it is more convenient to
756    * assume a full writemask and then let LLVM optimization passes eliminate
757    * redundant code.
758    */
759
760   assert(info->num_dst <= 1);
761   if(info->num_dst) {
762      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
763         dst0[chan_index] = bld->base.undef;
764      }
765   }
766
767   switch (inst->Instruction.Opcode) {
768#if 0
769   case TGSI_OPCODE_ARL:
770      /* FIXME */
771      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
772         tmp0 = emit_fetch( bld, inst, 0, chan_index );
773         emit_flr(bld, 0, 0);
774         emit_f2it( bld, 0 );
775         dst0[chan_index] = tmp0;
776      }
777      break;
778#endif
779
780   case TGSI_OPCODE_MOV:
781      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
782         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
783      }
784      break;
785
786   case TGSI_OPCODE_LIT:
787      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
788         dst0[CHAN_X] = bld->base.one;
789      }
790      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
791         src0 = emit_fetch( bld, inst, 0, CHAN_X );
792         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
793      }
794      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
795         /* XMM[1] = SrcReg[0].yyyy */
796         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
797         /* XMM[1] = max(XMM[1], 0) */
798         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
799         /* XMM[2] = SrcReg[0].wwww */
800         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
801         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
802         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
803         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
804         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
805      }
806      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
807         dst0[CHAN_W] = bld->base.one;
808      }
809      break;
810
811   case TGSI_OPCODE_RCP:
812   /* TGSI_OPCODE_RECIP */
813      src0 = emit_fetch( bld, inst, 0, CHAN_X );
814      res = lp_build_rcp(&bld->base, src0);
815      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
816         dst0[chan_index] = res;
817      }
818      break;
819
820   case TGSI_OPCODE_RSQ:
821   /* TGSI_OPCODE_RECIPSQRT */
822      src0 = emit_fetch( bld, inst, 0, CHAN_X );
823      src0 = lp_build_abs(&bld->base, src0);
824      res = lp_build_rsqrt(&bld->base, src0);
825      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
826         dst0[chan_index] = res;
827      }
828      break;
829
830   case TGSI_OPCODE_EXP:
831      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
832          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
833          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
834         LLVMValueRef *p_exp2_int_part = NULL;
835         LLVMValueRef *p_frac_part = NULL;
836         LLVMValueRef *p_exp2 = NULL;
837
838         src0 = emit_fetch( bld, inst, 0, CHAN_X );
839
840         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
841            p_exp2_int_part = &tmp0;
842         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
843            p_frac_part = &tmp1;
844         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
845            p_exp2 = &tmp2;
846
847         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
848
849         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
850            dst0[CHAN_X] = tmp0;
851         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
852            dst0[CHAN_Y] = tmp1;
853         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
854            dst0[CHAN_Z] = tmp2;
855      }
856      /* dst.w = 1.0 */
857      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
858         dst0[CHAN_W] = bld->base.one;
859      }
860      break;
861
862   case TGSI_OPCODE_LOG:
863      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
864          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
865          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
866         LLVMValueRef *p_floor_log2 = NULL;
867         LLVMValueRef *p_exp = NULL;
868         LLVMValueRef *p_log2 = NULL;
869
870         src0 = emit_fetch( bld, inst, 0, CHAN_X );
871         src0 = lp_build_abs( &bld->base, src0 );
872
873         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
874            p_floor_log2 = &tmp0;
875         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
876            p_exp = &tmp1;
877         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
878            p_log2 = &tmp2;
879
880         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
881
882         /* dst.x = floor(lg2(abs(src.x))) */
883         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
884            dst0[CHAN_X] = tmp0;
885         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
886         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
887            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
888         }
889         /* dst.z = lg2(abs(src.x)) */
890         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
891            dst0[CHAN_Z] = tmp2;
892      }
893      /* dst.w = 1.0 */
894      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
895         dst0[CHAN_W] = bld->base.one;
896      }
897      break;
898
899   case TGSI_OPCODE_MUL:
900      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
901         src0 = emit_fetch( bld, inst, 0, chan_index );
902         src1 = emit_fetch( bld, inst, 1, chan_index );
903         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
904      }
905      break;
906
907   case TGSI_OPCODE_ADD:
908      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
909         src0 = emit_fetch( bld, inst, 0, chan_index );
910         src1 = emit_fetch( bld, inst, 1, chan_index );
911         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
912      }
913      break;
914
915   case TGSI_OPCODE_DP3:
916   /* TGSI_OPCODE_DOT3 */
917      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
918      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
919      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
920      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
921      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
922      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
923      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
924      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
925      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
926      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
927      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
928      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
929         dst0[chan_index] = tmp0;
930      }
931      break;
932
933   case TGSI_OPCODE_DP4:
934   /* TGSI_OPCODE_DOT4 */
935      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
936      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
937      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
938      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
939      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
940      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
941      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
942      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
943      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
944      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
945      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
946      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
947      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
948      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
949      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
950      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
951         dst0[chan_index] = tmp0;
952      }
953      break;
954
955   case TGSI_OPCODE_DST:
956      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
957         dst0[CHAN_X] = bld->base.one;
958      }
959      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
960         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
961         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
962         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
963      }
964      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
965         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
966      }
967      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
968         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
969      }
970      break;
971
972   case TGSI_OPCODE_MIN:
973      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
974         src0 = emit_fetch( bld, inst, 0, chan_index );
975         src1 = emit_fetch( bld, inst, 1, chan_index );
976         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
977      }
978      break;
979
980   case TGSI_OPCODE_MAX:
981      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
982         src0 = emit_fetch( bld, inst, 0, chan_index );
983         src1 = emit_fetch( bld, inst, 1, chan_index );
984         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
985      }
986      break;
987
988   case TGSI_OPCODE_SLT:
989   /* TGSI_OPCODE_SETLT */
990      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
991         src0 = emit_fetch( bld, inst, 0, chan_index );
992         src1 = emit_fetch( bld, inst, 1, chan_index );
993         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
994         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
995      }
996      break;
997
998   case TGSI_OPCODE_SGE:
999   /* TGSI_OPCODE_SETGE */
1000      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1001         src0 = emit_fetch( bld, inst, 0, chan_index );
1002         src1 = emit_fetch( bld, inst, 1, chan_index );
1003         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1004         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1005      }
1006      break;
1007
1008   case TGSI_OPCODE_MAD:
1009   /* TGSI_OPCODE_MADD */
1010      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1011         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1012         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1013         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1014         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1015         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1016         dst0[chan_index] = tmp0;
1017      }
1018      break;
1019
1020   case TGSI_OPCODE_SUB:
1021      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1022         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1023         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1024         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1025      }
1026      break;
1027
1028   case TGSI_OPCODE_LRP:
1029      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1030         src0 = emit_fetch( bld, inst, 0, chan_index );
1031         src1 = emit_fetch( bld, inst, 1, chan_index );
1032         src2 = emit_fetch( bld, inst, 2, chan_index );
1033         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1034         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1035         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1036      }
1037      break;
1038
1039   case TGSI_OPCODE_CND:
1040      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1041         src0 = emit_fetch( bld, inst, 0, chan_index );
1042         src1 = emit_fetch( bld, inst, 1, chan_index );
1043         src2 = emit_fetch( bld, inst, 2, chan_index );
1044         tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1045         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1046         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1047      }
1048      break;
1049
1050   case TGSI_OPCODE_DP2A:
1051      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1052      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1053      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1054      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1055      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1056      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1057      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1058      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
1059      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1060      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1061         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1062      }
1063      break;
1064
1065   case TGSI_OPCODE_FRC:
1066      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1067         src0 = emit_fetch( bld, inst, 0, chan_index );
1068         tmp0 = lp_build_floor(&bld->base, src0);
1069         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1070         dst0[chan_index] = tmp0;
1071      }
1072      break;
1073
1074   case TGSI_OPCODE_CLAMP:
1075      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1076         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1077         src1 = emit_fetch( bld, inst, 1, chan_index );
1078         src2 = emit_fetch( bld, inst, 2, chan_index );
1079         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1080         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1081         dst0[chan_index] = tmp0;
1082      }
1083      break;
1084
1085   case TGSI_OPCODE_FLR:
1086      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1087         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1088         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1089      }
1090      break;
1091
1092   case TGSI_OPCODE_ROUND:
1093      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1094         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1095         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1096      }
1097      break;
1098
1099   case TGSI_OPCODE_EX2: {
1100      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1101      tmp0 = lp_build_exp2( &bld->base, tmp0);
1102      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1103         dst0[chan_index] = tmp0;
1104      }
1105      break;
1106   }
1107
1108   case TGSI_OPCODE_LG2:
1109      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1110      tmp0 = lp_build_log2( &bld->base, tmp0);
1111      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1112         dst0[chan_index] = tmp0;
1113      }
1114      break;
1115
1116   case TGSI_OPCODE_POW:
1117      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1118      src1 = emit_fetch( bld, inst, 1, CHAN_X );
1119      res = lp_build_pow( &bld->base, src0, src1 );
1120      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1121         dst0[chan_index] = res;
1122      }
1123      break;
1124
1125   case TGSI_OPCODE_XPD:
1126      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1127          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1128         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1129         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1130      }
1131      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1132          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1133         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1134         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1135      }
1136      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1137         tmp2 = tmp0;
1138         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1139         tmp5 = tmp3;
1140         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1141         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1142         dst0[CHAN_X] = tmp2;
1143      }
1144      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1145          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1146         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1147         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1148      }
1149      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1150         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1151         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1152         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1153         dst0[CHAN_Y] = tmp3;
1154      }
1155      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1156         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1157         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1158         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1159         dst0[CHAN_Z] = tmp5;
1160      }
1161      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1162         dst0[CHAN_W] = bld->base.one;
1163      }
1164      break;
1165
1166   case TGSI_OPCODE_ABS:
1167      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1168         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1169         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1170      }
1171      break;
1172
1173   case TGSI_OPCODE_RCC:
1174      /* deprecated? */
1175      assert(0);
1176      return FALSE;
1177
1178   case TGSI_OPCODE_DPH:
1179      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1180      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1181      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1182      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1183      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1184      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1185      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1186      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1187      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1188      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1189      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1190      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1191      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1192      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1193         dst0[chan_index] = tmp0;
1194      }
1195      break;
1196
1197   case TGSI_OPCODE_COS:
1198      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1199      tmp0 = lp_build_cos( &bld->base, tmp0 );
1200      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1201         dst0[chan_index] = tmp0;
1202      }
1203      break;
1204
1205   case TGSI_OPCODE_DDX:
1206      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1207         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1208      }
1209      break;
1210
1211   case TGSI_OPCODE_DDY:
1212      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1213         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1214      }
1215      break;
1216
1217   case TGSI_OPCODE_KILP:
1218      /* predicated kill */
1219      emit_kilp( bld, inst );
1220      break;
1221
1222   case TGSI_OPCODE_KIL:
1223      /* conditional kill */
1224      emit_kil( bld, inst );
1225      break;
1226
1227   case TGSI_OPCODE_PK2H:
1228      return FALSE;
1229      break;
1230
1231   case TGSI_OPCODE_PK2US:
1232      return FALSE;
1233      break;
1234
1235   case TGSI_OPCODE_PK4B:
1236      return FALSE;
1237      break;
1238
1239   case TGSI_OPCODE_PK4UB:
1240      return FALSE;
1241      break;
1242
1243   case TGSI_OPCODE_RFL:
1244      return FALSE;
1245      break;
1246
1247   case TGSI_OPCODE_SEQ:
1248      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1249         src0 = emit_fetch( bld, inst, 0, chan_index );
1250         src1 = emit_fetch( bld, inst, 1, chan_index );
1251         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1252         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1253      }
1254      break;
1255
1256   case TGSI_OPCODE_SFL:
1257      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1258         dst0[chan_index] = bld->base.zero;
1259      }
1260      break;
1261
1262   case TGSI_OPCODE_SGT:
1263      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1264         src0 = emit_fetch( bld, inst, 0, chan_index );
1265         src1 = emit_fetch( bld, inst, 1, chan_index );
1266         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1267         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1268      }
1269      break;
1270
1271   case TGSI_OPCODE_SIN:
1272      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1273      tmp0 = lp_build_sin( &bld->base, tmp0 );
1274      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1275         dst0[chan_index] = tmp0;
1276      }
1277      break;
1278
1279   case TGSI_OPCODE_SLE:
1280      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1281         src0 = emit_fetch( bld, inst, 0, chan_index );
1282         src1 = emit_fetch( bld, inst, 1, chan_index );
1283         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1284         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1285      }
1286      break;
1287
1288   case TGSI_OPCODE_SNE:
1289      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1290         src0 = emit_fetch( bld, inst, 0, chan_index );
1291         src1 = emit_fetch( bld, inst, 1, chan_index );
1292         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1293         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1294      }
1295      break;
1296
1297   case TGSI_OPCODE_STR:
1298      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1299         dst0[chan_index] = bld->base.one;
1300      }
1301      break;
1302
1303   case TGSI_OPCODE_TEX:
1304      emit_tex( bld, inst, FALSE, FALSE, dst0 );
1305      break;
1306
1307   case TGSI_OPCODE_TXD:
1308      /* FIXME */
1309      return FALSE;
1310      break;
1311
1312   case TGSI_OPCODE_UP2H:
1313      /* deprecated */
1314      assert (0);
1315      return FALSE;
1316      break;
1317
1318   case TGSI_OPCODE_UP2US:
1319      /* deprecated */
1320      assert(0);
1321      return FALSE;
1322      break;
1323
1324   case TGSI_OPCODE_UP4B:
1325      /* deprecated */
1326      assert(0);
1327      return FALSE;
1328      break;
1329
1330   case TGSI_OPCODE_UP4UB:
1331      /* deprecated */
1332      assert(0);
1333      return FALSE;
1334      break;
1335
1336   case TGSI_OPCODE_X2D:
1337      /* deprecated? */
1338      assert(0);
1339      return FALSE;
1340      break;
1341
1342   case TGSI_OPCODE_ARA:
1343      /* deprecated */
1344      assert(0);
1345      return FALSE;
1346      break;
1347
1348#if 0
1349   case TGSI_OPCODE_ARR:
1350      /* FIXME */
1351      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1352         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1353         emit_rnd( bld, 0, 0 );
1354         emit_f2it( bld, 0 );
1355         dst0[chan_index] = tmp0;
1356      }
1357      break;
1358#endif
1359
1360   case TGSI_OPCODE_BRA:
1361      /* deprecated */
1362      assert(0);
1363      return FALSE;
1364      break;
1365
1366   case TGSI_OPCODE_CAL:
1367      /* FIXME */
1368      return FALSE;
1369      break;
1370
1371   case TGSI_OPCODE_RET:
1372      /* FIXME */
1373      return FALSE;
1374      break;
1375
1376   case TGSI_OPCODE_END:
1377      break;
1378
1379   case TGSI_OPCODE_SSG:
1380   /* TGSI_OPCODE_SGN */
1381      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1382         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1383         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1384      }
1385      break;
1386
1387   case TGSI_OPCODE_CMP:
1388      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1389         src0 = emit_fetch( bld, inst, 0, chan_index );
1390         src1 = emit_fetch( bld, inst, 1, chan_index );
1391         src2 = emit_fetch( bld, inst, 2, chan_index );
1392         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1393         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1394      }
1395      break;
1396
1397   case TGSI_OPCODE_SCS:
1398      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1399         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1400         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1401      }
1402      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1403         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1404         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1405      }
1406      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1407         dst0[CHAN_Z] = bld->base.zero;
1408      }
1409      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1410         dst0[CHAN_W] = bld->base.one;
1411      }
1412      break;
1413
1414   case TGSI_OPCODE_TXB:
1415      emit_tex( bld, inst, TRUE, FALSE, dst0 );
1416      break;
1417
1418   case TGSI_OPCODE_NRM:
1419      /* fall-through */
1420   case TGSI_OPCODE_NRM4:
1421      /* 3 or 4-component normalization */
1422      {
1423         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1424
1425         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1426             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1427             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1428             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1429
1430            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1431
1432            /* xmm4 = src.x */
1433            /* xmm0 = src.x * src.x */
1434            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1435            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1436               tmp4 = tmp0;
1437            }
1438            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
1439
1440            /* xmm5 = src.y */
1441            /* xmm0 = xmm0 + src.y * src.y */
1442            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
1443            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1444               tmp5 = tmp1;
1445            }
1446            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1447            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1448
1449            /* xmm6 = src.z */
1450            /* xmm0 = xmm0 + src.z * src.z */
1451            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
1452            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1453               tmp6 = tmp1;
1454            }
1455            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1456            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1457
1458            if (dims == 4) {
1459               /* xmm7 = src.w */
1460               /* xmm0 = xmm0 + src.w * src.w */
1461               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
1462               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
1463                  tmp7 = tmp1;
1464               }
1465               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
1466               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1467            }
1468
1469            /* xmm1 = 1 / sqrt(xmm0) */
1470            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
1471
1472            /* dst.x = xmm1 * src.x */
1473            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
1474               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
1475            }
1476
1477            /* dst.y = xmm1 * src.y */
1478            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
1479               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
1480            }
1481
1482            /* dst.z = xmm1 * src.z */
1483            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
1484               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
1485            }
1486
1487            /* dst.w = xmm1 * src.w */
1488            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
1489               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
1490            }
1491         }
1492
1493         /* dst.w = 1.0 */
1494         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
1495            dst0[CHAN_W] = bld->base.one;
1496         }
1497      }
1498      break;
1499
1500   case TGSI_OPCODE_DIV:
1501      /* deprecated */
1502      assert( 0 );
1503      return FALSE;
1504      break;
1505
1506   case TGSI_OPCODE_DP2:
1507      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1508      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1509      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1510      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1511      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1512      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1513      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1514      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1515         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1516      }
1517      break;
1518
1519   case TGSI_OPCODE_TXL:
1520      emit_tex( bld, inst, TRUE, FALSE, dst0 );
1521      break;
1522
1523   case TGSI_OPCODE_TXP:
1524      emit_tex( bld, inst, FALSE, TRUE, dst0 );
1525      break;
1526
1527   case TGSI_OPCODE_BRK:
1528      lp_exec_break(&bld->exec_mask);
1529      break;
1530
1531   case TGSI_OPCODE_IF:
1532      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1533      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
1534                          tmp0, bld->base.zero);
1535      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
1536      break;
1537
1538   case TGSI_OPCODE_BGNFOR:
1539      /* deprecated */
1540      assert(0);
1541      return FALSE;
1542      break;
1543
1544   case TGSI_OPCODE_BGNLOOP:
1545      lp_exec_bgnloop(&bld->exec_mask);
1546      break;
1547
1548   case TGSI_OPCODE_REP:
1549      /* deprecated */
1550      assert(0);
1551      return FALSE;
1552      break;
1553
1554   case TGSI_OPCODE_ELSE:
1555      lp_exec_mask_cond_invert(&bld->exec_mask);
1556      break;
1557
1558   case TGSI_OPCODE_ENDIF:
1559      lp_exec_mask_cond_pop(&bld->exec_mask);
1560      break;
1561
1562   case TGSI_OPCODE_ENDFOR:
1563      /* deprecated */
1564      assert(0);
1565      return FALSE;
1566      break;
1567
1568   case TGSI_OPCODE_ENDLOOP:
1569      lp_exec_endloop(&bld->exec_mask);
1570      break;
1571
1572   case TGSI_OPCODE_ENDREP:
1573      /* deprecated */
1574      assert(0);
1575      return FALSE;
1576      break;
1577
1578   case TGSI_OPCODE_PUSHA:
1579      /* deprecated? */
1580      assert(0);
1581      return FALSE;
1582      break;
1583
1584   case TGSI_OPCODE_POPA:
1585      /* deprecated? */
1586      assert(0);
1587      return FALSE;
1588      break;
1589
1590   case TGSI_OPCODE_CEIL:
1591      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1592         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1593         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
1594      }
1595      break;
1596
1597   case TGSI_OPCODE_I2F:
1598      /* deprecated? */
1599      assert(0);
1600      return FALSE;
1601      break;
1602
1603   case TGSI_OPCODE_NOT:
1604      /* deprecated? */
1605      assert(0);
1606      return FALSE;
1607      break;
1608
1609   case TGSI_OPCODE_TRUNC:
1610      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1611         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1612         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
1613      }
1614      break;
1615
1616   case TGSI_OPCODE_SHL:
1617      /* deprecated? */
1618      assert(0);
1619      return FALSE;
1620      break;
1621
1622   case TGSI_OPCODE_ISHR:
1623      /* deprecated? */
1624      assert(0);
1625      return FALSE;
1626      break;
1627
1628   case TGSI_OPCODE_AND:
1629      /* deprecated? */
1630      assert(0);
1631      return FALSE;
1632      break;
1633
1634   case TGSI_OPCODE_OR:
1635      /* deprecated? */
1636      assert(0);
1637      return FALSE;
1638      break;
1639
1640   case TGSI_OPCODE_MOD:
1641      /* deprecated? */
1642      assert(0);
1643      return FALSE;
1644      break;
1645
1646   case TGSI_OPCODE_XOR:
1647      /* deprecated? */
1648      assert(0);
1649      return FALSE;
1650      break;
1651
1652   case TGSI_OPCODE_SAD:
1653      /* deprecated? */
1654      assert(0);
1655      return FALSE;
1656      break;
1657
1658   case TGSI_OPCODE_TXF:
1659      /* deprecated? */
1660      assert(0);
1661      return FALSE;
1662      break;
1663
1664   case TGSI_OPCODE_TXQ:
1665      /* deprecated? */
1666      assert(0);
1667      return FALSE;
1668      break;
1669
1670   case TGSI_OPCODE_CONT:
1671      lp_exec_continue(&bld->exec_mask);
1672      break;
1673
1674   case TGSI_OPCODE_EMIT:
1675      return FALSE;
1676      break;
1677
1678   case TGSI_OPCODE_ENDPRIM:
1679      return FALSE;
1680      break;
1681
1682   case TGSI_OPCODE_NOP:
1683      break;
1684
1685   default:
1686      return FALSE;
1687   }
1688
1689   if(info->num_dst) {
1690      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1691         emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
1692      }
1693   }
1694
1695   return TRUE;
1696}
1697
1698
1699void
1700lp_build_tgsi_soa(LLVMBuilderRef builder,
1701                  const struct tgsi_token *tokens,
1702                  struct lp_type type,
1703                  struct lp_build_mask_context *mask,
1704                  LLVMValueRef consts_ptr,
1705                  const LLVMValueRef *pos,
1706                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
1707                  LLVMValueRef (*outputs)[NUM_CHANNELS],
1708                  struct lp_build_sampler_soa *sampler)
1709{
1710   struct lp_build_tgsi_soa_context bld;
1711   struct tgsi_parse_context parse;
1712   uint num_immediates = 0;
1713   unsigned i;
1714
1715   /* Setup build context */
1716   memset(&bld, 0, sizeof bld);
1717   lp_build_context_init(&bld.base, builder, type);
1718   bld.mask = mask;
1719   bld.pos = pos;
1720   bld.inputs = inputs;
1721   bld.outputs = outputs;
1722   bld.consts_ptr = consts_ptr;
1723   bld.sampler = sampler;
1724
1725   lp_exec_mask_init(&bld.exec_mask, &bld.base);
1726
1727   tgsi_parse_init( &parse, tokens );
1728
1729   while( !tgsi_parse_end_of_tokens( &parse ) ) {
1730      tgsi_parse_token( &parse );
1731
1732      switch( parse.FullToken.Token.Type ) {
1733      case TGSI_TOKEN_TYPE_DECLARATION:
1734         /* Inputs already interpolated */
1735         {
1736            if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration ))
1737               _debug_printf("warning: failed to define LLVM variable\n");
1738         }
1739         break;
1740
1741      case TGSI_TOKEN_TYPE_INSTRUCTION:
1742         {
1743            unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
1744            const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
1745            if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
1746               _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1747                             info ? info->mnemonic : "<invalid>");
1748         }
1749
1750         break;
1751
1752      case TGSI_TOKEN_TYPE_IMMEDIATE:
1753         /* simply copy the immediate values into the next immediates[] slot */
1754         {
1755            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1756            assert(size <= 4);
1757            assert(num_immediates < LP_MAX_IMMEDIATES);
1758            for( i = 0; i < size; ++i )
1759               bld.immediates[num_immediates][i] =
1760                  lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
1761            for( i = size; i < 4; ++i )
1762               bld.immediates[num_immediates][i] = bld.base.undef;
1763            num_immediates++;
1764         }
1765         break;
1766
1767      case TGSI_TOKEN_TYPE_PROPERTY:
1768         break;
1769
1770      default:
1771         assert( 0 );
1772      }
1773   }
1774   if (0) {
1775      LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
1776      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1777      debug_printf("11111111111111111111111111111 \n");
1778      tgsi_dump(tokens, 0);
1779      LLVMDumpValue(function);
1780      debug_printf("2222222222222222222222222222 \n");
1781   }
1782   tgsi_parse_free( &parse );
1783}
1784
1785