lp_bld_tgsi_soa.c revision 10740acf46e08960dde790005d65a98440f313bc
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_info.h"
46#include "tgsi/tgsi_parse.h"
47#include "tgsi/tgsi_util.h"
48#include "tgsi/tgsi_scan.h"
49#include "lp_bld_type.h"
50#include "lp_bld_const.h"
51#include "lp_bld_arit.h"
52#include "lp_bld_bitarit.h"
53#include "lp_bld_gather.h"
54#include "lp_bld_logic.h"
55#include "lp_bld_swizzle.h"
56#include "lp_bld_flow.h"
57#include "lp_bld_quad.h"
58#include "lp_bld_tgsi.h"
59#include "lp_bld_limits.h"
60#include "lp_bld_debug.h"
61#include "lp_bld_printf.h"
62
63
64#define FOR_EACH_CHANNEL( CHAN )\
65   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
66
67#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
69
70#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
71   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
72
73#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
74   FOR_EACH_CHANNEL( CHAN )\
75      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
76
77#define CHAN_X 0
78#define CHAN_Y 1
79#define CHAN_Z 2
80#define CHAN_W 3
81#define NUM_CHANNELS 4
82
83#define LP_MAX_INSTRUCTIONS 256
84
85
86struct lp_exec_mask {
87   struct lp_build_context *bld;
88
89   boolean has_mask;
90
91   LLVMTypeRef int_vec_type;
92
93   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
94   int cond_stack_size;
95   LLVMValueRef cond_mask;
96
97   LLVMBasicBlockRef loop_block;
98   LLVMValueRef cont_mask;
99   LLVMValueRef break_mask;
100   LLVMValueRef break_var;
101   struct {
102      LLVMBasicBlockRef loop_block;
103      LLVMValueRef cont_mask;
104      LLVMValueRef break_mask;
105      LLVMValueRef break_var;
106   } loop_stack[LP_MAX_TGSI_NESTING];
107   int loop_stack_size;
108
109   LLVMValueRef ret_mask;
110   struct {
111      int pc;
112      LLVMValueRef ret_mask;
113   } call_stack[LP_MAX_TGSI_NESTING];
114   int call_stack_size;
115
116   LLVMValueRef exec_mask;
117};
118
119struct lp_build_tgsi_soa_context
120{
121   struct lp_build_context base;
122
123   /* Builder for vector integer masks and indices */
124   struct lp_build_context uint_bld;
125
126   /* Builder for scalar elements of shader's data type (float) */
127   struct lp_build_context elem_bld;
128
129   LLVMValueRef consts_ptr;
130   const LLVMValueRef *pos;
131   const LLVMValueRef (*inputs)[NUM_CHANNELS];
132   LLVMValueRef (*outputs)[NUM_CHANNELS];
133
134   const struct lp_build_sampler_soa *sampler;
135
136   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
137   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
138   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
139   LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
140
141   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
142    * set in the indirect_files field.
143    * The temps[] array above is unused then.
144    */
145   LLVMValueRef temps_array;
146
147   /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
148    * set in the indirect_files field.
149    * The outputs[] array above is unused then.
150    */
151   LLVMValueRef outputs_array;
152
153   const struct tgsi_shader_info *info;
154   /** bitmask indicating which register files are accessed indirectly */
155   unsigned indirect_files;
156
157   struct lp_build_mask_context *mask;
158   struct lp_exec_mask exec_mask;
159
160   struct tgsi_full_instruction *instructions;
161   uint max_instructions;
162};
163
164static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
165{
166   mask->bld = bld;
167   mask->has_mask = FALSE;
168   mask->cond_stack_size = 0;
169   mask->loop_stack_size = 0;
170   mask->call_stack_size = 0;
171
172   mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
173   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
174         LLVMConstAllOnes(mask->int_vec_type);
175}
176
177static void lp_exec_mask_update(struct lp_exec_mask *mask)
178{
179   if (mask->loop_stack_size) {
180      /*for loops we need to update the entire mask at runtime */
181      LLVMValueRef tmp;
182      assert(mask->break_mask);
183      tmp = LLVMBuildAnd(mask->bld->builder,
184                         mask->cont_mask,
185                         mask->break_mask,
186                         "maskcb");
187      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
188                                     mask->cond_mask,
189                                     tmp,
190                                     "maskfull");
191   } else
192      mask->exec_mask = mask->cond_mask;
193
194   if (mask->call_stack_size) {
195      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
196                                     mask->exec_mask,
197                                     mask->ret_mask,
198                                     "callmask");
199   }
200
201   mask->has_mask = (mask->cond_stack_size > 0 ||
202                     mask->loop_stack_size > 0 ||
203                     mask->call_stack_size > 0);
204}
205
206static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
207                                   LLVMValueRef val)
208{
209   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
210   if (mask->cond_stack_size == 0) {
211      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
212   }
213   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
214   assert(LLVMTypeOf(val) == mask->int_vec_type);
215   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
216                                  mask->cond_mask,
217                                  val,
218                                  "");
219   lp_exec_mask_update(mask);
220}
221
222static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
223{
224   LLVMValueRef prev_mask;
225   LLVMValueRef inv_mask;
226
227   assert(mask->cond_stack_size);
228   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
229   if (mask->cond_stack_size == 1) {
230      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
231   }
232
233   inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
234
235   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
236                                  inv_mask,
237                                  prev_mask, "");
238   lp_exec_mask_update(mask);
239}
240
241static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
242{
243   assert(mask->cond_stack_size);
244   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
245   lp_exec_mask_update(mask);
246}
247
248static void lp_exec_bgnloop(struct lp_exec_mask *mask)
249{
250   if (mask->loop_stack_size == 0) {
251      assert(mask->loop_block == NULL);
252      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
253      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
254      assert(mask->break_var == NULL);
255   }
256
257   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
258
259   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
260   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
261   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
262   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
263   ++mask->loop_stack_size;
264
265   mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
266   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
267
268   mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
269   LLVMBuildBr(mask->bld->builder, mask->loop_block);
270   LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
271
272   mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
273
274   lp_exec_mask_update(mask);
275}
276
277static void lp_exec_break(struct lp_exec_mask *mask)
278{
279   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
280                                         mask->exec_mask,
281                                         "break");
282
283   mask->break_mask = LLVMBuildAnd(mask->bld->builder,
284                                   mask->break_mask,
285                                   exec_mask, "break_full");
286
287   lp_exec_mask_update(mask);
288}
289
290static void lp_exec_continue(struct lp_exec_mask *mask)
291{
292   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
293                                         mask->exec_mask,
294                                         "");
295
296   mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
297                                  mask->cont_mask,
298                                  exec_mask, "");
299
300   lp_exec_mask_update(mask);
301}
302
303
304static void lp_exec_endloop(struct lp_exec_mask *mask)
305{
306   LLVMBasicBlockRef endloop;
307   LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
308                                      mask->bld->type.length);
309   LLVMValueRef i1cond;
310
311   assert(mask->break_mask);
312
313   /*
314    * Restore the cont_mask, but don't pop
315    */
316   assert(mask->loop_stack_size);
317   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
318   lp_exec_mask_update(mask);
319
320   /*
321    * Unlike the continue mask, the break_mask must be preserved across loop
322    * iterations
323    */
324   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
325
326   /* i1cond = (mask == 0) */
327   i1cond = LLVMBuildICmp(
328      mask->bld->builder,
329      LLVMIntNE,
330      LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
331      LLVMConstNull(reg_type), "");
332
333   endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
334
335   LLVMBuildCondBr(mask->bld->builder,
336                   i1cond, mask->loop_block, endloop);
337
338   LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
339
340   assert(mask->loop_stack_size);
341   --mask->loop_stack_size;
342   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
343   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
344   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
345   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
346
347   lp_exec_mask_update(mask);
348}
349
350/* stores val into an address pointed to by dst.
351 * mask->exec_mask is used to figure out which bits of val
352 * should be stored into the address
353 * (0 means don't store this bit, 1 means do store).
354 */
355static void lp_exec_mask_store(struct lp_exec_mask *mask,
356                               LLVMValueRef pred,
357                               LLVMValueRef val,
358                               LLVMValueRef dst)
359{
360   /* Mix the predicate and execution mask */
361   if (mask->has_mask) {
362      if (pred) {
363         pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
364      } else {
365         pred = mask->exec_mask;
366      }
367   }
368
369   if (pred) {
370      LLVMValueRef real_val, dst_val;
371
372      dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
373      real_val = lp_build_select(mask->bld,
374                                 pred,
375                                 val, dst_val);
376
377      LLVMBuildStore(mask->bld->builder, real_val, dst);
378   } else
379      LLVMBuildStore(mask->bld->builder, val, dst);
380}
381
382static void lp_exec_mask_call(struct lp_exec_mask *mask,
383                              int func,
384                              int *pc)
385{
386   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
387   mask->call_stack[mask->call_stack_size].pc = *pc;
388   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
389   mask->call_stack_size++;
390   *pc = func;
391}
392
393static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
394{
395   LLVMValueRef exec_mask;
396
397   if (mask->call_stack_size == 0) {
398      /* returning from main() */
399      *pc = -1;
400      return;
401   }
402   exec_mask = LLVMBuildNot(mask->bld->builder,
403                            mask->exec_mask,
404                            "ret");
405
406   mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
407                                 mask->ret_mask,
408                                 exec_mask, "ret_full");
409
410   lp_exec_mask_update(mask);
411}
412
413static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
414{
415}
416
417static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
418{
419   assert(mask->call_stack_size);
420   mask->call_stack_size--;
421   *pc = mask->call_stack[mask->call_stack_size].pc;
422   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
423   lp_exec_mask_update(mask);
424}
425
426
427/**
428 * Return pointer to a temporary register channel (src or dest).
429 * Note that indirect addressing cannot be handled here.
430 * \param index  which temporary register
431 * \param chan  which channel of the temp register.
432 */
433static LLVMValueRef
434get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
435             unsigned index,
436             unsigned chan)
437{
438   assert(chan < 4);
439   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
440      LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
441      return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
442   }
443   else {
444      return bld->temps[index][chan];
445   }
446}
447
448/**
449 * Return pointer to a output register channel (src or dest).
450 * Note that indirect addressing cannot be handled here.
451 * \param index  which output register
452 * \param chan  which channel of the output register.
453 */
454static LLVMValueRef
455get_output_ptr(struct lp_build_tgsi_soa_context *bld,
456               unsigned index,
457               unsigned chan)
458{
459   assert(chan < 4);
460   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
461      LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
462      return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, "");
463   }
464   else {
465      return bld->outputs[index][chan];
466   }
467}
468
469
470
471/**
472 * Gather vector.
473 * XXX the lp_build_gather() function should be capable of doing this
474 * with a little work.
475 */
476static LLVMValueRef
477build_gather(struct lp_build_tgsi_soa_context *bld,
478             LLVMValueRef base_ptr,
479             LLVMValueRef indexes)
480{
481   LLVMValueRef res = bld->base.undef;
482   unsigned i;
483
484   /*
485    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
486    */
487   for (i = 0; i < bld->base.type.length; i++) {
488      LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
489      LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
490                                                   indexes, ii, "");
491      LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
492                                             &index, 1, "gather_ptr");
493      LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
494
495      res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
496   }
497
498   return res;
499}
500
501
502/**
503 * Scatter/store vector.
504 */
505static void
506emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
507                  LLVMValueRef base_ptr,
508                  LLVMValueRef indexes,
509                  LLVMValueRef values,
510                  struct lp_exec_mask *mask,
511                  LLVMValueRef pred)
512{
513   LLVMBuilderRef builder = bld->base.builder;
514   unsigned i;
515
516   /* Mix the predicate and execution mask */
517   if (mask->has_mask) {
518      if (pred) {
519         pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
520      }
521      else {
522         pred = mask->exec_mask;
523      }
524   }
525
526   /*
527    * Loop over elements of index_vec, store scalar value.
528    */
529   for (i = 0; i < bld->base.type.length; i++) {
530      LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
531      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
532      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
533      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
534      LLVMValueRef scalar_pred = pred ?
535         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
536
537      if (0)
538         lp_build_printf(builder, "scatter %d: val %f at %d %p\n",
539                         ii, val, index, scalar_ptr);
540
541      if (scalar_pred) {
542         LLVMValueRef real_val, dst_val;
543         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
544         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
545         LLVMBuildStore(builder, real_val, scalar_ptr);
546      }
547      else {
548         LLVMBuildStore(builder, val, scalar_ptr);
549      }
550   }
551}
552
553
554/**
555 * Read the current value of the ADDR register, convert the floats to
556 * ints, add the base index and return the vector of offsets.
557 * The offsets will be used to index into the constant buffer or
558 * temporary register file.
559 */
560static LLVMValueRef
561get_indirect_index(struct lp_build_tgsi_soa_context *bld,
562                   unsigned reg_file, unsigned reg_index,
563                   const struct tgsi_src_register *indirect_reg)
564{
565   struct lp_build_context *uint_bld = &bld->uint_bld;
566   /* always use X component of address register */
567   unsigned swizzle = indirect_reg->SwizzleX;
568   LLVMValueRef base;
569   LLVMValueRef rel;
570   LLVMValueRef max_index;
571   LLVMValueRef index;
572
573   assert(bld->indirect_files & (1 << reg_file));
574
575   base = lp_build_const_int_vec(uint_bld->type, reg_index);
576
577   assert(swizzle < 4);
578   rel = LLVMBuildLoad(bld->base.builder,
579                        bld->addr[indirect_reg->Index][swizzle],
580                        "load addr reg");
581
582   /* for indexing we want integers */
583   rel = LLVMBuildFPToSI(bld->base.builder,
584                         rel,
585                         uint_bld->vec_type, "");
586
587   index = lp_build_add(uint_bld, base, rel);
588
589   max_index = lp_build_const_int_vec(uint_bld->type,
590                                      bld->info->file_max[reg_file]);
591
592   assert(!uint_bld->type.sign);
593   index = lp_build_min(uint_bld, index, max_index);
594
595   return index;
596}
597
598
599/**
600 * Register fetch.
601 */
602static LLVMValueRef
603emit_fetch(
604   struct lp_build_tgsi_soa_context *bld,
605   const struct tgsi_full_instruction *inst,
606   unsigned src_op,
607   const unsigned chan_index )
608{
609   struct lp_build_context *uint_bld = &bld->uint_bld;
610   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
611   const unsigned swizzle =
612      tgsi_util_get_full_src_register_swizzle(reg, chan_index);
613   LLVMValueRef res;
614   LLVMValueRef indirect_index = NULL;
615
616   if (swizzle > 3) {
617      assert(0 && "invalid swizzle in emit_fetch()");
618      return bld->base.undef;
619   }
620
621   if (reg->Register.Indirect) {
622      indirect_index = get_indirect_index(bld,
623                                          reg->Register.File,
624                                          reg->Register.Index,
625                                          &reg->Indirect);
626   } else {
627      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
628   }
629
630   switch (reg->Register.File) {
631   case TGSI_FILE_CONSTANT:
632      if (reg->Register.Indirect) {
633         LLVMValueRef swizzle_vec =
634            lp_build_const_int_vec(uint_bld->type, swizzle);
635         LLVMValueRef index_vec;  /* index into the const buffer */
636
637         /* index_vec = indirect_index * 4 + swizzle */
638         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
639         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
640
641         /* Gather values from the constant buffer */
642         res = build_gather(bld, bld->consts_ptr, index_vec);
643      }
644      else {
645         LLVMValueRef index;  /* index into the const buffer */
646         LLVMValueRef scalar, scalar_ptr;
647
648         index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
649
650         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
651                                   &index, 1, "");
652         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
653
654         res = lp_build_broadcast_scalar(&bld->base, scalar);
655      }
656      break;
657
658   case TGSI_FILE_IMMEDIATE:
659      res = bld->immediates[reg->Register.Index][swizzle];
660      assert(res);
661      break;
662
663   case TGSI_FILE_INPUT:
664      res = bld->inputs[reg->Register.Index][swizzle];
665      assert(res);
666      break;
667
668   case TGSI_FILE_TEMPORARY:
669      if (reg->Register.Indirect) {
670         LLVMValueRef swizzle_vec =
671            lp_build_const_int_vec(uint_bld->type, swizzle);
672         LLVMValueRef length_vec =
673            lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
674         LLVMValueRef index_vec;  /* index into the const buffer */
675         LLVMValueRef temps_array;
676         LLVMTypeRef float4_ptr_type;
677
678         /* index_vec = (indirect_index * 4 + swizzle) * length */
679         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
680         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
681         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
682
683         /* cast temps_array pointer to float* */
684         float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
685         temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array,
686                                        float4_ptr_type, "");
687
688         /* Gather values from the temporary register array */
689         res = build_gather(bld, temps_array, index_vec);
690      }
691      else {
692         LLVMValueRef temp_ptr;
693         temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
694         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
695         if (!res)
696            return bld->base.undef;
697      }
698      break;
699
700   default:
701      assert(0 && "invalid src register in emit_fetch()");
702      return bld->base.undef;
703   }
704
705   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
706   case TGSI_UTIL_SIGN_CLEAR:
707      res = lp_build_abs( &bld->base, res );
708      break;
709
710   case TGSI_UTIL_SIGN_SET:
711      res = lp_build_abs( &bld->base, res );
712      /* fall through */
713   case TGSI_UTIL_SIGN_TOGGLE:
714      res = lp_build_negate( &bld->base, res );
715      break;
716
717   case TGSI_UTIL_SIGN_KEEP:
718      break;
719   }
720
721   return res;
722}
723
724
725/**
726 * Register fetch with derivatives.
727 */
728static void
729emit_fetch_deriv(
730   struct lp_build_tgsi_soa_context *bld,
731   const struct tgsi_full_instruction *inst,
732   unsigned index,
733   const unsigned chan_index,
734   LLVMValueRef *res,
735   LLVMValueRef *ddx,
736   LLVMValueRef *ddy)
737{
738   LLVMValueRef src;
739
740   src = emit_fetch(bld, inst, index, chan_index);
741
742   if(res)
743      *res = src;
744
745   /* TODO: use interpolation coeffs for inputs */
746
747   if(ddx)
748      *ddx = lp_build_ddx(&bld->base, src);
749
750   if(ddy)
751      *ddy = lp_build_ddy(&bld->base, src);
752}
753
754
755/**
756 * Predicate.
757 */
758static void
759emit_fetch_predicate(
760   struct lp_build_tgsi_soa_context *bld,
761   const struct tgsi_full_instruction *inst,
762   LLVMValueRef *pred)
763{
764   unsigned index;
765   unsigned char swizzles[4];
766   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
767   LLVMValueRef value;
768   unsigned chan;
769
770   if (!inst->Instruction.Predicate) {
771      FOR_EACH_CHANNEL( chan ) {
772         pred[chan] = NULL;
773      }
774      return;
775   }
776
777   swizzles[0] = inst->Predicate.SwizzleX;
778   swizzles[1] = inst->Predicate.SwizzleY;
779   swizzles[2] = inst->Predicate.SwizzleZ;
780   swizzles[3] = inst->Predicate.SwizzleW;
781
782   index = inst->Predicate.Index;
783   assert(index < LP_MAX_TGSI_PREDS);
784
785   FOR_EACH_CHANNEL( chan ) {
786      unsigned swizzle = swizzles[chan];
787
788      /*
789       * Only fetch the predicate register channels that are actually listed
790       * in the swizzles
791       */
792      if (!unswizzled[swizzle]) {
793         value = LLVMBuildLoad(bld->base.builder,
794                               bld->preds[index][swizzle], "");
795
796         /*
797          * Convert the value to an integer mask.
798          *
799          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
800          * is needlessly causing two comparisons due to storing the intermediate
801          * result as float vector instead of an integer mask vector.
802          */
803         value = lp_build_compare(bld->base.builder,
804                                  bld->base.type,
805                                  PIPE_FUNC_NOTEQUAL,
806                                  value,
807                                  bld->base.zero);
808         if (inst->Predicate.Negate) {
809            value = LLVMBuildNot(bld->base.builder, value, "");
810         }
811
812         unswizzled[swizzle] = value;
813      } else {
814         value = unswizzled[swizzle];
815      }
816
817      pred[chan] = value;
818   }
819}
820
821
822/**
823 * Register store.
824 */
825static void
826emit_store(
827   struct lp_build_tgsi_soa_context *bld,
828   const struct tgsi_full_instruction *inst,
829   unsigned index,
830   unsigned chan_index,
831   LLVMValueRef pred,
832   LLVMValueRef value)
833{
834   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
835   struct lp_build_context *uint_bld = &bld->uint_bld;
836   LLVMValueRef indirect_index = NULL;
837
838   switch( inst->Instruction.Saturate ) {
839   case TGSI_SAT_NONE:
840      break;
841
842   case TGSI_SAT_ZERO_ONE:
843      value = lp_build_max(&bld->base, value, bld->base.zero);
844      value = lp_build_min(&bld->base, value, bld->base.one);
845      break;
846
847   case TGSI_SAT_MINUS_PLUS_ONE:
848      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
849      value = lp_build_min(&bld->base, value, bld->base.one);
850      break;
851
852   default:
853      assert(0);
854   }
855
856   if (reg->Register.Indirect) {
857      indirect_index = get_indirect_index(bld,
858                                          reg->Register.File,
859                                          reg->Register.Index,
860                                          &reg->Indirect);
861   } else {
862      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
863   }
864
865   switch( reg->Register.File ) {
866   case TGSI_FILE_OUTPUT:
867      if (reg->Register.Indirect) {
868         LLVMBuilderRef builder = bld->base.builder;
869         LLVMValueRef chan_vec =
870            lp_build_const_int_vec(uint_bld->type, chan_index);
871         LLVMValueRef length_vec =
872            lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
873         LLVMValueRef index_vec;  /* indexes into the temp registers */
874         LLVMValueRef outputs_array;
875         LLVMValueRef pixel_offsets;
876         LLVMTypeRef float_ptr_type;
877         int i;
878
879         /* build pixel offset vector: {0, 1, 2, 3, ...} */
880         pixel_offsets = uint_bld->undef;
881         for (i = 0; i < bld->base.type.length; i++) {
882            LLVMValueRef ii = lp_build_const_int32(i);
883            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
884                                                   ii, ii, "");
885         }
886
887         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
888         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
889         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
890         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
891         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
892
893         float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
894         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
895                                          float_ptr_type, "");
896
897         /* Scatter store values into temp registers */
898         emit_mask_scatter(bld, outputs_array, index_vec, value,
899                           &bld->exec_mask, pred);
900      }
901      else {
902         LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
903                                               chan_index);
904         lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
905      }
906      break;
907
908   case TGSI_FILE_TEMPORARY:
909      if (reg->Register.Indirect) {
910         LLVMBuilderRef builder = bld->base.builder;
911         LLVMValueRef chan_vec =
912            lp_build_const_int_vec(uint_bld->type, chan_index);
913         LLVMValueRef length_vec =
914            lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
915         LLVMValueRef index_vec;  /* indexes into the temp registers */
916         LLVMValueRef temps_array;
917         LLVMValueRef pixel_offsets;
918         LLVMTypeRef float_ptr_type;
919         int i;
920
921         /* build pixel offset vector: {0, 1, 2, 3, ...} */
922         pixel_offsets = uint_bld->undef;
923         for (i = 0; i < bld->base.type.length; i++) {
924            LLVMValueRef ii = lp_build_const_int32(i);
925            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
926                                                   ii, ii, "");
927         }
928
929         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
930         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
931         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
932         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
933         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
934
935         float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
936         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
937                                        float_ptr_type, "");
938
939         /* Scatter store values into temp registers */
940         emit_mask_scatter(bld, temps_array, index_vec, value,
941                           &bld->exec_mask, pred);
942      }
943      else {
944         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
945                                              chan_index);
946         lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
947      }
948      break;
949
950   case TGSI_FILE_ADDRESS:
951      lp_exec_mask_store(&bld->exec_mask, pred, value,
952                         bld->addr[reg->Indirect.Index][chan_index]);
953      break;
954
955   case TGSI_FILE_PREDICATE:
956      lp_exec_mask_store(&bld->exec_mask, pred, value,
957                         bld->preds[reg->Register.Index][chan_index]);
958      break;
959
960   default:
961      assert( 0 );
962   }
963}
964
965
966/**
967 * High-level instruction translators.
968 */
969
970static void
971emit_tex( struct lp_build_tgsi_soa_context *bld,
972          const struct tgsi_full_instruction *inst,
973          enum lp_build_tex_modifier modifier,
974          LLVMValueRef *texel)
975{
976   unsigned unit;
977   LLVMValueRef lod_bias, explicit_lod;
978   LLVMValueRef oow = NULL;
979   LLVMValueRef coords[3];
980   LLVMValueRef ddx[3];
981   LLVMValueRef ddy[3];
982   unsigned num_coords;
983   unsigned i;
984
985   if (!bld->sampler) {
986      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
987      for (i = 0; i < 4; i++) {
988         texel[i] = bld->base.undef;
989      }
990      return;
991   }
992
993   switch (inst->Texture.Texture) {
994   case TGSI_TEXTURE_1D:
995      num_coords = 1;
996      break;
997   case TGSI_TEXTURE_2D:
998   case TGSI_TEXTURE_RECT:
999      num_coords = 2;
1000      break;
1001   case TGSI_TEXTURE_SHADOW1D:
1002   case TGSI_TEXTURE_SHADOW2D:
1003   case TGSI_TEXTURE_SHADOWRECT:
1004   case TGSI_TEXTURE_3D:
1005   case TGSI_TEXTURE_CUBE:
1006      num_coords = 3;
1007      break;
1008   default:
1009      assert(0);
1010      return;
1011   }
1012
1013   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1014      lod_bias = emit_fetch( bld, inst, 0, 3 );
1015      explicit_lod = NULL;
1016   }
1017   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1018      lod_bias = NULL;
1019      explicit_lod = emit_fetch( bld, inst, 0, 3 );
1020   }
1021   else {
1022      lod_bias = NULL;
1023      explicit_lod = NULL;
1024   }
1025
1026   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1027      oow = emit_fetch( bld, inst, 0, 3 );
1028      oow = lp_build_rcp(&bld->base, oow);
1029   }
1030
1031   for (i = 0; i < num_coords; i++) {
1032      coords[i] = emit_fetch( bld, inst, 0, i );
1033      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1034         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1035   }
1036   for (i = num_coords; i < 3; i++) {
1037      coords[i] = bld->base.undef;
1038   }
1039
1040   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1041      LLVMTypeRef i32t = LLVMInt32Type();
1042      LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
1043      for (i = 0; i < num_coords; i++) {
1044         LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1045         LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1046         ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
1047         ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
1048      }
1049      unit = inst->Src[3].Register.Index;
1050   }  else {
1051      for (i = 0; i < num_coords; i++) {
1052         ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1053         ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1054      }
1055      unit = inst->Src[1].Register.Index;
1056   }
1057   for (i = num_coords; i < 3; i++) {
1058      ddx[i] = LLVMGetUndef(bld->base.elem_type);
1059      ddy[i] = LLVMGetUndef(bld->base.elem_type);
1060   }
1061
1062   bld->sampler->emit_fetch_texel(bld->sampler,
1063                                  bld->base.builder,
1064                                  bld->base.type,
1065                                  unit, num_coords, coords,
1066                                  ddx, ddy,
1067                                  lod_bias, explicit_lod,
1068                                  texel);
1069}
1070
1071static boolean
1072near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1073		   int pc)
1074{
1075   int i;
1076
1077   for (i = 0; i < 5; i++) {
1078      unsigned opcode;
1079
1080      if (pc + i >= bld->info->num_instructions)
1081	 return TRUE;
1082
1083      opcode = bld->instructions[pc + i].Instruction.Opcode;
1084
1085      if (opcode == TGSI_OPCODE_END)
1086	 return TRUE;
1087
1088      if (opcode == TGSI_OPCODE_TEX ||
1089	  opcode == TGSI_OPCODE_TXP ||
1090	  opcode == TGSI_OPCODE_TXD ||
1091	  opcode == TGSI_OPCODE_TXB ||
1092	  opcode == TGSI_OPCODE_TXL ||
1093	  opcode == TGSI_OPCODE_TXF ||
1094	  opcode == TGSI_OPCODE_TXQ ||
1095	  opcode == TGSI_OPCODE_CAL ||
1096	  opcode == TGSI_OPCODE_CALLNZ ||
1097	  opcode == TGSI_OPCODE_IF ||
1098	  opcode == TGSI_OPCODE_IFC ||
1099	  opcode == TGSI_OPCODE_BGNLOOP ||
1100	  opcode == TGSI_OPCODE_SWITCH)
1101	 return FALSE;
1102   }
1103
1104   return TRUE;
1105}
1106
1107
1108
1109/**
1110 * Kill fragment if any of the src register values are negative.
1111 */
1112static void
1113emit_kil(
1114   struct lp_build_tgsi_soa_context *bld,
1115   const struct tgsi_full_instruction *inst,
1116   int pc)
1117{
1118   const struct tgsi_full_src_register *reg = &inst->Src[0];
1119   LLVMValueRef terms[NUM_CHANNELS];
1120   LLVMValueRef mask;
1121   unsigned chan_index;
1122
1123   memset(&terms, 0, sizeof terms);
1124
1125   FOR_EACH_CHANNEL( chan_index ) {
1126      unsigned swizzle;
1127
1128      /* Unswizzle channel */
1129      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1130
1131      /* Check if the component has not been already tested. */
1132      assert(swizzle < NUM_CHANNELS);
1133      if( !terms[swizzle] )
1134         /* TODO: change the comparison operator instead of setting the sign */
1135         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
1136   }
1137
1138   mask = NULL;
1139   FOR_EACH_CHANNEL( chan_index ) {
1140      if(terms[chan_index]) {
1141         LLVMValueRef chan_mask;
1142
1143         /*
1144          * If term < 0 then mask = 0 else mask = ~0.
1145          */
1146         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1147
1148         if(mask)
1149            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1150         else
1151            mask = chan_mask;
1152      }
1153   }
1154
1155   if(mask) {
1156      lp_build_mask_update(bld->mask, mask);
1157
1158      if (!near_end_of_shader(bld, pc))
1159	 lp_build_mask_check(bld->mask);
1160   }
1161}
1162
1163
1164/**
1165 * Predicated fragment kill.
1166 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1167 * The only predication is the execution mask which will apply if
1168 * we're inside a loop or conditional.
1169 */
1170static void
1171emit_kilp(struct lp_build_tgsi_soa_context *bld,
1172          const struct tgsi_full_instruction *inst,
1173	  int pc)
1174{
1175   LLVMValueRef mask;
1176
1177   /* For those channels which are "alive", disable fragment shader
1178    * execution.
1179    */
1180   if (bld->exec_mask.has_mask) {
1181      mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1182   }
1183   else {
1184      LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1185      mask = zero;
1186   }
1187
1188   lp_build_mask_update(bld->mask, mask);
1189
1190   if (!near_end_of_shader(bld, pc))
1191      lp_build_mask_check(bld->mask);
1192}
1193
1194
1195/**
1196 * Emit code which will dump the value of all the temporary registers
1197 * to stdout.
1198 */
1199static void
1200emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1201{
1202   LLVMBuilderRef builder = bld->base.builder;
1203   LLVMValueRef temp_ptr;
1204   LLVMValueRef i0 = lp_build_const_int32(0);
1205   LLVMValueRef i1 = lp_build_const_int32(1);
1206   LLVMValueRef i2 = lp_build_const_int32(2);
1207   LLVMValueRef i3 = lp_build_const_int32(3);
1208   int index;
1209   int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1210
1211   for (index = 0; index < n; index++) {
1212      LLVMValueRef idx = lp_build_const_int32(index);
1213      LLVMValueRef v[4][4], res;
1214      int chan;
1215
1216      lp_build_printf(builder, "TEMP[%d]:\n", idx);
1217
1218      for (chan = 0; chan < 4; chan++) {
1219         temp_ptr = get_temp_ptr(bld, index, chan);
1220         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
1221         v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1222         v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1223         v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1224         v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1225      }
1226
1227      lp_build_printf(builder, "  X: %f %f %f %f\n",
1228                      v[0][0], v[0][1], v[0][2], v[0][3]);
1229      lp_build_printf(builder, "  Y: %f %f %f %f\n",
1230                      v[1][0], v[1][1], v[1][2], v[1][3]);
1231      lp_build_printf(builder, "  Z: %f %f %f %f\n",
1232                      v[2][0], v[2][1], v[2][2], v[2][3]);
1233      lp_build_printf(builder, "  W: %f %f %f %f\n",
1234                      v[3][0], v[3][1], v[3][2], v[3][3]);
1235   }
1236}
1237
1238
1239
1240static void
1241emit_declaration(
1242   struct lp_build_tgsi_soa_context *bld,
1243   const struct tgsi_full_declaration *decl)
1244{
1245   LLVMTypeRef vec_type = bld->base.vec_type;
1246   const unsigned first = decl->Range.First;
1247   const unsigned last = decl->Range.Last;
1248   unsigned idx, i;
1249
1250   for (idx = first; idx <= last; ++idx) {
1251      assert(last <= bld->info->file_max[decl->Declaration.File]);
1252      switch (decl->Declaration.File) {
1253      case TGSI_FILE_TEMPORARY:
1254         assert(idx < LP_MAX_TGSI_TEMPS);
1255         if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1256            for (i = 0; i < NUM_CHANNELS; i++)
1257               bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1258                                                    vec_type, "temp");
1259         }
1260         break;
1261
1262      case TGSI_FILE_OUTPUT:
1263         if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1264            for (i = 0; i < NUM_CHANNELS; i++)
1265               bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1266                                                      vec_type, "output");
1267         }
1268         break;
1269
1270      case TGSI_FILE_ADDRESS:
1271         assert(idx < LP_MAX_TGSI_ADDRS);
1272         for (i = 0; i < NUM_CHANNELS; i++)
1273            bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1274                                                vec_type, "addr");
1275         break;
1276
1277      case TGSI_FILE_PREDICATE:
1278         assert(idx < LP_MAX_TGSI_PREDS);
1279         for (i = 0; i < NUM_CHANNELS; i++)
1280            bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1281                                                 vec_type, "predicate");
1282         break;
1283
1284      default:
1285         /* don't need to declare other vars */
1286         break;
1287      }
1288   }
1289}
1290
1291
1292/**
1293 * Emit LLVM for one TGSI instruction.
1294 * \param return TRUE for success, FALSE otherwise
1295 */
1296static boolean
1297emit_instruction(
1298   struct lp_build_tgsi_soa_context *bld,
1299   const struct tgsi_full_instruction *inst,
1300   const struct tgsi_opcode_info *info,
1301   int *pc)
1302{
1303   unsigned chan_index;
1304   LLVMValueRef src0, src1, src2;
1305   LLVMValueRef tmp0, tmp1, tmp2;
1306   LLVMValueRef tmp3 = NULL;
1307   LLVMValueRef tmp4 = NULL;
1308   LLVMValueRef tmp5 = NULL;
1309   LLVMValueRef tmp6 = NULL;
1310   LLVMValueRef tmp7 = NULL;
1311   LLVMValueRef res;
1312   LLVMValueRef dst0[NUM_CHANNELS];
1313
1314   /*
1315    * Stores and write masks are handled in a general fashion after the long
1316    * instruction opcode switch statement.
1317    *
1318    * Although not stricitly necessary, we avoid generating instructions for
1319    * channels which won't be stored, in cases where's that easy. For some
1320    * complex instructions, like texture sampling, it is more convenient to
1321    * assume a full writemask and then let LLVM optimization passes eliminate
1322    * redundant code.
1323    */
1324
1325   (*pc)++;
1326
1327   assert(info->num_dst <= 1);
1328   if (info->num_dst) {
1329      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1330         dst0[chan_index] = bld->base.undef;
1331      }
1332   }
1333
1334   switch (inst->Instruction.Opcode) {
1335   case TGSI_OPCODE_ARL:
1336      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1337         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1338         tmp0 = lp_build_floor(&bld->base, tmp0);
1339         dst0[chan_index] = tmp0;
1340      }
1341      break;
1342
1343   case TGSI_OPCODE_MOV:
1344      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1345         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1346      }
1347      break;
1348
1349   case TGSI_OPCODE_LIT:
1350      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1351         dst0[CHAN_X] = bld->base.one;
1352      }
1353      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1354         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1355         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1356      }
1357      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1358         /* XMM[1] = SrcReg[0].yyyy */
1359         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1360         /* XMM[1] = max(XMM[1], 0) */
1361         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1362         /* XMM[2] = SrcReg[0].wwww */
1363         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1364         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1365         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1366         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1367         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1368      }
1369      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1370         dst0[CHAN_W] = bld->base.one;
1371      }
1372      break;
1373
1374   case TGSI_OPCODE_RCP:
1375   /* TGSI_OPCODE_RECIP */
1376      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1377      res = lp_build_rcp(&bld->base, src0);
1378      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1379         dst0[chan_index] = res;
1380      }
1381      break;
1382
1383   case TGSI_OPCODE_RSQ:
1384   /* TGSI_OPCODE_RECIPSQRT */
1385      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1386      src0 = lp_build_abs(&bld->base, src0);
1387      res = lp_build_rsqrt(&bld->base, src0);
1388      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1389         dst0[chan_index] = res;
1390      }
1391      break;
1392
1393   case TGSI_OPCODE_EXP:
1394      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1395          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1396          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1397         LLVMValueRef *p_exp2_int_part = NULL;
1398         LLVMValueRef *p_frac_part = NULL;
1399         LLVMValueRef *p_exp2 = NULL;
1400
1401         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1402
1403         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1404            p_exp2_int_part = &tmp0;
1405         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1406            p_frac_part = &tmp1;
1407         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1408            p_exp2 = &tmp2;
1409
1410         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1411
1412         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1413            dst0[CHAN_X] = tmp0;
1414         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1415            dst0[CHAN_Y] = tmp1;
1416         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1417            dst0[CHAN_Z] = tmp2;
1418      }
1419      /* dst.w = 1.0 */
1420      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1421         dst0[CHAN_W] = bld->base.one;
1422      }
1423      break;
1424
1425   case TGSI_OPCODE_LOG:
1426      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1427          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1428          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1429         LLVMValueRef *p_floor_log2 = NULL;
1430         LLVMValueRef *p_exp = NULL;
1431         LLVMValueRef *p_log2 = NULL;
1432
1433         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1434         src0 = lp_build_abs( &bld->base, src0 );
1435
1436         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1437            p_floor_log2 = &tmp0;
1438         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1439            p_exp = &tmp1;
1440         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1441            p_log2 = &tmp2;
1442
1443         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1444
1445         /* dst.x = floor(lg2(abs(src.x))) */
1446         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1447            dst0[CHAN_X] = tmp0;
1448         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1449         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1450            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1451         }
1452         /* dst.z = lg2(abs(src.x)) */
1453         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1454            dst0[CHAN_Z] = tmp2;
1455      }
1456      /* dst.w = 1.0 */
1457      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1458         dst0[CHAN_W] = bld->base.one;
1459      }
1460      break;
1461
1462   case TGSI_OPCODE_MUL:
1463      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1464         src0 = emit_fetch( bld, inst, 0, chan_index );
1465         src1 = emit_fetch( bld, inst, 1, chan_index );
1466         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1467      }
1468      break;
1469
1470   case TGSI_OPCODE_ADD:
1471      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1472         src0 = emit_fetch( bld, inst, 0, chan_index );
1473         src1 = emit_fetch( bld, inst, 1, chan_index );
1474         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1475      }
1476      break;
1477
1478   case TGSI_OPCODE_DP3:
1479   /* TGSI_OPCODE_DOT3 */
1480      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1481      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1482      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1483      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1484      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1485      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1486      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1487      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1488      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1489      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1490      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1491      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1492         dst0[chan_index] = tmp0;
1493      }
1494      break;
1495
1496   case TGSI_OPCODE_DP4:
1497   /* TGSI_OPCODE_DOT4 */
1498      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1499      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1500      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1501      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1502      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1503      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1504      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1505      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1506      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1507      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1508      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1509      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1510      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1511      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1512      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1513      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1514         dst0[chan_index] = tmp0;
1515      }
1516      break;
1517
1518   case TGSI_OPCODE_DST:
1519      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1520         dst0[CHAN_X] = bld->base.one;
1521      }
1522      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1523         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1524         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1525         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1526      }
1527      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1528         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1529      }
1530      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1531         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1532      }
1533      break;
1534
1535   case TGSI_OPCODE_MIN:
1536      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1537         src0 = emit_fetch( bld, inst, 0, chan_index );
1538         src1 = emit_fetch( bld, inst, 1, chan_index );
1539         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1540      }
1541      break;
1542
1543   case TGSI_OPCODE_MAX:
1544      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1545         src0 = emit_fetch( bld, inst, 0, chan_index );
1546         src1 = emit_fetch( bld, inst, 1, chan_index );
1547         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1548      }
1549      break;
1550
1551   case TGSI_OPCODE_SLT:
1552   /* TGSI_OPCODE_SETLT */
1553      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1554         src0 = emit_fetch( bld, inst, 0, chan_index );
1555         src1 = emit_fetch( bld, inst, 1, chan_index );
1556         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1557         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1558      }
1559      break;
1560
1561   case TGSI_OPCODE_SGE:
1562   /* TGSI_OPCODE_SETGE */
1563      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1564         src0 = emit_fetch( bld, inst, 0, chan_index );
1565         src1 = emit_fetch( bld, inst, 1, chan_index );
1566         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1567         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1568      }
1569      break;
1570
1571   case TGSI_OPCODE_MAD:
1572   /* TGSI_OPCODE_MADD */
1573      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1574         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1575         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1576         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1577         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1578         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1579         dst0[chan_index] = tmp0;
1580      }
1581      break;
1582
1583   case TGSI_OPCODE_SUB:
1584      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1585         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1586         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1587         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1588      }
1589      break;
1590
1591   case TGSI_OPCODE_LRP:
1592      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1593         src0 = emit_fetch( bld, inst, 0, chan_index );
1594         src1 = emit_fetch( bld, inst, 1, chan_index );
1595         src2 = emit_fetch( bld, inst, 2, chan_index );
1596         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1597         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1598         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1599      }
1600      break;
1601
1602   case TGSI_OPCODE_CND:
1603      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1604         src0 = emit_fetch( bld, inst, 0, chan_index );
1605         src1 = emit_fetch( bld, inst, 1, chan_index );
1606         src2 = emit_fetch( bld, inst, 2, chan_index );
1607         tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1608         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1609         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1610      }
1611      break;
1612
1613   case TGSI_OPCODE_DP2A:
1614      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1615      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1616      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1617      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1618      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1619      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1620      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1621      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
1622      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1623      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1624         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1625      }
1626      break;
1627
1628   case TGSI_OPCODE_FRC:
1629      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1630         src0 = emit_fetch( bld, inst, 0, chan_index );
1631         tmp0 = lp_build_floor(&bld->base, src0);
1632         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1633         dst0[chan_index] = tmp0;
1634      }
1635      break;
1636
1637   case TGSI_OPCODE_CLAMP:
1638      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1639         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1640         src1 = emit_fetch( bld, inst, 1, chan_index );
1641         src2 = emit_fetch( bld, inst, 2, chan_index );
1642         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1643         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1644         dst0[chan_index] = tmp0;
1645      }
1646      break;
1647
1648   case TGSI_OPCODE_FLR:
1649      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1650         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1651         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1652      }
1653      break;
1654
1655   case TGSI_OPCODE_ROUND:
1656      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1657         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1658         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1659      }
1660      break;
1661
1662   case TGSI_OPCODE_EX2: {
1663      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1664      tmp0 = lp_build_exp2( &bld->base, tmp0);
1665      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1666         dst0[chan_index] = tmp0;
1667      }
1668      break;
1669   }
1670
1671   case TGSI_OPCODE_LG2:
1672      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1673      tmp0 = lp_build_log2( &bld->base, tmp0);
1674      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1675         dst0[chan_index] = tmp0;
1676      }
1677      break;
1678
1679   case TGSI_OPCODE_POW:
1680      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1681      src1 = emit_fetch( bld, inst, 1, CHAN_X );
1682      res = lp_build_pow( &bld->base, src0, src1 );
1683      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1684         dst0[chan_index] = res;
1685      }
1686      break;
1687
1688   case TGSI_OPCODE_XPD:
1689      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1690          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1691         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1692         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1693      }
1694      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1695          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1696         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1697         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1698      }
1699      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1700         tmp2 = tmp0;
1701         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1702         tmp5 = tmp3;
1703         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1704         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1705         dst0[CHAN_X] = tmp2;
1706      }
1707      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1708          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1709         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1710         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1711      }
1712      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1713         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1714         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1715         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1716         dst0[CHAN_Y] = tmp3;
1717      }
1718      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1719         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1720         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1721         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1722         dst0[CHAN_Z] = tmp5;
1723      }
1724      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1725         dst0[CHAN_W] = bld->base.one;
1726      }
1727      break;
1728
1729   case TGSI_OPCODE_ABS:
1730      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1731         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1732         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1733      }
1734      break;
1735
1736   case TGSI_OPCODE_RCC:
1737      /* deprecated? */
1738      assert(0);
1739      return FALSE;
1740
1741   case TGSI_OPCODE_DPH:
1742      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1743      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1744      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1745      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1746      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1747      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1748      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1749      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1750      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1751      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1752      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1753      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1754      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1755      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1756         dst0[chan_index] = tmp0;
1757      }
1758      break;
1759
1760   case TGSI_OPCODE_COS:
1761      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1762      tmp0 = lp_build_cos( &bld->base, tmp0 );
1763      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1764         dst0[chan_index] = tmp0;
1765      }
1766      break;
1767
1768   case TGSI_OPCODE_DDX:
1769      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1770         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1771      }
1772      break;
1773
1774   case TGSI_OPCODE_DDY:
1775      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1776         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1777      }
1778      break;
1779
1780   case TGSI_OPCODE_KILP:
1781      /* predicated kill */
1782      emit_kilp( bld, inst, (*pc)-1 );
1783      break;
1784
1785   case TGSI_OPCODE_KIL:
1786      /* conditional kill */
1787      emit_kil( bld, inst, (*pc)-1 );
1788      break;
1789
1790   case TGSI_OPCODE_PK2H:
1791      return FALSE;
1792      break;
1793
1794   case TGSI_OPCODE_PK2US:
1795      return FALSE;
1796      break;
1797
1798   case TGSI_OPCODE_PK4B:
1799      return FALSE;
1800      break;
1801
1802   case TGSI_OPCODE_PK4UB:
1803      return FALSE;
1804      break;
1805
1806   case TGSI_OPCODE_RFL:
1807      return FALSE;
1808      break;
1809
1810   case TGSI_OPCODE_SEQ:
1811      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1812         src0 = emit_fetch( bld, inst, 0, chan_index );
1813         src1 = emit_fetch( bld, inst, 1, chan_index );
1814         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1815         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1816      }
1817      break;
1818
1819   case TGSI_OPCODE_SFL:
1820      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1821         dst0[chan_index] = bld->base.zero;
1822      }
1823      break;
1824
1825   case TGSI_OPCODE_SGT:
1826      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1827         src0 = emit_fetch( bld, inst, 0, chan_index );
1828         src1 = emit_fetch( bld, inst, 1, chan_index );
1829         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1830         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1831      }
1832      break;
1833
1834   case TGSI_OPCODE_SIN:
1835      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1836      tmp0 = lp_build_sin( &bld->base, tmp0 );
1837      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1838         dst0[chan_index] = tmp0;
1839      }
1840      break;
1841
1842   case TGSI_OPCODE_SLE:
1843      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1844         src0 = emit_fetch( bld, inst, 0, chan_index );
1845         src1 = emit_fetch( bld, inst, 1, chan_index );
1846         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1847         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1848      }
1849      break;
1850
1851   case TGSI_OPCODE_SNE:
1852      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1853         src0 = emit_fetch( bld, inst, 0, chan_index );
1854         src1 = emit_fetch( bld, inst, 1, chan_index );
1855         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1856         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1857      }
1858      break;
1859
1860   case TGSI_OPCODE_STR:
1861      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1862         dst0[chan_index] = bld->base.one;
1863      }
1864      break;
1865
1866   case TGSI_OPCODE_TEX:
1867      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1868      break;
1869
1870   case TGSI_OPCODE_TXD:
1871      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1872      break;
1873
1874   case TGSI_OPCODE_UP2H:
1875      /* deprecated */
1876      assert (0);
1877      return FALSE;
1878      break;
1879
1880   case TGSI_OPCODE_UP2US:
1881      /* deprecated */
1882      assert(0);
1883      return FALSE;
1884      break;
1885
1886   case TGSI_OPCODE_UP4B:
1887      /* deprecated */
1888      assert(0);
1889      return FALSE;
1890      break;
1891
1892   case TGSI_OPCODE_UP4UB:
1893      /* deprecated */
1894      assert(0);
1895      return FALSE;
1896      break;
1897
1898   case TGSI_OPCODE_X2D:
1899      /* deprecated? */
1900      assert(0);
1901      return FALSE;
1902      break;
1903
1904   case TGSI_OPCODE_ARA:
1905      /* deprecated */
1906      assert(0);
1907      return FALSE;
1908      break;
1909
1910   case TGSI_OPCODE_ARR:
1911      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1912         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1913         tmp0 = lp_build_round(&bld->base, tmp0);
1914         dst0[chan_index] = tmp0;
1915      }
1916      break;
1917
1918   case TGSI_OPCODE_BRA:
1919      /* deprecated */
1920      assert(0);
1921      return FALSE;
1922      break;
1923
1924   case TGSI_OPCODE_CAL:
1925      lp_exec_mask_call(&bld->exec_mask,
1926                        inst->Label.Label,
1927                        pc);
1928
1929      break;
1930
1931   case TGSI_OPCODE_RET:
1932      lp_exec_mask_ret(&bld->exec_mask, pc);
1933      break;
1934
1935   case TGSI_OPCODE_END:
1936      if (0) {
1937         /* for debugging */
1938         emit_dump_temps(bld);
1939      }
1940      *pc = -1;
1941      break;
1942
1943   case TGSI_OPCODE_SSG:
1944   /* TGSI_OPCODE_SGN */
1945      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1946         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1947         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
1948      }
1949      break;
1950
1951   case TGSI_OPCODE_CMP:
1952      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1953         src0 = emit_fetch( bld, inst, 0, chan_index );
1954         src1 = emit_fetch( bld, inst, 1, chan_index );
1955         src2 = emit_fetch( bld, inst, 2, chan_index );
1956         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
1957         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
1958      }
1959      break;
1960
1961   case TGSI_OPCODE_SCS:
1962      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1963         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1964         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
1965      }
1966      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1967         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1968         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
1969      }
1970      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1971         dst0[CHAN_Z] = bld->base.zero;
1972      }
1973      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1974         dst0[CHAN_W] = bld->base.one;
1975      }
1976      break;
1977
1978   case TGSI_OPCODE_TXB:
1979      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
1980      break;
1981
1982   case TGSI_OPCODE_NRM:
1983      /* fall-through */
1984   case TGSI_OPCODE_NRM4:
1985      /* 3 or 4-component normalization */
1986      {
1987         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1988
1989         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
1990             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
1991             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
1992             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
1993
1994            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1995
1996            /* xmm4 = src.x */
1997            /* xmm0 = src.x * src.x */
1998            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
1999            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2000               tmp4 = tmp0;
2001            }
2002            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2003
2004            /* xmm5 = src.y */
2005            /* xmm0 = xmm0 + src.y * src.y */
2006            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2007            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2008               tmp5 = tmp1;
2009            }
2010            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2011            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2012
2013            /* xmm6 = src.z */
2014            /* xmm0 = xmm0 + src.z * src.z */
2015            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2016            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2017               tmp6 = tmp1;
2018            }
2019            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2020            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2021
2022            if (dims == 4) {
2023               /* xmm7 = src.w */
2024               /* xmm0 = xmm0 + src.w * src.w */
2025               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2026               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2027                  tmp7 = tmp1;
2028               }
2029               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2030               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2031            }
2032
2033            /* xmm1 = 1 / sqrt(xmm0) */
2034            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2035
2036            /* dst.x = xmm1 * src.x */
2037            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2038               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2039            }
2040
2041            /* dst.y = xmm1 * src.y */
2042            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2043               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2044            }
2045
2046            /* dst.z = xmm1 * src.z */
2047            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2048               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2049            }
2050
2051            /* dst.w = xmm1 * src.w */
2052            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2053               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2054            }
2055         }
2056
2057         /* dst.w = 1.0 */
2058         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2059            dst0[CHAN_W] = bld->base.one;
2060         }
2061      }
2062      break;
2063
2064   case TGSI_OPCODE_DIV:
2065      /* deprecated */
2066      assert( 0 );
2067      return FALSE;
2068      break;
2069
2070   case TGSI_OPCODE_DP2:
2071      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
2072      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
2073      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
2074      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
2075      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
2076      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
2077      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
2078      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2079         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
2080      }
2081      break;
2082
2083   case TGSI_OPCODE_TXL:
2084      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2085      break;
2086
2087   case TGSI_OPCODE_TXP:
2088      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2089      break;
2090
2091   case TGSI_OPCODE_BRK:
2092      lp_exec_break(&bld->exec_mask);
2093      break;
2094
2095   case TGSI_OPCODE_IF:
2096      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2097      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2098                          tmp0, bld->base.zero);
2099      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2100      break;
2101
2102   case TGSI_OPCODE_BGNLOOP:
2103      lp_exec_bgnloop(&bld->exec_mask);
2104      break;
2105
2106   case TGSI_OPCODE_BGNSUB:
2107      lp_exec_mask_bgnsub(&bld->exec_mask);
2108      break;
2109
2110   case TGSI_OPCODE_ELSE:
2111      lp_exec_mask_cond_invert(&bld->exec_mask);
2112      break;
2113
2114   case TGSI_OPCODE_ENDIF:
2115      lp_exec_mask_cond_pop(&bld->exec_mask);
2116      break;
2117
2118   case TGSI_OPCODE_ENDLOOP:
2119      lp_exec_endloop(&bld->exec_mask);
2120      break;
2121
2122   case TGSI_OPCODE_ENDSUB:
2123      lp_exec_mask_endsub(&bld->exec_mask, pc);
2124      break;
2125
2126   case TGSI_OPCODE_PUSHA:
2127      /* deprecated? */
2128      assert(0);
2129      return FALSE;
2130      break;
2131
2132   case TGSI_OPCODE_POPA:
2133      /* deprecated? */
2134      assert(0);
2135      return FALSE;
2136      break;
2137
2138   case TGSI_OPCODE_CEIL:
2139      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2140         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2141         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2142      }
2143      break;
2144
2145   case TGSI_OPCODE_I2F:
2146      /* deprecated? */
2147      assert(0);
2148      return FALSE;
2149      break;
2150
2151   case TGSI_OPCODE_NOT:
2152      /* deprecated? */
2153      assert(0);
2154      return FALSE;
2155      break;
2156
2157   case TGSI_OPCODE_TRUNC:
2158      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2159         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2160         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2161      }
2162      break;
2163
2164   case TGSI_OPCODE_SHL:
2165      /* deprecated? */
2166      assert(0);
2167      return FALSE;
2168      break;
2169
2170   case TGSI_OPCODE_ISHR:
2171      /* deprecated? */
2172      assert(0);
2173      return FALSE;
2174      break;
2175
2176   case TGSI_OPCODE_AND:
2177      /* deprecated? */
2178      assert(0);
2179      return FALSE;
2180      break;
2181
2182   case TGSI_OPCODE_OR:
2183      /* deprecated? */
2184      assert(0);
2185      return FALSE;
2186      break;
2187
2188   case TGSI_OPCODE_MOD:
2189      /* deprecated? */
2190      assert(0);
2191      return FALSE;
2192      break;
2193
2194   case TGSI_OPCODE_XOR:
2195      /* deprecated? */
2196      assert(0);
2197      return FALSE;
2198      break;
2199
2200   case TGSI_OPCODE_SAD:
2201      /* deprecated? */
2202      assert(0);
2203      return FALSE;
2204      break;
2205
2206   case TGSI_OPCODE_TXF:
2207      /* deprecated? */
2208      assert(0);
2209      return FALSE;
2210      break;
2211
2212   case TGSI_OPCODE_TXQ:
2213      /* deprecated? */
2214      assert(0);
2215      return FALSE;
2216      break;
2217
2218   case TGSI_OPCODE_CONT:
2219      lp_exec_continue(&bld->exec_mask);
2220      break;
2221
2222   case TGSI_OPCODE_EMIT:
2223      return FALSE;
2224      break;
2225
2226   case TGSI_OPCODE_ENDPRIM:
2227      return FALSE;
2228      break;
2229
2230   case TGSI_OPCODE_NOP:
2231      break;
2232
2233   default:
2234      return FALSE;
2235   }
2236
2237   if(info->num_dst) {
2238      LLVMValueRef pred[NUM_CHANNELS];
2239
2240      emit_fetch_predicate( bld, inst, pred );
2241
2242      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2243         emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2244      }
2245   }
2246
2247   return TRUE;
2248}
2249
2250
2251void
2252lp_build_tgsi_soa(LLVMBuilderRef builder,
2253                  const struct tgsi_token *tokens,
2254                  struct lp_type type,
2255                  struct lp_build_mask_context *mask,
2256                  LLVMValueRef consts_ptr,
2257                  const LLVMValueRef *pos,
2258                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
2259                  LLVMValueRef (*outputs)[NUM_CHANNELS],
2260                  struct lp_build_sampler_soa *sampler,
2261                  const struct tgsi_shader_info *info)
2262{
2263   struct lp_build_tgsi_soa_context bld;
2264   struct tgsi_parse_context parse;
2265   uint num_immediates = 0;
2266   uint num_instructions = 0;
2267   unsigned i;
2268   int pc = 0;
2269
2270   struct lp_type res_type;
2271
2272   assert(type.length <= LP_MAX_VECTOR_LENGTH);
2273   memset(&res_type, 0, sizeof res_type);
2274   res_type.width = type.width;
2275   res_type.length = type.length;
2276   res_type.sign = 1;
2277
2278   /* Setup build context */
2279   memset(&bld, 0, sizeof bld);
2280   lp_build_context_init(&bld.base, builder, type);
2281   lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type));
2282   lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type));
2283   bld.mask = mask;
2284   bld.pos = pos;
2285   bld.inputs = inputs;
2286   bld.outputs = outputs;
2287   bld.consts_ptr = consts_ptr;
2288   bld.sampler = sampler;
2289   bld.info = info;
2290   bld.indirect_files = info->indirect_files;
2291   bld.instructions = (struct tgsi_full_instruction *)
2292                      MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2293   bld.max_instructions = LP_MAX_INSTRUCTIONS;
2294
2295   if (!bld.instructions) {
2296      return;
2297   }
2298
2299   lp_exec_mask_init(&bld.exec_mask, &bld.base);
2300
2301   if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2302      LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2303                                             info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0);
2304      bld.temps_array = lp_build_array_alloca(bld.base.builder,
2305                                              bld.base.vec_type, array_size,
2306                                              "temp_array");
2307   }
2308
2309   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2310      LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2311                                             info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0);
2312      bld.outputs_array = lp_build_array_alloca(bld.base.builder,
2313                                                bld.base.vec_type, array_size,
2314                                                "output_array");
2315   }
2316
2317   tgsi_parse_init( &parse, tokens );
2318
2319   while( !tgsi_parse_end_of_tokens( &parse ) ) {
2320      tgsi_parse_token( &parse );
2321
2322      switch( parse.FullToken.Token.Type ) {
2323      case TGSI_TOKEN_TYPE_DECLARATION:
2324         /* Inputs already interpolated */
2325         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2326         break;
2327
2328      case TGSI_TOKEN_TYPE_INSTRUCTION:
2329         {
2330            /* save expanded instruction */
2331            if (num_instructions == bld.max_instructions) {
2332               struct tgsi_full_instruction *instructions;
2333               instructions = REALLOC(bld.instructions,
2334                                      bld.max_instructions
2335                                      * sizeof(struct tgsi_full_instruction),
2336                                      (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2337                                      * sizeof(struct tgsi_full_instruction));
2338               if (!instructions) {
2339                  break;
2340               }
2341               bld.instructions = instructions;
2342               bld.max_instructions += LP_MAX_INSTRUCTIONS;
2343            }
2344
2345            memcpy(bld.instructions + num_instructions,
2346                   &parse.FullToken.FullInstruction,
2347                   sizeof(bld.instructions[0]));
2348
2349            num_instructions++;
2350         }
2351
2352         break;
2353
2354      case TGSI_TOKEN_TYPE_IMMEDIATE:
2355         /* simply copy the immediate values into the next immediates[] slot */
2356         {
2357            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2358            assert(size <= 4);
2359            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2360            for( i = 0; i < size; ++i )
2361               bld.immediates[num_immediates][i] =
2362                  lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2363            for( i = size; i < 4; ++i )
2364               bld.immediates[num_immediates][i] = bld.base.undef;
2365            num_immediates++;
2366         }
2367         break;
2368
2369      case TGSI_TOKEN_TYPE_PROPERTY:
2370         break;
2371
2372      default:
2373         assert( 0 );
2374      }
2375   }
2376
2377   while (pc != -1) {
2378      struct tgsi_full_instruction *instr = bld.instructions + pc;
2379      const struct tgsi_opcode_info *opcode_info =
2380         tgsi_get_opcode_info(instr->Instruction.Opcode);
2381      if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2382         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2383                       opcode_info->mnemonic);
2384   }
2385
2386   /* If we have indirect addressing in outputs we need to copy our alloca array
2387    * to the outputs slots specified by the called */
2388   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2389      unsigned index, chan;
2390      assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
2391      for (index = 0; index < info->num_outputs; ++index) {
2392         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2393            bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
2394         }
2395      }
2396   }
2397
2398   if (0) {
2399      LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2400      LLVMValueRef function = LLVMGetBasicBlockParent(block);
2401      debug_printf("11111111111111111111111111111 \n");
2402      tgsi_dump(tokens, 0);
2403      lp_debug_dump_value(function);
2404      debug_printf("2222222222222222222222222222 \n");
2405   }
2406   tgsi_parse_free( &parse );
2407
2408   if (0) {
2409      LLVMModuleRef module = LLVMGetGlobalParent(
2410         LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2411      LLVMDumpModule(module);
2412
2413   }
2414
2415   FREE( bld.instructions );
2416}
2417
2418