lp_bld_tgsi_soa.c revision 1d6f3543a063ab9e740fd0c149dcce26c282d773
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_info.h"
46#include "tgsi/tgsi_parse.h"
47#include "tgsi/tgsi_util.h"
48#include "tgsi/tgsi_scan.h"
49#include "lp_bld_type.h"
50#include "lp_bld_const.h"
51#include "lp_bld_arit.h"
52#include "lp_bld_bitarit.h"
53#include "lp_bld_gather.h"
54#include "lp_bld_logic.h"
55#include "lp_bld_swizzle.h"
56#include "lp_bld_flow.h"
57#include "lp_bld_quad.h"
58#include "lp_bld_tgsi.h"
59#include "lp_bld_limits.h"
60#include "lp_bld_debug.h"
61#include "lp_bld_printf.h"
62
63
64#define FOR_EACH_CHANNEL( CHAN )\
65   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
66
67#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
68   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
69
70#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
71   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
72
73#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
74   FOR_EACH_CHANNEL( CHAN )\
75      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
76
77#define CHAN_X 0
78#define CHAN_Y 1
79#define CHAN_Z 2
80#define CHAN_W 3
81#define NUM_CHANNELS 4
82
83#define LP_MAX_INSTRUCTIONS 256
84
85
86struct lp_exec_mask {
87   struct lp_build_context *bld;
88
89   boolean has_mask;
90
91   LLVMTypeRef int_vec_type;
92
93   LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING];
94   int cond_stack_size;
95   LLVMValueRef cond_mask;
96
97   LLVMBasicBlockRef loop_block;
98   LLVMValueRef cont_mask;
99   LLVMValueRef break_mask;
100   LLVMValueRef break_var;
101   struct {
102      LLVMBasicBlockRef loop_block;
103      LLVMValueRef cont_mask;
104      LLVMValueRef break_mask;
105      LLVMValueRef break_var;
106   } loop_stack[LP_MAX_TGSI_NESTING];
107   int loop_stack_size;
108
109   LLVMValueRef ret_mask;
110   struct {
111      int pc;
112      LLVMValueRef ret_mask;
113   } call_stack[LP_MAX_TGSI_NESTING];
114   int call_stack_size;
115
116   LLVMValueRef exec_mask;
117};
118
119struct lp_build_tgsi_soa_context
120{
121   struct lp_build_context base;
122
123   /* Builder for vector integer masks and indices */
124   struct lp_build_context uint_bld;
125
126   /* Builder for scalar elements of shader's data type (float) */
127   struct lp_build_context elem_bld;
128
129   LLVMValueRef consts_ptr;
130   const LLVMValueRef *pos;
131   const LLVMValueRef (*inputs)[NUM_CHANNELS];
132   LLVMValueRef (*outputs)[NUM_CHANNELS];
133
134   const struct lp_build_sampler_soa *sampler;
135
136   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS];
137   LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS];
138   LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
139   LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
140
141   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
142    * set in the indirect_files field.
143    * The temps[] array above is unused then.
144    */
145   LLVMValueRef temps_array;
146
147   /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is
148    * set in the indirect_files field.
149    * The outputs[] array above is unused then.
150    */
151   LLVMValueRef outputs_array;
152
153   /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is
154    * set in the indirect_files field.
155    * The inputs[] array above is unused then.
156    */
157   LLVMValueRef inputs_array;
158
159   LLVMValueRef system_values_array;
160
161   const struct tgsi_shader_info *info;
162   /** bitmask indicating which register files are accessed indirectly */
163   unsigned indirect_files;
164
165   struct lp_build_mask_context *mask;
166   struct lp_exec_mask exec_mask;
167
168   struct tgsi_full_instruction *instructions;
169   uint max_instructions;
170};
171
172static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
173{
174   mask->bld = bld;
175   mask->has_mask = FALSE;
176   mask->cond_stack_size = 0;
177   mask->loop_stack_size = 0;
178   mask->call_stack_size = 0;
179
180   mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
181   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
182         LLVMConstAllOnes(mask->int_vec_type);
183}
184
185static void lp_exec_mask_update(struct lp_exec_mask *mask)
186{
187   if (mask->loop_stack_size) {
188      /*for loops we need to update the entire mask at runtime */
189      LLVMValueRef tmp;
190      assert(mask->break_mask);
191      tmp = LLVMBuildAnd(mask->bld->builder,
192                         mask->cont_mask,
193                         mask->break_mask,
194                         "maskcb");
195      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
196                                     mask->cond_mask,
197                                     tmp,
198                                     "maskfull");
199   } else
200      mask->exec_mask = mask->cond_mask;
201
202   if (mask->call_stack_size) {
203      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
204                                     mask->exec_mask,
205                                     mask->ret_mask,
206                                     "callmask");
207   }
208
209   mask->has_mask = (mask->cond_stack_size > 0 ||
210                     mask->loop_stack_size > 0 ||
211                     mask->call_stack_size > 0);
212}
213
214static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
215                                   LLVMValueRef val)
216{
217   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
218   if (mask->cond_stack_size == 0) {
219      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
220   }
221   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
222   assert(LLVMTypeOf(val) == mask->int_vec_type);
223   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
224                                  mask->cond_mask,
225                                  val,
226                                  "");
227   lp_exec_mask_update(mask);
228}
229
230static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
231{
232   LLVMValueRef prev_mask;
233   LLVMValueRef inv_mask;
234
235   assert(mask->cond_stack_size);
236   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
237   if (mask->cond_stack_size == 1) {
238      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
239   }
240
241   inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, "");
242
243   mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
244                                  inv_mask,
245                                  prev_mask, "");
246   lp_exec_mask_update(mask);
247}
248
249static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
250{
251   assert(mask->cond_stack_size);
252   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
253   lp_exec_mask_update(mask);
254}
255
256static void lp_exec_bgnloop(struct lp_exec_mask *mask)
257{
258   if (mask->loop_stack_size == 0) {
259      assert(mask->loop_block == NULL);
260      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
261      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
262      assert(mask->break_var == NULL);
263   }
264
265   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
266
267   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
268   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
269   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
270   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
271   ++mask->loop_stack_size;
272
273   mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, "");
274   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
275
276   mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
277   LLVMBuildBr(mask->bld->builder, mask->loop_block);
278   LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
279
280   mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, "");
281
282   lp_exec_mask_update(mask);
283}
284
285static void lp_exec_break(struct lp_exec_mask *mask)
286{
287   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
288                                         mask->exec_mask,
289                                         "break");
290
291   mask->break_mask = LLVMBuildAnd(mask->bld->builder,
292                                   mask->break_mask,
293                                   exec_mask, "break_full");
294
295   lp_exec_mask_update(mask);
296}
297
298static void lp_exec_continue(struct lp_exec_mask *mask)
299{
300   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
301                                         mask->exec_mask,
302                                         "");
303
304   mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
305                                  mask->cont_mask,
306                                  exec_mask, "");
307
308   lp_exec_mask_update(mask);
309}
310
311
312static void lp_exec_endloop(struct lp_exec_mask *mask)
313{
314   LLVMBasicBlockRef endloop;
315   LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
316                                      mask->bld->type.length);
317   LLVMValueRef i1cond;
318
319   assert(mask->break_mask);
320
321   /*
322    * Restore the cont_mask, but don't pop
323    */
324   assert(mask->loop_stack_size);
325   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
326   lp_exec_mask_update(mask);
327
328   /*
329    * Unlike the continue mask, the break_mask must be preserved across loop
330    * iterations
331    */
332   LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var);
333
334   /* i1cond = (mask == 0) */
335   i1cond = LLVMBuildICmp(
336      mask->bld->builder,
337      LLVMIntNE,
338      LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""),
339      LLVMConstNull(reg_type), "");
340
341   endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
342
343   LLVMBuildCondBr(mask->bld->builder,
344                   i1cond, mask->loop_block, endloop);
345
346   LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
347
348   assert(mask->loop_stack_size);
349   --mask->loop_stack_size;
350   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
351   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
352   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
353   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
354
355   lp_exec_mask_update(mask);
356}
357
358/* stores val into an address pointed to by dst.
359 * mask->exec_mask is used to figure out which bits of val
360 * should be stored into the address
361 * (0 means don't store this bit, 1 means do store).
362 */
363static void lp_exec_mask_store(struct lp_exec_mask *mask,
364                               LLVMValueRef pred,
365                               LLVMValueRef val,
366                               LLVMValueRef dst)
367{
368   /* Mix the predicate and execution mask */
369   if (mask->has_mask) {
370      if (pred) {
371         pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
372      } else {
373         pred = mask->exec_mask;
374      }
375   }
376
377   if (pred) {
378      LLVMValueRef real_val, dst_val;
379
380      dst_val = LLVMBuildLoad(mask->bld->builder, dst, "");
381      real_val = lp_build_select(mask->bld,
382                                 pred,
383                                 val, dst_val);
384
385      LLVMBuildStore(mask->bld->builder, real_val, dst);
386   } else
387      LLVMBuildStore(mask->bld->builder, val, dst);
388}
389
390static void lp_exec_mask_call(struct lp_exec_mask *mask,
391                              int func,
392                              int *pc)
393{
394   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
395   mask->call_stack[mask->call_stack_size].pc = *pc;
396   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
397   mask->call_stack_size++;
398   *pc = func;
399}
400
401static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
402{
403   LLVMValueRef exec_mask;
404
405   if (mask->call_stack_size == 0) {
406      /* returning from main() */
407      *pc = -1;
408      return;
409   }
410   exec_mask = LLVMBuildNot(mask->bld->builder,
411                            mask->exec_mask,
412                            "ret");
413
414   mask->ret_mask = LLVMBuildAnd(mask->bld->builder,
415                                 mask->ret_mask,
416                                 exec_mask, "ret_full");
417
418   lp_exec_mask_update(mask);
419}
420
421static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
422{
423}
424
425static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
426{
427   assert(mask->call_stack_size);
428   mask->call_stack_size--;
429   *pc = mask->call_stack[mask->call_stack_size].pc;
430   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
431   lp_exec_mask_update(mask);
432}
433
434
435/**
436 * Return pointer to a temporary register channel (src or dest).
437 * Note that indirect addressing cannot be handled here.
438 * \param index  which temporary register
439 * \param chan  which channel of the temp register.
440 */
441static LLVMValueRef
442get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
443             unsigned index,
444             unsigned chan)
445{
446   assert(chan < 4);
447   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
448      LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
449      return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
450   }
451   else {
452      return bld->temps[index][chan];
453   }
454}
455
456/**
457 * Return pointer to a output register channel (src or dest).
458 * Note that indirect addressing cannot be handled here.
459 * \param index  which output register
460 * \param chan  which channel of the output register.
461 */
462static LLVMValueRef
463get_output_ptr(struct lp_build_tgsi_soa_context *bld,
464               unsigned index,
465               unsigned chan)
466{
467   assert(chan < 4);
468   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
469      LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
470      return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, "");
471   }
472   else {
473      return bld->outputs[index][chan];
474   }
475}
476
477/**
478 * Gather vector.
479 * XXX the lp_build_gather() function should be capable of doing this
480 * with a little work.
481 */
482static LLVMValueRef
483build_gather(struct lp_build_tgsi_soa_context *bld,
484             LLVMValueRef base_ptr,
485             LLVMValueRef indexes)
486{
487   LLVMValueRef res = bld->base.undef;
488   unsigned i;
489
490   /*
491    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
492    */
493   for (i = 0; i < bld->base.type.length; i++) {
494      LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
495      LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
496                                                   indexes, ii, "");
497      LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
498                                             &index, 1, "gather_ptr");
499      LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
500
501      res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
502   }
503
504   return res;
505}
506
507
508/**
509 * Scatter/store vector.
510 */
511static void
512emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
513                  LLVMValueRef base_ptr,
514                  LLVMValueRef indexes,
515                  LLVMValueRef values,
516                  struct lp_exec_mask *mask,
517                  LLVMValueRef pred)
518{
519   LLVMBuilderRef builder = bld->base.builder;
520   unsigned i;
521
522   /* Mix the predicate and execution mask */
523   if (mask->has_mask) {
524      if (pred) {
525         pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, "");
526      }
527      else {
528         pred = mask->exec_mask;
529      }
530   }
531
532   /*
533    * Loop over elements of index_vec, store scalar value.
534    */
535   for (i = 0; i < bld->base.type.length; i++) {
536      LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
537      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
538      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
539      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
540      LLVMValueRef scalar_pred = pred ?
541         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
542
543      if (0)
544         lp_build_printf(builder, "scatter %d: val %f at %d %p\n",
545                         ii, val, index, scalar_ptr);
546
547      if (scalar_pred) {
548         LLVMValueRef real_val, dst_val;
549         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
550         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
551         LLVMBuildStore(builder, real_val, scalar_ptr);
552      }
553      else {
554         LLVMBuildStore(builder, val, scalar_ptr);
555      }
556   }
557}
558
559
560/**
561 * Read the current value of the ADDR register, convert the floats to
562 * ints, add the base index and return the vector of offsets.
563 * The offsets will be used to index into the constant buffer or
564 * temporary register file.
565 */
566static LLVMValueRef
567get_indirect_index(struct lp_build_tgsi_soa_context *bld,
568                   unsigned reg_file, unsigned reg_index,
569                   const struct tgsi_src_register *indirect_reg)
570{
571   struct lp_build_context *uint_bld = &bld->uint_bld;
572   /* always use X component of address register */
573   unsigned swizzle = indirect_reg->SwizzleX;
574   LLVMValueRef base;
575   LLVMValueRef rel;
576   LLVMValueRef max_index;
577   LLVMValueRef index;
578
579   assert(bld->indirect_files & (1 << reg_file));
580
581   base = lp_build_const_int_vec(uint_bld->type, reg_index);
582
583   assert(swizzle < 4);
584   rel = LLVMBuildLoad(bld->base.builder,
585                        bld->addr[indirect_reg->Index][swizzle],
586                        "load addr reg");
587
588   /* for indexing we want integers */
589   rel = LLVMBuildFPToSI(bld->base.builder,
590                         rel,
591                         uint_bld->vec_type, "");
592
593   index = lp_build_add(uint_bld, base, rel);
594
595   max_index = lp_build_const_int_vec(uint_bld->type,
596                                      bld->info->file_max[reg_file]);
597
598   assert(!uint_bld->type.sign);
599   index = lp_build_min(uint_bld, index, max_index);
600
601   return index;
602}
603
604
605/**
606 * Register fetch.
607 */
608static LLVMValueRef
609emit_fetch(
610   struct lp_build_tgsi_soa_context *bld,
611   const struct tgsi_full_instruction *inst,
612   unsigned src_op,
613   const unsigned chan_index )
614{
615   struct lp_build_context *uint_bld = &bld->uint_bld;
616   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
617   const unsigned swizzle =
618      tgsi_util_get_full_src_register_swizzle(reg, chan_index);
619   LLVMValueRef res;
620   LLVMValueRef indirect_index = NULL;
621
622   if (swizzle > 3) {
623      assert(0 && "invalid swizzle in emit_fetch()");
624      return bld->base.undef;
625   }
626
627   if (reg->Register.Indirect) {
628      indirect_index = get_indirect_index(bld,
629                                          reg->Register.File,
630                                          reg->Register.Index,
631                                          &reg->Indirect);
632   } else {
633      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
634   }
635
636   switch (reg->Register.File) {
637   case TGSI_FILE_CONSTANT:
638      if (reg->Register.Indirect) {
639         LLVMValueRef swizzle_vec =
640            lp_build_const_int_vec(uint_bld->type, swizzle);
641         LLVMValueRef index_vec;  /* index into the const buffer */
642
643         /* index_vec = indirect_index * 4 + swizzle */
644         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
645         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
646
647         /* Gather values from the constant buffer */
648         res = build_gather(bld, bld->consts_ptr, index_vec);
649      }
650      else {
651         LLVMValueRef index;  /* index into the const buffer */
652         LLVMValueRef scalar, scalar_ptr;
653
654         index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
655
656         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
657                                   &index, 1, "");
658         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
659
660         res = lp_build_broadcast_scalar(&bld->base, scalar);
661      }
662      break;
663
664   case TGSI_FILE_IMMEDIATE:
665      res = bld->immediates[reg->Register.Index][swizzle];
666      assert(res);
667      break;
668
669   case TGSI_FILE_INPUT:
670      if (reg->Register.Indirect) {
671         LLVMValueRef swizzle_vec =
672            lp_build_const_int_vec(uint_bld->type, swizzle);
673         LLVMValueRef length_vec =
674            lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
675         LLVMValueRef index_vec;  /* index into the const buffer */
676         LLVMValueRef inputs_array;
677         LLVMTypeRef float4_ptr_type;
678
679         /* index_vec = (indirect_index * 4 + swizzle) * length */
680         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
681         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
682         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
683
684         /* cast inputs_array pointer to float* */
685         float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
686         inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array,
687                                        float4_ptr_type, "");
688
689         /* Gather values from the temporary register array */
690         res = build_gather(bld, inputs_array, index_vec);
691      } else {
692         if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
693            LLVMValueRef lindex = lp_build_const_int32(reg->Register.Index * 4 + swizzle);
694            LLVMValueRef input_ptr =  LLVMBuildGEP(bld->base.builder,
695                                                   bld->inputs_array, &lindex, 1, "");
696            res = LLVMBuildLoad(bld->base.builder, input_ptr, "");
697         }
698         else {
699            res = bld->inputs[reg->Register.Index][swizzle];
700         }
701      }
702      assert(res);
703      break;
704
705   case TGSI_FILE_TEMPORARY:
706      if (reg->Register.Indirect) {
707         LLVMValueRef swizzle_vec =
708            lp_build_const_int_vec(uint_bld->type, swizzle);
709         LLVMValueRef length_vec =
710            lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
711         LLVMValueRef index_vec;  /* index into the const buffer */
712         LLVMValueRef temps_array;
713         LLVMTypeRef float4_ptr_type;
714
715         /* index_vec = (indirect_index * 4 + swizzle) * length */
716         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
717         index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
718         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
719
720         /* cast temps_array pointer to float* */
721         float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
722         temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array,
723                                        float4_ptr_type, "");
724
725         /* Gather values from the temporary register array */
726         res = build_gather(bld, temps_array, index_vec);
727      }
728      else {
729         LLVMValueRef temp_ptr;
730         temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
731         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
732         if (!res)
733            return bld->base.undef;
734      }
735      break;
736
737   case TGSI_FILE_SYSTEM_VALUE:
738      assert(!reg->Register.Indirect);
739      {
740         LLVMValueRef index;  /* index into the system value array */
741         LLVMValueRef scalar, scalar_ptr;
742
743         index = lp_build_const_int32(reg->Register.Index * 4 + swizzle);
744
745         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->system_values_array,
746                                   &index, 1, "");
747         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
748
749         res = lp_build_broadcast_scalar(&bld->base, scalar);
750      }
751      break;
752
753   default:
754      assert(0 && "invalid src register in emit_fetch()");
755      return bld->base.undef;
756   }
757
758   switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
759   case TGSI_UTIL_SIGN_CLEAR:
760      res = lp_build_abs( &bld->base, res );
761      break;
762
763   case TGSI_UTIL_SIGN_SET:
764      res = lp_build_abs( &bld->base, res );
765      /* fall through */
766   case TGSI_UTIL_SIGN_TOGGLE:
767      res = lp_build_negate( &bld->base, res );
768      break;
769
770   case TGSI_UTIL_SIGN_KEEP:
771      break;
772   }
773
774   return res;
775}
776
777
778/**
779 * Register fetch with derivatives.
780 */
781static void
782emit_fetch_deriv(
783   struct lp_build_tgsi_soa_context *bld,
784   const struct tgsi_full_instruction *inst,
785   unsigned index,
786   const unsigned chan_index,
787   LLVMValueRef *res,
788   LLVMValueRef *ddx,
789   LLVMValueRef *ddy)
790{
791   LLVMValueRef src;
792
793   src = emit_fetch(bld, inst, index, chan_index);
794
795   if(res)
796      *res = src;
797
798   /* TODO: use interpolation coeffs for inputs */
799
800   if(ddx)
801      *ddx = lp_build_ddx(&bld->base, src);
802
803   if(ddy)
804      *ddy = lp_build_ddy(&bld->base, src);
805}
806
807
808/**
809 * Predicate.
810 */
811static void
812emit_fetch_predicate(
813   struct lp_build_tgsi_soa_context *bld,
814   const struct tgsi_full_instruction *inst,
815   LLVMValueRef *pred)
816{
817   unsigned index;
818   unsigned char swizzles[4];
819   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
820   LLVMValueRef value;
821   unsigned chan;
822
823   if (!inst->Instruction.Predicate) {
824      FOR_EACH_CHANNEL( chan ) {
825         pred[chan] = NULL;
826      }
827      return;
828   }
829
830   swizzles[0] = inst->Predicate.SwizzleX;
831   swizzles[1] = inst->Predicate.SwizzleY;
832   swizzles[2] = inst->Predicate.SwizzleZ;
833   swizzles[3] = inst->Predicate.SwizzleW;
834
835   index = inst->Predicate.Index;
836   assert(index < LP_MAX_TGSI_PREDS);
837
838   FOR_EACH_CHANNEL( chan ) {
839      unsigned swizzle = swizzles[chan];
840
841      /*
842       * Only fetch the predicate register channels that are actually listed
843       * in the swizzles
844       */
845      if (!unswizzled[swizzle]) {
846         value = LLVMBuildLoad(bld->base.builder,
847                               bld->preds[index][swizzle], "");
848
849         /*
850          * Convert the value to an integer mask.
851          *
852          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
853          * is needlessly causing two comparisons due to storing the intermediate
854          * result as float vector instead of an integer mask vector.
855          */
856         value = lp_build_compare(bld->base.builder,
857                                  bld->base.type,
858                                  PIPE_FUNC_NOTEQUAL,
859                                  value,
860                                  bld->base.zero);
861         if (inst->Predicate.Negate) {
862            value = LLVMBuildNot(bld->base.builder, value, "");
863         }
864
865         unswizzled[swizzle] = value;
866      } else {
867         value = unswizzled[swizzle];
868      }
869
870      pred[chan] = value;
871   }
872}
873
874
875/**
876 * Register store.
877 */
878static void
879emit_store(
880   struct lp_build_tgsi_soa_context *bld,
881   const struct tgsi_full_instruction *inst,
882   unsigned index,
883   unsigned chan_index,
884   LLVMValueRef pred,
885   LLVMValueRef value)
886{
887   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
888   struct lp_build_context *uint_bld = &bld->uint_bld;
889   LLVMValueRef indirect_index = NULL;
890
891   switch( inst->Instruction.Saturate ) {
892   case TGSI_SAT_NONE:
893      break;
894
895   case TGSI_SAT_ZERO_ONE:
896      value = lp_build_max(&bld->base, value, bld->base.zero);
897      value = lp_build_min(&bld->base, value, bld->base.one);
898      break;
899
900   case TGSI_SAT_MINUS_PLUS_ONE:
901      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
902      value = lp_build_min(&bld->base, value, bld->base.one);
903      break;
904
905   default:
906      assert(0);
907   }
908
909   if (reg->Register.Indirect) {
910      indirect_index = get_indirect_index(bld,
911                                          reg->Register.File,
912                                          reg->Register.Index,
913                                          &reg->Indirect);
914   } else {
915      assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]);
916   }
917
918   switch( reg->Register.File ) {
919   case TGSI_FILE_OUTPUT:
920      if (reg->Register.Indirect) {
921         LLVMBuilderRef builder = bld->base.builder;
922         LLVMValueRef chan_vec =
923            lp_build_const_int_vec(uint_bld->type, chan_index);
924         LLVMValueRef length_vec =
925            lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
926         LLVMValueRef index_vec;  /* indexes into the temp registers */
927         LLVMValueRef outputs_array;
928         LLVMValueRef pixel_offsets;
929         LLVMTypeRef float_ptr_type;
930         int i;
931
932         /* build pixel offset vector: {0, 1, 2, 3, ...} */
933         pixel_offsets = uint_bld->undef;
934         for (i = 0; i < bld->base.type.length; i++) {
935            LLVMValueRef ii = lp_build_const_int32(i);
936            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
937                                                   ii, ii, "");
938         }
939
940         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
941         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
942         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
943         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
944         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
945
946         float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
947         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
948                                          float_ptr_type, "");
949
950         /* Scatter store values into temp registers */
951         emit_mask_scatter(bld, outputs_array, index_vec, value,
952                           &bld->exec_mask, pred);
953      }
954      else {
955         LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index,
956                                               chan_index);
957         lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr);
958      }
959      break;
960
961   case TGSI_FILE_TEMPORARY:
962      if (reg->Register.Indirect) {
963         LLVMBuilderRef builder = bld->base.builder;
964         LLVMValueRef chan_vec =
965            lp_build_const_int_vec(uint_bld->type, chan_index);
966         LLVMValueRef length_vec =
967            lp_build_const_int_vec(uint_bld->type, bld->base.type.length);
968         LLVMValueRef index_vec;  /* indexes into the temp registers */
969         LLVMValueRef temps_array;
970         LLVMValueRef pixel_offsets;
971         LLVMTypeRef float_ptr_type;
972         int i;
973
974         /* build pixel offset vector: {0, 1, 2, 3, ...} */
975         pixel_offsets = uint_bld->undef;
976         for (i = 0; i < bld->base.type.length; i++) {
977            LLVMValueRef ii = lp_build_const_int32(i);
978            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
979                                                   ii, ii, "");
980         }
981
982         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
983         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
984         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
985         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
986         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
987
988         float_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
989         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
990                                        float_ptr_type, "");
991
992         /* Scatter store values into temp registers */
993         emit_mask_scatter(bld, temps_array, index_vec, value,
994                           &bld->exec_mask, pred);
995      }
996      else {
997         LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
998                                              chan_index);
999         lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
1000      }
1001      break;
1002
1003   case TGSI_FILE_ADDRESS:
1004      lp_exec_mask_store(&bld->exec_mask, pred, value,
1005                         bld->addr[reg->Indirect.Index][chan_index]);
1006      break;
1007
1008   case TGSI_FILE_PREDICATE:
1009      lp_exec_mask_store(&bld->exec_mask, pred, value,
1010                         bld->preds[reg->Register.Index][chan_index]);
1011      break;
1012
1013   default:
1014      assert( 0 );
1015   }
1016}
1017
1018
1019/**
1020 * High-level instruction translators.
1021 */
1022
1023static void
1024emit_tex( struct lp_build_tgsi_soa_context *bld,
1025          const struct tgsi_full_instruction *inst,
1026          enum lp_build_tex_modifier modifier,
1027          LLVMValueRef *texel)
1028{
1029   unsigned unit;
1030   LLVMValueRef lod_bias, explicit_lod;
1031   LLVMValueRef oow = NULL;
1032   LLVMValueRef coords[3];
1033   LLVMValueRef ddx[3];
1034   LLVMValueRef ddy[3];
1035   unsigned num_coords;
1036   unsigned i;
1037
1038   if (!bld->sampler) {
1039      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1040      for (i = 0; i < 4; i++) {
1041         texel[i] = bld->base.undef;
1042      }
1043      return;
1044   }
1045
1046   switch (inst->Texture.Texture) {
1047   case TGSI_TEXTURE_1D:
1048      num_coords = 1;
1049      break;
1050   case TGSI_TEXTURE_2D:
1051   case TGSI_TEXTURE_RECT:
1052      num_coords = 2;
1053      break;
1054   case TGSI_TEXTURE_SHADOW1D:
1055   case TGSI_TEXTURE_SHADOW2D:
1056   case TGSI_TEXTURE_SHADOWRECT:
1057   case TGSI_TEXTURE_3D:
1058   case TGSI_TEXTURE_CUBE:
1059      num_coords = 3;
1060      break;
1061   default:
1062      assert(0);
1063      return;
1064   }
1065
1066   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1067      lod_bias = emit_fetch( bld, inst, 0, 3 );
1068      explicit_lod = NULL;
1069   }
1070   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1071      lod_bias = NULL;
1072      explicit_lod = emit_fetch( bld, inst, 0, 3 );
1073   }
1074   else {
1075      lod_bias = NULL;
1076      explicit_lod = NULL;
1077   }
1078
1079   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1080      oow = emit_fetch( bld, inst, 0, 3 );
1081      oow = lp_build_rcp(&bld->base, oow);
1082   }
1083
1084   for (i = 0; i < num_coords; i++) {
1085      coords[i] = emit_fetch( bld, inst, 0, i );
1086      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1087         coords[i] = lp_build_mul(&bld->base, coords[i], oow);
1088   }
1089   for (i = num_coords; i < 3; i++) {
1090      coords[i] = bld->base.undef;
1091   }
1092
1093   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1094      LLVMTypeRef i32t = LLVMInt32Type();
1095      LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
1096      for (i = 0; i < num_coords; i++) {
1097         LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
1098         LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
1099         ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
1100         ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
1101      }
1102      unit = inst->Src[3].Register.Index;
1103   }  else {
1104      for (i = 0; i < num_coords; i++) {
1105         ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
1106         ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
1107      }
1108      unit = inst->Src[1].Register.Index;
1109   }
1110   for (i = num_coords; i < 3; i++) {
1111      ddx[i] = LLVMGetUndef(bld->base.elem_type);
1112      ddy[i] = LLVMGetUndef(bld->base.elem_type);
1113   }
1114
1115   bld->sampler->emit_fetch_texel(bld->sampler,
1116                                  bld->base.builder,
1117                                  bld->base.type,
1118                                  unit, num_coords, coords,
1119                                  ddx, ddy,
1120                                  lod_bias, explicit_lod,
1121                                  texel);
1122}
1123
1124static boolean
1125near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1126		   int pc)
1127{
1128   int i;
1129
1130   for (i = 0; i < 5; i++) {
1131      unsigned opcode;
1132
1133      if (pc + i >= bld->info->num_instructions)
1134	 return TRUE;
1135
1136      opcode = bld->instructions[pc + i].Instruction.Opcode;
1137
1138      if (opcode == TGSI_OPCODE_END)
1139	 return TRUE;
1140
1141      if (opcode == TGSI_OPCODE_TEX ||
1142	  opcode == TGSI_OPCODE_TXP ||
1143	  opcode == TGSI_OPCODE_TXD ||
1144	  opcode == TGSI_OPCODE_TXB ||
1145	  opcode == TGSI_OPCODE_TXL ||
1146	  opcode == TGSI_OPCODE_TXF ||
1147	  opcode == TGSI_OPCODE_TXQ ||
1148	  opcode == TGSI_OPCODE_CAL ||
1149	  opcode == TGSI_OPCODE_CALLNZ ||
1150	  opcode == TGSI_OPCODE_IF ||
1151	  opcode == TGSI_OPCODE_IFC ||
1152	  opcode == TGSI_OPCODE_BGNLOOP ||
1153	  opcode == TGSI_OPCODE_SWITCH)
1154	 return FALSE;
1155   }
1156
1157   return TRUE;
1158}
1159
1160
1161
1162/**
1163 * Kill fragment if any of the src register values are negative.
1164 */
1165static void
1166emit_kil(
1167   struct lp_build_tgsi_soa_context *bld,
1168   const struct tgsi_full_instruction *inst,
1169   int pc)
1170{
1171   const struct tgsi_full_src_register *reg = &inst->Src[0];
1172   LLVMValueRef terms[NUM_CHANNELS];
1173   LLVMValueRef mask;
1174   unsigned chan_index;
1175
1176   memset(&terms, 0, sizeof terms);
1177
1178   FOR_EACH_CHANNEL( chan_index ) {
1179      unsigned swizzle;
1180
1181      /* Unswizzle channel */
1182      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1183
1184      /* Check if the component has not been already tested. */
1185      assert(swizzle < NUM_CHANNELS);
1186      if( !terms[swizzle] )
1187         /* TODO: change the comparison operator instead of setting the sign */
1188         terms[swizzle] =  emit_fetch(bld, inst, 0, chan_index );
1189   }
1190
1191   mask = NULL;
1192   FOR_EACH_CHANNEL( chan_index ) {
1193      if(terms[chan_index]) {
1194         LLVMValueRef chan_mask;
1195
1196         /*
1197          * If term < 0 then mask = 0 else mask = ~0.
1198          */
1199         chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero);
1200
1201         if(mask)
1202            mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, "");
1203         else
1204            mask = chan_mask;
1205      }
1206   }
1207
1208   if(mask) {
1209      lp_build_mask_update(bld->mask, mask);
1210
1211      if (!near_end_of_shader(bld, pc))
1212	 lp_build_mask_check(bld->mask);
1213   }
1214}
1215
1216
1217/**
1218 * Predicated fragment kill.
1219 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1220 * The only predication is the execution mask which will apply if
1221 * we're inside a loop or conditional.
1222 */
1223static void
1224emit_kilp(struct lp_build_tgsi_soa_context *bld,
1225          const struct tgsi_full_instruction *inst,
1226	  int pc)
1227{
1228   LLVMValueRef mask;
1229
1230   /* For those channels which are "alive", disable fragment shader
1231    * execution.
1232    */
1233   if (bld->exec_mask.has_mask) {
1234      mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp");
1235   }
1236   else {
1237      LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type);
1238      mask = zero;
1239   }
1240
1241   lp_build_mask_update(bld->mask, mask);
1242
1243   if (!near_end_of_shader(bld, pc))
1244      lp_build_mask_check(bld->mask);
1245}
1246
1247
1248/**
1249 * Emit code which will dump the value of all the temporary registers
1250 * to stdout.
1251 */
1252static void
1253emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1254{
1255   LLVMBuilderRef builder = bld->base.builder;
1256   LLVMValueRef temp_ptr;
1257   LLVMValueRef i0 = lp_build_const_int32(0);
1258   LLVMValueRef i1 = lp_build_const_int32(1);
1259   LLVMValueRef i2 = lp_build_const_int32(2);
1260   LLVMValueRef i3 = lp_build_const_int32(3);
1261   int index;
1262   int n = bld->info->file_max[TGSI_FILE_TEMPORARY];
1263
1264   for (index = 0; index < n; index++) {
1265      LLVMValueRef idx = lp_build_const_int32(index);
1266      LLVMValueRef v[4][4], res;
1267      int chan;
1268
1269      lp_build_printf(builder, "TEMP[%d]:\n", idx);
1270
1271      for (chan = 0; chan < 4; chan++) {
1272         temp_ptr = get_temp_ptr(bld, index, chan);
1273         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
1274         v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1275         v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1276         v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1277         v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1278      }
1279
1280      lp_build_printf(builder, "  X: %f %f %f %f\n",
1281                      v[0][0], v[0][1], v[0][2], v[0][3]);
1282      lp_build_printf(builder, "  Y: %f %f %f %f\n",
1283                      v[1][0], v[1][1], v[1][2], v[1][3]);
1284      lp_build_printf(builder, "  Z: %f %f %f %f\n",
1285                      v[2][0], v[2][1], v[2][2], v[2][3]);
1286      lp_build_printf(builder, "  W: %f %f %f %f\n",
1287                      v[3][0], v[3][1], v[3][2], v[3][3]);
1288   }
1289}
1290
1291
1292
1293static void
1294emit_declaration(
1295   struct lp_build_tgsi_soa_context *bld,
1296   const struct tgsi_full_declaration *decl)
1297{
1298   LLVMTypeRef vec_type = bld->base.vec_type;
1299   const unsigned first = decl->Range.First;
1300   const unsigned last = decl->Range.Last;
1301   unsigned idx, i;
1302
1303   for (idx = first; idx <= last; ++idx) {
1304      assert(last <= bld->info->file_max[decl->Declaration.File]);
1305      switch (decl->Declaration.File) {
1306      case TGSI_FILE_TEMPORARY:
1307         assert(idx < LP_MAX_TGSI_TEMPS);
1308         if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1309            for (i = 0; i < NUM_CHANNELS; i++)
1310               bld->temps[idx][i] = lp_build_alloca(bld->base.builder,
1311                                                    vec_type, "temp");
1312         }
1313         break;
1314
1315      case TGSI_FILE_OUTPUT:
1316         if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1317            for (i = 0; i < NUM_CHANNELS; i++)
1318               bld->outputs[idx][i] = lp_build_alloca(bld->base.builder,
1319                                                      vec_type, "output");
1320         }
1321         break;
1322
1323      case TGSI_FILE_ADDRESS:
1324         assert(idx < LP_MAX_TGSI_ADDRS);
1325         for (i = 0; i < NUM_CHANNELS; i++)
1326            bld->addr[idx][i] = lp_build_alloca(bld->base.builder,
1327                                                vec_type, "addr");
1328         break;
1329
1330      case TGSI_FILE_PREDICATE:
1331         assert(idx < LP_MAX_TGSI_PREDS);
1332         for (i = 0; i < NUM_CHANNELS; i++)
1333            bld->preds[idx][i] = lp_build_alloca(bld->base.builder,
1334                                                 vec_type, "predicate");
1335         break;
1336
1337      default:
1338         /* don't need to declare other vars */
1339         break;
1340      }
1341   }
1342}
1343
1344
1345/**
1346 * Emit LLVM for one TGSI instruction.
1347 * \param return TRUE for success, FALSE otherwise
1348 */
1349static boolean
1350emit_instruction(
1351   struct lp_build_tgsi_soa_context *bld,
1352   const struct tgsi_full_instruction *inst,
1353   const struct tgsi_opcode_info *info,
1354   int *pc)
1355{
1356   unsigned chan_index;
1357   LLVMValueRef src0, src1, src2;
1358   LLVMValueRef tmp0, tmp1, tmp2;
1359   LLVMValueRef tmp3 = NULL;
1360   LLVMValueRef tmp4 = NULL;
1361   LLVMValueRef tmp5 = NULL;
1362   LLVMValueRef tmp6 = NULL;
1363   LLVMValueRef tmp7 = NULL;
1364   LLVMValueRef res;
1365   LLVMValueRef dst0[NUM_CHANNELS];
1366
1367   /*
1368    * Stores and write masks are handled in a general fashion after the long
1369    * instruction opcode switch statement.
1370    *
1371    * Although not stricitly necessary, we avoid generating instructions for
1372    * channels which won't be stored, in cases where's that easy. For some
1373    * complex instructions, like texture sampling, it is more convenient to
1374    * assume a full writemask and then let LLVM optimization passes eliminate
1375    * redundant code.
1376    */
1377
1378   (*pc)++;
1379
1380   assert(info->num_dst <= 1);
1381   if (info->num_dst) {
1382      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1383         dst0[chan_index] = bld->base.undef;
1384      }
1385   }
1386
1387   switch (inst->Instruction.Opcode) {
1388   case TGSI_OPCODE_ARL:
1389      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1390         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1391         tmp0 = lp_build_floor(&bld->base, tmp0);
1392         dst0[chan_index] = tmp0;
1393      }
1394      break;
1395
1396   case TGSI_OPCODE_MOV:
1397      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1398         dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
1399      }
1400      break;
1401
1402   case TGSI_OPCODE_LIT:
1403      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
1404         dst0[CHAN_X] = bld->base.one;
1405      }
1406      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1407         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1408         dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
1409      }
1410      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1411         /* XMM[1] = SrcReg[0].yyyy */
1412         tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1413         /* XMM[1] = max(XMM[1], 0) */
1414         tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero);
1415         /* XMM[2] = SrcReg[0].wwww */
1416         tmp2 = emit_fetch( bld, inst, 0, CHAN_W );
1417         tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
1418         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1419         tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
1420         dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
1421      }
1422      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
1423         dst0[CHAN_W] = bld->base.one;
1424      }
1425      break;
1426
1427   case TGSI_OPCODE_RCP:
1428   /* TGSI_OPCODE_RECIP */
1429      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1430      res = lp_build_rcp(&bld->base, src0);
1431      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1432         dst0[chan_index] = res;
1433      }
1434      break;
1435
1436   case TGSI_OPCODE_RSQ:
1437   /* TGSI_OPCODE_RECIPSQRT */
1438      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1439      src0 = lp_build_abs(&bld->base, src0);
1440      res = lp_build_rsqrt(&bld->base, src0);
1441      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1442         dst0[chan_index] = res;
1443      }
1444      break;
1445
1446   case TGSI_OPCODE_EXP:
1447      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1448          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1449          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1450         LLVMValueRef *p_exp2_int_part = NULL;
1451         LLVMValueRef *p_frac_part = NULL;
1452         LLVMValueRef *p_exp2 = NULL;
1453
1454         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1455
1456         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1457            p_exp2_int_part = &tmp0;
1458         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1459            p_frac_part = &tmp1;
1460         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1461            p_exp2 = &tmp2;
1462
1463         lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
1464
1465         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1466            dst0[CHAN_X] = tmp0;
1467         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1468            dst0[CHAN_Y] = tmp1;
1469         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1470            dst0[CHAN_Z] = tmp2;
1471      }
1472      /* dst.w = 1.0 */
1473      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1474         dst0[CHAN_W] = bld->base.one;
1475      }
1476      break;
1477
1478   case TGSI_OPCODE_LOG:
1479      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1480          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1481          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
1482         LLVMValueRef *p_floor_log2 = NULL;
1483         LLVMValueRef *p_exp = NULL;
1484         LLVMValueRef *p_log2 = NULL;
1485
1486         src0 = emit_fetch( bld, inst, 0, CHAN_X );
1487         src0 = lp_build_abs( &bld->base, src0 );
1488
1489         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1490            p_floor_log2 = &tmp0;
1491         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
1492            p_exp = &tmp1;
1493         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1494            p_log2 = &tmp2;
1495
1496         lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
1497
1498         /* dst.x = floor(lg2(abs(src.x))) */
1499         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
1500            dst0[CHAN_X] = tmp0;
1501         /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
1502         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
1503            dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
1504         }
1505         /* dst.z = lg2(abs(src.x)) */
1506         if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
1507            dst0[CHAN_Z] = tmp2;
1508      }
1509      /* dst.w = 1.0 */
1510      if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
1511         dst0[CHAN_W] = bld->base.one;
1512      }
1513      break;
1514
1515   case TGSI_OPCODE_MUL:
1516      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1517         src0 = emit_fetch( bld, inst, 0, chan_index );
1518         src1 = emit_fetch( bld, inst, 1, chan_index );
1519         dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
1520      }
1521      break;
1522
1523   case TGSI_OPCODE_ADD:
1524      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1525         src0 = emit_fetch( bld, inst, 0, chan_index );
1526         src1 = emit_fetch( bld, inst, 1, chan_index );
1527         dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
1528      }
1529      break;
1530
1531   case TGSI_OPCODE_DP3:
1532   /* TGSI_OPCODE_DOT3 */
1533      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1534      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1535      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1536      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1537      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1538      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1539      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1540      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1541      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1542      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1543      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1544      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1545         dst0[chan_index] = tmp0;
1546      }
1547      break;
1548
1549   case TGSI_OPCODE_DP4:
1550   /* TGSI_OPCODE_DOT4 */
1551      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1552      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1553      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1554      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1555      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1556      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1557      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1558      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1559      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1560      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1561      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1562      tmp1 = emit_fetch( bld, inst, 0, CHAN_W );
1563      tmp2 = emit_fetch( bld, inst, 1, CHAN_W );
1564      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1565      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1566      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1567         dst0[chan_index] = tmp0;
1568      }
1569      break;
1570
1571   case TGSI_OPCODE_DST:
1572      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1573         dst0[CHAN_X] = bld->base.one;
1574      }
1575      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1576         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1577         tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
1578         dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
1579      }
1580      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1581         dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
1582      }
1583      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1584         dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
1585      }
1586      break;
1587
1588   case TGSI_OPCODE_MIN:
1589      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1590         src0 = emit_fetch( bld, inst, 0, chan_index );
1591         src1 = emit_fetch( bld, inst, 1, chan_index );
1592         dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
1593      }
1594      break;
1595
1596   case TGSI_OPCODE_MAX:
1597      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1598         src0 = emit_fetch( bld, inst, 0, chan_index );
1599         src1 = emit_fetch( bld, inst, 1, chan_index );
1600         dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
1601      }
1602      break;
1603
1604   case TGSI_OPCODE_SLT:
1605   /* TGSI_OPCODE_SETLT */
1606      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1607         src0 = emit_fetch( bld, inst, 0, chan_index );
1608         src1 = emit_fetch( bld, inst, 1, chan_index );
1609         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
1610         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1611      }
1612      break;
1613
1614   case TGSI_OPCODE_SGE:
1615   /* TGSI_OPCODE_SETGE */
1616      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1617         src0 = emit_fetch( bld, inst, 0, chan_index );
1618         src1 = emit_fetch( bld, inst, 1, chan_index );
1619         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
1620         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1621      }
1622      break;
1623
1624   case TGSI_OPCODE_MAD:
1625   /* TGSI_OPCODE_MADD */
1626      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1627         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1628         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1629         tmp2 = emit_fetch( bld, inst, 2, chan_index );
1630         tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1631         tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
1632         dst0[chan_index] = tmp0;
1633      }
1634      break;
1635
1636   case TGSI_OPCODE_SUB:
1637      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1638         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1639         tmp1 = emit_fetch( bld, inst, 1, chan_index );
1640         dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
1641      }
1642      break;
1643
1644   case TGSI_OPCODE_LRP:
1645      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1646         src0 = emit_fetch( bld, inst, 0, chan_index );
1647         src1 = emit_fetch( bld, inst, 1, chan_index );
1648         src2 = emit_fetch( bld, inst, 2, chan_index );
1649         tmp0 = lp_build_sub( &bld->base, src1, src2 );
1650         tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
1651         dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
1652      }
1653      break;
1654
1655   case TGSI_OPCODE_CND:
1656      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1657         src0 = emit_fetch( bld, inst, 0, chan_index );
1658         src1 = emit_fetch( bld, inst, 1, chan_index );
1659         src2 = emit_fetch( bld, inst, 2, chan_index );
1660         tmp1 = lp_build_const_vec(bld->base.type, 0.5);
1661         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
1662         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
1663      }
1664      break;
1665
1666   case TGSI_OPCODE_DP2A:
1667      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
1668      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
1669      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
1670      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
1671      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
1672      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
1673      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1674      tmp1 = emit_fetch( bld, inst, 2, CHAN_X );  /* xmm1 = src[2].x */
1675      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
1676      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1677         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
1678      }
1679      break;
1680
1681   case TGSI_OPCODE_FRC:
1682      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1683         src0 = emit_fetch( bld, inst, 0, chan_index );
1684         tmp0 = lp_build_floor(&bld->base, src0);
1685         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
1686         dst0[chan_index] = tmp0;
1687      }
1688      break;
1689
1690   case TGSI_OPCODE_CLAMP:
1691      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1692         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1693         src1 = emit_fetch( bld, inst, 1, chan_index );
1694         src2 = emit_fetch( bld, inst, 2, chan_index );
1695         tmp0 = lp_build_max(&bld->base, tmp0, src1);
1696         tmp0 = lp_build_min(&bld->base, tmp0, src2);
1697         dst0[chan_index] = tmp0;
1698      }
1699      break;
1700
1701   case TGSI_OPCODE_FLR:
1702      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1703         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1704         dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
1705      }
1706      break;
1707
1708   case TGSI_OPCODE_ROUND:
1709      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1710         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1711         dst0[chan_index] = lp_build_round(&bld->base, tmp0);
1712      }
1713      break;
1714
1715   case TGSI_OPCODE_EX2: {
1716      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1717      tmp0 = lp_build_exp2( &bld->base, tmp0);
1718      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1719         dst0[chan_index] = tmp0;
1720      }
1721      break;
1722   }
1723
1724   case TGSI_OPCODE_LG2:
1725      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1726      tmp0 = lp_build_log2( &bld->base, tmp0);
1727      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1728         dst0[chan_index] = tmp0;
1729      }
1730      break;
1731
1732   case TGSI_OPCODE_POW:
1733      src0 = emit_fetch( bld, inst, 0, CHAN_X );
1734      src1 = emit_fetch( bld, inst, 1, CHAN_X );
1735      res = lp_build_pow( &bld->base, src0, src1 );
1736      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1737         dst0[chan_index] = res;
1738      }
1739      break;
1740
1741   case TGSI_OPCODE_XPD:
1742      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1743          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
1744         tmp1 = emit_fetch( bld, inst, 1, CHAN_Z );
1745         tmp3 = emit_fetch( bld, inst, 0, CHAN_Z );
1746      }
1747      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
1748          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1749         tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
1750         tmp4 = emit_fetch( bld, inst, 1, CHAN_Y );
1751      }
1752      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
1753         tmp2 = tmp0;
1754         tmp2 = lp_build_mul( &bld->base, tmp2, tmp1);
1755         tmp5 = tmp3;
1756         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1757         tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
1758         dst0[CHAN_X] = tmp2;
1759      }
1760      if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
1761          IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
1762         tmp2 = emit_fetch( bld, inst, 1, CHAN_X );
1763         tmp5 = emit_fetch( bld, inst, 0, CHAN_X );
1764      }
1765      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
1766         tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
1767         tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
1768         tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
1769         dst0[CHAN_Y] = tmp3;
1770      }
1771      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
1772         tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
1773         tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
1774         tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
1775         dst0[CHAN_Z] = tmp5;
1776      }
1777      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
1778         dst0[CHAN_W] = bld->base.one;
1779      }
1780      break;
1781
1782   case TGSI_OPCODE_ABS:
1783      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1784         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1785         dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
1786      }
1787      break;
1788
1789   case TGSI_OPCODE_RCC:
1790      /* deprecated? */
1791      assert(0);
1792      return FALSE;
1793
1794   case TGSI_OPCODE_DPH:
1795      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1796      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );
1797      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
1798      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );
1799      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );
1800      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1801      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1802      tmp1 = emit_fetch( bld, inst, 0, CHAN_Z );
1803      tmp2 = emit_fetch( bld, inst, 1, CHAN_Z );
1804      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
1805      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1806      tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
1807      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
1808      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1809         dst0[chan_index] = tmp0;
1810      }
1811      break;
1812
1813   case TGSI_OPCODE_COS:
1814      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1815      tmp0 = lp_build_cos( &bld->base, tmp0 );
1816      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1817         dst0[chan_index] = tmp0;
1818      }
1819      break;
1820
1821   case TGSI_OPCODE_DDX:
1822      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1823         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
1824      }
1825      break;
1826
1827   case TGSI_OPCODE_DDY:
1828      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1829         emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
1830      }
1831      break;
1832
1833   case TGSI_OPCODE_KILP:
1834      /* predicated kill */
1835      emit_kilp( bld, inst, (*pc)-1 );
1836      break;
1837
1838   case TGSI_OPCODE_KIL:
1839      /* conditional kill */
1840      emit_kil( bld, inst, (*pc)-1 );
1841      break;
1842
1843   case TGSI_OPCODE_PK2H:
1844      return FALSE;
1845      break;
1846
1847   case TGSI_OPCODE_PK2US:
1848      return FALSE;
1849      break;
1850
1851   case TGSI_OPCODE_PK4B:
1852      return FALSE;
1853      break;
1854
1855   case TGSI_OPCODE_PK4UB:
1856      return FALSE;
1857      break;
1858
1859   case TGSI_OPCODE_RFL:
1860      return FALSE;
1861      break;
1862
1863   case TGSI_OPCODE_SEQ:
1864      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1865         src0 = emit_fetch( bld, inst, 0, chan_index );
1866         src1 = emit_fetch( bld, inst, 1, chan_index );
1867         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
1868         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1869      }
1870      break;
1871
1872   case TGSI_OPCODE_SFL:
1873      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1874         dst0[chan_index] = bld->base.zero;
1875      }
1876      break;
1877
1878   case TGSI_OPCODE_SGT:
1879      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1880         src0 = emit_fetch( bld, inst, 0, chan_index );
1881         src1 = emit_fetch( bld, inst, 1, chan_index );
1882         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
1883         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1884      }
1885      break;
1886
1887   case TGSI_OPCODE_SIN:
1888      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
1889      tmp0 = lp_build_sin( &bld->base, tmp0 );
1890      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1891         dst0[chan_index] = tmp0;
1892      }
1893      break;
1894
1895   case TGSI_OPCODE_SLE:
1896      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1897         src0 = emit_fetch( bld, inst, 0, chan_index );
1898         src1 = emit_fetch( bld, inst, 1, chan_index );
1899         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
1900         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1901      }
1902      break;
1903
1904   case TGSI_OPCODE_SNE:
1905      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1906         src0 = emit_fetch( bld, inst, 0, chan_index );
1907         src1 = emit_fetch( bld, inst, 1, chan_index );
1908         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
1909         dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
1910      }
1911      break;
1912
1913   case TGSI_OPCODE_STR:
1914      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1915         dst0[chan_index] = bld->base.one;
1916      }
1917      break;
1918
1919   case TGSI_OPCODE_TEX:
1920      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 );
1921      break;
1922
1923   case TGSI_OPCODE_TXD:
1924      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 );
1925      break;
1926
1927   case TGSI_OPCODE_UP2H:
1928      /* deprecated */
1929      assert (0);
1930      return FALSE;
1931      break;
1932
1933   case TGSI_OPCODE_UP2US:
1934      /* deprecated */
1935      assert(0);
1936      return FALSE;
1937      break;
1938
1939   case TGSI_OPCODE_UP4B:
1940      /* deprecated */
1941      assert(0);
1942      return FALSE;
1943      break;
1944
1945   case TGSI_OPCODE_UP4UB:
1946      /* deprecated */
1947      assert(0);
1948      return FALSE;
1949      break;
1950
1951   case TGSI_OPCODE_X2D:
1952      /* deprecated? */
1953      assert(0);
1954      return FALSE;
1955      break;
1956
1957   case TGSI_OPCODE_ARA:
1958      /* deprecated */
1959      assert(0);
1960      return FALSE;
1961      break;
1962
1963   case TGSI_OPCODE_ARR:
1964      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1965         tmp0 = emit_fetch( bld, inst, 0, chan_index );
1966         tmp0 = lp_build_round(&bld->base, tmp0);
1967         dst0[chan_index] = tmp0;
1968      }
1969      break;
1970
1971   case TGSI_OPCODE_BRA:
1972      /* deprecated */
1973      assert(0);
1974      return FALSE;
1975      break;
1976
1977   case TGSI_OPCODE_CAL:
1978      lp_exec_mask_call(&bld->exec_mask,
1979                        inst->Label.Label,
1980                        pc);
1981
1982      break;
1983
1984   case TGSI_OPCODE_RET:
1985      lp_exec_mask_ret(&bld->exec_mask, pc);
1986      break;
1987
1988   case TGSI_OPCODE_END:
1989      if (0) {
1990         /* for debugging */
1991         emit_dump_temps(bld);
1992      }
1993      *pc = -1;
1994      break;
1995
1996   case TGSI_OPCODE_SSG:
1997   /* TGSI_OPCODE_SGN */
1998      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1999         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2000         dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
2001      }
2002      break;
2003
2004   case TGSI_OPCODE_CMP:
2005      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2006         src0 = emit_fetch( bld, inst, 0, chan_index );
2007         src1 = emit_fetch( bld, inst, 1, chan_index );
2008         src2 = emit_fetch( bld, inst, 2, chan_index );
2009         tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
2010         dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
2011      }
2012      break;
2013
2014   case TGSI_OPCODE_SCS:
2015      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
2016         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2017         dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
2018      }
2019      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
2020         tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
2021         dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
2022      }
2023      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
2024         dst0[CHAN_Z] = bld->base.zero;
2025      }
2026      IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
2027         dst0[CHAN_W] = bld->base.one;
2028      }
2029      break;
2030
2031   case TGSI_OPCODE_TXB:
2032      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 );
2033      break;
2034
2035   case TGSI_OPCODE_NRM:
2036      /* fall-through */
2037   case TGSI_OPCODE_NRM4:
2038      /* 3 or 4-component normalization */
2039      {
2040         uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
2041
2042         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) ||
2043             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) ||
2044             IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) ||
2045             (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) {
2046
2047            /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
2048
2049            /* xmm4 = src.x */
2050            /* xmm0 = src.x * src.x */
2051            tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2052            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2053               tmp4 = tmp0;
2054            }
2055            tmp0 = lp_build_mul( &bld->base, tmp0, tmp0);
2056
2057            /* xmm5 = src.y */
2058            /* xmm0 = xmm0 + src.y * src.y */
2059            tmp1 = emit_fetch(bld, inst, 0, CHAN_Y);
2060            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2061               tmp5 = tmp1;
2062            }
2063            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2064            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2065
2066            /* xmm6 = src.z */
2067            /* xmm0 = xmm0 + src.z * src.z */
2068            tmp1 = emit_fetch(bld, inst, 0, CHAN_Z);
2069            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2070               tmp6 = tmp1;
2071            }
2072            tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2073            tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2074
2075            if (dims == 4) {
2076               /* xmm7 = src.w */
2077               /* xmm0 = xmm0 + src.w * src.w */
2078               tmp1 = emit_fetch(bld, inst, 0, CHAN_W);
2079               if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) {
2080                  tmp7 = tmp1;
2081               }
2082               tmp1 = lp_build_mul( &bld->base, tmp1, tmp1);
2083               tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
2084            }
2085
2086            /* xmm1 = 1 / sqrt(xmm0) */
2087            tmp1 = lp_build_rsqrt( &bld->base, tmp0);
2088
2089            /* dst.x = xmm1 * src.x */
2090            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
2091               dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
2092            }
2093
2094            /* dst.y = xmm1 * src.y */
2095            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
2096               dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
2097            }
2098
2099            /* dst.z = xmm1 * src.z */
2100            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
2101               dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
2102            }
2103
2104            /* dst.w = xmm1 * src.w */
2105            if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
2106               dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
2107            }
2108         }
2109
2110         /* dst.w = 1.0 */
2111         if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
2112            dst0[CHAN_W] = bld->base.one;
2113         }
2114      }
2115      break;
2116
2117   case TGSI_OPCODE_DIV:
2118      /* deprecated */
2119      assert( 0 );
2120      return FALSE;
2121      break;
2122
2123   case TGSI_OPCODE_DP2:
2124      tmp0 = emit_fetch( bld, inst, 0, CHAN_X );  /* xmm0 = src[0].x */
2125      tmp1 = emit_fetch( bld, inst, 1, CHAN_X );  /* xmm1 = src[1].x */
2126      tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 * xmm1 */
2127      tmp1 = emit_fetch( bld, inst, 0, CHAN_Y );  /* xmm1 = src[0].y */
2128      tmp2 = emit_fetch( bld, inst, 1, CHAN_Y );  /* xmm2 = src[1].y */
2129      tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);              /* xmm1 = xmm1 * xmm2 */
2130      tmp0 = lp_build_add( &bld->base, tmp0, tmp1);              /* xmm0 = xmm0 + xmm1 */
2131      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2132         dst0[chan_index] = tmp0;  /* dest[ch] = xmm0 */
2133      }
2134      break;
2135
2136   case TGSI_OPCODE_TXL:
2137      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 );
2138      break;
2139
2140   case TGSI_OPCODE_TXP:
2141      emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 );
2142      break;
2143
2144   case TGSI_OPCODE_BRK:
2145      lp_exec_break(&bld->exec_mask);
2146      break;
2147
2148   case TGSI_OPCODE_IF:
2149      tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
2150      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
2151                          tmp0, bld->base.zero);
2152      lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
2153      break;
2154
2155   case TGSI_OPCODE_BGNLOOP:
2156      lp_exec_bgnloop(&bld->exec_mask);
2157      break;
2158
2159   case TGSI_OPCODE_BGNSUB:
2160      lp_exec_mask_bgnsub(&bld->exec_mask);
2161      break;
2162
2163   case TGSI_OPCODE_ELSE:
2164      lp_exec_mask_cond_invert(&bld->exec_mask);
2165      break;
2166
2167   case TGSI_OPCODE_ENDIF:
2168      lp_exec_mask_cond_pop(&bld->exec_mask);
2169      break;
2170
2171   case TGSI_OPCODE_ENDLOOP:
2172      lp_exec_endloop(&bld->exec_mask);
2173      break;
2174
2175   case TGSI_OPCODE_ENDSUB:
2176      lp_exec_mask_endsub(&bld->exec_mask, pc);
2177      break;
2178
2179   case TGSI_OPCODE_PUSHA:
2180      /* deprecated? */
2181      assert(0);
2182      return FALSE;
2183      break;
2184
2185   case TGSI_OPCODE_POPA:
2186      /* deprecated? */
2187      assert(0);
2188      return FALSE;
2189      break;
2190
2191   case TGSI_OPCODE_CEIL:
2192      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2193         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2194         dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
2195      }
2196      break;
2197
2198   case TGSI_OPCODE_I2F:
2199      /* deprecated? */
2200      assert(0);
2201      return FALSE;
2202      break;
2203
2204   case TGSI_OPCODE_NOT:
2205      /* deprecated? */
2206      assert(0);
2207      return FALSE;
2208      break;
2209
2210   case TGSI_OPCODE_TRUNC:
2211      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2212         tmp0 = emit_fetch( bld, inst, 0, chan_index );
2213         dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
2214      }
2215      break;
2216
2217   case TGSI_OPCODE_SHL:
2218      /* deprecated? */
2219      assert(0);
2220      return FALSE;
2221      break;
2222
2223   case TGSI_OPCODE_ISHR:
2224      /* deprecated? */
2225      assert(0);
2226      return FALSE;
2227      break;
2228
2229   case TGSI_OPCODE_AND:
2230      /* deprecated? */
2231      assert(0);
2232      return FALSE;
2233      break;
2234
2235   case TGSI_OPCODE_OR:
2236      /* deprecated? */
2237      assert(0);
2238      return FALSE;
2239      break;
2240
2241   case TGSI_OPCODE_MOD:
2242      /* deprecated? */
2243      assert(0);
2244      return FALSE;
2245      break;
2246
2247   case TGSI_OPCODE_XOR:
2248      /* deprecated? */
2249      assert(0);
2250      return FALSE;
2251      break;
2252
2253   case TGSI_OPCODE_SAD:
2254      /* deprecated? */
2255      assert(0);
2256      return FALSE;
2257      break;
2258
2259   case TGSI_OPCODE_TXF:
2260      /* deprecated? */
2261      assert(0);
2262      return FALSE;
2263      break;
2264
2265   case TGSI_OPCODE_TXQ:
2266      /* deprecated? */
2267      assert(0);
2268      return FALSE;
2269      break;
2270
2271   case TGSI_OPCODE_CONT:
2272      lp_exec_continue(&bld->exec_mask);
2273      break;
2274
2275   case TGSI_OPCODE_EMIT:
2276      return FALSE;
2277      break;
2278
2279   case TGSI_OPCODE_ENDPRIM:
2280      return FALSE;
2281      break;
2282
2283   case TGSI_OPCODE_NOP:
2284      break;
2285
2286   default:
2287      return FALSE;
2288   }
2289
2290   if(info->num_dst) {
2291      LLVMValueRef pred[NUM_CHANNELS];
2292
2293      emit_fetch_predicate( bld, inst, pred );
2294
2295      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
2296         emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]);
2297      }
2298   }
2299
2300   return TRUE;
2301}
2302
2303
2304void
2305lp_build_tgsi_soa(LLVMBuilderRef builder,
2306                  const struct tgsi_token *tokens,
2307                  struct lp_type type,
2308                  struct lp_build_mask_context *mask,
2309                  LLVMValueRef consts_ptr,
2310                  LLVMValueRef system_values_array,
2311                  const LLVMValueRef *pos,
2312                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
2313                  LLVMValueRef (*outputs)[NUM_CHANNELS],
2314                  struct lp_build_sampler_soa *sampler,
2315                  const struct tgsi_shader_info *info)
2316{
2317   struct lp_build_tgsi_soa_context bld;
2318   struct tgsi_parse_context parse;
2319   uint num_immediates = 0;
2320   uint num_instructions = 0;
2321   unsigned i;
2322   int pc = 0;
2323
2324   struct lp_type res_type;
2325
2326   assert(type.length <= LP_MAX_VECTOR_LENGTH);
2327   memset(&res_type, 0, sizeof res_type);
2328   res_type.width = type.width;
2329   res_type.length = type.length;
2330   res_type.sign = 1;
2331
2332   /* Setup build context */
2333   memset(&bld, 0, sizeof bld);
2334   lp_build_context_init(&bld.base, builder, type);
2335   lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type));
2336   lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type));
2337   bld.mask = mask;
2338   bld.pos = pos;
2339   bld.inputs = inputs;
2340   bld.outputs = outputs;
2341   bld.consts_ptr = consts_ptr;
2342   bld.sampler = sampler;
2343   bld.info = info;
2344   bld.indirect_files = info->indirect_files;
2345   bld.instructions = (struct tgsi_full_instruction *)
2346                      MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
2347   bld.max_instructions = LP_MAX_INSTRUCTIONS;
2348
2349   if (!bld.instructions) {
2350      return;
2351   }
2352
2353   lp_exec_mask_init(&bld.exec_mask, &bld.base);
2354
2355   if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
2356      LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2357                                             info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0);
2358      bld.temps_array = lp_build_array_alloca(bld.base.builder,
2359                                              bld.base.vec_type, array_size,
2360                                              "temp_array");
2361   }
2362
2363   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2364      LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2365                                             info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0);
2366      bld.outputs_array = lp_build_array_alloca(bld.base.builder,
2367                                                bld.base.vec_type, array_size,
2368                                                "output_array");
2369   }
2370
2371   /* If we have indirect addressing in inputs we need to copy them into
2372    * our alloca array to be able to iterate over them */
2373   if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) {
2374      unsigned index, chan;
2375      LLVMTypeRef vec_type = bld.base.vec_type;
2376      LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
2377                                             info->file_max[TGSI_FILE_INPUT]*4 + 4, 0);
2378      bld.inputs_array = lp_build_array_alloca(bld.base.builder,
2379                                               vec_type, array_size,
2380                                               "input_array");
2381
2382      assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1);
2383
2384      for (index = 0; index < info->num_inputs; ++index) {
2385         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2386            LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
2387            LLVMValueRef input_ptr =
2388               LLVMBuildGEP(bld.base.builder, bld.inputs_array,
2389                            &lindex, 1, "");
2390            LLVMValueRef value = bld.inputs[index][chan];
2391            if (value)
2392               LLVMBuildStore(bld.base.builder, value, input_ptr);
2393         }
2394      }
2395   }
2396
2397   bld.system_values_array = system_values_array;
2398
2399   tgsi_parse_init( &parse, tokens );
2400
2401   while( !tgsi_parse_end_of_tokens( &parse ) ) {
2402      tgsi_parse_token( &parse );
2403
2404      switch( parse.FullToken.Token.Type ) {
2405      case TGSI_TOKEN_TYPE_DECLARATION:
2406         /* Inputs already interpolated */
2407         emit_declaration( &bld, &parse.FullToken.FullDeclaration );
2408         break;
2409
2410      case TGSI_TOKEN_TYPE_INSTRUCTION:
2411         {
2412            /* save expanded instruction */
2413            if (num_instructions == bld.max_instructions) {
2414               struct tgsi_full_instruction *instructions;
2415               instructions = REALLOC(bld.instructions,
2416                                      bld.max_instructions
2417                                      * sizeof(struct tgsi_full_instruction),
2418                                      (bld.max_instructions + LP_MAX_INSTRUCTIONS)
2419                                      * sizeof(struct tgsi_full_instruction));
2420               if (!instructions) {
2421                  break;
2422               }
2423               bld.instructions = instructions;
2424               bld.max_instructions += LP_MAX_INSTRUCTIONS;
2425            }
2426
2427            memcpy(bld.instructions + num_instructions,
2428                   &parse.FullToken.FullInstruction,
2429                   sizeof(bld.instructions[0]));
2430
2431            num_instructions++;
2432         }
2433
2434         break;
2435
2436      case TGSI_TOKEN_TYPE_IMMEDIATE:
2437         /* simply copy the immediate values into the next immediates[] slot */
2438         {
2439            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
2440            assert(size <= 4);
2441            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
2442            for( i = 0; i < size; ++i )
2443               bld.immediates[num_immediates][i] =
2444                  lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float);
2445            for( i = size; i < 4; ++i )
2446               bld.immediates[num_immediates][i] = bld.base.undef;
2447            num_immediates++;
2448         }
2449         break;
2450
2451      case TGSI_TOKEN_TYPE_PROPERTY:
2452         break;
2453
2454      default:
2455         assert( 0 );
2456      }
2457   }
2458
2459   while (pc != -1) {
2460      struct tgsi_full_instruction *instr = bld.instructions + pc;
2461      const struct tgsi_opcode_info *opcode_info =
2462         tgsi_get_opcode_info(instr->Instruction.Opcode);
2463      if (!emit_instruction( &bld, instr, opcode_info, &pc ))
2464         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
2465                       opcode_info->mnemonic);
2466   }
2467
2468   /* If we have indirect addressing in outputs we need to copy our alloca array
2469    * to the outputs slots specified by the called */
2470   if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2471      unsigned index, chan;
2472      assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1);
2473      for (index = 0; index < info->num_outputs; ++index) {
2474         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
2475            bld.outputs[index][chan] = get_output_ptr(&bld, index, chan);
2476         }
2477      }
2478   }
2479
2480   if (0) {
2481      LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
2482      LLVMValueRef function = LLVMGetBasicBlockParent(block);
2483      debug_printf("11111111111111111111111111111 \n");
2484      tgsi_dump(tokens, 0);
2485      lp_debug_dump_value(function);
2486      debug_printf("2222222222222222222222222222 \n");
2487   }
2488   tgsi_parse_free( &parse );
2489
2490   if (0) {
2491      LLVMModuleRef module = LLVMGetGlobalParent(
2492         LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
2493      LLVMDumpModule(module);
2494
2495   }
2496
2497   FREE( bld.instructions );
2498}
2499
2500
2501/**
2502 * Build up the system values array out of individual values such as
2503 * the instance ID, front-face, primitive ID, etc.  The shader info is
2504 * used to determine which system values are needed and where to put
2505 * them in the system values array.
2506 *
2507 * XXX only instance ID is implemented at this time.
2508 *
2509 * The system values register file is similar to the constants buffer.
2510 * Example declaration:
2511 *    DCL SV[0], INSTANCEID
2512 * Example instruction:
2513 *    MOVE foo, SV[0].xxxx;
2514 *
2515 * \return  LLVM float array (interpreted as float [][4])
2516 */
2517LLVMValueRef
2518lp_build_system_values_array(LLVMBuilderRef builder,
2519                             const struct tgsi_shader_info *info,
2520                             LLVMValueRef instance_id,
2521                             LLVMValueRef facing)
2522{
2523   LLVMValueRef size = lp_build_const_int32(4 * info->num_system_values);
2524   LLVMValueRef array = lp_build_array_alloca(builder, LLVMFloatType(),
2525                                              size, "sysvals_array");
2526   unsigned i;
2527
2528   for (i = 0; i < info->num_system_values; i++) {
2529      LLVMValueRef index = lp_build_const_int32(i * 4);
2530      LLVMValueRef ptr, value;
2531
2532      switch (info->system_value_semantic_name[i]) {
2533      case TGSI_SEMANTIC_INSTANCEID:
2534         /* convert instance ID from int to float */
2535         value = LLVMBuildSIToFP(builder, instance_id, LLVMFloatType(),
2536                                 "sysval_instanceid");
2537         break;
2538      case TGSI_SEMANTIC_FACE:
2539         /* fall-through */
2540      default:
2541         assert(0 && "unexpected semantic in build_system_values_array()");
2542      }
2543
2544      ptr = LLVMBuildGEP(builder, array, &index, 1, "");
2545      LLVMBuildStore(builder, value, ptr);
2546   }
2547
2548   return array;
2549}
2550