lp_bld_tgsi_soa.c revision 5d10d757276a599a60a68b88b21087b5824a8df7
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_exec.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_tgsi_action.h"
51#include "lp_bld_type.h"
52#include "lp_bld_const.h"
53#include "lp_bld_arit.h"
54#include "lp_bld_bitarit.h"
55#include "lp_bld_gather.h"
56#include "lp_bld_init.h"
57#include "lp_bld_logic.h"
58#include "lp_bld_swizzle.h"
59#include "lp_bld_flow.h"
60#include "lp_bld_quad.h"
61#include "lp_bld_tgsi.h"
62#include "lp_bld_limits.h"
63#include "lp_bld_debug.h"
64#include "lp_bld_printf.h"
65
66
67static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
68{
69   LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
70   LLVMBuilderRef builder = bld->gallivm->builder;
71
72   mask->bld = bld;
73   mask->has_mask = FALSE;
74   mask->cond_stack_size = 0;
75   mask->loop_stack_size = 0;
76   mask->call_stack_size = 0;
77
78   mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
79   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
80         LLVMConstAllOnes(mask->int_vec_type);
81
82   mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
83
84   LLVMBuildStore(
85      builder,
86      LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
87      mask->loop_limiter);
88}
89
90static void lp_exec_mask_update(struct lp_exec_mask *mask)
91{
92   LLVMBuilderRef builder = mask->bld->gallivm->builder;
93
94   if (mask->loop_stack_size) {
95      /*for loops we need to update the entire mask at runtime */
96      LLVMValueRef tmp;
97      assert(mask->break_mask);
98      tmp = LLVMBuildAnd(builder,
99                         mask->cont_mask,
100                         mask->break_mask,
101                         "maskcb");
102      mask->exec_mask = LLVMBuildAnd(builder,
103                                     mask->cond_mask,
104                                     tmp,
105                                     "maskfull");
106   } else
107      mask->exec_mask = mask->cond_mask;
108
109   if (mask->call_stack_size) {
110      mask->exec_mask = LLVMBuildAnd(builder,
111                                     mask->exec_mask,
112                                     mask->ret_mask,
113                                     "callmask");
114   }
115
116   mask->has_mask = (mask->cond_stack_size > 0 ||
117                     mask->loop_stack_size > 0 ||
118                     mask->call_stack_size > 0);
119}
120
121static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
122                                   LLVMValueRef val)
123{
124   LLVMBuilderRef builder = mask->bld->gallivm->builder;
125
126   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
127   if (mask->cond_stack_size == 0) {
128      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
129   }
130   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
131   assert(LLVMTypeOf(val) == mask->int_vec_type);
132   mask->cond_mask = LLVMBuildAnd(builder,
133                                  mask->cond_mask,
134                                  val,
135                                  "");
136   lp_exec_mask_update(mask);
137}
138
139static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
140{
141   LLVMBuilderRef builder = mask->bld->gallivm->builder;
142   LLVMValueRef prev_mask;
143   LLVMValueRef inv_mask;
144
145   assert(mask->cond_stack_size);
146   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
147   if (mask->cond_stack_size == 1) {
148      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
149   }
150
151   inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
152
153   mask->cond_mask = LLVMBuildAnd(builder,
154                                  inv_mask,
155                                  prev_mask, "");
156   lp_exec_mask_update(mask);
157}
158
159static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
160{
161   assert(mask->cond_stack_size);
162   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
163   lp_exec_mask_update(mask);
164}
165
166static void lp_exec_bgnloop(struct lp_exec_mask *mask)
167{
168   LLVMBuilderRef builder = mask->bld->gallivm->builder;
169
170   if (mask->loop_stack_size == 0) {
171      assert(mask->loop_block == NULL);
172      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
173      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
174      assert(mask->break_var == NULL);
175   }
176
177   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
178
179   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
180   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
181   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
182   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
183   ++mask->loop_stack_size;
184
185   mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
186   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
187
188   mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
189
190   LLVMBuildBr(builder, mask->loop_block);
191   LLVMPositionBuilderAtEnd(builder, mask->loop_block);
192
193   mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
194
195   lp_exec_mask_update(mask);
196}
197
198static void lp_exec_break(struct lp_exec_mask *mask)
199{
200   LLVMBuilderRef builder = mask->bld->gallivm->builder;
201   LLVMValueRef exec_mask = LLVMBuildNot(builder,
202                                         mask->exec_mask,
203                                         "break");
204
205   mask->break_mask = LLVMBuildAnd(builder,
206                                   mask->break_mask,
207                                   exec_mask, "break_full");
208
209   lp_exec_mask_update(mask);
210}
211
212static void lp_exec_continue(struct lp_exec_mask *mask)
213{
214   LLVMBuilderRef builder = mask->bld->gallivm->builder;
215   LLVMValueRef exec_mask = LLVMBuildNot(builder,
216                                         mask->exec_mask,
217                                         "");
218
219   mask->cont_mask = LLVMBuildAnd(builder,
220                                  mask->cont_mask,
221                                  exec_mask, "");
222
223   lp_exec_mask_update(mask);
224}
225
226
227static void lp_exec_endloop(struct gallivm_state *gallivm,
228                            struct lp_exec_mask *mask)
229{
230   LLVMBuilderRef builder = mask->bld->gallivm->builder;
231   LLVMBasicBlockRef endloop;
232   LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
233   LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
234                                               mask->bld->type.width *
235                                               mask->bld->type.length);
236   LLVMValueRef i1cond, i2cond, icond, limiter;
237
238   assert(mask->break_mask);
239
240   /*
241    * Restore the cont_mask, but don't pop
242    */
243   assert(mask->loop_stack_size);
244   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
245   lp_exec_mask_update(mask);
246
247   /*
248    * Unlike the continue mask, the break_mask must be preserved across loop
249    * iterations
250    */
251   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
252
253   /* Decrement the loop limiter */
254   limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
255
256   limiter = LLVMBuildSub(
257      builder,
258      limiter,
259      LLVMConstInt(int_type, 1, false),
260      "");
261
262   LLVMBuildStore(builder, limiter, mask->loop_limiter);
263
264   /* i1cond = (mask != 0) */
265   i1cond = LLVMBuildICmp(
266      builder,
267      LLVMIntNE,
268      LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
269      LLVMConstNull(reg_type), "");
270
271   /* i2cond = (looplimiter > 0) */
272   i2cond = LLVMBuildICmp(
273      builder,
274      LLVMIntSGT,
275      limiter,
276      LLVMConstNull(int_type), "");
277
278   /* if( i1cond && i2cond ) */
279   icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
280
281   endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
282
283   LLVMBuildCondBr(builder,
284                   icond, mask->loop_block, endloop);
285
286   LLVMPositionBuilderAtEnd(builder, endloop);
287
288   assert(mask->loop_stack_size);
289   --mask->loop_stack_size;
290   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
291   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
292   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
293   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
294
295   lp_exec_mask_update(mask);
296}
297
298/* stores val into an address pointed to by dst.
299 * mask->exec_mask is used to figure out which bits of val
300 * should be stored into the address
301 * (0 means don't store this bit, 1 means do store).
302 */
303static void lp_exec_mask_store(struct lp_exec_mask *mask,
304                               struct lp_build_context *bld_store,
305                               LLVMValueRef pred,
306                               LLVMValueRef val,
307                               LLVMValueRef dst)
308{
309   LLVMBuilderRef builder = mask->bld->gallivm->builder;
310
311   /* Mix the predicate and execution mask */
312   if (mask->has_mask) {
313      if (pred) {
314         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
315      } else {
316         pred = mask->exec_mask;
317      }
318   }
319
320   if (pred) {
321      LLVMValueRef real_val, dst_val;
322
323      dst_val = LLVMBuildLoad(builder, dst, "");
324      real_val = lp_build_select(bld_store,
325                                 pred,
326                                 val, dst_val);
327
328      LLVMBuildStore(builder, real_val, dst);
329   } else
330      LLVMBuildStore(builder, val, dst);
331}
332
333static void lp_exec_mask_call(struct lp_exec_mask *mask,
334                              int func,
335                              int *pc)
336{
337   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
338   mask->call_stack[mask->call_stack_size].pc = *pc;
339   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
340   mask->call_stack_size++;
341   *pc = func;
342}
343
344static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
345{
346   LLVMBuilderRef builder = mask->bld->gallivm->builder;
347   LLVMValueRef exec_mask;
348
349   if (mask->call_stack_size == 0) {
350      /* returning from main() */
351      *pc = -1;
352      return;
353   }
354   exec_mask = LLVMBuildNot(builder,
355                            mask->exec_mask,
356                            "ret");
357
358   mask->ret_mask = LLVMBuildAnd(builder,
359                                 mask->ret_mask,
360                                 exec_mask, "ret_full");
361
362   lp_exec_mask_update(mask);
363}
364
365static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
366{
367}
368
369static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
370{
371   assert(mask->call_stack_size);
372   mask->call_stack_size--;
373   *pc = mask->call_stack[mask->call_stack_size].pc;
374   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
375   lp_exec_mask_update(mask);
376}
377
378
379/**
380 * Return pointer to a temporary register channel (src or dest).
381 * Note that indirect addressing cannot be handled here.
382 * \param index  which temporary register
383 * \param chan  which channel of the temp register.
384 */
385LLVMValueRef
386lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
387             unsigned index,
388             unsigned chan)
389{
390   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
391   assert(chan < 4);
392   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
393      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
394      return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
395   }
396   else {
397      return bld->temps[index][chan];
398   }
399}
400
401/**
402 * Return pointer to a output register channel (src or dest).
403 * Note that indirect addressing cannot be handled here.
404 * \param index  which output register
405 * \param chan  which channel of the output register.
406 */
407LLVMValueRef
408lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
409               unsigned index,
410               unsigned chan)
411{
412   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
413   assert(chan < 4);
414   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
415      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
416                                                 index * 4 + chan);
417      return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
418   }
419   else {
420      return bld->outputs[index][chan];
421   }
422}
423
424/**
425 * Gather vector.
426 * XXX the lp_build_gather() function should be capable of doing this
427 * with a little work.
428 */
429static LLVMValueRef
430build_gather(struct lp_build_context *bld,
431             LLVMValueRef base_ptr,
432             LLVMValueRef indexes)
433{
434   LLVMBuilderRef builder = bld->gallivm->builder;
435   LLVMValueRef res = bld->undef;
436   unsigned i;
437
438   /*
439    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
440    */
441   for (i = 0; i < bld->type.length; i++) {
442      LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
443      LLVMValueRef index = LLVMBuildExtractElement(builder,
444                                                   indexes, ii, "");
445      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
446                                             &index, 1, "gather_ptr");
447      LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
448
449      res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
450   }
451
452   return res;
453}
454
455
456/**
457 * Scatter/store vector.
458 */
459static void
460emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
461                  LLVMValueRef base_ptr,
462                  LLVMValueRef indexes,
463                  LLVMValueRef values,
464                  struct lp_exec_mask *mask,
465                  LLVMValueRef pred)
466{
467   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
468   LLVMBuilderRef builder = gallivm->builder;
469   unsigned i;
470
471   /* Mix the predicate and execution mask */
472   if (mask->has_mask) {
473      if (pred) {
474         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
475      }
476      else {
477         pred = mask->exec_mask;
478      }
479   }
480
481   /*
482    * Loop over elements of index_vec, store scalar value.
483    */
484   for (i = 0; i < bld->bld_base.base.type.length; i++) {
485      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
486      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
487      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
488      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
489      LLVMValueRef scalar_pred = pred ?
490         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
491
492      if (0)
493         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
494                         ii, val, index, scalar_ptr);
495
496      if (scalar_pred) {
497         LLVMValueRef real_val, dst_val;
498         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
499         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
500         LLVMBuildStore(builder, real_val, scalar_ptr);
501      }
502      else {
503         LLVMBuildStore(builder, val, scalar_ptr);
504      }
505   }
506}
507
508
509/**
510 * Read the current value of the ADDR register, convert the floats to
511 * ints, add the base index and return the vector of offsets.
512 * The offsets will be used to index into the constant buffer or
513 * temporary register file.
514 */
515static LLVMValueRef
516get_indirect_index(struct lp_build_tgsi_soa_context *bld,
517                   unsigned reg_file, unsigned reg_index,
518                   const struct tgsi_src_register *indirect_reg)
519{
520   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
521   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
522   /* always use X component of address register */
523   unsigned swizzle = indirect_reg->SwizzleX;
524   LLVMValueRef base;
525   LLVMValueRef rel;
526   LLVMValueRef max_index;
527   LLVMValueRef index;
528
529   assert(bld->indirect_files & (1 << reg_file));
530
531   base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
532
533   assert(swizzle < 4);
534   rel = LLVMBuildLoad(builder,
535                        bld->addr[indirect_reg->Index][swizzle],
536                        "load addr reg");
537
538   index = lp_build_add(uint_bld, base, rel);
539
540   max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
541                                      uint_bld->type,
542                                      bld->bld_base.info->file_max[reg_file]);
543
544   assert(!uint_bld->type.sign);
545   index = lp_build_min(uint_bld, index, max_index);
546
547   return index;
548}
549
550static struct lp_build_context *
551stype_to_fetch(struct lp_build_tgsi_context * bld_base,
552	       enum tgsi_opcode_type stype)
553{
554   struct lp_build_context *bld_fetch;
555
556   switch (stype) {
557   case TGSI_TYPE_FLOAT:
558   case TGSI_TYPE_UNTYPED:
559      bld_fetch = &bld_base->base;
560      break;
561   case TGSI_TYPE_UNSIGNED:
562      bld_fetch = &bld_base->uint_bld;
563      break;
564   case TGSI_TYPE_SIGNED:
565      bld_fetch = &bld_base->int_bld;
566      break;
567   case TGSI_TYPE_VOID:
568   case TGSI_TYPE_DOUBLE:
569   default:
570      assert(0);
571      bld_fetch = NULL;
572      break;
573   }
574   return bld_fetch;
575}
576
577static LLVMValueRef
578emit_fetch_constant(
579   struct lp_build_tgsi_context * bld_base,
580   const struct tgsi_full_src_register * reg,
581   enum tgsi_opcode_type stype,
582   unsigned swizzle)
583{
584   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
585   struct gallivm_state *gallivm = bld_base->base.gallivm;
586   LLVMBuilderRef builder = gallivm->builder;
587   struct lp_build_context *uint_bld = &bld_base->uint_bld;
588   LLVMValueRef indirect_index = NULL;
589   struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
590
591   /* XXX: Handle fetching xyzw components as a vector */
592   assert(swizzle != ~0);
593
594   if (reg->Register.Indirect) {
595      indirect_index = get_indirect_index(bld,
596                                          reg->Register.File,
597                                          reg->Register.Index,
598                                          &reg->Indirect);
599   }
600
601   if (reg->Register.Indirect) {
602      LLVMValueRef swizzle_vec =
603         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
604      LLVMValueRef index_vec;  /* index into the const buffer */
605
606      /* index_vec = indirect_index * 4 + swizzle */
607      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
608      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
609
610      /* Gather values from the constant buffer */
611      return build_gather(bld_fetch, bld->consts_ptr, index_vec);
612   }
613   else {
614      LLVMValueRef index;  /* index into the const buffer */
615      LLVMValueRef scalar, scalar_ptr;
616
617      index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
618
619      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
620                                   &index, 1, "");
621
622      if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
623         LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
624         LLVMValueRef temp_ptr;
625         temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, "");
626         scalar = LLVMBuildLoad(builder, temp_ptr, "");
627      } else
628         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
629
630      return lp_build_broadcast_scalar(bld_fetch, scalar);
631   }
632}
633
634static LLVMValueRef
635emit_fetch_immediate(
636   struct lp_build_tgsi_context * bld_base,
637   const struct tgsi_full_src_register * reg,
638   enum tgsi_opcode_type stype,
639   unsigned swizzle)
640{
641   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
642   LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
643   assert(res);
644
645   if (stype == TGSI_TYPE_UNSIGNED) {
646      res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
647   } else if (stype == TGSI_TYPE_SIGNED) {
648      res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
649   }
650   return res;
651}
652
653static LLVMValueRef
654emit_fetch_input(
655   struct lp_build_tgsi_context * bld_base,
656   const struct tgsi_full_src_register * reg,
657   enum tgsi_opcode_type stype,
658   unsigned swizzle)
659{
660   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
661   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
662   LLVMBuilderRef builder = gallivm->builder;
663   struct lp_build_context *uint_bld = &bld_base->uint_bld;
664   LLVMValueRef indirect_index = NULL;
665   LLVMValueRef res;
666
667   if (reg->Register.Indirect) {
668      indirect_index = get_indirect_index(bld,
669                                          reg->Register.File,
670                                          reg->Register.Index,
671                                          &reg->Indirect);
672   }
673
674   if (reg->Register.Indirect) {
675      LLVMValueRef swizzle_vec =
676         lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
677      LLVMValueRef length_vec =
678         lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
679      LLVMValueRef index_vec;  /* index into the const buffer */
680      LLVMValueRef inputs_array;
681      LLVMTypeRef float4_ptr_type;
682
683      /* index_vec = (indirect_index * 4 + swizzle) * length */
684      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
685      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
686      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
687
688      /* cast inputs_array pointer to float* */
689      float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
690      inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
691                                         float4_ptr_type, "");
692
693      /* Gather values from the temporary register array */
694      res = build_gather(&bld_base->base, inputs_array, index_vec);
695   } else {
696      if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
697         LLVMValueRef lindex = lp_build_const_int32(gallivm,
698                                        reg->Register.Index * 4 + swizzle);
699         LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
700                                                bld->inputs_array, &lindex, 1, "");
701         res = LLVMBuildLoad(builder, input_ptr, "");
702      }
703      else {
704         res = bld->inputs[reg->Register.Index][swizzle];
705      }
706   }
707   assert(res);
708   return res;
709}
710
711static LLVMValueRef
712emit_fetch_temporary(
713   struct lp_build_tgsi_context * bld_base,
714   const struct tgsi_full_src_register * reg,
715   enum tgsi_opcode_type stype,
716   unsigned swizzle)
717{
718   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
719   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
720   LLVMBuilderRef builder = gallivm->builder;
721   struct lp_build_context *uint_bld = &bld_base->uint_bld;
722   LLVMValueRef indirect_index = NULL;
723   LLVMValueRef res;
724
725   if (reg->Register.Indirect) {
726      indirect_index = get_indirect_index(bld,
727                                          reg->Register.File,
728                                          reg->Register.Index,
729                                          &reg->Indirect);
730   }
731
732   if (reg->Register.Indirect) {
733      LLVMValueRef swizzle_vec =
734         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
735      LLVMValueRef length_vec =
736         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
737                                bld->bld_base.base.type.length);
738      LLVMValueRef index_vec;  /* index into the const buffer */
739      LLVMValueRef temps_array;
740      LLVMTypeRef float4_ptr_type;
741
742      /* index_vec = (indirect_index * 4 + swizzle) * length */
743      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
744      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
745      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
746
747      /* cast temps_array pointer to float* */
748      float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
749      temps_array = LLVMBuildBitCast(builder, bld->temps_array,
750                                     float4_ptr_type, "");
751
752      /* Gather values from the temporary register array */
753      res = build_gather(&bld_base->base, temps_array, index_vec);
754   }
755   else {
756      LLVMValueRef temp_ptr;
757      if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
758         LLVMTypeRef itype = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
759         LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
760                                                     swizzle);
761         temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, "");
762      } else
763         temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
764      res = LLVMBuildLoad(builder, temp_ptr, "");
765      if (!res)
766         return bld->bld_base.base.undef;
767   }
768
769   return res;
770}
771
772static LLVMValueRef
773emit_fetch_system_value(
774   struct lp_build_tgsi_context * bld_base,
775   const struct tgsi_full_src_register * reg,
776   enum tgsi_opcode_type stype,
777   unsigned swizzle)
778{
779   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
780   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
781   LLVMBuilderRef builder = gallivm->builder;
782   LLVMValueRef index;  /* index into the system value array */
783   LLVMValueRef scalar, scalar_ptr;
784
785   assert(!reg->Register.Indirect);
786
787   index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
788
789   scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, &index, 1, "");
790   scalar = LLVMBuildLoad(builder, scalar_ptr, "");
791
792   return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
793}
794
795/**
796 * Register fetch with derivatives.
797 */
798static void
799emit_fetch_deriv(
800   struct lp_build_tgsi_soa_context *bld,
801   LLVMValueRef src,
802   LLVMValueRef *res,
803   LLVMValueRef *ddx,
804   LLVMValueRef *ddy)
805{
806   if(res)
807      *res = src;
808
809   /* TODO: use interpolation coeffs for inputs */
810
811   if(ddx)
812      *ddx = lp_build_ddx(&bld->bld_base.base, src);
813
814   if(ddy)
815      *ddy = lp_build_ddy(&bld->bld_base.base, src);
816}
817
818
819/**
820 * Predicate.
821 */
822static void
823emit_fetch_predicate(
824   struct lp_build_tgsi_soa_context *bld,
825   const struct tgsi_full_instruction *inst,
826   LLVMValueRef *pred)
827{
828   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
829   unsigned index;
830   unsigned char swizzles[4];
831   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
832   LLVMValueRef value;
833   unsigned chan;
834
835   if (!inst->Instruction.Predicate) {
836      TGSI_FOR_EACH_CHANNEL( chan ) {
837         pred[chan] = NULL;
838      }
839      return;
840   }
841
842   swizzles[0] = inst->Predicate.SwizzleX;
843   swizzles[1] = inst->Predicate.SwizzleY;
844   swizzles[2] = inst->Predicate.SwizzleZ;
845   swizzles[3] = inst->Predicate.SwizzleW;
846
847   index = inst->Predicate.Index;
848   assert(index < LP_MAX_TGSI_PREDS);
849
850   TGSI_FOR_EACH_CHANNEL( chan ) {
851      unsigned swizzle = swizzles[chan];
852
853      /*
854       * Only fetch the predicate register channels that are actually listed
855       * in the swizzles
856       */
857      if (!unswizzled[swizzle]) {
858         value = LLVMBuildLoad(builder,
859                               bld->preds[index][swizzle], "");
860
861         /*
862          * Convert the value to an integer mask.
863          *
864          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
865          * is needlessly causing two comparisons due to storing the intermediate
866          * result as float vector instead of an integer mask vector.
867          */
868         value = lp_build_compare(bld->bld_base.base.gallivm,
869                                  bld->bld_base.base.type,
870                                  PIPE_FUNC_NOTEQUAL,
871                                  value,
872                                  bld->bld_base.base.zero);
873         if (inst->Predicate.Negate) {
874            value = LLVMBuildNot(builder, value, "");
875         }
876
877         unswizzled[swizzle] = value;
878      } else {
879         value = unswizzled[swizzle];
880      }
881
882      pred[chan] = value;
883   }
884}
885
886/**
887 * Register store.
888 */
889static void
890emit_store_chan(
891   struct lp_build_tgsi_context *bld_base,
892   const struct tgsi_full_instruction *inst,
893   unsigned index,
894   unsigned chan_index,
895   LLVMValueRef pred,
896   LLVMValueRef value)
897{
898   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
899   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
900   LLVMBuilderRef builder = gallivm->builder;
901   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
902   struct lp_build_context *uint_bld = &bld_base->uint_bld;
903   LLVMValueRef indirect_index = NULL;
904   struct lp_build_context *bld_store;
905   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
906
907   switch (dtype) {
908   default:
909   case TGSI_TYPE_FLOAT:
910   case TGSI_TYPE_UNTYPED:
911      bld_store = &bld_base->base;
912      break;
913   case TGSI_TYPE_UNSIGNED:
914      bld_store = &bld_base->uint_bld;
915      break;
916   case TGSI_TYPE_SIGNED:
917      bld_store = &bld_base->int_bld;
918      break;
919   case TGSI_TYPE_DOUBLE:
920   case TGSI_TYPE_VOID:
921      assert(0);
922      bld_store = NULL;
923      break;
924   }
925
926   switch( inst->Instruction.Saturate ) {
927   case TGSI_SAT_NONE:
928      break;
929
930   case TGSI_SAT_ZERO_ONE:
931      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
932      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
933      break;
934
935   case TGSI_SAT_MINUS_PLUS_ONE:
936      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
937      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
938      break;
939
940   default:
941      assert(0);
942   }
943
944   if (reg->Register.Indirect) {
945      indirect_index = get_indirect_index(bld,
946                                          reg->Register.File,
947                                          reg->Register.Index,
948                                          &reg->Indirect);
949   } else {
950      assert(reg->Register.Index <=
951                             bld->bld_base.info->file_max[reg->Register.File]);
952   }
953
954   switch( reg->Register.File ) {
955   case TGSI_FILE_OUTPUT:
956      if (reg->Register.Indirect) {
957         LLVMValueRef chan_vec =
958            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
959         LLVMValueRef length_vec =
960            lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
961         LLVMValueRef index_vec;  /* indexes into the temp registers */
962         LLVMValueRef outputs_array;
963         LLVMValueRef pixel_offsets;
964         LLVMTypeRef float_ptr_type;
965         int i;
966
967         /* build pixel offset vector: {0, 1, 2, 3, ...} */
968         pixel_offsets = uint_bld->undef;
969         for (i = 0; i < bld->bld_base.base.type.length; i++) {
970            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
971            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
972                                                   ii, ii, "");
973         }
974
975         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
976         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
977         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
978         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
979         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
980
981         float_ptr_type =
982            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
983         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
984                                          float_ptr_type, "");
985
986         /* Scatter store values into temp registers */
987         emit_mask_scatter(bld, outputs_array, index_vec, value,
988                           &bld->exec_mask, pred);
989      }
990      else {
991         LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
992                                               chan_index);
993         lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
994      }
995      break;
996
997   case TGSI_FILE_TEMPORARY:
998      if (reg->Register.Indirect) {
999         LLVMValueRef chan_vec =
1000            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1001         LLVMValueRef length_vec =
1002            lp_build_const_int_vec(gallivm, uint_bld->type,
1003                                   bld->bld_base.base.type.length);
1004         LLVMValueRef index_vec;  /* indexes into the temp registers */
1005         LLVMValueRef temps_array;
1006         LLVMValueRef pixel_offsets;
1007         LLVMTypeRef float_ptr_type;
1008         int i;
1009
1010         /* build pixel offset vector: {0, 1, 2, 3, ...} */
1011         pixel_offsets = uint_bld->undef;
1012         for (i = 0; i < bld->bld_base.base.type.length; i++) {
1013            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1014            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1015                                                   ii, ii, "");
1016         }
1017
1018         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1019         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1020         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1021         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1022         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1023
1024         float_ptr_type =
1025            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1026         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1027                                        float_ptr_type, "");
1028
1029         /* Scatter store values into temp registers */
1030         emit_mask_scatter(bld, temps_array, index_vec, value,
1031                           &bld->exec_mask, pred);
1032      }
1033      else {
1034         LLVMValueRef temp_ptr;
1035
1036         switch (dtype) {
1037         case TGSI_TYPE_UNSIGNED:
1038         case TGSI_TYPE_SIGNED: {
1039            LLVMTypeRef itype = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
1040            LLVMTypeRef ivtype = LLVMPointerType(itype, 0);
1041            LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1042                                                        chan_index);
1043            LLVMValueRef temp_value_ptr;
1044
1045            temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, "");
1046            temp_value_ptr = LLVMBuildBitCast(builder, value, itype, "");
1047            value = temp_value_ptr;
1048            break;
1049         }
1050         default:
1051         case TGSI_TYPE_FLOAT:
1052         case TGSI_TYPE_UNTYPED:
1053            temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1054                                           chan_index);
1055            break;
1056         }
1057
1058         lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
1059      }
1060      break;
1061
1062   case TGSI_FILE_ADDRESS:
1063      assert(dtype == TGSI_TYPE_SIGNED);
1064      assert(LLVMTypeOf(value) == bld_base->base.int_vec_type);
1065      lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1066                         bld->addr[reg->Register.Index][chan_index]);
1067      break;
1068
1069   case TGSI_FILE_PREDICATE:
1070      lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1071                         bld->preds[reg->Register.Index][chan_index]);
1072      break;
1073
1074   default:
1075      assert( 0 );
1076   }
1077}
1078
1079static void
1080emit_store(
1081   struct lp_build_tgsi_context * bld_base,
1082   const struct tgsi_full_instruction * inst,
1083   const struct tgsi_opcode_info * info,
1084   LLVMValueRef dst[4])
1085
1086{
1087   unsigned chan_index;
1088   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1089
1090   if(info->num_dst) {
1091      LLVMValueRef pred[TGSI_NUM_CHANNELS];
1092
1093      emit_fetch_predicate( bld, inst, pred );
1094
1095      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1096         emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1097      }
1098   }
1099}
1100
1101/**
1102 * High-level instruction translators.
1103 */
1104
1105static void
1106emit_tex( struct lp_build_tgsi_soa_context *bld,
1107          const struct tgsi_full_instruction *inst,
1108          enum lp_build_tex_modifier modifier,
1109          LLVMValueRef *texel)
1110{
1111   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1112   unsigned unit;
1113   LLVMValueRef lod_bias, explicit_lod;
1114   LLVMValueRef oow = NULL;
1115   LLVMValueRef coords[3];
1116   LLVMValueRef ddx[3];
1117   LLVMValueRef ddy[3];
1118   unsigned num_coords;
1119   unsigned i;
1120
1121   if (!bld->sampler) {
1122      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1123      for (i = 0; i < 4; i++) {
1124         texel[i] = bld->bld_base.base.undef;
1125      }
1126      return;
1127   }
1128
1129   switch (inst->Texture.Texture) {
1130   case TGSI_TEXTURE_1D:
1131      num_coords = 1;
1132      break;
1133   case TGSI_TEXTURE_1D_ARRAY:
1134   case TGSI_TEXTURE_2D:
1135   case TGSI_TEXTURE_RECT:
1136      num_coords = 2;
1137      break;
1138   case TGSI_TEXTURE_SHADOW1D:
1139   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1140   case TGSI_TEXTURE_SHADOW2D:
1141   case TGSI_TEXTURE_SHADOWRECT:
1142   case TGSI_TEXTURE_2D_ARRAY:
1143   case TGSI_TEXTURE_3D:
1144   case TGSI_TEXTURE_CUBE:
1145      num_coords = 3;
1146      break;
1147   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1148      num_coords = 4;
1149      break;
1150   default:
1151      assert(0);
1152      return;
1153   }
1154
1155   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1156      lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1157      explicit_lod = NULL;
1158   }
1159   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1160      lod_bias = NULL;
1161      explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1162   }
1163   else {
1164      lod_bias = NULL;
1165      explicit_lod = NULL;
1166   }
1167
1168   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1169      oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1170      oow = lp_build_rcp(&bld->bld_base.base, oow);
1171   }
1172
1173   for (i = 0; i < num_coords; i++) {
1174      coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1175      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1176         coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1177   }
1178   for (i = num_coords; i < 3; i++) {
1179      coords[i] = bld->bld_base.base.undef;
1180   }
1181
1182   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1183      LLVMValueRef index0 = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
1184      for (i = 0; i < num_coords; i++) {
1185         LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base, inst, 1, i );
1186         LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base, inst, 2, i );
1187         ddx[i] = LLVMBuildExtractElement(builder, src1, index0, "");
1188         ddy[i] = LLVMBuildExtractElement(builder, src2, index0, "");
1189      }
1190      unit = inst->Src[3].Register.Index;
1191   }  else {
1192      for (i = 0; i < num_coords; i++) {
1193         ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base, coords[i] );
1194         ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base, coords[i] );
1195      }
1196      unit = inst->Src[1].Register.Index;
1197   }
1198   for (i = num_coords; i < 3; i++) {
1199      ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
1200      ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
1201   }
1202
1203   bld->sampler->emit_fetch_texel(bld->sampler,
1204                                  bld->bld_base.base.gallivm,
1205                                  bld->bld_base.base.type,
1206                                  unit, num_coords, coords,
1207                                  ddx, ddy,
1208                                  lod_bias, explicit_lod,
1209                                  texel);
1210}
1211
1212static void
1213emit_txq( struct lp_build_tgsi_soa_context *bld,
1214          const struct tgsi_full_instruction *inst,
1215          LLVMValueRef *sizes_out)
1216{
1217   LLVMValueRef explicit_lod;
1218   unsigned num_coords, has_lod;
1219   unsigned i;
1220
1221   switch (inst->Texture.Texture) {
1222   case TGSI_TEXTURE_1D:
1223   case TGSI_TEXTURE_SHADOW1D:
1224   case TGSI_TEXTURE_SHADOW2D:
1225   case TGSI_TEXTURE_SHADOWCUBE:
1226      num_coords = 1;
1227      has_lod = 1;
1228      break;
1229   case TGSI_TEXTURE_2D:
1230   case TGSI_TEXTURE_CUBE:
1231   case TGSI_TEXTURE_1D_ARRAY:
1232   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1233      num_coords = 2;
1234      has_lod = 1;
1235      break;
1236   case TGSI_TEXTURE_3D:
1237// case TGSI_TEXTURE_CUBE_ARRAY:
1238// case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1239   case TGSI_TEXTURE_2D_ARRAY:
1240   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1241      num_coords = 3;
1242      has_lod = 1;
1243      break;
1244
1245   case TGSI_TEXTURE_BUFFER:
1246      num_coords = 1;
1247      has_lod = 0;
1248      break;
1249
1250   case TGSI_TEXTURE_RECT:
1251   case TGSI_TEXTURE_SHADOWRECT:
1252// case TGSI_TEXTURE_2D_MS:
1253      num_coords = 2;
1254      has_lod = 0;
1255      break;
1256
1257// case TGSI_TEXTURE_2D_MS_ARRAY:
1258//    num_coords = 3;
1259//    has_lod = 0;
1260//    break;
1261
1262   default:
1263      assert(0);
1264      return;
1265   }
1266
1267   if (!bld->sampler) {
1268      _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
1269      for (i = 0; i < num_coords; i++)
1270         sizes_out[i] = bld->bld_base.base.undef;
1271      return;
1272   }
1273
1274   if (has_lod)
1275      explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 2 );
1276   else
1277      explicit_lod = NULL;
1278
1279   bld->sampler->emit_size_query(bld->sampler,
1280                                 bld->bld_base.base.gallivm,
1281                                 inst->Src[1].Register.Index,
1282                                 explicit_lod,
1283                                 sizes_out);
1284}
1285
1286static boolean
1287near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1288		   int pc)
1289{
1290   int i;
1291
1292   for (i = 0; i < 5; i++) {
1293      unsigned opcode;
1294
1295      if (pc + i >= bld->bld_base.info->num_instructions)
1296	 return TRUE;
1297
1298      opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1299
1300      if (opcode == TGSI_OPCODE_END)
1301	 return TRUE;
1302
1303      if (opcode == TGSI_OPCODE_TEX ||
1304	  opcode == TGSI_OPCODE_TXP ||
1305	  opcode == TGSI_OPCODE_TXD ||
1306	  opcode == TGSI_OPCODE_TXB ||
1307	  opcode == TGSI_OPCODE_TXL ||
1308	  opcode == TGSI_OPCODE_TXF ||
1309	  opcode == TGSI_OPCODE_TXQ ||
1310	  opcode == TGSI_OPCODE_CAL ||
1311	  opcode == TGSI_OPCODE_CALLNZ ||
1312	  opcode == TGSI_OPCODE_IF ||
1313	  opcode == TGSI_OPCODE_IFC ||
1314	  opcode == TGSI_OPCODE_BGNLOOP ||
1315	  opcode == TGSI_OPCODE_SWITCH)
1316	 return FALSE;
1317   }
1318
1319   return TRUE;
1320}
1321
1322
1323
1324/**
1325 * Kill fragment if any of the src register values are negative.
1326 */
1327static void
1328emit_kil(
1329   struct lp_build_tgsi_soa_context *bld,
1330   const struct tgsi_full_instruction *inst,
1331   int pc)
1332{
1333   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1334   const struct tgsi_full_src_register *reg = &inst->Src[0];
1335   LLVMValueRef terms[TGSI_NUM_CHANNELS];
1336   LLVMValueRef mask;
1337   unsigned chan_index;
1338
1339   memset(&terms, 0, sizeof terms);
1340
1341   TGSI_FOR_EACH_CHANNEL( chan_index ) {
1342      unsigned swizzle;
1343
1344      /* Unswizzle channel */
1345      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1346
1347      /* Check if the component has not been already tested. */
1348      assert(swizzle < TGSI_NUM_CHANNELS);
1349      if( !terms[swizzle] )
1350         /* TODO: change the comparison operator instead of setting the sign */
1351         terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
1352   }
1353
1354   mask = NULL;
1355   TGSI_FOR_EACH_CHANNEL( chan_index ) {
1356      if(terms[chan_index]) {
1357         LLVMValueRef chan_mask;
1358
1359         /*
1360          * If term < 0 then mask = 0 else mask = ~0.
1361          */
1362         chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
1363
1364         if(mask)
1365            mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1366         else
1367            mask = chan_mask;
1368      }
1369   }
1370
1371   if(mask) {
1372      lp_build_mask_update(bld->mask, mask);
1373
1374      if (!near_end_of_shader(bld, pc))
1375	 lp_build_mask_check(bld->mask);
1376   }
1377}
1378
1379
1380/**
1381 * Predicated fragment kill.
1382 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1383 * The only predication is the execution mask which will apply if
1384 * we're inside a loop or conditional.
1385 */
1386static void
1387emit_kilp(struct lp_build_tgsi_soa_context *bld,
1388          int pc)
1389{
1390   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1391   LLVMValueRef mask;
1392
1393   /* For those channels which are "alive", disable fragment shader
1394    * execution.
1395    */
1396   if (bld->exec_mask.has_mask) {
1397      mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1398   }
1399   else {
1400      LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
1401      mask = zero;
1402   }
1403
1404   lp_build_mask_update(bld->mask, mask);
1405
1406   if (!near_end_of_shader(bld, pc))
1407      lp_build_mask_check(bld->mask);
1408}
1409
1410
1411/**
1412 * Emit code which will dump the value of all the temporary registers
1413 * to stdout.
1414 */
1415static void
1416emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1417{
1418   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1419   LLVMBuilderRef builder = gallivm->builder;
1420   LLVMValueRef temp_ptr;
1421   LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1422   LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1423   LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1424   LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1425   int index;
1426   int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
1427
1428   for (index = 0; index < n; index++) {
1429      LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1430      LLVMValueRef v[4][4], res;
1431      int chan;
1432
1433      lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1434
1435      for (chan = 0; chan < 4; chan++) {
1436         temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
1437         res = LLVMBuildLoad(builder, temp_ptr, "");
1438         v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1439         v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1440         v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1441         v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1442      }
1443
1444      lp_build_printf(gallivm, "  X: %f %f %f %f\n",
1445                      v[0][0], v[0][1], v[0][2], v[0][3]);
1446      lp_build_printf(gallivm, "  Y: %f %f %f %f\n",
1447                      v[1][0], v[1][1], v[1][2], v[1][3]);
1448      lp_build_printf(gallivm, "  Z: %f %f %f %f\n",
1449                      v[2][0], v[2][1], v[2][2], v[2][3]);
1450      lp_build_printf(gallivm, "  W: %f %f %f %f\n",
1451                      v[3][0], v[3][1], v[3][2], v[3][3]);
1452   }
1453}
1454
1455
1456
1457void
1458lp_emit_declaration_soa(
1459   struct lp_build_tgsi_context *bld_base,
1460   const struct tgsi_full_declaration *decl)
1461{
1462   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1463   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1464   LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1465   const unsigned first = decl->Range.First;
1466   const unsigned last = decl->Range.Last;
1467   unsigned idx, i;
1468
1469   for (idx = first; idx <= last; ++idx) {
1470      assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
1471      switch (decl->Declaration.File) {
1472      case TGSI_FILE_TEMPORARY:
1473         assert(idx < LP_MAX_TGSI_TEMPS);
1474         if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1475            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1476               bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1477         }
1478         break;
1479
1480      case TGSI_FILE_OUTPUT:
1481         if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1482            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1483               bld->outputs[idx][i] = lp_build_alloca(gallivm,
1484                                                      vec_type, "output");
1485         }
1486         break;
1487
1488      case TGSI_FILE_ADDRESS:
1489	 /* ADDR registers are the only allocated with an integer LLVM IR type,
1490	  * as they are guaranteed to always have integers.
1491	  * XXX: Not sure if this exception is worthwhile (or the whole idea of
1492	  * an ADDR register for that matter).
1493	  */
1494         assert(idx < LP_MAX_TGSI_ADDRS);
1495         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1496            bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
1497         break;
1498
1499      case TGSI_FILE_PREDICATE:
1500         assert(idx < LP_MAX_TGSI_PREDS);
1501         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1502            bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1503                                                 "predicate");
1504         break;
1505
1506      default:
1507         /* don't need to declare other vars */
1508         break;
1509      }
1510   }
1511}
1512
1513
1514void lp_emit_immediate_soa(
1515   struct lp_build_tgsi_context *bld_base,
1516   const struct tgsi_full_immediate *imm)
1517{
1518   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1519   struct gallivm_state * gallivm = bld_base->base.gallivm;
1520
1521   /* simply copy the immediate values into the next immediates[] slot */
1522   unsigned i;
1523   const uint size = imm->Immediate.NrTokens - 1;
1524   assert(size <= 4);
1525   assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
1526   switch (imm->Immediate.DataType) {
1527   case TGSI_IMM_FLOAT32:
1528      for( i = 0; i < size; ++i )
1529         bld->immediates[bld->num_immediates][i] =
1530            lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
1531
1532      break;
1533   case TGSI_IMM_UINT32:
1534      for( i = 0; i < size; ++i ) {
1535         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
1536         bld->immediates[bld->num_immediates][i] =
1537            LLVMConstBitCast(tmp, bld_base->base.vec_type);
1538      }
1539
1540      break;
1541   case TGSI_IMM_INT32:
1542      for( i = 0; i < size; ++i ) {
1543         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
1544         bld->immediates[bld->num_immediates][i] =
1545            LLVMConstBitCast(tmp, bld_base->base.vec_type);
1546      }
1547
1548      break;
1549   }
1550   for( i = size; i < 4; ++i )
1551      bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
1552
1553   bld->num_immediates++;
1554}
1555
1556static void
1557ddx_emit(
1558   const struct lp_build_tgsi_action * action,
1559   struct lp_build_tgsi_context * bld_base,
1560   struct lp_build_emit_data * emit_data)
1561{
1562   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1563
1564   emit_fetch_deriv(bld, emit_data->args[0], NULL,
1565                    &emit_data->output[emit_data->chan], NULL);
1566}
1567
1568static void
1569ddy_emit(
1570   const struct lp_build_tgsi_action * action,
1571   struct lp_build_tgsi_context * bld_base,
1572   struct lp_build_emit_data * emit_data)
1573{
1574   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1575
1576   emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
1577                    &emit_data->output[emit_data->chan]);
1578}
1579
1580static void
1581kilp_emit(
1582   const struct lp_build_tgsi_action * action,
1583   struct lp_build_tgsi_context * bld_base,
1584   struct lp_build_emit_data * emit_data)
1585{
1586   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1587
1588   emit_kilp(bld, bld_base->pc - 1);
1589}
1590
1591static void
1592kil_emit(
1593   const struct lp_build_tgsi_action * action,
1594   struct lp_build_tgsi_context * bld_base,
1595   struct lp_build_emit_data * emit_data)
1596{
1597   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1598
1599   emit_kil(bld, emit_data->inst, bld_base->pc - 1);
1600}
1601
1602static void
1603tex_emit(
1604   const struct lp_build_tgsi_action * action,
1605   struct lp_build_tgsi_context * bld_base,
1606   struct lp_build_emit_data * emit_data)
1607{
1608   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1609
1610   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
1611}
1612
1613static void
1614txb_emit(
1615   const struct lp_build_tgsi_action * action,
1616   struct lp_build_tgsi_context * bld_base,
1617   struct lp_build_emit_data * emit_data)
1618{
1619   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1620
1621   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
1622            emit_data->output);
1623}
1624
1625static void
1626txd_emit(
1627   const struct lp_build_tgsi_action * action,
1628   struct lp_build_tgsi_context * bld_base,
1629   struct lp_build_emit_data * emit_data)
1630{
1631   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1632
1633   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
1634            emit_data->output);
1635}
1636
1637static void
1638txl_emit(
1639   const struct lp_build_tgsi_action * action,
1640   struct lp_build_tgsi_context * bld_base,
1641   struct lp_build_emit_data * emit_data)
1642{
1643   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1644
1645   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
1646            emit_data->output);
1647}
1648
1649static void
1650txp_emit(
1651   const struct lp_build_tgsi_action * action,
1652   struct lp_build_tgsi_context * bld_base,
1653   struct lp_build_emit_data * emit_data)
1654{
1655   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1656
1657   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
1658            emit_data->output);
1659}
1660
1661static void
1662txq_emit(
1663   const struct lp_build_tgsi_action * action,
1664   struct lp_build_tgsi_context * bld_base,
1665   struct lp_build_emit_data * emit_data)
1666{
1667   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1668
1669   emit_txq(bld, emit_data->inst, emit_data->output);
1670}
1671
1672static void
1673cal_emit(
1674   const struct lp_build_tgsi_action * action,
1675   struct lp_build_tgsi_context * bld_base,
1676   struct lp_build_emit_data * emit_data)
1677{
1678   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1679
1680   lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
1681                     &bld_base->pc);
1682}
1683
1684static void
1685ret_emit(
1686   const struct lp_build_tgsi_action * action,
1687   struct lp_build_tgsi_context * bld_base,
1688   struct lp_build_emit_data * emit_data)
1689{
1690   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1691
1692   lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
1693}
1694
1695static void
1696brk_emit(
1697   const struct lp_build_tgsi_action * action,
1698   struct lp_build_tgsi_context * bld_base,
1699   struct lp_build_emit_data * emit_data)
1700{
1701   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1702
1703   lp_exec_break(&bld->exec_mask);
1704}
1705
1706static void
1707if_emit(
1708   const struct lp_build_tgsi_action * action,
1709   struct lp_build_tgsi_context * bld_base,
1710   struct lp_build_emit_data * emit_data)
1711{
1712   LLVMValueRef tmp;
1713   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1714
1715   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
1716                      emit_data->args[0], bld->bld_base.base.zero);
1717   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
1718}
1719
1720static void
1721bgnloop_emit(
1722   const struct lp_build_tgsi_action * action,
1723   struct lp_build_tgsi_context * bld_base,
1724   struct lp_build_emit_data * emit_data)
1725{
1726   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1727
1728   lp_exec_bgnloop(&bld->exec_mask);
1729}
1730
1731static void
1732bgnsub_emit(
1733   const struct lp_build_tgsi_action * action,
1734   struct lp_build_tgsi_context * bld_base,
1735   struct lp_build_emit_data * emit_data)
1736{
1737   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1738
1739   lp_exec_mask_bgnsub(&bld->exec_mask);
1740}
1741
1742static void
1743else_emit(
1744   const struct lp_build_tgsi_action * action,
1745   struct lp_build_tgsi_context * bld_base,
1746   struct lp_build_emit_data * emit_data)
1747{
1748   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1749
1750   lp_exec_mask_cond_invert(&bld->exec_mask);
1751}
1752
1753static void
1754endif_emit(
1755   const struct lp_build_tgsi_action * action,
1756   struct lp_build_tgsi_context * bld_base,
1757   struct lp_build_emit_data * emit_data)
1758{
1759   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1760
1761   lp_exec_mask_cond_pop(&bld->exec_mask);
1762}
1763
1764static void
1765endloop_emit(
1766   const struct lp_build_tgsi_action * action,
1767   struct lp_build_tgsi_context * bld_base,
1768   struct lp_build_emit_data * emit_data)
1769{
1770   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1771
1772   lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1773}
1774
1775static void
1776endsub_emit(
1777   const struct lp_build_tgsi_action * action,
1778   struct lp_build_tgsi_context * bld_base,
1779   struct lp_build_emit_data * emit_data)
1780{
1781   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1782
1783   lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
1784}
1785
1786static void
1787cont_emit(
1788   const struct lp_build_tgsi_action * action,
1789   struct lp_build_tgsi_context * bld_base,
1790   struct lp_build_emit_data * emit_data)
1791{
1792   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1793
1794   lp_exec_continue(&bld->exec_mask);
1795}
1796
1797/* XXX: Refactor and move it to lp_bld_tgsi_action.c
1798 *
1799 * XXX: What do the comments about xmm registers mean?  Maybe they are left over
1800 * from old code, but there is no garauntee that LLVM will use those registers
1801 * for this code.
1802 *
1803 * XXX: There should be no calls to lp_build_emit_fetch in this function.  This
1804 * should be handled by the emit_data->fetch_args function. */
1805static void
1806nrm_emit(
1807   const struct lp_build_tgsi_action * action,
1808   struct lp_build_tgsi_context * bld_base,
1809   struct lp_build_emit_data * emit_data)
1810{
1811   LLVMValueRef tmp0, tmp1;
1812   LLVMValueRef tmp4 = NULL;
1813   LLVMValueRef tmp5 = NULL;
1814   LLVMValueRef tmp6 = NULL;
1815   LLVMValueRef tmp7 = NULL;
1816   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1817
1818   uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1819
1820  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
1821      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
1822      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
1823      (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
1824
1825      /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1826
1827      /* xmm4 = src.x */
1828      /* xmm0 = src.x * src.x */
1829      tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
1830      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
1831         tmp4 = tmp0;
1832      }
1833      tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
1834
1835      /* xmm5 = src.y */
1836      /* xmm0 = xmm0 + src.y * src.y */
1837      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
1838      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
1839         tmp5 = tmp1;
1840      }
1841      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1842      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1843
1844      /* xmm6 = src.z */
1845      /* xmm0 = xmm0 + src.z * src.z */
1846      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
1847      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
1848         tmp6 = tmp1;
1849      }
1850      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1851      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1852
1853      if (dims == 4) {
1854         /* xmm7 = src.w */
1855         /* xmm0 = xmm0 + src.w * src.w */
1856         tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
1857         if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
1858            tmp7 = tmp1;
1859         }
1860         tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1861         tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1862      }
1863      /* xmm1 = 1 / sqrt(xmm0) */
1864      tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
1865       /* dst.x = xmm1 * src.x */
1866      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
1867         emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
1868      }
1869      /* dst.y = xmm1 * src.y */
1870      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
1871         emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
1872      }
1873
1874      /* dst.z = xmm1 * src.z */
1875      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
1876         emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
1877      }
1878      /* dst.w = xmm1 * src.w */
1879      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
1880         emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
1881      }
1882   }
1883
1884   /* dst.w = 1.0 */
1885   if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
1886       emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
1887   }
1888}
1889
1890static void emit_prologue(struct lp_build_tgsi_context * bld_base)
1891{
1892   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1893   struct gallivm_state * gallivm = bld_base->base.gallivm;
1894
1895   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1896      LLVMValueRef array_size =
1897         lp_build_const_int32(gallivm,
1898                         bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
1899      bld->temps_array = lp_build_array_alloca(gallivm,
1900                                              bld_base->base.vec_type, array_size,
1901                                              "temp_array");
1902   }
1903
1904   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
1905      LLVMValueRef array_size =
1906         lp_build_const_int32(gallivm,
1907                            bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
1908      bld->outputs_array = lp_build_array_alloca(gallivm,
1909                                                bld_base->base.vec_type, array_size,
1910                                                "output_array");
1911   }
1912
1913   /* If we have indirect addressing in inputs we need to copy them into
1914    * our alloca array to be able to iterate over them */
1915   if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1916      unsigned index, chan;
1917      LLVMTypeRef vec_type = bld_base->base.vec_type;
1918      LLVMValueRef array_size = lp_build_const_int32(gallivm,
1919            bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
1920      bld->inputs_array = lp_build_array_alloca(gallivm,
1921                                               vec_type, array_size,
1922                                               "input_array");
1923
1924      assert(bld_base->info->num_inputs
1925                        <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
1926
1927      for (index = 0; index < bld_base->info->num_inputs; ++index) {
1928         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1929            LLVMValueRef lindex =
1930               lp_build_const_int32(gallivm, index * 4 + chan);
1931            LLVMValueRef input_ptr =
1932               LLVMBuildGEP(gallivm->builder, bld->inputs_array,
1933                            &lindex, 1, "");
1934            LLVMValueRef value = bld->inputs[index][chan];
1935            if (value)
1936               LLVMBuildStore(gallivm->builder, value, input_ptr);
1937         }
1938      }
1939   }
1940}
1941
1942static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
1943{
1944   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1945
1946   if (0) {
1947      /* for debugging */
1948      emit_dump_temps(bld);
1949   }
1950
1951   /* If we have indirect addressing in outputs we need to copy our alloca array
1952    * to the outputs slots specified by the called */
1953   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
1954      unsigned index, chan;
1955      assert(bld_base->info->num_outputs <=
1956                        bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
1957      for (index = 0; index < bld_base->info->num_outputs; ++index) {
1958         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1959            bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
1960         }
1961      }
1962   }
1963}
1964
1965void
1966lp_build_tgsi_soa(struct gallivm_state *gallivm,
1967                  const struct tgsi_token *tokens,
1968                  struct lp_type type,
1969                  struct lp_build_mask_context *mask,
1970                  LLVMValueRef consts_ptr,
1971                  LLVMValueRef system_values_array,
1972                  const LLVMValueRef *pos,
1973                  const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
1974                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1975                  struct lp_build_sampler_soa *sampler,
1976                  const struct tgsi_shader_info *info)
1977{
1978   struct lp_build_tgsi_soa_context bld;
1979
1980   struct lp_type res_type;
1981
1982   assert(type.length <= LP_MAX_VECTOR_LENGTH);
1983   memset(&res_type, 0, sizeof res_type);
1984   res_type.width = type.width;
1985   res_type.length = type.length;
1986   res_type.sign = 1;
1987
1988   /* Setup build context */
1989   memset(&bld, 0, sizeof bld);
1990   lp_build_context_init(&bld.bld_base.base, gallivm, type);
1991   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1992   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1993   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
1994   bld.mask = mask;
1995   bld.pos = pos;
1996   bld.inputs = inputs;
1997   bld.outputs = outputs;
1998   bld.consts_ptr = consts_ptr;
1999   bld.sampler = sampler;
2000   bld.bld_base.info = info;
2001   bld.indirect_files = info->indirect_files;
2002
2003   bld.bld_base.soa = TRUE;
2004   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
2005   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
2006   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
2007   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
2008   bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
2009   bld.bld_base.emit_store = emit_store;
2010
2011   bld.bld_base.emit_declaration = lp_emit_declaration_soa;
2012   bld.bld_base.emit_immediate = lp_emit_immediate_soa;
2013
2014   bld.bld_base.emit_prologue = emit_prologue;
2015   bld.bld_base.emit_epilogue = emit_epilogue;
2016
2017   /* Set opcode actions */
2018   lp_set_default_actions_cpu(&bld.bld_base);
2019
2020   bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
2021   bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
2022   bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
2023   bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
2024   bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
2025   bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
2026   bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
2027   bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
2028   bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
2029   bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
2030   bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
2031   bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
2032   bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
2033   bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
2034   bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
2035   bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
2036   bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
2037   bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
2038   bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
2039   bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
2040   bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
2041   bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
2042   bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
2043
2044   lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
2045
2046
2047   bld.system_values_array = system_values_array;
2048
2049   lp_build_tgsi_llvm(&bld.bld_base, tokens);
2050
2051   if (0) {
2052      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2053      LLVMValueRef function = LLVMGetBasicBlockParent(block);
2054      debug_printf("11111111111111111111111111111 \n");
2055      tgsi_dump(tokens, 0);
2056      lp_debug_dump_value(function);
2057      debug_printf("2222222222222222222222222222 \n");
2058   }
2059
2060   if (0) {
2061      LLVMModuleRef module = LLVMGetGlobalParent(
2062         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2063      LLVMDumpModule(module);
2064
2065   }
2066}
2067
2068
2069/**
2070 * Build up the system values array out of individual values such as
2071 * the instance ID, front-face, primitive ID, etc.  The shader info is
2072 * used to determine which system values are needed and where to put
2073 * them in the system values array.
2074 *
2075 * XXX only instance ID is implemented at this time.
2076 *
2077 * The system values register file is similar to the constants buffer.
2078 * Example declaration:
2079 *    DCL SV[0], INSTANCEID
2080 * Example instruction:
2081 *    MOVE foo, SV[0].xxxx;
2082 *
2083 * \return  LLVM float array (interpreted as float [][4])
2084 */
2085LLVMValueRef
2086lp_build_system_values_array(struct gallivm_state *gallivm,
2087                             const struct tgsi_shader_info *info,
2088                             LLVMValueRef instance_id,
2089                             LLVMValueRef facing)
2090{
2091   LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values);
2092   LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context);
2093   LLVMValueRef array = lp_build_array_alloca(gallivm, float_t,
2094                                              size, "sysvals_array");
2095   unsigned i;
2096
2097   for (i = 0; i < info->num_system_values; i++) {
2098      LLVMValueRef index = lp_build_const_int32(gallivm, i * 4);
2099      LLVMValueRef ptr, value = 0;
2100
2101      switch (info->system_value_semantic_name[i]) {
2102      case TGSI_SEMANTIC_INSTANCEID:
2103         /* convert instance ID from int to float */
2104         value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t,
2105                                 "sysval_instanceid");
2106         break;
2107      case TGSI_SEMANTIC_FACE:
2108         /* fall-through */
2109      default:
2110         assert(0 && "unexpected semantic in build_system_values_array()");
2111      }
2112
2113      ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, "");
2114      LLVMBuildStore(gallivm->builder, value, ptr);
2115   }
2116
2117   return array;
2118}
2119