lp_bld_tgsi_soa.c revision 00eb74b275e21d567a0ab8a6731181e005208634
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_exec.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_tgsi_action.h"
51#include "lp_bld_type.h"
52#include "lp_bld_const.h"
53#include "lp_bld_arit.h"
54#include "lp_bld_bitarit.h"
55#include "lp_bld_gather.h"
56#include "lp_bld_init.h"
57#include "lp_bld_logic.h"
58#include "lp_bld_swizzle.h"
59#include "lp_bld_flow.h"
60#include "lp_bld_quad.h"
61#include "lp_bld_tgsi.h"
62#include "lp_bld_limits.h"
63#include "lp_bld_debug.h"
64#include "lp_bld_printf.h"
65
66
67static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
68{
69   LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
70   LLVMBuilderRef builder = bld->gallivm->builder;
71
72   mask->bld = bld;
73   mask->has_mask = FALSE;
74   mask->cond_stack_size = 0;
75   mask->loop_stack_size = 0;
76   mask->call_stack_size = 0;
77
78   mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
79   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
80         LLVMConstAllOnes(mask->int_vec_type);
81
82   mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
83
84   LLVMBuildStore(
85      builder,
86      LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
87      mask->loop_limiter);
88}
89
90static void lp_exec_mask_update(struct lp_exec_mask *mask)
91{
92   LLVMBuilderRef builder = mask->bld->gallivm->builder;
93
94   if (mask->loop_stack_size) {
95      /*for loops we need to update the entire mask at runtime */
96      LLVMValueRef tmp;
97      assert(mask->break_mask);
98      tmp = LLVMBuildAnd(builder,
99                         mask->cont_mask,
100                         mask->break_mask,
101                         "maskcb");
102      mask->exec_mask = LLVMBuildAnd(builder,
103                                     mask->cond_mask,
104                                     tmp,
105                                     "maskfull");
106   } else
107      mask->exec_mask = mask->cond_mask;
108
109   if (mask->call_stack_size) {
110      mask->exec_mask = LLVMBuildAnd(builder,
111                                     mask->exec_mask,
112                                     mask->ret_mask,
113                                     "callmask");
114   }
115
116   mask->has_mask = (mask->cond_stack_size > 0 ||
117                     mask->loop_stack_size > 0 ||
118                     mask->call_stack_size > 0);
119}
120
121static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
122                                   LLVMValueRef val)
123{
124   LLVMBuilderRef builder = mask->bld->gallivm->builder;
125
126   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
127   if (mask->cond_stack_size == 0) {
128      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
129   }
130   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
131   assert(LLVMTypeOf(val) == mask->int_vec_type);
132   mask->cond_mask = LLVMBuildAnd(builder,
133                                  mask->cond_mask,
134                                  val,
135                                  "");
136   lp_exec_mask_update(mask);
137}
138
139static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
140{
141   LLVMBuilderRef builder = mask->bld->gallivm->builder;
142   LLVMValueRef prev_mask;
143   LLVMValueRef inv_mask;
144
145   assert(mask->cond_stack_size);
146   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
147   if (mask->cond_stack_size == 1) {
148      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
149   }
150
151   inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
152
153   mask->cond_mask = LLVMBuildAnd(builder,
154                                  inv_mask,
155                                  prev_mask, "");
156   lp_exec_mask_update(mask);
157}
158
159static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
160{
161   assert(mask->cond_stack_size);
162   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
163   lp_exec_mask_update(mask);
164}
165
166static void lp_exec_bgnloop(struct lp_exec_mask *mask)
167{
168   LLVMBuilderRef builder = mask->bld->gallivm->builder;
169
170   if (mask->loop_stack_size == 0) {
171      assert(mask->loop_block == NULL);
172      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
173      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
174      assert(mask->break_var == NULL);
175   }
176
177   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
178
179   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
180   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
181   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
182   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
183   ++mask->loop_stack_size;
184
185   mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
186   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
187
188   mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
189
190   LLVMBuildBr(builder, mask->loop_block);
191   LLVMPositionBuilderAtEnd(builder, mask->loop_block);
192
193   mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
194
195   lp_exec_mask_update(mask);
196}
197
198static void lp_exec_break(struct lp_exec_mask *mask)
199{
200   LLVMBuilderRef builder = mask->bld->gallivm->builder;
201   LLVMValueRef exec_mask = LLVMBuildNot(builder,
202                                         mask->exec_mask,
203                                         "break");
204
205   mask->break_mask = LLVMBuildAnd(builder,
206                                   mask->break_mask,
207                                   exec_mask, "break_full");
208
209   lp_exec_mask_update(mask);
210}
211
212static void lp_exec_continue(struct lp_exec_mask *mask)
213{
214   LLVMBuilderRef builder = mask->bld->gallivm->builder;
215   LLVMValueRef exec_mask = LLVMBuildNot(builder,
216                                         mask->exec_mask,
217                                         "");
218
219   mask->cont_mask = LLVMBuildAnd(builder,
220                                  mask->cont_mask,
221                                  exec_mask, "");
222
223   lp_exec_mask_update(mask);
224}
225
226
227static void lp_exec_endloop(struct gallivm_state *gallivm,
228                            struct lp_exec_mask *mask)
229{
230   LLVMBuilderRef builder = mask->bld->gallivm->builder;
231   LLVMBasicBlockRef endloop;
232   LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
233   LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
234                                               mask->bld->type.width *
235                                               mask->bld->type.length);
236   LLVMValueRef i1cond, i2cond, icond, limiter;
237
238   assert(mask->break_mask);
239
240   /*
241    * Restore the cont_mask, but don't pop
242    */
243   assert(mask->loop_stack_size);
244   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
245   lp_exec_mask_update(mask);
246
247   /*
248    * Unlike the continue mask, the break_mask must be preserved across loop
249    * iterations
250    */
251   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
252
253   /* Decrement the loop limiter */
254   limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
255
256   limiter = LLVMBuildSub(
257      builder,
258      limiter,
259      LLVMConstInt(int_type, 1, false),
260      "");
261
262   LLVMBuildStore(builder, limiter, mask->loop_limiter);
263
264   /* i1cond = (mask != 0) */
265   i1cond = LLVMBuildICmp(
266      builder,
267      LLVMIntNE,
268      LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
269      LLVMConstNull(reg_type), "");
270
271   /* i2cond = (looplimiter > 0) */
272   i2cond = LLVMBuildICmp(
273      builder,
274      LLVMIntSGT,
275      limiter,
276      LLVMConstNull(int_type), "");
277
278   /* if( i1cond && i2cond ) */
279   icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
280
281   endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
282
283   LLVMBuildCondBr(builder,
284                   icond, mask->loop_block, endloop);
285
286   LLVMPositionBuilderAtEnd(builder, endloop);
287
288   assert(mask->loop_stack_size);
289   --mask->loop_stack_size;
290   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
291   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
292   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
293   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
294
295   lp_exec_mask_update(mask);
296}
297
298/* stores val into an address pointed to by dst.
299 * mask->exec_mask is used to figure out which bits of val
300 * should be stored into the address
301 * (0 means don't store this bit, 1 means do store).
302 */
303static void lp_exec_mask_store(struct lp_exec_mask *mask,
304                               struct lp_build_context *bld_store,
305                               LLVMValueRef pred,
306                               LLVMValueRef val,
307                               LLVMValueRef dst)
308{
309   LLVMBuilderRef builder = mask->bld->gallivm->builder;
310
311   /* Mix the predicate and execution mask */
312   if (mask->has_mask) {
313      if (pred) {
314         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
315      } else {
316         pred = mask->exec_mask;
317      }
318   }
319
320   if (pred) {
321      LLVMValueRef real_val, dst_val;
322
323      dst_val = LLVMBuildLoad(builder, dst, "");
324      real_val = lp_build_select(bld_store,
325                                 pred,
326                                 val, dst_val);
327
328      LLVMBuildStore(builder, real_val, dst);
329   } else
330      LLVMBuildStore(builder, val, dst);
331}
332
333static void lp_exec_mask_call(struct lp_exec_mask *mask,
334                              int func,
335                              int *pc)
336{
337   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
338   mask->call_stack[mask->call_stack_size].pc = *pc;
339   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
340   mask->call_stack_size++;
341   *pc = func;
342}
343
344static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
345{
346   LLVMBuilderRef builder = mask->bld->gallivm->builder;
347   LLVMValueRef exec_mask;
348
349   if (mask->call_stack_size == 0) {
350      /* returning from main() */
351      *pc = -1;
352      return;
353   }
354   exec_mask = LLVMBuildNot(builder,
355                            mask->exec_mask,
356                            "ret");
357
358   mask->ret_mask = LLVMBuildAnd(builder,
359                                 mask->ret_mask,
360                                 exec_mask, "ret_full");
361
362   lp_exec_mask_update(mask);
363}
364
365static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
366{
367}
368
369static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
370{
371   assert(mask->call_stack_size);
372   mask->call_stack_size--;
373   *pc = mask->call_stack[mask->call_stack_size].pc;
374   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
375   lp_exec_mask_update(mask);
376}
377
378
379/**
380 * Return pointer to a temporary register channel (src or dest).
381 * Note that indirect addressing cannot be handled here.
382 * \param index  which temporary register
383 * \param chan  which channel of the temp register.
384 */
385LLVMValueRef
386lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
387             unsigned index,
388             unsigned chan)
389{
390   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
391   assert(chan < 4);
392   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
393      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
394      return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
395   }
396   else {
397      return bld->temps[index][chan];
398   }
399}
400
401/**
402 * Return pointer to a output register channel (src or dest).
403 * Note that indirect addressing cannot be handled here.
404 * \param index  which output register
405 * \param chan  which channel of the output register.
406 */
407LLVMValueRef
408lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
409               unsigned index,
410               unsigned chan)
411{
412   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
413   assert(chan < 4);
414   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
415      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
416                                                 index * 4 + chan);
417      return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
418   }
419   else {
420      return bld->outputs[index][chan];
421   }
422}
423
424/**
425 * Gather vector.
426 * XXX the lp_build_gather() function should be capable of doing this
427 * with a little work.
428 */
429static LLVMValueRef
430build_gather(struct lp_build_context *bld,
431             LLVMValueRef base_ptr,
432             LLVMValueRef indexes)
433{
434   LLVMBuilderRef builder = bld->gallivm->builder;
435   LLVMValueRef res = bld->undef;
436   unsigned i;
437
438   /*
439    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
440    */
441   for (i = 0; i < bld->type.length; i++) {
442      LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
443      LLVMValueRef index = LLVMBuildExtractElement(builder,
444                                                   indexes, ii, "");
445      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
446                                             &index, 1, "gather_ptr");
447      LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
448
449      res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
450   }
451
452   return res;
453}
454
455
456/**
457 * Scatter/store vector.
458 */
459static void
460emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
461                  LLVMValueRef base_ptr,
462                  LLVMValueRef indexes,
463                  LLVMValueRef values,
464                  struct lp_exec_mask *mask,
465                  LLVMValueRef pred)
466{
467   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
468   LLVMBuilderRef builder = gallivm->builder;
469   unsigned i;
470
471   /* Mix the predicate and execution mask */
472   if (mask->has_mask) {
473      if (pred) {
474         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
475      }
476      else {
477         pred = mask->exec_mask;
478      }
479   }
480
481   /*
482    * Loop over elements of index_vec, store scalar value.
483    */
484   for (i = 0; i < bld->bld_base.base.type.length; i++) {
485      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
486      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
487      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
488      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
489      LLVMValueRef scalar_pred = pred ?
490         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
491
492      if (0)
493         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
494                         ii, val, index, scalar_ptr);
495
496      if (scalar_pred) {
497         LLVMValueRef real_val, dst_val;
498         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
499         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
500         LLVMBuildStore(builder, real_val, scalar_ptr);
501      }
502      else {
503         LLVMBuildStore(builder, val, scalar_ptr);
504      }
505   }
506}
507
508
509/**
510 * Read the current value of the ADDR register, convert the floats to
511 * ints, add the base index and return the vector of offsets.
512 * The offsets will be used to index into the constant buffer or
513 * temporary register file.
514 */
515static LLVMValueRef
516get_indirect_index(struct lp_build_tgsi_soa_context *bld,
517                   unsigned reg_file, unsigned reg_index,
518                   const struct tgsi_src_register *indirect_reg)
519{
520   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
521   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
522   /* always use X component of address register */
523   unsigned swizzle = indirect_reg->SwizzleX;
524   LLVMValueRef base;
525   LLVMValueRef rel;
526   LLVMValueRef max_index;
527   LLVMValueRef index;
528
529   assert(bld->indirect_files & (1 << reg_file));
530
531   base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
532
533   assert(swizzle < 4);
534   rel = LLVMBuildLoad(builder,
535                        bld->addr[indirect_reg->Index][swizzle],
536                        "load addr reg");
537
538   index = lp_build_add(uint_bld, base, rel);
539
540   max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
541                                      uint_bld->type,
542                                      bld->bld_base.info->file_max[reg_file]);
543
544   assert(!uint_bld->type.sign);
545   index = lp_build_min(uint_bld, index, max_index);
546
547   return index;
548}
549
550static struct lp_build_context *
551stype_to_fetch(struct lp_build_tgsi_context * bld_base,
552	       enum tgsi_opcode_type stype)
553{
554   struct lp_build_context *bld_fetch;
555
556   switch (stype) {
557   case TGSI_TYPE_FLOAT:
558   case TGSI_TYPE_UNTYPED:
559      bld_fetch = &bld_base->base;
560      break;
561   case TGSI_TYPE_UNSIGNED:
562      bld_fetch = &bld_base->uint_bld;
563      break;
564   case TGSI_TYPE_SIGNED:
565      bld_fetch = &bld_base->int_bld;
566      break;
567   case TGSI_TYPE_VOID:
568   case TGSI_TYPE_DOUBLE:
569   default:
570      assert(0);
571      bld_fetch = NULL;
572      break;
573   }
574   return bld_fetch;
575}
576
577static LLVMValueRef
578emit_fetch_constant(
579   struct lp_build_tgsi_context * bld_base,
580   const struct tgsi_full_src_register * reg,
581   enum tgsi_opcode_type stype,
582   unsigned swizzle)
583{
584   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
585   struct gallivm_state *gallivm = bld_base->base.gallivm;
586   LLVMBuilderRef builder = gallivm->builder;
587   struct lp_build_context *uint_bld = &bld_base->uint_bld;
588   LLVMValueRef indirect_index = NULL;
589   struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
590
591   /* XXX: Handle fetching xyzw components as a vector */
592   assert(swizzle != ~0);
593
594   if (reg->Register.Indirect) {
595      indirect_index = get_indirect_index(bld,
596                                          reg->Register.File,
597                                          reg->Register.Index,
598                                          &reg->Indirect);
599   }
600
601   if (reg->Register.Indirect) {
602      LLVMValueRef swizzle_vec =
603         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
604      LLVMValueRef index_vec;  /* index into the const buffer */
605
606      /* index_vec = indirect_index * 4 + swizzle */
607      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
608      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
609
610      /* Gather values from the constant buffer */
611      return build_gather(bld_fetch, bld->consts_ptr, index_vec);
612   }
613   else {
614      LLVMValueRef index;  /* index into the const buffer */
615      LLVMValueRef scalar, scalar_ptr;
616
617      index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
618
619      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
620                                   &index, 1, "");
621
622      if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
623         LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
624         LLVMValueRef temp_ptr;
625         temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, "");
626         scalar = LLVMBuildLoad(builder, temp_ptr, "");
627      } else
628         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
629
630      return lp_build_broadcast_scalar(bld_fetch, scalar);
631   }
632}
633
634static LLVMValueRef
635emit_fetch_immediate(
636   struct lp_build_tgsi_context * bld_base,
637   const struct tgsi_full_src_register * reg,
638   enum tgsi_opcode_type stype,
639   unsigned swizzle)
640{
641   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
642   LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
643   assert(res);
644
645   if (stype == TGSI_TYPE_UNSIGNED) {
646      res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
647   } else if (stype == TGSI_TYPE_SIGNED) {
648      res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
649   }
650   return res;
651}
652
653static LLVMValueRef
654emit_fetch_input(
655   struct lp_build_tgsi_context * bld_base,
656   const struct tgsi_full_src_register * reg,
657   enum tgsi_opcode_type stype,
658   unsigned swizzle)
659{
660   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
661   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
662   LLVMBuilderRef builder = gallivm->builder;
663   struct lp_build_context *uint_bld = &bld_base->uint_bld;
664   LLVMValueRef indirect_index = NULL;
665   LLVMValueRef res;
666
667   if (reg->Register.Indirect) {
668      indirect_index = get_indirect_index(bld,
669                                          reg->Register.File,
670                                          reg->Register.Index,
671                                          &reg->Indirect);
672   }
673
674   if (reg->Register.Indirect) {
675      LLVMValueRef swizzle_vec =
676         lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
677      LLVMValueRef length_vec =
678         lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
679      LLVMValueRef index_vec;  /* index into the const buffer */
680      LLVMValueRef inputs_array;
681      LLVMTypeRef float4_ptr_type;
682
683      /* index_vec = (indirect_index * 4 + swizzle) * length */
684      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
685      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
686      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
687
688      /* cast inputs_array pointer to float* */
689      float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
690      inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
691                                         float4_ptr_type, "");
692
693      /* Gather values from the temporary register array */
694      res = build_gather(&bld_base->base, inputs_array, index_vec);
695   } else {
696      if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
697         LLVMValueRef lindex = lp_build_const_int32(gallivm,
698                                        reg->Register.Index * 4 + swizzle);
699         LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
700                                                bld->inputs_array, &lindex, 1, "");
701         res = LLVMBuildLoad(builder, input_ptr, "");
702      }
703      else {
704         res = bld->inputs[reg->Register.Index][swizzle];
705      }
706   }
707
708   assert(res);
709
710   if (stype == TGSI_TYPE_UNSIGNED) {
711      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
712   } else if (stype == TGSI_TYPE_SIGNED) {
713      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
714   }
715
716   return res;
717}
718
719static LLVMValueRef
720emit_fetch_temporary(
721   struct lp_build_tgsi_context * bld_base,
722   const struct tgsi_full_src_register * reg,
723   enum tgsi_opcode_type stype,
724   unsigned swizzle)
725{
726   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
727   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
728   LLVMBuilderRef builder = gallivm->builder;
729   struct lp_build_context *uint_bld = &bld_base->uint_bld;
730   LLVMValueRef indirect_index = NULL;
731   LLVMValueRef res;
732
733   if (reg->Register.Indirect) {
734      indirect_index = get_indirect_index(bld,
735                                          reg->Register.File,
736                                          reg->Register.Index,
737                                          &reg->Indirect);
738   }
739
740   if (reg->Register.Indirect) {
741      LLVMValueRef swizzle_vec =
742         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
743      LLVMValueRef length_vec =
744         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
745                                bld->bld_base.base.type.length);
746      LLVMValueRef index_vec;  /* index into the const buffer */
747      LLVMValueRef temps_array;
748      LLVMTypeRef float4_ptr_type;
749
750      /* index_vec = (indirect_index * 4 + swizzle) * length */
751      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
752      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
753      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
754
755      /* cast temps_array pointer to float* */
756      float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
757      temps_array = LLVMBuildBitCast(builder, bld->temps_array,
758                                     float4_ptr_type, "");
759
760      /* Gather values from the temporary register array */
761      res = build_gather(&bld_base->base, temps_array, index_vec);
762   }
763   else {
764      LLVMValueRef temp_ptr;
765      if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
766         LLVMTypeRef itype = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
767         LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
768                                                     swizzle);
769         temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, "");
770      } else
771         temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
772      res = LLVMBuildLoad(builder, temp_ptr, "");
773      if (!res)
774         return bld->bld_base.base.undef;
775   }
776
777   return res;
778}
779
780static LLVMValueRef
781emit_fetch_system_value(
782   struct lp_build_tgsi_context * bld_base,
783   const struct tgsi_full_src_register * reg,
784   enum tgsi_opcode_type stype,
785   unsigned swizzle)
786{
787   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
788   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
789   LLVMBuilderRef builder = gallivm->builder;
790   LLVMValueRef index;  /* index into the system value array */
791   LLVMValueRef scalar, scalar_ptr;
792
793   assert(!reg->Register.Indirect);
794
795   index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
796
797   scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, &index, 1, "");
798   scalar = LLVMBuildLoad(builder, scalar_ptr, "");
799
800   return lp_build_broadcast_scalar(&bld->bld_base.base, scalar);
801}
802
803/**
804 * Register fetch with derivatives.
805 */
806static void
807emit_fetch_deriv(
808   struct lp_build_tgsi_soa_context *bld,
809   LLVMValueRef src,
810   LLVMValueRef *res,
811   LLVMValueRef *ddx,
812   LLVMValueRef *ddy)
813{
814   if(res)
815      *res = src;
816
817   /* TODO: use interpolation coeffs for inputs */
818
819   if(ddx)
820      *ddx = lp_build_ddx(&bld->bld_base.base, src);
821
822   if(ddy)
823      *ddy = lp_build_ddy(&bld->bld_base.base, src);
824}
825
826
827/**
828 * Predicate.
829 */
830static void
831emit_fetch_predicate(
832   struct lp_build_tgsi_soa_context *bld,
833   const struct tgsi_full_instruction *inst,
834   LLVMValueRef *pred)
835{
836   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
837   unsigned index;
838   unsigned char swizzles[4];
839   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
840   LLVMValueRef value;
841   unsigned chan;
842
843   if (!inst->Instruction.Predicate) {
844      TGSI_FOR_EACH_CHANNEL( chan ) {
845         pred[chan] = NULL;
846      }
847      return;
848   }
849
850   swizzles[0] = inst->Predicate.SwizzleX;
851   swizzles[1] = inst->Predicate.SwizzleY;
852   swizzles[2] = inst->Predicate.SwizzleZ;
853   swizzles[3] = inst->Predicate.SwizzleW;
854
855   index = inst->Predicate.Index;
856   assert(index < LP_MAX_TGSI_PREDS);
857
858   TGSI_FOR_EACH_CHANNEL( chan ) {
859      unsigned swizzle = swizzles[chan];
860
861      /*
862       * Only fetch the predicate register channels that are actually listed
863       * in the swizzles
864       */
865      if (!unswizzled[swizzle]) {
866         value = LLVMBuildLoad(builder,
867                               bld->preds[index][swizzle], "");
868
869         /*
870          * Convert the value to an integer mask.
871          *
872          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
873          * is needlessly causing two comparisons due to storing the intermediate
874          * result as float vector instead of an integer mask vector.
875          */
876         value = lp_build_compare(bld->bld_base.base.gallivm,
877                                  bld->bld_base.base.type,
878                                  PIPE_FUNC_NOTEQUAL,
879                                  value,
880                                  bld->bld_base.base.zero);
881         if (inst->Predicate.Negate) {
882            value = LLVMBuildNot(builder, value, "");
883         }
884
885         unswizzled[swizzle] = value;
886      } else {
887         value = unswizzled[swizzle];
888      }
889
890      pred[chan] = value;
891   }
892}
893
894/**
895 * Register store.
896 */
897static void
898emit_store_chan(
899   struct lp_build_tgsi_context *bld_base,
900   const struct tgsi_full_instruction *inst,
901   unsigned index,
902   unsigned chan_index,
903   LLVMValueRef pred,
904   LLVMValueRef value)
905{
906   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
907   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
908   LLVMBuilderRef builder = gallivm->builder;
909   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
910   struct lp_build_context *uint_bld = &bld_base->uint_bld;
911   LLVMValueRef indirect_index = NULL;
912   struct lp_build_context *bld_store;
913   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
914
915   switch (dtype) {
916   default:
917   case TGSI_TYPE_FLOAT:
918   case TGSI_TYPE_UNTYPED:
919      bld_store = &bld_base->base;
920      break;
921   case TGSI_TYPE_UNSIGNED:
922      bld_store = &bld_base->uint_bld;
923      break;
924   case TGSI_TYPE_SIGNED:
925      bld_store = &bld_base->int_bld;
926      break;
927   case TGSI_TYPE_DOUBLE:
928   case TGSI_TYPE_VOID:
929      assert(0);
930      bld_store = NULL;
931      break;
932   }
933
934   switch( inst->Instruction.Saturate ) {
935   case TGSI_SAT_NONE:
936      break;
937
938   case TGSI_SAT_ZERO_ONE:
939      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
940      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
941      break;
942
943   case TGSI_SAT_MINUS_PLUS_ONE:
944      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
945      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
946      break;
947
948   default:
949      assert(0);
950   }
951
952   if (reg->Register.Indirect) {
953      indirect_index = get_indirect_index(bld,
954                                          reg->Register.File,
955                                          reg->Register.Index,
956                                          &reg->Indirect);
957   } else {
958      assert(reg->Register.Index <=
959                             bld->bld_base.info->file_max[reg->Register.File]);
960   }
961
962   switch( reg->Register.File ) {
963   case TGSI_FILE_OUTPUT:
964      if (reg->Register.Indirect) {
965         LLVMValueRef chan_vec =
966            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
967         LLVMValueRef length_vec =
968            lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
969         LLVMValueRef index_vec;  /* indexes into the temp registers */
970         LLVMValueRef outputs_array;
971         LLVMValueRef pixel_offsets;
972         LLVMTypeRef float_ptr_type;
973         int i;
974
975         /* build pixel offset vector: {0, 1, 2, 3, ...} */
976         pixel_offsets = uint_bld->undef;
977         for (i = 0; i < bld->bld_base.base.type.length; i++) {
978            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
979            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
980                                                   ii, ii, "");
981         }
982
983         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
984         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
985         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
986         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
987         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
988
989         float_ptr_type =
990            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
991         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
992                                          float_ptr_type, "");
993
994         /* Scatter store values into temp registers */
995         emit_mask_scatter(bld, outputs_array, index_vec, value,
996                           &bld->exec_mask, pred);
997      }
998      else {
999         LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1000                                               chan_index);
1001         lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
1002      }
1003      break;
1004
1005   case TGSI_FILE_TEMPORARY:
1006      if (reg->Register.Indirect) {
1007         LLVMValueRef chan_vec =
1008            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1009         LLVMValueRef length_vec =
1010            lp_build_const_int_vec(gallivm, uint_bld->type,
1011                                   bld->bld_base.base.type.length);
1012         LLVMValueRef index_vec;  /* indexes into the temp registers */
1013         LLVMValueRef temps_array;
1014         LLVMValueRef pixel_offsets;
1015         LLVMTypeRef float_ptr_type;
1016         int i;
1017
1018         /* build pixel offset vector: {0, 1, 2, 3, ...} */
1019         pixel_offsets = uint_bld->undef;
1020         for (i = 0; i < bld->bld_base.base.type.length; i++) {
1021            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1022            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1023                                                   ii, ii, "");
1024         }
1025
1026         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1027         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1028         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1029         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1030         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1031
1032         float_ptr_type =
1033            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1034         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1035                                        float_ptr_type, "");
1036
1037         /* Scatter store values into temp registers */
1038         emit_mask_scatter(bld, temps_array, index_vec, value,
1039                           &bld->exec_mask, pred);
1040      }
1041      else {
1042         LLVMValueRef temp_ptr;
1043
1044         switch (dtype) {
1045         case TGSI_TYPE_UNSIGNED:
1046         case TGSI_TYPE_SIGNED: {
1047            LLVMTypeRef itype = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
1048            LLVMTypeRef ivtype = LLVMPointerType(itype, 0);
1049            LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1050                                                        chan_index);
1051            LLVMValueRef temp_value_ptr;
1052
1053            temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, "");
1054            temp_value_ptr = LLVMBuildBitCast(builder, value, itype, "");
1055            value = temp_value_ptr;
1056            break;
1057         }
1058         default:
1059         case TGSI_TYPE_FLOAT:
1060         case TGSI_TYPE_UNTYPED:
1061            temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1062                                           chan_index);
1063            break;
1064         }
1065
1066         lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
1067      }
1068      break;
1069
1070   case TGSI_FILE_ADDRESS:
1071      assert(dtype == TGSI_TYPE_SIGNED);
1072      assert(LLVMTypeOf(value) == bld_base->base.int_vec_type);
1073      lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1074                         bld->addr[reg->Register.Index][chan_index]);
1075      break;
1076
1077   case TGSI_FILE_PREDICATE:
1078      lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1079                         bld->preds[reg->Register.Index][chan_index]);
1080      break;
1081
1082   default:
1083      assert( 0 );
1084   }
1085}
1086
1087static void
1088emit_store(
1089   struct lp_build_tgsi_context * bld_base,
1090   const struct tgsi_full_instruction * inst,
1091   const struct tgsi_opcode_info * info,
1092   LLVMValueRef dst[4])
1093
1094{
1095   unsigned chan_index;
1096   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1097
1098   if(info->num_dst) {
1099      LLVMValueRef pred[TGSI_NUM_CHANNELS];
1100
1101      emit_fetch_predicate( bld, inst, pred );
1102
1103      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1104         emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1105      }
1106   }
1107}
1108
1109/**
1110 * High-level instruction translators.
1111 */
1112
1113static void
1114emit_tex( struct lp_build_tgsi_soa_context *bld,
1115          const struct tgsi_full_instruction *inst,
1116          enum lp_build_tex_modifier modifier,
1117          LLVMValueRef *texel)
1118{
1119   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1120   unsigned unit;
1121   LLVMValueRef lod_bias, explicit_lod;
1122   LLVMValueRef oow = NULL;
1123   LLVMValueRef coords[3];
1124   LLVMValueRef ddx[3];
1125   LLVMValueRef ddy[3];
1126   unsigned num_coords;
1127   unsigned i;
1128
1129   if (!bld->sampler) {
1130      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1131      for (i = 0; i < 4; i++) {
1132         texel[i] = bld->bld_base.base.undef;
1133      }
1134      return;
1135   }
1136
1137   switch (inst->Texture.Texture) {
1138   case TGSI_TEXTURE_1D:
1139      num_coords = 1;
1140      break;
1141   case TGSI_TEXTURE_1D_ARRAY:
1142   case TGSI_TEXTURE_2D:
1143   case TGSI_TEXTURE_RECT:
1144      num_coords = 2;
1145      break;
1146   case TGSI_TEXTURE_SHADOW1D:
1147   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1148   case TGSI_TEXTURE_SHADOW2D:
1149   case TGSI_TEXTURE_SHADOWRECT:
1150   case TGSI_TEXTURE_2D_ARRAY:
1151   case TGSI_TEXTURE_3D:
1152   case TGSI_TEXTURE_CUBE:
1153      num_coords = 3;
1154      break;
1155   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1156      num_coords = 4;
1157      break;
1158   default:
1159      assert(0);
1160      return;
1161   }
1162
1163   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1164      lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1165      explicit_lod = NULL;
1166   }
1167   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1168      lod_bias = NULL;
1169      explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1170   }
1171   else {
1172      lod_bias = NULL;
1173      explicit_lod = NULL;
1174   }
1175
1176   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1177      oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1178      oow = lp_build_rcp(&bld->bld_base.base, oow);
1179   }
1180
1181   for (i = 0; i < num_coords; i++) {
1182      coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1183      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1184         coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1185   }
1186   for (i = num_coords; i < 3; i++) {
1187      coords[i] = bld->bld_base.base.undef;
1188   }
1189
1190   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1191      LLVMValueRef index0 = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
1192      for (i = 0; i < num_coords; i++) {
1193         LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base, inst, 1, i );
1194         LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base, inst, 2, i );
1195         ddx[i] = LLVMBuildExtractElement(builder, src1, index0, "");
1196         ddy[i] = LLVMBuildExtractElement(builder, src2, index0, "");
1197      }
1198      unit = inst->Src[3].Register.Index;
1199   }  else {
1200      for (i = 0; i < num_coords; i++) {
1201         ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base, coords[i] );
1202         ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base, coords[i] );
1203      }
1204      unit = inst->Src[1].Register.Index;
1205   }
1206   for (i = num_coords; i < 3; i++) {
1207      ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
1208      ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type);
1209   }
1210
1211   bld->sampler->emit_fetch_texel(bld->sampler,
1212                                  bld->bld_base.base.gallivm,
1213                                  bld->bld_base.base.type,
1214                                  unit, num_coords, coords,
1215                                  ddx, ddy,
1216                                  lod_bias, explicit_lod,
1217                                  texel);
1218}
1219
1220static void
1221emit_txq( struct lp_build_tgsi_soa_context *bld,
1222          const struct tgsi_full_instruction *inst,
1223          LLVMValueRef *sizes_out)
1224{
1225   LLVMValueRef explicit_lod;
1226   unsigned num_coords, has_lod;
1227   unsigned i;
1228
1229   switch (inst->Texture.Texture) {
1230   case TGSI_TEXTURE_1D:
1231   case TGSI_TEXTURE_SHADOW1D:
1232   case TGSI_TEXTURE_SHADOW2D:
1233   case TGSI_TEXTURE_SHADOWCUBE:
1234      num_coords = 1;
1235      has_lod = 1;
1236      break;
1237   case TGSI_TEXTURE_2D:
1238   case TGSI_TEXTURE_CUBE:
1239   case TGSI_TEXTURE_1D_ARRAY:
1240   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1241      num_coords = 2;
1242      has_lod = 1;
1243      break;
1244   case TGSI_TEXTURE_3D:
1245// case TGSI_TEXTURE_CUBE_ARRAY:
1246// case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1247   case TGSI_TEXTURE_2D_ARRAY:
1248   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1249      num_coords = 3;
1250      has_lod = 1;
1251      break;
1252
1253   case TGSI_TEXTURE_BUFFER:
1254      num_coords = 1;
1255      has_lod = 0;
1256      break;
1257
1258   case TGSI_TEXTURE_RECT:
1259   case TGSI_TEXTURE_SHADOWRECT:
1260// case TGSI_TEXTURE_2D_MS:
1261      num_coords = 2;
1262      has_lod = 0;
1263      break;
1264
1265// case TGSI_TEXTURE_2D_MS_ARRAY:
1266//    num_coords = 3;
1267//    has_lod = 0;
1268//    break;
1269
1270   default:
1271      assert(0);
1272      return;
1273   }
1274
1275   if (!bld->sampler) {
1276      _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
1277      for (i = 0; i < num_coords; i++)
1278         sizes_out[i] = bld->bld_base.base.undef;
1279      return;
1280   }
1281
1282   if (has_lod)
1283      explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 2 );
1284   else
1285      explicit_lod = NULL;
1286
1287   bld->sampler->emit_size_query(bld->sampler,
1288                                 bld->bld_base.base.gallivm,
1289                                 inst->Src[1].Register.Index,
1290                                 explicit_lod,
1291                                 sizes_out);
1292}
1293
1294static boolean
1295near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1296		   int pc)
1297{
1298   int i;
1299
1300   for (i = 0; i < 5; i++) {
1301      unsigned opcode;
1302
1303      if (pc + i >= bld->bld_base.info->num_instructions)
1304	 return TRUE;
1305
1306      opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1307
1308      if (opcode == TGSI_OPCODE_END)
1309	 return TRUE;
1310
1311      if (opcode == TGSI_OPCODE_TEX ||
1312	  opcode == TGSI_OPCODE_TXP ||
1313	  opcode == TGSI_OPCODE_TXD ||
1314	  opcode == TGSI_OPCODE_TXB ||
1315	  opcode == TGSI_OPCODE_TXL ||
1316	  opcode == TGSI_OPCODE_TXF ||
1317	  opcode == TGSI_OPCODE_TXQ ||
1318	  opcode == TGSI_OPCODE_CAL ||
1319	  opcode == TGSI_OPCODE_CALLNZ ||
1320	  opcode == TGSI_OPCODE_IF ||
1321	  opcode == TGSI_OPCODE_IFC ||
1322	  opcode == TGSI_OPCODE_BGNLOOP ||
1323	  opcode == TGSI_OPCODE_SWITCH)
1324	 return FALSE;
1325   }
1326
1327   return TRUE;
1328}
1329
1330
1331
1332/**
1333 * Kill fragment if any of the src register values are negative.
1334 */
1335static void
1336emit_kil(
1337   struct lp_build_tgsi_soa_context *bld,
1338   const struct tgsi_full_instruction *inst,
1339   int pc)
1340{
1341   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1342   const struct tgsi_full_src_register *reg = &inst->Src[0];
1343   LLVMValueRef terms[TGSI_NUM_CHANNELS];
1344   LLVMValueRef mask;
1345   unsigned chan_index;
1346
1347   memset(&terms, 0, sizeof terms);
1348
1349   TGSI_FOR_EACH_CHANNEL( chan_index ) {
1350      unsigned swizzle;
1351
1352      /* Unswizzle channel */
1353      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1354
1355      /* Check if the component has not been already tested. */
1356      assert(swizzle < TGSI_NUM_CHANNELS);
1357      if( !terms[swizzle] )
1358         /* TODO: change the comparison operator instead of setting the sign */
1359         terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
1360   }
1361
1362   mask = NULL;
1363   TGSI_FOR_EACH_CHANNEL( chan_index ) {
1364      if(terms[chan_index]) {
1365         LLVMValueRef chan_mask;
1366
1367         /*
1368          * If term < 0 then mask = 0 else mask = ~0.
1369          */
1370         chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
1371
1372         if(mask)
1373            mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1374         else
1375            mask = chan_mask;
1376      }
1377   }
1378
1379   if(mask) {
1380      lp_build_mask_update(bld->mask, mask);
1381
1382      if (!near_end_of_shader(bld, pc))
1383	 lp_build_mask_check(bld->mask);
1384   }
1385}
1386
1387
1388/**
1389 * Predicated fragment kill.
1390 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1391 * The only predication is the execution mask which will apply if
1392 * we're inside a loop or conditional.
1393 */
1394static void
1395emit_kilp(struct lp_build_tgsi_soa_context *bld,
1396          int pc)
1397{
1398   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1399   LLVMValueRef mask;
1400
1401   /* For those channels which are "alive", disable fragment shader
1402    * execution.
1403    */
1404   if (bld->exec_mask.has_mask) {
1405      mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1406   }
1407   else {
1408      LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
1409      mask = zero;
1410   }
1411
1412   lp_build_mask_update(bld->mask, mask);
1413
1414   if (!near_end_of_shader(bld, pc))
1415      lp_build_mask_check(bld->mask);
1416}
1417
1418
1419/**
1420 * Emit code which will dump the value of all the temporary registers
1421 * to stdout.
1422 */
1423static void
1424emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1425{
1426   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1427   LLVMBuilderRef builder = gallivm->builder;
1428   LLVMValueRef temp_ptr;
1429   LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1430   LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1431   LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1432   LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1433   int index;
1434   int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
1435
1436   for (index = 0; index < n; index++) {
1437      LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1438      LLVMValueRef v[4][4], res;
1439      int chan;
1440
1441      lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1442
1443      for (chan = 0; chan < 4; chan++) {
1444         temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
1445         res = LLVMBuildLoad(builder, temp_ptr, "");
1446         v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1447         v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1448         v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1449         v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1450      }
1451
1452      lp_build_printf(gallivm, "  X: %f %f %f %f\n",
1453                      v[0][0], v[0][1], v[0][2], v[0][3]);
1454      lp_build_printf(gallivm, "  Y: %f %f %f %f\n",
1455                      v[1][0], v[1][1], v[1][2], v[1][3]);
1456      lp_build_printf(gallivm, "  Z: %f %f %f %f\n",
1457                      v[2][0], v[2][1], v[2][2], v[2][3]);
1458      lp_build_printf(gallivm, "  W: %f %f %f %f\n",
1459                      v[3][0], v[3][1], v[3][2], v[3][3]);
1460   }
1461}
1462
1463
1464
1465void
1466lp_emit_declaration_soa(
1467   struct lp_build_tgsi_context *bld_base,
1468   const struct tgsi_full_declaration *decl)
1469{
1470   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1471   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1472   LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1473   const unsigned first = decl->Range.First;
1474   const unsigned last = decl->Range.Last;
1475   unsigned idx, i;
1476
1477   for (idx = first; idx <= last; ++idx) {
1478      assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
1479      switch (decl->Declaration.File) {
1480      case TGSI_FILE_TEMPORARY:
1481         assert(idx < LP_MAX_TGSI_TEMPS);
1482         if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1483            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1484               bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1485         }
1486         break;
1487
1488      case TGSI_FILE_OUTPUT:
1489         if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1490            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1491               bld->outputs[idx][i] = lp_build_alloca(gallivm,
1492                                                      vec_type, "output");
1493         }
1494         break;
1495
1496      case TGSI_FILE_ADDRESS:
1497	 /* ADDR registers are the only allocated with an integer LLVM IR type,
1498	  * as they are guaranteed to always have integers.
1499	  * XXX: Not sure if this exception is worthwhile (or the whole idea of
1500	  * an ADDR register for that matter).
1501	  */
1502         assert(idx < LP_MAX_TGSI_ADDRS);
1503         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1504            bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
1505         break;
1506
1507      case TGSI_FILE_PREDICATE:
1508         assert(idx < LP_MAX_TGSI_PREDS);
1509         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1510            bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1511                                                 "predicate");
1512         break;
1513
1514      default:
1515         /* don't need to declare other vars */
1516         break;
1517      }
1518   }
1519}
1520
1521
1522void lp_emit_immediate_soa(
1523   struct lp_build_tgsi_context *bld_base,
1524   const struct tgsi_full_immediate *imm)
1525{
1526   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1527   struct gallivm_state * gallivm = bld_base->base.gallivm;
1528
1529   /* simply copy the immediate values into the next immediates[] slot */
1530   unsigned i;
1531   const uint size = imm->Immediate.NrTokens - 1;
1532   assert(size <= 4);
1533   assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
1534   switch (imm->Immediate.DataType) {
1535   case TGSI_IMM_FLOAT32:
1536      for( i = 0; i < size; ++i )
1537         bld->immediates[bld->num_immediates][i] =
1538            lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
1539
1540      break;
1541   case TGSI_IMM_UINT32:
1542      for( i = 0; i < size; ++i ) {
1543         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
1544         bld->immediates[bld->num_immediates][i] =
1545            LLVMConstBitCast(tmp, bld_base->base.vec_type);
1546      }
1547
1548      break;
1549   case TGSI_IMM_INT32:
1550      for( i = 0; i < size; ++i ) {
1551         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
1552         bld->immediates[bld->num_immediates][i] =
1553            LLVMConstBitCast(tmp, bld_base->base.vec_type);
1554      }
1555
1556      break;
1557   }
1558   for( i = size; i < 4; ++i )
1559      bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
1560
1561   bld->num_immediates++;
1562}
1563
1564static void
1565ddx_emit(
1566   const struct lp_build_tgsi_action * action,
1567   struct lp_build_tgsi_context * bld_base,
1568   struct lp_build_emit_data * emit_data)
1569{
1570   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1571
1572   emit_fetch_deriv(bld, emit_data->args[0], NULL,
1573                    &emit_data->output[emit_data->chan], NULL);
1574}
1575
1576static void
1577ddy_emit(
1578   const struct lp_build_tgsi_action * action,
1579   struct lp_build_tgsi_context * bld_base,
1580   struct lp_build_emit_data * emit_data)
1581{
1582   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1583
1584   emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
1585                    &emit_data->output[emit_data->chan]);
1586}
1587
1588static void
1589kilp_emit(
1590   const struct lp_build_tgsi_action * action,
1591   struct lp_build_tgsi_context * bld_base,
1592   struct lp_build_emit_data * emit_data)
1593{
1594   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1595
1596   emit_kilp(bld, bld_base->pc - 1);
1597}
1598
1599static void
1600kil_emit(
1601   const struct lp_build_tgsi_action * action,
1602   struct lp_build_tgsi_context * bld_base,
1603   struct lp_build_emit_data * emit_data)
1604{
1605   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1606
1607   emit_kil(bld, emit_data->inst, bld_base->pc - 1);
1608}
1609
1610static void
1611tex_emit(
1612   const struct lp_build_tgsi_action * action,
1613   struct lp_build_tgsi_context * bld_base,
1614   struct lp_build_emit_data * emit_data)
1615{
1616   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1617
1618   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
1619}
1620
1621static void
1622txb_emit(
1623   const struct lp_build_tgsi_action * action,
1624   struct lp_build_tgsi_context * bld_base,
1625   struct lp_build_emit_data * emit_data)
1626{
1627   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1628
1629   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
1630            emit_data->output);
1631}
1632
1633static void
1634txd_emit(
1635   const struct lp_build_tgsi_action * action,
1636   struct lp_build_tgsi_context * bld_base,
1637   struct lp_build_emit_data * emit_data)
1638{
1639   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1640
1641   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
1642            emit_data->output);
1643}
1644
1645static void
1646txl_emit(
1647   const struct lp_build_tgsi_action * action,
1648   struct lp_build_tgsi_context * bld_base,
1649   struct lp_build_emit_data * emit_data)
1650{
1651   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1652
1653   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
1654            emit_data->output);
1655}
1656
1657static void
1658txp_emit(
1659   const struct lp_build_tgsi_action * action,
1660   struct lp_build_tgsi_context * bld_base,
1661   struct lp_build_emit_data * emit_data)
1662{
1663   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1664
1665   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
1666            emit_data->output);
1667}
1668
1669static void
1670txq_emit(
1671   const struct lp_build_tgsi_action * action,
1672   struct lp_build_tgsi_context * bld_base,
1673   struct lp_build_emit_data * emit_data)
1674{
1675   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1676
1677   emit_txq(bld, emit_data->inst, emit_data->output);
1678}
1679
1680static void
1681cal_emit(
1682   const struct lp_build_tgsi_action * action,
1683   struct lp_build_tgsi_context * bld_base,
1684   struct lp_build_emit_data * emit_data)
1685{
1686   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1687
1688   lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
1689                     &bld_base->pc);
1690}
1691
1692static void
1693ret_emit(
1694   const struct lp_build_tgsi_action * action,
1695   struct lp_build_tgsi_context * bld_base,
1696   struct lp_build_emit_data * emit_data)
1697{
1698   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1699
1700   lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
1701}
1702
1703static void
1704brk_emit(
1705   const struct lp_build_tgsi_action * action,
1706   struct lp_build_tgsi_context * bld_base,
1707   struct lp_build_emit_data * emit_data)
1708{
1709   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1710
1711   lp_exec_break(&bld->exec_mask);
1712}
1713
1714static void
1715if_emit(
1716   const struct lp_build_tgsi_action * action,
1717   struct lp_build_tgsi_context * bld_base,
1718   struct lp_build_emit_data * emit_data)
1719{
1720   LLVMValueRef tmp;
1721   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1722
1723   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
1724                      emit_data->args[0], bld->bld_base.base.zero);
1725   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
1726}
1727
1728static void
1729bgnloop_emit(
1730   const struct lp_build_tgsi_action * action,
1731   struct lp_build_tgsi_context * bld_base,
1732   struct lp_build_emit_data * emit_data)
1733{
1734   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1735
1736   lp_exec_bgnloop(&bld->exec_mask);
1737}
1738
1739static void
1740bgnsub_emit(
1741   const struct lp_build_tgsi_action * action,
1742   struct lp_build_tgsi_context * bld_base,
1743   struct lp_build_emit_data * emit_data)
1744{
1745   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1746
1747   lp_exec_mask_bgnsub(&bld->exec_mask);
1748}
1749
1750static void
1751else_emit(
1752   const struct lp_build_tgsi_action * action,
1753   struct lp_build_tgsi_context * bld_base,
1754   struct lp_build_emit_data * emit_data)
1755{
1756   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1757
1758   lp_exec_mask_cond_invert(&bld->exec_mask);
1759}
1760
1761static void
1762endif_emit(
1763   const struct lp_build_tgsi_action * action,
1764   struct lp_build_tgsi_context * bld_base,
1765   struct lp_build_emit_data * emit_data)
1766{
1767   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1768
1769   lp_exec_mask_cond_pop(&bld->exec_mask);
1770}
1771
1772static void
1773endloop_emit(
1774   const struct lp_build_tgsi_action * action,
1775   struct lp_build_tgsi_context * bld_base,
1776   struct lp_build_emit_data * emit_data)
1777{
1778   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1779
1780   lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1781}
1782
1783static void
1784endsub_emit(
1785   const struct lp_build_tgsi_action * action,
1786   struct lp_build_tgsi_context * bld_base,
1787   struct lp_build_emit_data * emit_data)
1788{
1789   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1790
1791   lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
1792}
1793
1794static void
1795cont_emit(
1796   const struct lp_build_tgsi_action * action,
1797   struct lp_build_tgsi_context * bld_base,
1798   struct lp_build_emit_data * emit_data)
1799{
1800   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1801
1802   lp_exec_continue(&bld->exec_mask);
1803}
1804
1805/* XXX: Refactor and move it to lp_bld_tgsi_action.c
1806 *
1807 * XXX: What do the comments about xmm registers mean?  Maybe they are left over
1808 * from old code, but there is no garauntee that LLVM will use those registers
1809 * for this code.
1810 *
1811 * XXX: There should be no calls to lp_build_emit_fetch in this function.  This
1812 * should be handled by the emit_data->fetch_args function. */
1813static void
1814nrm_emit(
1815   const struct lp_build_tgsi_action * action,
1816   struct lp_build_tgsi_context * bld_base,
1817   struct lp_build_emit_data * emit_data)
1818{
1819   LLVMValueRef tmp0, tmp1;
1820   LLVMValueRef tmp4 = NULL;
1821   LLVMValueRef tmp5 = NULL;
1822   LLVMValueRef tmp6 = NULL;
1823   LLVMValueRef tmp7 = NULL;
1824   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1825
1826   uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1827
1828  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
1829      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
1830      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
1831      (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
1832
1833      /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1834
1835      /* xmm4 = src.x */
1836      /* xmm0 = src.x * src.x */
1837      tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
1838      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
1839         tmp4 = tmp0;
1840      }
1841      tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
1842
1843      /* xmm5 = src.y */
1844      /* xmm0 = xmm0 + src.y * src.y */
1845      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
1846      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
1847         tmp5 = tmp1;
1848      }
1849      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1850      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1851
1852      /* xmm6 = src.z */
1853      /* xmm0 = xmm0 + src.z * src.z */
1854      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
1855      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
1856         tmp6 = tmp1;
1857      }
1858      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1859      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1860
1861      if (dims == 4) {
1862         /* xmm7 = src.w */
1863         /* xmm0 = xmm0 + src.w * src.w */
1864         tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
1865         if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
1866            tmp7 = tmp1;
1867         }
1868         tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1869         tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1870      }
1871      /* xmm1 = 1 / sqrt(xmm0) */
1872      tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
1873       /* dst.x = xmm1 * src.x */
1874      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
1875         emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
1876      }
1877      /* dst.y = xmm1 * src.y */
1878      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
1879         emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
1880      }
1881
1882      /* dst.z = xmm1 * src.z */
1883      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
1884         emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
1885      }
1886      /* dst.w = xmm1 * src.w */
1887      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
1888         emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
1889      }
1890   }
1891
1892   /* dst.w = 1.0 */
1893   if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
1894       emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
1895   }
1896}
1897
1898static void emit_prologue(struct lp_build_tgsi_context * bld_base)
1899{
1900   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1901   struct gallivm_state * gallivm = bld_base->base.gallivm;
1902
1903   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1904      LLVMValueRef array_size =
1905         lp_build_const_int32(gallivm,
1906                         bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
1907      bld->temps_array = lp_build_array_alloca(gallivm,
1908                                              bld_base->base.vec_type, array_size,
1909                                              "temp_array");
1910   }
1911
1912   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
1913      LLVMValueRef array_size =
1914         lp_build_const_int32(gallivm,
1915                            bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
1916      bld->outputs_array = lp_build_array_alloca(gallivm,
1917                                                bld_base->base.vec_type, array_size,
1918                                                "output_array");
1919   }
1920
1921   /* If we have indirect addressing in inputs we need to copy them into
1922    * our alloca array to be able to iterate over them */
1923   if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1924      unsigned index, chan;
1925      LLVMTypeRef vec_type = bld_base->base.vec_type;
1926      LLVMValueRef array_size = lp_build_const_int32(gallivm,
1927            bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
1928      bld->inputs_array = lp_build_array_alloca(gallivm,
1929                                               vec_type, array_size,
1930                                               "input_array");
1931
1932      assert(bld_base->info->num_inputs
1933                        <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
1934
1935      for (index = 0; index < bld_base->info->num_inputs; ++index) {
1936         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1937            LLVMValueRef lindex =
1938               lp_build_const_int32(gallivm, index * 4 + chan);
1939            LLVMValueRef input_ptr =
1940               LLVMBuildGEP(gallivm->builder, bld->inputs_array,
1941                            &lindex, 1, "");
1942            LLVMValueRef value = bld->inputs[index][chan];
1943            if (value)
1944               LLVMBuildStore(gallivm->builder, value, input_ptr);
1945         }
1946      }
1947   }
1948}
1949
1950static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
1951{
1952   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1953
1954   if (0) {
1955      /* for debugging */
1956      emit_dump_temps(bld);
1957   }
1958
1959   /* If we have indirect addressing in outputs we need to copy our alloca array
1960    * to the outputs slots specified by the called */
1961   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
1962      unsigned index, chan;
1963      assert(bld_base->info->num_outputs <=
1964                        bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
1965      for (index = 0; index < bld_base->info->num_outputs; ++index) {
1966         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1967            bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
1968         }
1969      }
1970   }
1971}
1972
1973void
1974lp_build_tgsi_soa(struct gallivm_state *gallivm,
1975                  const struct tgsi_token *tokens,
1976                  struct lp_type type,
1977                  struct lp_build_mask_context *mask,
1978                  LLVMValueRef consts_ptr,
1979                  LLVMValueRef system_values_array,
1980                  const LLVMValueRef *pos,
1981                  const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
1982                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1983                  struct lp_build_sampler_soa *sampler,
1984                  const struct tgsi_shader_info *info)
1985{
1986   struct lp_build_tgsi_soa_context bld;
1987
1988   struct lp_type res_type;
1989
1990   assert(type.length <= LP_MAX_VECTOR_LENGTH);
1991   memset(&res_type, 0, sizeof res_type);
1992   res_type.width = type.width;
1993   res_type.length = type.length;
1994   res_type.sign = 1;
1995
1996   /* Setup build context */
1997   memset(&bld, 0, sizeof bld);
1998   lp_build_context_init(&bld.bld_base.base, gallivm, type);
1999   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2000   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2001   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2002   bld.mask = mask;
2003   bld.pos = pos;
2004   bld.inputs = inputs;
2005   bld.outputs = outputs;
2006   bld.consts_ptr = consts_ptr;
2007   bld.sampler = sampler;
2008   bld.bld_base.info = info;
2009   bld.indirect_files = info->indirect_files;
2010
2011   bld.bld_base.soa = TRUE;
2012   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
2013   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
2014   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
2015   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
2016   bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
2017   bld.bld_base.emit_store = emit_store;
2018
2019   bld.bld_base.emit_declaration = lp_emit_declaration_soa;
2020   bld.bld_base.emit_immediate = lp_emit_immediate_soa;
2021
2022   bld.bld_base.emit_prologue = emit_prologue;
2023   bld.bld_base.emit_epilogue = emit_epilogue;
2024
2025   /* Set opcode actions */
2026   lp_set_default_actions_cpu(&bld.bld_base);
2027
2028   bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
2029   bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
2030   bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
2031   bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
2032   bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
2033   bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
2034   bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
2035   bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
2036   bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
2037   bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
2038   bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
2039   bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
2040   bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
2041   bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
2042   bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
2043   bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
2044   bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
2045   bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
2046   bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
2047   bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
2048   bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
2049   bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
2050   bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
2051
2052   lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
2053
2054
2055   bld.system_values_array = system_values_array;
2056
2057   lp_build_tgsi_llvm(&bld.bld_base, tokens);
2058
2059   if (0) {
2060      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2061      LLVMValueRef function = LLVMGetBasicBlockParent(block);
2062      debug_printf("11111111111111111111111111111 \n");
2063      tgsi_dump(tokens, 0);
2064      lp_debug_dump_value(function);
2065      debug_printf("2222222222222222222222222222 \n");
2066   }
2067
2068   if (0) {
2069      LLVMModuleRef module = LLVMGetGlobalParent(
2070         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2071      LLVMDumpModule(module);
2072
2073   }
2074}
2075
2076
2077/**
2078 * Build up the system values array out of individual values such as
2079 * the instance ID, front-face, primitive ID, etc.  The shader info is
2080 * used to determine which system values are needed and where to put
2081 * them in the system values array.
2082 *
2083 * XXX only instance ID is implemented at this time.
2084 *
2085 * The system values register file is similar to the constants buffer.
2086 * Example declaration:
2087 *    DCL SV[0], INSTANCEID
2088 * Example instruction:
2089 *    MOVE foo, SV[0].xxxx;
2090 *
2091 * \return  LLVM float array (interpreted as float [][4])
2092 */
2093LLVMValueRef
2094lp_build_system_values_array(struct gallivm_state *gallivm,
2095                             const struct tgsi_shader_info *info,
2096                             LLVMValueRef instance_id,
2097                             LLVMValueRef facing)
2098{
2099   LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values);
2100   LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context);
2101   LLVMValueRef array = lp_build_array_alloca(gallivm, float_t,
2102                                              size, "sysvals_array");
2103   unsigned i;
2104
2105   for (i = 0; i < info->num_system_values; i++) {
2106      LLVMValueRef index = lp_build_const_int32(gallivm, i * 4);
2107      LLVMValueRef ptr, value = 0;
2108
2109      switch (info->system_value_semantic_name[i]) {
2110      case TGSI_SEMANTIC_INSTANCEID:
2111         /* convert instance ID from int to float */
2112         value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t,
2113                                 "sysval_instanceid");
2114         break;
2115      case TGSI_SEMANTIC_FACE:
2116         /* fall-through */
2117      default:
2118         assert(0 && "unexpected semantic in build_system_values_array()");
2119      }
2120
2121      ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, "");
2122      LLVMBuildStore(gallivm->builder, value, ptr);
2123   }
2124
2125   return array;
2126}
2127