lp_bld_tgsi_soa.c revision 3469715a8a171512cf9b528702e70393f01c6041
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39#include "pipe/p_config.h"
40#include "pipe/p_shader_tokens.h"
41#include "util/u_debug.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44#include "tgsi/tgsi_dump.h"
45#include "tgsi/tgsi_exec.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_tgsi_action.h"
51#include "lp_bld_type.h"
52#include "lp_bld_const.h"
53#include "lp_bld_arit.h"
54#include "lp_bld_bitarit.h"
55#include "lp_bld_gather.h"
56#include "lp_bld_init.h"
57#include "lp_bld_logic.h"
58#include "lp_bld_swizzle.h"
59#include "lp_bld_flow.h"
60#include "lp_bld_quad.h"
61#include "lp_bld_tgsi.h"
62#include "lp_bld_limits.h"
63#include "lp_bld_debug.h"
64#include "lp_bld_printf.h"
65#include "lp_bld_sample.h"
66
67
68static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
69{
70   LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
71   LLVMBuilderRef builder = bld->gallivm->builder;
72
73   mask->bld = bld;
74   mask->has_mask = FALSE;
75   mask->cond_stack_size = 0;
76   mask->loop_stack_size = 0;
77   mask->call_stack_size = 0;
78
79   mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
80   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask =
81         LLVMConstAllOnes(mask->int_vec_type);
82
83   mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
84
85   LLVMBuildStore(
86      builder,
87      LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
88      mask->loop_limiter);
89}
90
91static void lp_exec_mask_update(struct lp_exec_mask *mask)
92{
93   LLVMBuilderRef builder = mask->bld->gallivm->builder;
94
95   if (mask->loop_stack_size) {
96      /*for loops we need to update the entire mask at runtime */
97      LLVMValueRef tmp;
98      assert(mask->break_mask);
99      tmp = LLVMBuildAnd(builder,
100                         mask->cont_mask,
101                         mask->break_mask,
102                         "maskcb");
103      mask->exec_mask = LLVMBuildAnd(builder,
104                                     mask->cond_mask,
105                                     tmp,
106                                     "maskfull");
107   } else
108      mask->exec_mask = mask->cond_mask;
109
110   if (mask->call_stack_size) {
111      mask->exec_mask = LLVMBuildAnd(builder,
112                                     mask->exec_mask,
113                                     mask->ret_mask,
114                                     "callmask");
115   }
116
117   mask->has_mask = (mask->cond_stack_size > 0 ||
118                     mask->loop_stack_size > 0 ||
119                     mask->call_stack_size > 0);
120}
121
122static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
123                                   LLVMValueRef val)
124{
125   LLVMBuilderRef builder = mask->bld->gallivm->builder;
126
127   assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
128   if (mask->cond_stack_size == 0) {
129      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
130   }
131   mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
132   assert(LLVMTypeOf(val) == mask->int_vec_type);
133   mask->cond_mask = LLVMBuildAnd(builder,
134                                  mask->cond_mask,
135                                  val,
136                                  "");
137   lp_exec_mask_update(mask);
138}
139
140static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
141{
142   LLVMBuilderRef builder = mask->bld->gallivm->builder;
143   LLVMValueRef prev_mask;
144   LLVMValueRef inv_mask;
145
146   assert(mask->cond_stack_size);
147   prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
148   if (mask->cond_stack_size == 1) {
149      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
150   }
151
152   inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
153
154   mask->cond_mask = LLVMBuildAnd(builder,
155                                  inv_mask,
156                                  prev_mask, "");
157   lp_exec_mask_update(mask);
158}
159
160static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
161{
162   assert(mask->cond_stack_size);
163   mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
164   lp_exec_mask_update(mask);
165}
166
167static void lp_exec_bgnloop(struct lp_exec_mask *mask)
168{
169   LLVMBuilderRef builder = mask->bld->gallivm->builder;
170
171   if (mask->loop_stack_size == 0) {
172      assert(mask->loop_block == NULL);
173      assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
174      assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
175      assert(mask->break_var == NULL);
176   }
177
178   assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
179
180   mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
181   mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
182   mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
183   mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
184   ++mask->loop_stack_size;
185
186   mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
187   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
188
189   mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
190
191   LLVMBuildBr(builder, mask->loop_block);
192   LLVMPositionBuilderAtEnd(builder, mask->loop_block);
193
194   mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
195
196   lp_exec_mask_update(mask);
197}
198
199static void lp_exec_break(struct lp_exec_mask *mask)
200{
201   LLVMBuilderRef builder = mask->bld->gallivm->builder;
202   LLVMValueRef exec_mask = LLVMBuildNot(builder,
203                                         mask->exec_mask,
204                                         "break");
205
206   mask->break_mask = LLVMBuildAnd(builder,
207                                   mask->break_mask,
208                                   exec_mask, "break_full");
209
210   lp_exec_mask_update(mask);
211}
212
213static void lp_exec_continue(struct lp_exec_mask *mask)
214{
215   LLVMBuilderRef builder = mask->bld->gallivm->builder;
216   LLVMValueRef exec_mask = LLVMBuildNot(builder,
217                                         mask->exec_mask,
218                                         "");
219
220   mask->cont_mask = LLVMBuildAnd(builder,
221                                  mask->cont_mask,
222                                  exec_mask, "");
223
224   lp_exec_mask_update(mask);
225}
226
227
228static void lp_exec_endloop(struct gallivm_state *gallivm,
229                            struct lp_exec_mask *mask)
230{
231   LLVMBuilderRef builder = mask->bld->gallivm->builder;
232   LLVMBasicBlockRef endloop;
233   LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
234   LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
235                                               mask->bld->type.width *
236                                               mask->bld->type.length);
237   LLVMValueRef i1cond, i2cond, icond, limiter;
238
239   assert(mask->break_mask);
240
241   /*
242    * Restore the cont_mask, but don't pop
243    */
244   assert(mask->loop_stack_size);
245   mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
246   lp_exec_mask_update(mask);
247
248   /*
249    * Unlike the continue mask, the break_mask must be preserved across loop
250    * iterations
251    */
252   LLVMBuildStore(builder, mask->break_mask, mask->break_var);
253
254   /* Decrement the loop limiter */
255   limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
256
257   limiter = LLVMBuildSub(
258      builder,
259      limiter,
260      LLVMConstInt(int_type, 1, false),
261      "");
262
263   LLVMBuildStore(builder, limiter, mask->loop_limiter);
264
265   /* i1cond = (mask != 0) */
266   i1cond = LLVMBuildICmp(
267      builder,
268      LLVMIntNE,
269      LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
270      LLVMConstNull(reg_type), "");
271
272   /* i2cond = (looplimiter > 0) */
273   i2cond = LLVMBuildICmp(
274      builder,
275      LLVMIntSGT,
276      limiter,
277      LLVMConstNull(int_type), "");
278
279   /* if( i1cond && i2cond ) */
280   icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
281
282   endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
283
284   LLVMBuildCondBr(builder,
285                   icond, mask->loop_block, endloop);
286
287   LLVMPositionBuilderAtEnd(builder, endloop);
288
289   assert(mask->loop_stack_size);
290   --mask->loop_stack_size;
291   mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
292   mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
293   mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
294   mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
295
296   lp_exec_mask_update(mask);
297}
298
299/* stores val into an address pointed to by dst.
300 * mask->exec_mask is used to figure out which bits of val
301 * should be stored into the address
302 * (0 means don't store this bit, 1 means do store).
303 */
304static void lp_exec_mask_store(struct lp_exec_mask *mask,
305                               struct lp_build_context *bld_store,
306                               LLVMValueRef pred,
307                               LLVMValueRef val,
308                               LLVMValueRef dst)
309{
310   LLVMBuilderRef builder = mask->bld->gallivm->builder;
311
312   /* Mix the predicate and execution mask */
313   if (mask->has_mask) {
314      if (pred) {
315         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
316      } else {
317         pred = mask->exec_mask;
318      }
319   }
320
321   if (pred) {
322      LLVMValueRef real_val, dst_val;
323
324      dst_val = LLVMBuildLoad(builder, dst, "");
325      real_val = lp_build_select(bld_store,
326                                 pred,
327                                 val, dst_val);
328
329      LLVMBuildStore(builder, real_val, dst);
330   } else
331      LLVMBuildStore(builder, val, dst);
332}
333
334static void lp_exec_mask_call(struct lp_exec_mask *mask,
335                              int func,
336                              int *pc)
337{
338   assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
339   mask->call_stack[mask->call_stack_size].pc = *pc;
340   mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
341   mask->call_stack_size++;
342   *pc = func;
343}
344
345static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
346{
347   LLVMBuilderRef builder = mask->bld->gallivm->builder;
348   LLVMValueRef exec_mask;
349
350   if (mask->call_stack_size == 0) {
351      /* returning from main() */
352      *pc = -1;
353      return;
354   }
355   exec_mask = LLVMBuildNot(builder,
356                            mask->exec_mask,
357                            "ret");
358
359   mask->ret_mask = LLVMBuildAnd(builder,
360                                 mask->ret_mask,
361                                 exec_mask, "ret_full");
362
363   lp_exec_mask_update(mask);
364}
365
366static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
367{
368}
369
370static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
371{
372   assert(mask->call_stack_size);
373   mask->call_stack_size--;
374   *pc = mask->call_stack[mask->call_stack_size].pc;
375   mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
376   lp_exec_mask_update(mask);
377}
378
379
380/**
381 * Return pointer to a temporary register channel (src or dest).
382 * Note that indirect addressing cannot be handled here.
383 * \param index  which temporary register
384 * \param chan  which channel of the temp register.
385 */
386LLVMValueRef
387lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
388             unsigned index,
389             unsigned chan)
390{
391   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
392   assert(chan < 4);
393   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
394      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
395      return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
396   }
397   else {
398      return bld->temps[index][chan];
399   }
400}
401
402/**
403 * Return pointer to a output register channel (src or dest).
404 * Note that indirect addressing cannot be handled here.
405 * \param index  which output register
406 * \param chan  which channel of the output register.
407 */
408LLVMValueRef
409lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
410               unsigned index,
411               unsigned chan)
412{
413   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
414   assert(chan < 4);
415   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
416      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
417                                                 index * 4 + chan);
418      return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
419   }
420   else {
421      return bld->outputs[index][chan];
422   }
423}
424
425/**
426 * Gather vector.
427 * XXX the lp_build_gather() function should be capable of doing this
428 * with a little work.
429 */
430static LLVMValueRef
431build_gather(struct lp_build_context *bld,
432             LLVMValueRef base_ptr,
433             LLVMValueRef indexes)
434{
435   LLVMBuilderRef builder = bld->gallivm->builder;
436   LLVMValueRef res = bld->undef;
437   unsigned i;
438
439   /*
440    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
441    */
442   for (i = 0; i < bld->type.length; i++) {
443      LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
444      LLVMValueRef index = LLVMBuildExtractElement(builder,
445                                                   indexes, ii, "");
446      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
447                                             &index, 1, "gather_ptr");
448      LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
449
450      res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
451   }
452
453   return res;
454}
455
456
457/**
458 * Scatter/store vector.
459 */
460static void
461emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
462                  LLVMValueRef base_ptr,
463                  LLVMValueRef indexes,
464                  LLVMValueRef values,
465                  struct lp_exec_mask *mask,
466                  LLVMValueRef pred)
467{
468   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
469   LLVMBuilderRef builder = gallivm->builder;
470   unsigned i;
471
472   /* Mix the predicate and execution mask */
473   if (mask->has_mask) {
474      if (pred) {
475         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
476      }
477      else {
478         pred = mask->exec_mask;
479      }
480   }
481
482   /*
483    * Loop over elements of index_vec, store scalar value.
484    */
485   for (i = 0; i < bld->bld_base.base.type.length; i++) {
486      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
487      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
488      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
489      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
490      LLVMValueRef scalar_pred = pred ?
491         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
492
493      if (0)
494         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
495                         ii, val, index, scalar_ptr);
496
497      if (scalar_pred) {
498         LLVMValueRef real_val, dst_val;
499         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
500         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
501         LLVMBuildStore(builder, real_val, scalar_ptr);
502      }
503      else {
504         LLVMBuildStore(builder, val, scalar_ptr);
505      }
506   }
507}
508
509
510/**
511 * Read the current value of the ADDR register, convert the floats to
512 * ints, add the base index and return the vector of offsets.
513 * The offsets will be used to index into the constant buffer or
514 * temporary register file.
515 */
516static LLVMValueRef
517get_indirect_index(struct lp_build_tgsi_soa_context *bld,
518                   unsigned reg_file, unsigned reg_index,
519                   const struct tgsi_src_register *indirect_reg)
520{
521   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
522   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
523   /* always use X component of address register */
524   unsigned swizzle = indirect_reg->SwizzleX;
525   LLVMValueRef base;
526   LLVMValueRef rel;
527   LLVMValueRef max_index;
528   LLVMValueRef index;
529
530   assert(bld->indirect_files & (1 << reg_file));
531
532   base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
533
534   assert(swizzle < 4);
535   rel = LLVMBuildLoad(builder,
536                        bld->addr[indirect_reg->Index][swizzle],
537                        "load addr reg");
538
539   index = lp_build_add(uint_bld, base, rel);
540
541   max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
542                                      uint_bld->type,
543                                      bld->bld_base.info->file_max[reg_file]);
544
545   assert(!uint_bld->type.sign);
546   index = lp_build_min(uint_bld, index, max_index);
547
548   return index;
549}
550
551static struct lp_build_context *
552stype_to_fetch(struct lp_build_tgsi_context * bld_base,
553	       enum tgsi_opcode_type stype)
554{
555   struct lp_build_context *bld_fetch;
556
557   switch (stype) {
558   case TGSI_TYPE_FLOAT:
559   case TGSI_TYPE_UNTYPED:
560      bld_fetch = &bld_base->base;
561      break;
562   case TGSI_TYPE_UNSIGNED:
563      bld_fetch = &bld_base->uint_bld;
564      break;
565   case TGSI_TYPE_SIGNED:
566      bld_fetch = &bld_base->int_bld;
567      break;
568   case TGSI_TYPE_VOID:
569   case TGSI_TYPE_DOUBLE:
570   default:
571      assert(0);
572      bld_fetch = NULL;
573      break;
574   }
575   return bld_fetch;
576}
577
578static LLVMValueRef
579emit_fetch_constant(
580   struct lp_build_tgsi_context * bld_base,
581   const struct tgsi_full_src_register * reg,
582   enum tgsi_opcode_type stype,
583   unsigned swizzle)
584{
585   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
586   struct gallivm_state *gallivm = bld_base->base.gallivm;
587   LLVMBuilderRef builder = gallivm->builder;
588   struct lp_build_context *uint_bld = &bld_base->uint_bld;
589   LLVMValueRef indirect_index = NULL;
590   struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
591
592   /* XXX: Handle fetching xyzw components as a vector */
593   assert(swizzle != ~0);
594
595   if (reg->Register.Indirect) {
596      indirect_index = get_indirect_index(bld,
597                                          reg->Register.File,
598                                          reg->Register.Index,
599                                          &reg->Indirect);
600   }
601
602   if (reg->Register.Indirect) {
603      LLVMValueRef swizzle_vec =
604         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
605      LLVMValueRef index_vec;  /* index into the const buffer */
606
607      /* index_vec = indirect_index * 4 + swizzle */
608      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
609      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
610
611      /* Gather values from the constant buffer */
612      return build_gather(bld_fetch, bld->consts_ptr, index_vec);
613   }
614   else {
615      LLVMValueRef index;  /* index into the const buffer */
616      LLVMValueRef scalar, scalar_ptr;
617
618      index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
619
620      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
621                                   &index, 1, "");
622
623      if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
624         LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
625         LLVMValueRef temp_ptr;
626         temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, "");
627         scalar = LLVMBuildLoad(builder, temp_ptr, "");
628      } else
629         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
630
631      return lp_build_broadcast_scalar(bld_fetch, scalar);
632   }
633}
634
635static LLVMValueRef
636emit_fetch_immediate(
637   struct lp_build_tgsi_context * bld_base,
638   const struct tgsi_full_src_register * reg,
639   enum tgsi_opcode_type stype,
640   unsigned swizzle)
641{
642   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
643   LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle];
644   assert(res);
645
646   if (stype == TGSI_TYPE_UNSIGNED) {
647      res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
648   } else if (stype == TGSI_TYPE_SIGNED) {
649      res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
650   }
651   return res;
652}
653
654static LLVMValueRef
655emit_fetch_input(
656   struct lp_build_tgsi_context * bld_base,
657   const struct tgsi_full_src_register * reg,
658   enum tgsi_opcode_type stype,
659   unsigned swizzle)
660{
661   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
662   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
663   LLVMBuilderRef builder = gallivm->builder;
664   struct lp_build_context *uint_bld = &bld_base->uint_bld;
665   LLVMValueRef indirect_index = NULL;
666   LLVMValueRef res;
667
668   if (reg->Register.Indirect) {
669      indirect_index = get_indirect_index(bld,
670                                          reg->Register.File,
671                                          reg->Register.Index,
672                                          &reg->Indirect);
673   }
674
675   if (reg->Register.Indirect) {
676      LLVMValueRef swizzle_vec =
677         lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
678      LLVMValueRef length_vec =
679         lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
680      LLVMValueRef index_vec;  /* index into the const buffer */
681      LLVMValueRef inputs_array;
682      LLVMTypeRef float4_ptr_type;
683
684      /* index_vec = (indirect_index * 4 + swizzle) * length */
685      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
686      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
687      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
688
689      /* cast inputs_array pointer to float* */
690      float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
691      inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
692                                         float4_ptr_type, "");
693
694      /* Gather values from the temporary register array */
695      res = build_gather(&bld_base->base, inputs_array, index_vec);
696   } else {
697      if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
698         LLVMValueRef lindex = lp_build_const_int32(gallivm,
699                                        reg->Register.Index * 4 + swizzle);
700         LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
701                                                bld->inputs_array, &lindex, 1, "");
702         res = LLVMBuildLoad(builder, input_ptr, "");
703      }
704      else {
705         res = bld->inputs[reg->Register.Index][swizzle];
706      }
707   }
708
709   assert(res);
710
711   if (stype == TGSI_TYPE_UNSIGNED) {
712      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
713   } else if (stype == TGSI_TYPE_SIGNED) {
714      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
715   }
716
717   return res;
718}
719
720static LLVMValueRef
721emit_fetch_temporary(
722   struct lp_build_tgsi_context * bld_base,
723   const struct tgsi_full_src_register * reg,
724   enum tgsi_opcode_type stype,
725   unsigned swizzle)
726{
727   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
728   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
729   LLVMBuilderRef builder = gallivm->builder;
730   struct lp_build_context *uint_bld = &bld_base->uint_bld;
731   LLVMValueRef indirect_index = NULL;
732   LLVMValueRef res;
733
734   if (reg->Register.Indirect) {
735      indirect_index = get_indirect_index(bld,
736                                          reg->Register.File,
737                                          reg->Register.Index,
738                                          &reg->Indirect);
739   }
740
741   if (reg->Register.Indirect) {
742      LLVMValueRef swizzle_vec =
743         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
744      LLVMValueRef length_vec =
745         lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
746                                bld->bld_base.base.type.length);
747      LLVMValueRef index_vec;  /* index into the const buffer */
748      LLVMValueRef temps_array;
749      LLVMTypeRef float4_ptr_type;
750
751      /* index_vec = (indirect_index * 4 + swizzle) * length */
752      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
753      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
754      index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
755
756      /* cast temps_array pointer to float* */
757      float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
758      temps_array = LLVMBuildBitCast(builder, bld->temps_array,
759                                     float4_ptr_type, "");
760
761      /* Gather values from the temporary register array */
762      res = build_gather(&bld_base->base, temps_array, index_vec);
763   }
764   else {
765      LLVMValueRef temp_ptr;
766      if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) {
767         LLVMTypeRef itype = LLVMPointerType(bld->bld_base.int_bld.vec_type, 0);
768         LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
769                                                     swizzle);
770         temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, "");
771      } else
772         temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
773      res = LLVMBuildLoad(builder, temp_ptr, "");
774      if (!res)
775         return bld->bld_base.base.undef;
776   }
777
778   return res;
779}
780
781static LLVMValueRef
782emit_fetch_system_value(
783   struct lp_build_tgsi_context * bld_base,
784   const struct tgsi_full_src_register * reg,
785   enum tgsi_opcode_type stype,
786   unsigned swizzle)
787{
788   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
789   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
790   const struct tgsi_shader_info *info = bld->bld_base.info;
791   LLVMBuilderRef builder = gallivm->builder;
792   LLVMValueRef res;
793   enum tgsi_opcode_type atype; // Actual type of the value
794
795   assert(!reg->Register.Indirect);
796
797   switch (info->system_value_semantic_name[reg->Register.Index]) {
798   case TGSI_SEMANTIC_INSTANCEID:
799      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
800      atype = TGSI_TYPE_UNSIGNED;
801      break;
802
803   case TGSI_SEMANTIC_VERTEXID:
804      res = bld->system_values.vertex_id;
805      atype = TGSI_TYPE_UNSIGNED;
806      break;
807
808   default:
809      assert(!"unexpected semantic in emit_fetch_system_value");
810      res = bld_base->base.zero;
811      atype = TGSI_TYPE_FLOAT;
812      break;
813   }
814
815   if (atype != stype) {
816      if (stype == TGSI_TYPE_FLOAT) {
817         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
818      } else if (stype == TGSI_TYPE_UNSIGNED) {
819         res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
820      } else if (stype == TGSI_TYPE_SIGNED) {
821         res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
822      }
823   }
824
825   return res;
826}
827
828/**
829 * Register fetch with derivatives.
830 */
831static void
832emit_fetch_deriv(
833   struct lp_build_tgsi_soa_context *bld,
834   LLVMValueRef src,
835   LLVMValueRef *res,
836   LLVMValueRef *ddx,
837   LLVMValueRef *ddy)
838{
839   if(res)
840      *res = src;
841
842   /* TODO: use interpolation coeffs for inputs */
843
844   if(ddx)
845      *ddx = lp_build_ddx(&bld->bld_base.base, src);
846
847   if(ddy)
848      *ddy = lp_build_ddy(&bld->bld_base.base, src);
849}
850
851
852/**
853 * Predicate.
854 */
855static void
856emit_fetch_predicate(
857   struct lp_build_tgsi_soa_context *bld,
858   const struct tgsi_full_instruction *inst,
859   LLVMValueRef *pred)
860{
861   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
862   unsigned index;
863   unsigned char swizzles[4];
864   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
865   LLVMValueRef value;
866   unsigned chan;
867
868   if (!inst->Instruction.Predicate) {
869      TGSI_FOR_EACH_CHANNEL( chan ) {
870         pred[chan] = NULL;
871      }
872      return;
873   }
874
875   swizzles[0] = inst->Predicate.SwizzleX;
876   swizzles[1] = inst->Predicate.SwizzleY;
877   swizzles[2] = inst->Predicate.SwizzleZ;
878   swizzles[3] = inst->Predicate.SwizzleW;
879
880   index = inst->Predicate.Index;
881   assert(index < LP_MAX_TGSI_PREDS);
882
883   TGSI_FOR_EACH_CHANNEL( chan ) {
884      unsigned swizzle = swizzles[chan];
885
886      /*
887       * Only fetch the predicate register channels that are actually listed
888       * in the swizzles
889       */
890      if (!unswizzled[swizzle]) {
891         value = LLVMBuildLoad(builder,
892                               bld->preds[index][swizzle], "");
893
894         /*
895          * Convert the value to an integer mask.
896          *
897          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
898          * is needlessly causing two comparisons due to storing the intermediate
899          * result as float vector instead of an integer mask vector.
900          */
901         value = lp_build_compare(bld->bld_base.base.gallivm,
902                                  bld->bld_base.base.type,
903                                  PIPE_FUNC_NOTEQUAL,
904                                  value,
905                                  bld->bld_base.base.zero);
906         if (inst->Predicate.Negate) {
907            value = LLVMBuildNot(builder, value, "");
908         }
909
910         unswizzled[swizzle] = value;
911      } else {
912         value = unswizzled[swizzle];
913      }
914
915      pred[chan] = value;
916   }
917}
918
919/**
920 * Register store.
921 */
922static void
923emit_store_chan(
924   struct lp_build_tgsi_context *bld_base,
925   const struct tgsi_full_instruction *inst,
926   unsigned index,
927   unsigned chan_index,
928   LLVMValueRef pred,
929   LLVMValueRef value)
930{
931   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
932   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
933   LLVMBuilderRef builder = gallivm->builder;
934   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
935   struct lp_build_context *uint_bld = &bld_base->uint_bld;
936   LLVMValueRef indirect_index = NULL;
937   struct lp_build_context *bld_store;
938   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
939
940   switch (dtype) {
941   default:
942   case TGSI_TYPE_FLOAT:
943   case TGSI_TYPE_UNTYPED:
944      bld_store = &bld_base->base;
945      break;
946   case TGSI_TYPE_UNSIGNED:
947      bld_store = &bld_base->uint_bld;
948      break;
949   case TGSI_TYPE_SIGNED:
950      bld_store = &bld_base->int_bld;
951      break;
952   case TGSI_TYPE_DOUBLE:
953   case TGSI_TYPE_VOID:
954      assert(0);
955      bld_store = NULL;
956      break;
957   }
958
959   switch( inst->Instruction.Saturate ) {
960   case TGSI_SAT_NONE:
961      break;
962
963   case TGSI_SAT_ZERO_ONE:
964      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
965      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
966      break;
967
968   case TGSI_SAT_MINUS_PLUS_ONE:
969      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
970      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
971      break;
972
973   default:
974      assert(0);
975   }
976
977   if (reg->Register.Indirect) {
978      indirect_index = get_indirect_index(bld,
979                                          reg->Register.File,
980                                          reg->Register.Index,
981                                          &reg->Indirect);
982   } else {
983      assert(reg->Register.Index <=
984                             bld->bld_base.info->file_max[reg->Register.File]);
985   }
986
987   switch( reg->Register.File ) {
988   case TGSI_FILE_OUTPUT:
989      if (reg->Register.Indirect) {
990         LLVMValueRef chan_vec =
991            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
992         LLVMValueRef length_vec =
993            lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
994         LLVMValueRef index_vec;  /* indexes into the temp registers */
995         LLVMValueRef outputs_array;
996         LLVMValueRef pixel_offsets;
997         LLVMTypeRef float_ptr_type;
998         int i;
999
1000         /* build pixel offset vector: {0, 1, 2, 3, ...} */
1001         pixel_offsets = uint_bld->undef;
1002         for (i = 0; i < bld->bld_base.base.type.length; i++) {
1003            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1004            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1005                                                   ii, ii, "");
1006         }
1007
1008         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1009         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1010         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1011         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1012         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1013
1014         float_ptr_type =
1015            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1016         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
1017                                          float_ptr_type, "");
1018
1019         /* Scatter store values into temp registers */
1020         emit_mask_scatter(bld, outputs_array, index_vec, value,
1021                           &bld->exec_mask, pred);
1022      }
1023      else {
1024         LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1025                                               chan_index);
1026         lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr);
1027      }
1028      break;
1029
1030   case TGSI_FILE_TEMPORARY:
1031      if (reg->Register.Indirect) {
1032         LLVMValueRef chan_vec =
1033            lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
1034         LLVMValueRef length_vec =
1035            lp_build_const_int_vec(gallivm, uint_bld->type,
1036                                   bld->bld_base.base.type.length);
1037         LLVMValueRef index_vec;  /* indexes into the temp registers */
1038         LLVMValueRef temps_array;
1039         LLVMValueRef pixel_offsets;
1040         LLVMTypeRef float_ptr_type;
1041         int i;
1042
1043         /* build pixel offset vector: {0, 1, 2, 3, ...} */
1044         pixel_offsets = uint_bld->undef;
1045         for (i = 0; i < bld->bld_base.base.type.length; i++) {
1046            LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1047            pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
1048                                                   ii, ii, "");
1049         }
1050
1051         /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1052         index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1053         index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1054         index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1055         index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1056
1057         float_ptr_type =
1058            LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1059         temps_array = LLVMBuildBitCast(builder, bld->temps_array,
1060                                        float_ptr_type, "");
1061
1062         /* Scatter store values into temp registers */
1063         emit_mask_scatter(bld, temps_array, index_vec, value,
1064                           &bld->exec_mask, pred);
1065      }
1066      else {
1067         LLVMValueRef temp_ptr;
1068
1069         switch (dtype) {
1070         case TGSI_TYPE_UNSIGNED:
1071         case TGSI_TYPE_SIGNED: {
1072            LLVMTypeRef itype = bld_base->int_bld.vec_type;
1073            LLVMTypeRef ivtype = LLVMPointerType(itype, 0);
1074            LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1075                                                        chan_index);
1076            LLVMValueRef temp_value_ptr;
1077
1078            temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, "");
1079            temp_value_ptr = LLVMBuildBitCast(builder, value, itype, "");
1080            value = temp_value_ptr;
1081            break;
1082         }
1083         default:
1084         case TGSI_TYPE_FLOAT:
1085         case TGSI_TYPE_UNTYPED:
1086            temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
1087                                           chan_index);
1088            break;
1089         }
1090
1091         lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr);
1092      }
1093      break;
1094
1095   case TGSI_FILE_ADDRESS:
1096      assert(dtype == TGSI_TYPE_SIGNED);
1097      assert(LLVMTypeOf(value) == bld_base->base.int_vec_type);
1098      lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1099                         bld->addr[reg->Register.Index][chan_index]);
1100      break;
1101
1102   case TGSI_FILE_PREDICATE:
1103      lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value,
1104                         bld->preds[reg->Register.Index][chan_index]);
1105      break;
1106
1107   default:
1108      assert( 0 );
1109   }
1110}
1111
1112static void
1113emit_store(
1114   struct lp_build_tgsi_context * bld_base,
1115   const struct tgsi_full_instruction * inst,
1116   const struct tgsi_opcode_info * info,
1117   LLVMValueRef dst[4])
1118
1119{
1120   unsigned chan_index;
1121   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122
1123   if(info->num_dst) {
1124      LLVMValueRef pred[TGSI_NUM_CHANNELS];
1125
1126      emit_fetch_predicate( bld, inst, pred );
1127
1128      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1129         emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1130      }
1131   }
1132}
1133
1134/**
1135 * High-level instruction translators.
1136 */
1137
1138static void
1139emit_tex( struct lp_build_tgsi_soa_context *bld,
1140          const struct tgsi_full_instruction *inst,
1141          enum lp_build_tex_modifier modifier,
1142          LLVMValueRef *texel)
1143{
1144   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1145   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1146   unsigned unit;
1147   LLVMValueRef lod_bias, explicit_lod;
1148   LLVMValueRef oow = NULL;
1149   LLVMValueRef coords[3];
1150   struct lp_derivatives derivs;
1151   unsigned num_coords;
1152   unsigned dims;
1153   unsigned i;
1154
1155   if (!bld->sampler) {
1156      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1157      for (i = 0; i < 4; i++) {
1158         texel[i] = bld->bld_base.base.undef;
1159      }
1160      return;
1161   }
1162
1163   derivs.ddx_ddy[0] = bld->bld_base.base.undef;
1164   derivs.ddx_ddy[1] = bld->bld_base.base.undef;
1165
1166   switch (inst->Texture.Texture) {
1167   case TGSI_TEXTURE_1D:
1168      num_coords = 1;
1169      dims = 1;
1170      break;
1171   case TGSI_TEXTURE_1D_ARRAY:
1172      num_coords = 2;
1173      dims = 1;
1174      break;
1175   case TGSI_TEXTURE_2D:
1176   case TGSI_TEXTURE_RECT:
1177      num_coords = 2;
1178      dims = 2;
1179      break;
1180   case TGSI_TEXTURE_SHADOW1D:
1181   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1182      num_coords = 3;
1183      dims = 1;
1184      break;
1185   case TGSI_TEXTURE_SHADOW2D:
1186   case TGSI_TEXTURE_SHADOWRECT:
1187   case TGSI_TEXTURE_2D_ARRAY:
1188   case TGSI_TEXTURE_CUBE:
1189      num_coords = 3;
1190      dims = 2;
1191      break;
1192   case TGSI_TEXTURE_3D:
1193      num_coords = 3;
1194      dims = 3;
1195      break;
1196   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1197      num_coords = 4;
1198      dims = 2;
1199      break;
1200   default:
1201      assert(0);
1202      return;
1203   }
1204
1205   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
1206      lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1207      explicit_lod = NULL;
1208   }
1209   else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
1210      lod_bias = NULL;
1211      explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1212   }
1213   else {
1214      lod_bias = NULL;
1215      explicit_lod = NULL;
1216   }
1217
1218   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
1219      oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
1220      oow = lp_build_rcp(&bld->bld_base.base, oow);
1221   }
1222
1223   for (i = 0; i < num_coords; i++) {
1224      coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
1225      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
1226         coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
1227   }
1228   for (i = num_coords; i < 3; i++) {
1229      coords[i] = bld->bld_base.base.undef;
1230   }
1231
1232   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
1233      LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
1234      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
1235      LLVMValueRef ddxdyonec[3];
1236      unsigned length = bld->bld_base.base.type.length;
1237      unsigned num_quads = length / 4;
1238      unsigned dim;
1239      unsigned quad;
1240
1241      for (dim = 0; dim < dims; ++dim) {
1242         LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
1243         LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
1244         for (quad = 0; quad < num_quads; ++quad) {
1245            unsigned s1 = 4*quad;
1246            unsigned s2 = 4*quad + length;
1247            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
1248            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
1249            shuffles[4*quad + 2] = i32undef;
1250            shuffles[4*quad + 3] = i32undef;
1251         }
1252         ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
1253                                               LLVMConstVector(shuffles, length), "");
1254      }
1255      if (dims == 1) {
1256         derivs.ddx_ddy[0] = ddxdyonec[0];
1257      }
1258      else if (dims >= 2) {
1259         for (quad = 0; quad < num_quads; ++quad) {
1260            unsigned s1 = 4*quad;
1261            unsigned s2 = 4*quad + length;
1262            shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
1263            shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
1264            shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
1265            shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
1266         }
1267         derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
1268                                                  LLVMConstVector(shuffles, length), "");
1269         if (dims == 3) {
1270            derivs.ddx_ddy[1] = ddxdyonec[2];
1271         }
1272      }
1273      unit = inst->Src[3].Register.Index;
1274   }  else {
1275      if (dims == 1) {
1276         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
1277      }
1278      else if (dims >= 2) {
1279         derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
1280                                                            coords[0], coords[1]);
1281         if (dims == 3) {
1282            derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
1283         }
1284      }
1285      unit = inst->Src[1].Register.Index;
1286   }
1287
1288   bld->sampler->emit_fetch_texel(bld->sampler,
1289                                  bld->bld_base.base.gallivm,
1290                                  bld->bld_base.base.type,
1291                                  unit, num_coords, coords,
1292                                  &derivs,
1293                                  lod_bias, explicit_lod,
1294                                  texel);
1295}
1296
1297static void
1298emit_txq( struct lp_build_tgsi_soa_context *bld,
1299          const struct tgsi_full_instruction *inst,
1300          LLVMValueRef *sizes_out)
1301{
1302   LLVMValueRef explicit_lod;
1303   unsigned num_coords, has_lod;
1304   unsigned i;
1305
1306   switch (inst->Texture.Texture) {
1307   case TGSI_TEXTURE_1D:
1308   case TGSI_TEXTURE_SHADOW1D:
1309   case TGSI_TEXTURE_SHADOW2D:
1310   case TGSI_TEXTURE_SHADOWCUBE:
1311      num_coords = 1;
1312      has_lod = 1;
1313      break;
1314   case TGSI_TEXTURE_2D:
1315   case TGSI_TEXTURE_CUBE:
1316   case TGSI_TEXTURE_1D_ARRAY:
1317   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1318      num_coords = 2;
1319      has_lod = 1;
1320      break;
1321   case TGSI_TEXTURE_3D:
1322// case TGSI_TEXTURE_CUBE_ARRAY:
1323// case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1324   case TGSI_TEXTURE_2D_ARRAY:
1325   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1326      num_coords = 3;
1327      has_lod = 1;
1328      break;
1329
1330   case TGSI_TEXTURE_BUFFER:
1331      num_coords = 1;
1332      has_lod = 0;
1333      break;
1334
1335   case TGSI_TEXTURE_RECT:
1336   case TGSI_TEXTURE_SHADOWRECT:
1337// case TGSI_TEXTURE_2D_MS:
1338      num_coords = 2;
1339      has_lod = 0;
1340      break;
1341
1342// case TGSI_TEXTURE_2D_MS_ARRAY:
1343//    num_coords = 3;
1344//    has_lod = 0;
1345//    break;
1346
1347   default:
1348      assert(0);
1349      return;
1350   }
1351
1352   if (!bld->sampler) {
1353      _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
1354      for (i = 0; i < num_coords; i++)
1355         sizes_out[i] = bld->bld_base.base.undef;
1356      return;
1357   }
1358
1359   if (has_lod)
1360      explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 2 );
1361   else
1362      explicit_lod = NULL;
1363
1364   bld->sampler->emit_size_query(bld->sampler,
1365                                 bld->bld_base.base.gallivm,
1366                                 bld->bld_base.int_bld.type,
1367                                 inst->Src[1].Register.Index,
1368                                 explicit_lod,
1369                                 sizes_out);
1370}
1371
1372static boolean
1373near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
1374		   int pc)
1375{
1376   int i;
1377
1378   for (i = 0; i < 5; i++) {
1379      unsigned opcode;
1380
1381      if (pc + i >= bld->bld_base.info->num_instructions)
1382	 return TRUE;
1383
1384      opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
1385
1386      if (opcode == TGSI_OPCODE_END)
1387	 return TRUE;
1388
1389      if (opcode == TGSI_OPCODE_TEX ||
1390	  opcode == TGSI_OPCODE_TXP ||
1391	  opcode == TGSI_OPCODE_TXD ||
1392	  opcode == TGSI_OPCODE_TXB ||
1393	  opcode == TGSI_OPCODE_TXL ||
1394	  opcode == TGSI_OPCODE_TXF ||
1395	  opcode == TGSI_OPCODE_TXQ ||
1396	  opcode == TGSI_OPCODE_CAL ||
1397	  opcode == TGSI_OPCODE_CALLNZ ||
1398	  opcode == TGSI_OPCODE_IF ||
1399	  opcode == TGSI_OPCODE_IFC ||
1400	  opcode == TGSI_OPCODE_BGNLOOP ||
1401	  opcode == TGSI_OPCODE_SWITCH)
1402	 return FALSE;
1403   }
1404
1405   return TRUE;
1406}
1407
1408
1409
1410/**
1411 * Kill fragment if any of the src register values are negative.
1412 */
1413static void
1414emit_kil(
1415   struct lp_build_tgsi_soa_context *bld,
1416   const struct tgsi_full_instruction *inst,
1417   int pc)
1418{
1419   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1420   const struct tgsi_full_src_register *reg = &inst->Src[0];
1421   LLVMValueRef terms[TGSI_NUM_CHANNELS];
1422   LLVMValueRef mask;
1423   unsigned chan_index;
1424
1425   memset(&terms, 0, sizeof terms);
1426
1427   TGSI_FOR_EACH_CHANNEL( chan_index ) {
1428      unsigned swizzle;
1429
1430      /* Unswizzle channel */
1431      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1432
1433      /* Check if the component has not been already tested. */
1434      assert(swizzle < TGSI_NUM_CHANNELS);
1435      if( !terms[swizzle] )
1436         /* TODO: change the comparison operator instead of setting the sign */
1437         terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
1438   }
1439
1440   mask = NULL;
1441   TGSI_FOR_EACH_CHANNEL( chan_index ) {
1442      if(terms[chan_index]) {
1443         LLVMValueRef chan_mask;
1444
1445         /*
1446          * If term < 0 then mask = 0 else mask = ~0.
1447          */
1448         chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
1449
1450         if(mask)
1451            mask = LLVMBuildAnd(builder, mask, chan_mask, "");
1452         else
1453            mask = chan_mask;
1454      }
1455   }
1456
1457   if(mask) {
1458      lp_build_mask_update(bld->mask, mask);
1459
1460      if (!near_end_of_shader(bld, pc))
1461	 lp_build_mask_check(bld->mask);
1462   }
1463}
1464
1465
1466/**
1467 * Predicated fragment kill.
1468 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
1469 * The only predication is the execution mask which will apply if
1470 * we're inside a loop or conditional.
1471 */
1472static void
1473emit_kilp(struct lp_build_tgsi_soa_context *bld,
1474          int pc)
1475{
1476   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1477   LLVMValueRef mask;
1478
1479   /* For those channels which are "alive", disable fragment shader
1480    * execution.
1481    */
1482   if (bld->exec_mask.has_mask) {
1483      mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1484   }
1485   else {
1486      LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
1487      mask = zero;
1488   }
1489
1490   lp_build_mask_update(bld->mask, mask);
1491
1492   if (!near_end_of_shader(bld, pc))
1493      lp_build_mask_check(bld->mask);
1494}
1495
1496
1497/**
1498 * Emit code which will dump the value of all the temporary registers
1499 * to stdout.
1500 */
1501static void
1502emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
1503{
1504   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1505   LLVMBuilderRef builder = gallivm->builder;
1506   LLVMValueRef temp_ptr;
1507   LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
1508   LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
1509   LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
1510   LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
1511   int index;
1512   int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
1513
1514   for (index = 0; index < n; index++) {
1515      LLVMValueRef idx = lp_build_const_int32(gallivm, index);
1516      LLVMValueRef v[4][4], res;
1517      int chan;
1518
1519      lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
1520
1521      for (chan = 0; chan < 4; chan++) {
1522         temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
1523         res = LLVMBuildLoad(builder, temp_ptr, "");
1524         v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
1525         v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
1526         v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
1527         v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
1528      }
1529
1530      lp_build_printf(gallivm, "  X: %f %f %f %f\n",
1531                      v[0][0], v[0][1], v[0][2], v[0][3]);
1532      lp_build_printf(gallivm, "  Y: %f %f %f %f\n",
1533                      v[1][0], v[1][1], v[1][2], v[1][3]);
1534      lp_build_printf(gallivm, "  Z: %f %f %f %f\n",
1535                      v[2][0], v[2][1], v[2][2], v[2][3]);
1536      lp_build_printf(gallivm, "  W: %f %f %f %f\n",
1537                      v[3][0], v[3][1], v[3][2], v[3][3]);
1538   }
1539}
1540
1541
1542
1543void
1544lp_emit_declaration_soa(
1545   struct lp_build_tgsi_context *bld_base,
1546   const struct tgsi_full_declaration *decl)
1547{
1548   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1549   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1550   LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1551   const unsigned first = decl->Range.First;
1552   const unsigned last = decl->Range.Last;
1553   unsigned idx, i;
1554
1555   for (idx = first; idx <= last; ++idx) {
1556      assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
1557      switch (decl->Declaration.File) {
1558      case TGSI_FILE_TEMPORARY:
1559         assert(idx < LP_MAX_TGSI_TEMPS);
1560         if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
1561            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1562               bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
1563         }
1564         break;
1565
1566      case TGSI_FILE_OUTPUT:
1567         if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
1568            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1569               bld->outputs[idx][i] = lp_build_alloca(gallivm,
1570                                                      vec_type, "output");
1571         }
1572         break;
1573
1574      case TGSI_FILE_ADDRESS:
1575	 /* ADDR registers are the only allocated with an integer LLVM IR type,
1576	  * as they are guaranteed to always have integers.
1577	  * XXX: Not sure if this exception is worthwhile (or the whole idea of
1578	  * an ADDR register for that matter).
1579	  */
1580         assert(idx < LP_MAX_TGSI_ADDRS);
1581         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1582            bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
1583         break;
1584
1585      case TGSI_FILE_PREDICATE:
1586         assert(idx < LP_MAX_TGSI_PREDS);
1587         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
1588            bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
1589                                                 "predicate");
1590         break;
1591
1592      default:
1593         /* don't need to declare other vars */
1594         break;
1595      }
1596   }
1597}
1598
1599
1600void lp_emit_immediate_soa(
1601   struct lp_build_tgsi_context *bld_base,
1602   const struct tgsi_full_immediate *imm)
1603{
1604   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1605   struct gallivm_state * gallivm = bld_base->base.gallivm;
1606
1607   /* simply copy the immediate values into the next immediates[] slot */
1608   unsigned i;
1609   const uint size = imm->Immediate.NrTokens - 1;
1610   assert(size <= 4);
1611   assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
1612   switch (imm->Immediate.DataType) {
1613   case TGSI_IMM_FLOAT32:
1614      for( i = 0; i < size; ++i )
1615         bld->immediates[bld->num_immediates][i] =
1616            lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
1617
1618      break;
1619   case TGSI_IMM_UINT32:
1620      for( i = 0; i < size; ++i ) {
1621         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
1622         bld->immediates[bld->num_immediates][i] =
1623            LLVMConstBitCast(tmp, bld_base->base.vec_type);
1624      }
1625
1626      break;
1627   case TGSI_IMM_INT32:
1628      for( i = 0; i < size; ++i ) {
1629         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
1630         bld->immediates[bld->num_immediates][i] =
1631            LLVMConstBitCast(tmp, bld_base->base.vec_type);
1632      }
1633
1634      break;
1635   }
1636   for( i = size; i < 4; ++i )
1637      bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
1638
1639   bld->num_immediates++;
1640}
1641
1642static void
1643ddx_emit(
1644   const struct lp_build_tgsi_action * action,
1645   struct lp_build_tgsi_context * bld_base,
1646   struct lp_build_emit_data * emit_data)
1647{
1648   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1649
1650   emit_fetch_deriv(bld, emit_data->args[0], NULL,
1651                    &emit_data->output[emit_data->chan], NULL);
1652}
1653
1654static void
1655ddy_emit(
1656   const struct lp_build_tgsi_action * action,
1657   struct lp_build_tgsi_context * bld_base,
1658   struct lp_build_emit_data * emit_data)
1659{
1660   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1661
1662   emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
1663                    &emit_data->output[emit_data->chan]);
1664}
1665
1666static void
1667kilp_emit(
1668   const struct lp_build_tgsi_action * action,
1669   struct lp_build_tgsi_context * bld_base,
1670   struct lp_build_emit_data * emit_data)
1671{
1672   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1673
1674   emit_kilp(bld, bld_base->pc - 1);
1675}
1676
1677static void
1678kil_emit(
1679   const struct lp_build_tgsi_action * action,
1680   struct lp_build_tgsi_context * bld_base,
1681   struct lp_build_emit_data * emit_data)
1682{
1683   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1684
1685   emit_kil(bld, emit_data->inst, bld_base->pc - 1);
1686}
1687
1688static void
1689tex_emit(
1690   const struct lp_build_tgsi_action * action,
1691   struct lp_build_tgsi_context * bld_base,
1692   struct lp_build_emit_data * emit_data)
1693{
1694   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1695
1696   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
1697}
1698
1699static void
1700txb_emit(
1701   const struct lp_build_tgsi_action * action,
1702   struct lp_build_tgsi_context * bld_base,
1703   struct lp_build_emit_data * emit_data)
1704{
1705   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1706
1707   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
1708            emit_data->output);
1709}
1710
1711static void
1712txd_emit(
1713   const struct lp_build_tgsi_action * action,
1714   struct lp_build_tgsi_context * bld_base,
1715   struct lp_build_emit_data * emit_data)
1716{
1717   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1718
1719   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
1720            emit_data->output);
1721}
1722
1723static void
1724txl_emit(
1725   const struct lp_build_tgsi_action * action,
1726   struct lp_build_tgsi_context * bld_base,
1727   struct lp_build_emit_data * emit_data)
1728{
1729   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1730
1731   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
1732            emit_data->output);
1733}
1734
1735static void
1736txp_emit(
1737   const struct lp_build_tgsi_action * action,
1738   struct lp_build_tgsi_context * bld_base,
1739   struct lp_build_emit_data * emit_data)
1740{
1741   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1742
1743   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
1744            emit_data->output);
1745}
1746
1747static void
1748txq_emit(
1749   const struct lp_build_tgsi_action * action,
1750   struct lp_build_tgsi_context * bld_base,
1751   struct lp_build_emit_data * emit_data)
1752{
1753   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1754
1755   emit_txq(bld, emit_data->inst, emit_data->output);
1756}
1757
1758static void
1759cal_emit(
1760   const struct lp_build_tgsi_action * action,
1761   struct lp_build_tgsi_context * bld_base,
1762   struct lp_build_emit_data * emit_data)
1763{
1764   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1765
1766   lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
1767                     &bld_base->pc);
1768}
1769
1770static void
1771ret_emit(
1772   const struct lp_build_tgsi_action * action,
1773   struct lp_build_tgsi_context * bld_base,
1774   struct lp_build_emit_data * emit_data)
1775{
1776   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1777
1778   lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
1779}
1780
1781static void
1782brk_emit(
1783   const struct lp_build_tgsi_action * action,
1784   struct lp_build_tgsi_context * bld_base,
1785   struct lp_build_emit_data * emit_data)
1786{
1787   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1788
1789   lp_exec_break(&bld->exec_mask);
1790}
1791
1792static void
1793if_emit(
1794   const struct lp_build_tgsi_action * action,
1795   struct lp_build_tgsi_context * bld_base,
1796   struct lp_build_emit_data * emit_data)
1797{
1798   LLVMValueRef tmp;
1799   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1800
1801   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
1802                      emit_data->args[0], bld->bld_base.base.zero);
1803   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
1804}
1805
1806static void
1807bgnloop_emit(
1808   const struct lp_build_tgsi_action * action,
1809   struct lp_build_tgsi_context * bld_base,
1810   struct lp_build_emit_data * emit_data)
1811{
1812   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1813
1814   lp_exec_bgnloop(&bld->exec_mask);
1815}
1816
1817static void
1818bgnsub_emit(
1819   const struct lp_build_tgsi_action * action,
1820   struct lp_build_tgsi_context * bld_base,
1821   struct lp_build_emit_data * emit_data)
1822{
1823   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1824
1825   lp_exec_mask_bgnsub(&bld->exec_mask);
1826}
1827
1828static void
1829else_emit(
1830   const struct lp_build_tgsi_action * action,
1831   struct lp_build_tgsi_context * bld_base,
1832   struct lp_build_emit_data * emit_data)
1833{
1834   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1835
1836   lp_exec_mask_cond_invert(&bld->exec_mask);
1837}
1838
1839static void
1840endif_emit(
1841   const struct lp_build_tgsi_action * action,
1842   struct lp_build_tgsi_context * bld_base,
1843   struct lp_build_emit_data * emit_data)
1844{
1845   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1846
1847   lp_exec_mask_cond_pop(&bld->exec_mask);
1848}
1849
1850static void
1851endloop_emit(
1852   const struct lp_build_tgsi_action * action,
1853   struct lp_build_tgsi_context * bld_base,
1854   struct lp_build_emit_data * emit_data)
1855{
1856   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1857
1858   lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1859}
1860
1861static void
1862endsub_emit(
1863   const struct lp_build_tgsi_action * action,
1864   struct lp_build_tgsi_context * bld_base,
1865   struct lp_build_emit_data * emit_data)
1866{
1867   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1868
1869   lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
1870}
1871
1872static void
1873cont_emit(
1874   const struct lp_build_tgsi_action * action,
1875   struct lp_build_tgsi_context * bld_base,
1876   struct lp_build_emit_data * emit_data)
1877{
1878   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1879
1880   lp_exec_continue(&bld->exec_mask);
1881}
1882
1883/* XXX: Refactor and move it to lp_bld_tgsi_action.c
1884 *
1885 * XXX: What do the comments about xmm registers mean?  Maybe they are left over
1886 * from old code, but there is no garauntee that LLVM will use those registers
1887 * for this code.
1888 *
1889 * XXX: There should be no calls to lp_build_emit_fetch in this function.  This
1890 * should be handled by the emit_data->fetch_args function. */
1891static void
1892nrm_emit(
1893   const struct lp_build_tgsi_action * action,
1894   struct lp_build_tgsi_context * bld_base,
1895   struct lp_build_emit_data * emit_data)
1896{
1897   LLVMValueRef tmp0, tmp1;
1898   LLVMValueRef tmp4 = NULL;
1899   LLVMValueRef tmp5 = NULL;
1900   LLVMValueRef tmp6 = NULL;
1901   LLVMValueRef tmp7 = NULL;
1902   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1903
1904   uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
1905
1906  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
1907      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
1908      TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
1909      (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
1910
1911      /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
1912
1913      /* xmm4 = src.x */
1914      /* xmm0 = src.x * src.x */
1915      tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
1916      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
1917         tmp4 = tmp0;
1918      }
1919      tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
1920
1921      /* xmm5 = src.y */
1922      /* xmm0 = xmm0 + src.y * src.y */
1923      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
1924      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
1925         tmp5 = tmp1;
1926      }
1927      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1928      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1929
1930      /* xmm6 = src.z */
1931      /* xmm0 = xmm0 + src.z * src.z */
1932      tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
1933      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
1934         tmp6 = tmp1;
1935      }
1936      tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1937      tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1938
1939      if (dims == 4) {
1940         /* xmm7 = src.w */
1941         /* xmm0 = xmm0 + src.w * src.w */
1942         tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
1943         if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
1944            tmp7 = tmp1;
1945         }
1946         tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
1947         tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
1948      }
1949      /* xmm1 = 1 / sqrt(xmm0) */
1950      tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
1951       /* dst.x = xmm1 * src.x */
1952      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
1953         emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
1954      }
1955      /* dst.y = xmm1 * src.y */
1956      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
1957         emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
1958      }
1959
1960      /* dst.z = xmm1 * src.z */
1961      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
1962         emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
1963      }
1964      /* dst.w = xmm1 * src.w */
1965      if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
1966         emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
1967      }
1968   }
1969
1970   /* dst.w = 1.0 */
1971   if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
1972       emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
1973   }
1974}
1975
1976static void emit_prologue(struct lp_build_tgsi_context * bld_base)
1977{
1978   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1979   struct gallivm_state * gallivm = bld_base->base.gallivm;
1980
1981   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
1982      LLVMValueRef array_size =
1983         lp_build_const_int32(gallivm,
1984                         bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
1985      bld->temps_array = lp_build_array_alloca(gallivm,
1986                                              bld_base->base.vec_type, array_size,
1987                                              "temp_array");
1988   }
1989
1990   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
1991      LLVMValueRef array_size =
1992         lp_build_const_int32(gallivm,
1993                            bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
1994      bld->outputs_array = lp_build_array_alloca(gallivm,
1995                                                bld_base->base.vec_type, array_size,
1996                                                "output_array");
1997   }
1998
1999   /* If we have indirect addressing in inputs we need to copy them into
2000    * our alloca array to be able to iterate over them */
2001   if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
2002      unsigned index, chan;
2003      LLVMTypeRef vec_type = bld_base->base.vec_type;
2004      LLVMValueRef array_size = lp_build_const_int32(gallivm,
2005            bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
2006      bld->inputs_array = lp_build_array_alloca(gallivm,
2007                                               vec_type, array_size,
2008                                               "input_array");
2009
2010      assert(bld_base->info->num_inputs
2011                        <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
2012
2013      for (index = 0; index < bld_base->info->num_inputs; ++index) {
2014         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2015            LLVMValueRef lindex =
2016               lp_build_const_int32(gallivm, index * 4 + chan);
2017            LLVMValueRef input_ptr =
2018               LLVMBuildGEP(gallivm->builder, bld->inputs_array,
2019                            &lindex, 1, "");
2020            LLVMValueRef value = bld->inputs[index][chan];
2021            if (value)
2022               LLVMBuildStore(gallivm->builder, value, input_ptr);
2023         }
2024      }
2025   }
2026}
2027
2028static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
2029{
2030   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2031
2032   if (0) {
2033      /* for debugging */
2034      emit_dump_temps(bld);
2035   }
2036
2037   /* If we have indirect addressing in outputs we need to copy our alloca array
2038    * to the outputs slots specified by the called */
2039   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
2040      unsigned index, chan;
2041      assert(bld_base->info->num_outputs <=
2042                        bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
2043      for (index = 0; index < bld_base->info->num_outputs; ++index) {
2044         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2045            bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
2046         }
2047      }
2048   }
2049}
2050
2051void
2052lp_build_tgsi_soa(struct gallivm_state *gallivm,
2053                  const struct tgsi_token *tokens,
2054                  struct lp_type type,
2055                  struct lp_build_mask_context *mask,
2056                  LLVMValueRef consts_ptr,
2057                  const struct lp_bld_tgsi_system_values *system_values,
2058                  const LLVMValueRef *pos,
2059                  const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
2060                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
2061                  struct lp_build_sampler_soa *sampler,
2062                  const struct tgsi_shader_info *info)
2063{
2064   struct lp_build_tgsi_soa_context bld;
2065
2066   struct lp_type res_type;
2067
2068   assert(type.length <= LP_MAX_VECTOR_LENGTH);
2069   memset(&res_type, 0, sizeof res_type);
2070   res_type.width = type.width;
2071   res_type.length = type.length;
2072   res_type.sign = 1;
2073
2074   /* Setup build context */
2075   memset(&bld, 0, sizeof bld);
2076   lp_build_context_init(&bld.bld_base.base, gallivm, type);
2077   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2078   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2079   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2080   bld.mask = mask;
2081   bld.pos = pos;
2082   bld.inputs = inputs;
2083   bld.outputs = outputs;
2084   bld.consts_ptr = consts_ptr;
2085   bld.sampler = sampler;
2086   bld.bld_base.info = info;
2087   bld.indirect_files = info->indirect_files;
2088
2089   bld.bld_base.soa = TRUE;
2090   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
2091   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
2092   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
2093   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
2094   bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
2095   bld.bld_base.emit_store = emit_store;
2096
2097   bld.bld_base.emit_declaration = lp_emit_declaration_soa;
2098   bld.bld_base.emit_immediate = lp_emit_immediate_soa;
2099
2100   bld.bld_base.emit_prologue = emit_prologue;
2101   bld.bld_base.emit_epilogue = emit_epilogue;
2102
2103   /* Set opcode actions */
2104   lp_set_default_actions_cpu(&bld.bld_base);
2105
2106   bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
2107   bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
2108   bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
2109   bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
2110   bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
2111   bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
2112   bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
2113   bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
2114   bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
2115   bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
2116   bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
2117   bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
2118   bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
2119   bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
2120   bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
2121   bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
2122   bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
2123   bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
2124   bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
2125   bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
2126   bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
2127   bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
2128   bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
2129
2130   lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
2131
2132   bld.system_values = *system_values;
2133
2134   lp_build_tgsi_llvm(&bld.bld_base, tokens);
2135
2136   if (0) {
2137      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
2138      LLVMValueRef function = LLVMGetBasicBlockParent(block);
2139      debug_printf("11111111111111111111111111111 \n");
2140      tgsi_dump(tokens, 0);
2141      lp_debug_dump_value(function);
2142      debug_printf("2222222222222222222222222222 \n");
2143   }
2144
2145   if (0) {
2146      LLVMModuleRef module = LLVMGetGlobalParent(
2147         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
2148      LLVMDumpModule(module);
2149
2150   }
2151}
2152