lp_bld_tgsi_aos.c revision 8b3c99a5ebbc2f8b586d8ae2bd9aa5c55bbf3f04
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40#include "pipe/p_config.h"
41#include "pipe/p_shader_tokens.h"
42#include "util/u_debug.h"
43#include "util/u_math.h"
44#include "util/u_memory.h"
45#include "tgsi/tgsi_dump.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_quad.h"
57#include "lp_bld_tgsi.h"
58#include "lp_bld_limits.h"
59#include "lp_bld_debug.h"
60
61
62#define LP_MAX_INSTRUCTIONS 256
63
64
65struct lp_build_tgsi_aos_context
66{
67   struct lp_build_context base;
68
69   /* Builder for integer masks and indices */
70   struct lp_build_context int_bld;
71
72   /*
73    * AoS swizzle used:
74    * - swizzles[0] = red index
75    * - swizzles[1] = green index
76    * - swizzles[2] = blue index
77    * - swizzles[3] = alpha index
78    */
79   unsigned char swizzles[4];
80   unsigned char inv_swizzles[4];
81
82   LLVMValueRef consts_ptr;
83   const LLVMValueRef *inputs;
84   LLVMValueRef *outputs;
85
86   struct lp_build_sampler_aos *sampler;
87
88   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
89   LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
90   LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
91   LLVMValueRef preds[LP_MAX_TGSI_PREDS];
92
93   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
94    * set in the indirect_files field.
95    * The temps[] array above is unused then.
96    */
97   LLVMValueRef temps_array;
98
99   /** bitmask indicating which register files are accessed indirectly */
100   unsigned indirect_files;
101
102   struct tgsi_full_instruction *instructions;
103   uint max_instructions;
104};
105
106
107/**
108 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
109 * ordering.
110 */
111static LLVMValueRef
112swizzle_aos(struct lp_build_tgsi_aos_context *bld,
113            LLVMValueRef a,
114            unsigned swizzle_x,
115            unsigned swizzle_y,
116            unsigned swizzle_z,
117            unsigned swizzle_w)
118{
119   unsigned char swizzles[4];
120
121   assert(swizzle_x < 4);
122   assert(swizzle_y < 4);
123   assert(swizzle_z < 4);
124   assert(swizzle_w < 4);
125
126   swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
127   swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
128   swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
129   swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
130
131   return lp_build_swizzle_aos(&bld->base, a, swizzles);
132}
133
134
135static LLVMValueRef
136swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
137                   LLVMValueRef a,
138                   unsigned chan)
139{
140   chan = bld->swizzles[chan];
141   return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
142}
143
144
145/**
146 * Register fetch.
147 */
148static LLVMValueRef
149emit_fetch(
150   struct lp_build_tgsi_aos_context *bld,
151   const struct tgsi_full_instruction *inst,
152   unsigned src_op)
153{
154   LLVMBuilderRef builder = bld->base.gallivm->builder;
155   struct lp_type type = bld->base.type;
156   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
157   LLVMValueRef res;
158   unsigned chan;
159
160   assert(!reg->Register.Indirect);
161
162   /*
163    * Fetch the from the register file.
164    */
165
166   switch (reg->Register.File) {
167   case TGSI_FILE_CONSTANT:
168      /*
169       * Get the constants components
170       */
171
172      res = bld->base.undef;
173      for (chan = 0; chan < 4; ++chan) {
174         LLVMValueRef index;
175         LLVMValueRef scalar_ptr;
176         LLVMValueRef scalar;
177         LLVMValueRef swizzle;
178
179         index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan);
180
181         scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr,
182                                   &index, 1, "");
183
184         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
185
186         lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
187
188         /*
189          * NOTE: constants array is always assumed to be RGBA
190          */
191
192         swizzle = lp_build_const_int32(bld->base.gallivm, bld->swizzles[chan]);
193
194         res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
195      }
196
197      /*
198       * Broadcast the first quaternion to all others.
199       *
200       * XXX: could be factored into a reusable function.
201       */
202
203      if (type.length > 4) {
204         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
205         unsigned i;
206
207         for (chan = 0; chan < 4; ++chan) {
208            shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan);
209         }
210
211         for (i = 4; i < type.length; ++i) {
212            shuffles[i] = shuffles[i % 4];
213         }
214
215         res = LLVMBuildShuffleVector(builder,
216                                      res, bld->base.undef,
217                                      LLVMConstVector(shuffles, type.length),
218                                      "");
219      }
220      break;
221
222   case TGSI_FILE_IMMEDIATE:
223      res = bld->immediates[reg->Register.Index];
224      assert(res);
225      break;
226
227   case TGSI_FILE_INPUT:
228      res = bld->inputs[reg->Register.Index];
229      assert(res);
230      break;
231
232   case TGSI_FILE_TEMPORARY:
233      {
234         LLVMValueRef temp_ptr;
235         temp_ptr = bld->temps[reg->Register.Index];
236         res = LLVMBuildLoad(builder, temp_ptr, "");
237         if (!res)
238            return bld->base.undef;
239      }
240      break;
241
242   default:
243      assert(0 && "invalid src register in emit_fetch()");
244      return bld->base.undef;
245   }
246
247   /*
248    * Apply sign modifier.
249    */
250
251   if (reg->Register.Absolute) {
252      res = lp_build_abs(&bld->base, res);
253   }
254
255   if(reg->Register.Negate) {
256      res = lp_build_negate(&bld->base, res);
257   }
258
259   /*
260    * Swizzle the argument
261    */
262
263   res = swizzle_aos(bld, res,
264                     reg->Register.SwizzleX,
265                     reg->Register.SwizzleY,
266                     reg->Register.SwizzleZ,
267                     reg->Register.SwizzleW);
268
269   return res;
270}
271
272
273/**
274 * Register store.
275 */
276static void
277emit_store(
278   struct lp_build_tgsi_aos_context *bld,
279   const struct tgsi_full_instruction *inst,
280   unsigned index,
281   LLVMValueRef value)
282{
283   LLVMBuilderRef builder = bld->base.gallivm->builder;
284   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
285   LLVMValueRef mask = NULL;
286   LLVMValueRef ptr;
287
288   /*
289    * Saturate the value
290    */
291
292   switch (inst->Instruction.Saturate) {
293   case TGSI_SAT_NONE:
294      break;
295
296   case TGSI_SAT_ZERO_ONE:
297      value = lp_build_max(&bld->base, value, bld->base.zero);
298      value = lp_build_min(&bld->base, value, bld->base.one);
299      break;
300
301   case TGSI_SAT_MINUS_PLUS_ONE:
302      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
303      value = lp_build_min(&bld->base, value, bld->base.one);
304      break;
305
306   default:
307      assert(0);
308   }
309
310   /*
311    * Translate the register file
312    */
313
314   assert(!reg->Register.Indirect);
315
316   switch (reg->Register.File) {
317   case TGSI_FILE_OUTPUT:
318      ptr = bld->outputs[reg->Register.Index];
319      break;
320
321   case TGSI_FILE_TEMPORARY:
322      ptr = bld->temps[reg->Register.Index];
323      break;
324
325   case TGSI_FILE_ADDRESS:
326      ptr = bld->addr[reg->Indirect.Index];
327      break;
328
329   case TGSI_FILE_PREDICATE:
330      ptr = bld->preds[reg->Register.Index];
331      break;
332
333   default:
334      assert(0);
335      return;
336   }
337
338   /*
339    * Predicate
340    */
341
342   if (inst->Instruction.Predicate) {
343      LLVMValueRef pred;
344
345      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
346
347      pred = LLVMBuildLoad(builder,
348                           bld->preds[inst->Predicate.Index], "");
349
350      /*
351       * Convert the value to an integer mask.
352       */
353      pred = lp_build_compare(bld->base.gallivm,
354                               bld->base.type,
355                               PIPE_FUNC_NOTEQUAL,
356                               pred,
357                               bld->base.zero);
358
359      if (inst->Predicate.Negate) {
360         pred = LLVMBuildNot(builder, pred, "");
361      }
362
363      pred = swizzle_aos(bld, pred,
364                         inst->Predicate.SwizzleX,
365                         inst->Predicate.SwizzleY,
366                         inst->Predicate.SwizzleZ,
367                         inst->Predicate.SwizzleW);
368
369      if (mask) {
370         mask = LLVMBuildAnd(builder, mask, pred, "");
371      } else {
372         mask = pred;
373      }
374   }
375
376   /*
377    * Writemask
378    */
379
380   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
381      LLVMValueRef writemask;
382
383      writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type,
384                                          reg->Register.WriteMask);
385
386      if (mask) {
387         mask = LLVMBuildAnd(builder, mask, writemask, "");
388      } else {
389         mask = writemask;
390      }
391   }
392
393   if (mask) {
394      LLVMValueRef orig_value;
395
396      orig_value = LLVMBuildLoad(builder, ptr, "");
397      value = lp_build_select(&bld->base,
398                              mask, value, orig_value);
399   }
400
401   LLVMBuildStore(builder, value, ptr);
402}
403
404
405/**
406 * High-level instruction translators.
407 */
408
409static LLVMValueRef
410emit_tex(struct lp_build_tgsi_aos_context *bld,
411         const struct tgsi_full_instruction *inst,
412         enum lp_build_tex_modifier modifier)
413{
414   unsigned target;
415   unsigned unit;
416   LLVMValueRef coords;
417   LLVMValueRef ddx;
418   LLVMValueRef ddy;
419
420   if (!bld->sampler) {
421      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
422      return bld->base.undef;
423   }
424
425   target = inst->Texture.Texture;
426
427   coords = emit_fetch( bld, inst, 0 );
428
429   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
430      ddx = emit_fetch( bld, inst, 1 );
431      ddy = emit_fetch( bld, inst, 2 );
432      unit = inst->Src[3].Register.Index;
433   }  else {
434#if 0
435      ddx = lp_build_ddx( &bld->base, coords );
436      ddy = lp_build_ddy( &bld->base, coords );
437#else
438      /* TODO */
439      ddx = bld->base.one;
440      ddy = bld->base.one;
441#endif
442      unit = inst->Src[1].Register.Index;
443   }
444
445   return bld->sampler->emit_fetch_texel(bld->sampler,
446                                         &bld->base,
447                                         target, unit,
448                                         coords, ddx, ddy,
449                                         modifier);
450}
451
452
453static void
454emit_declaration(
455   struct lp_build_tgsi_aos_context *bld,
456   const struct tgsi_full_declaration *decl)
457{
458   struct gallivm_state *gallivm = bld->base.gallivm;
459   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type);
460
461   unsigned first = decl->Range.First;
462   unsigned last = decl->Range.Last;
463   unsigned idx;
464
465   for (idx = first; idx <= last; ++idx) {
466      switch (decl->Declaration.File) {
467      case TGSI_FILE_TEMPORARY:
468         assert(idx < LP_MAX_TGSI_TEMPS);
469         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
470            LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
471            bld->temps_array = lp_build_array_alloca(bld->base.gallivm,
472                                                     vec_type, array_size, "");
473         } else {
474            bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
475         }
476         break;
477
478      case TGSI_FILE_OUTPUT:
479         bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
480         break;
481
482      case TGSI_FILE_ADDRESS:
483         assert(idx < LP_MAX_TGSI_ADDRS);
484         bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
485         break;
486
487      case TGSI_FILE_PREDICATE:
488         assert(idx < LP_MAX_TGSI_PREDS);
489         bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
490         break;
491
492      default:
493         /* don't need to declare other vars */
494         break;
495      }
496   }
497}
498
499
500/**
501 * Emit LLVM for one TGSI instruction.
502 * \param return TRUE for success, FALSE otherwise
503 */
504static boolean
505emit_instruction(
506   struct lp_build_tgsi_aos_context *bld,
507   const struct tgsi_full_instruction *inst,
508   const struct tgsi_opcode_info *info,
509   int *pc)
510{
511   LLVMValueRef src0, src1, src2;
512   LLVMValueRef tmp0, tmp1;
513   LLVMValueRef dst0 = NULL;
514
515   /*
516    * Stores and write masks are handled in a general fashion after the long
517    * instruction opcode switch statement.
518    *
519    * Although not stricitly necessary, we avoid generating instructions for
520    * channels which won't be stored, in cases where's that easy. For some
521    * complex instructions, like texture sampling, it is more convenient to
522    * assume a full writemask and then let LLVM optimization passes eliminate
523    * redundant code.
524    */
525
526   (*pc)++;
527
528   assert(info->num_dst <= 1);
529   if (info->num_dst) {
530      dst0 = bld->base.undef;
531   }
532
533   switch (inst->Instruction.Opcode) {
534   case TGSI_OPCODE_ARL:
535      src0 = emit_fetch(bld, inst, 0);
536      dst0 = lp_build_floor(&bld->base, src0);
537      break;
538
539   case TGSI_OPCODE_MOV:
540      dst0 = emit_fetch(bld, inst, 0);
541      break;
542
543   case TGSI_OPCODE_LIT:
544      return FALSE;
545
546   case TGSI_OPCODE_RCP:
547   /* TGSI_OPCODE_RECIP */
548      src0 = emit_fetch(bld, inst, 0);
549      dst0 = lp_build_rcp(&bld->base, src0);
550      break;
551
552   case TGSI_OPCODE_RSQ:
553   /* TGSI_OPCODE_RECIPSQRT */
554      src0 = emit_fetch(bld, inst, 0);
555      tmp0 = lp_build_abs(&bld->base, src0);
556      dst0 = lp_build_rsqrt(&bld->base, tmp0);
557      break;
558
559   case TGSI_OPCODE_EXP:
560      return FALSE;
561
562   case TGSI_OPCODE_LOG:
563      return FALSE;
564
565   case TGSI_OPCODE_MUL:
566      src0 = emit_fetch(bld, inst, 0);
567      src1 = emit_fetch(bld, inst, 1);
568      dst0 = lp_build_mul(&bld->base, src0, src1);
569      break;
570
571   case TGSI_OPCODE_ADD:
572      src0 = emit_fetch(bld, inst, 0);
573      src1 = emit_fetch(bld, inst, 1);
574      dst0 = lp_build_add(&bld->base, src0, src1);
575      break;
576
577   case TGSI_OPCODE_DP3:
578   /* TGSI_OPCODE_DOT3 */
579      return FALSE;
580
581   case TGSI_OPCODE_DP4:
582   /* TGSI_OPCODE_DOT4 */
583      return FALSE;
584
585   case TGSI_OPCODE_DST:
586      return FALSE;
587
588   case TGSI_OPCODE_MIN:
589      src0 = emit_fetch(bld, inst, 0);
590      src1 = emit_fetch(bld, inst, 1);
591      dst0 = lp_build_max(&bld->base, src0, src1);
592      break;
593
594   case TGSI_OPCODE_MAX:
595      src0 = emit_fetch(bld, inst, 0);
596      src1 = emit_fetch(bld, inst, 1);
597      dst0 = lp_build_max(&bld->base, src0, src1);
598      break;
599
600   case TGSI_OPCODE_SLT:
601   /* TGSI_OPCODE_SETLT */
602      src0 = emit_fetch(bld, inst, 0);
603      src1 = emit_fetch(bld, inst, 1);
604      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
605      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
606      break;
607
608   case TGSI_OPCODE_SGE:
609   /* TGSI_OPCODE_SETGE */
610      src0 = emit_fetch(bld, inst, 0);
611      src1 = emit_fetch(bld, inst, 1);
612      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
613      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
614      break;
615
616   case TGSI_OPCODE_MAD:
617   /* TGSI_OPCODE_MADD */
618      src0 = emit_fetch(bld, inst, 0);
619      src1 = emit_fetch(bld, inst, 1);
620      src2 = emit_fetch(bld, inst, 2);
621      tmp0 = lp_build_mul(&bld->base, src0, src1);
622      dst0 = lp_build_add(&bld->base, tmp0, src2);
623      break;
624
625   case TGSI_OPCODE_SUB:
626      src0 = emit_fetch(bld, inst, 0);
627      src1 = emit_fetch(bld, inst, 1);
628      dst0 = lp_build_sub(&bld->base, src0, src1);
629      break;
630
631   case TGSI_OPCODE_LRP:
632      src0 = emit_fetch(bld, inst, 0);
633      src1 = emit_fetch(bld, inst, 1);
634      src2 = emit_fetch(bld, inst, 2);
635      tmp0 = lp_build_sub(&bld->base, src1, src2);
636      tmp0 = lp_build_mul(&bld->base, src0, tmp0);
637      dst0 = lp_build_add(&bld->base, tmp0, src2);
638      break;
639
640   case TGSI_OPCODE_CND:
641      src0 = emit_fetch(bld, inst, 0);
642      src1 = emit_fetch(bld, inst, 1);
643      src2 = emit_fetch(bld, inst, 2);
644      tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
645      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
646      dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
647      break;
648
649   case TGSI_OPCODE_DP2A:
650      return FALSE;
651
652   case TGSI_OPCODE_FRC:
653      src0 = emit_fetch(bld, inst, 0);
654      tmp0 = lp_build_floor(&bld->base, src0);
655      dst0 = lp_build_sub(&bld->base, src0, tmp0);
656      break;
657
658   case TGSI_OPCODE_CLAMP:
659      src0 = emit_fetch(bld, inst, 0);
660      src1 = emit_fetch(bld, inst, 1);
661      src2 = emit_fetch(bld, inst, 2);
662      tmp0 = lp_build_max(&bld->base, src0, src1);
663      dst0 = lp_build_min(&bld->base, tmp0, src2);
664      break;
665
666   case TGSI_OPCODE_FLR:
667      src0 = emit_fetch(bld, inst, 0);
668      dst0 = lp_build_floor(&bld->base, src0);
669      break;
670
671   case TGSI_OPCODE_ROUND:
672      src0 = emit_fetch(bld, inst, 0);
673      dst0 = lp_build_round(&bld->base, src0);
674      break;
675
676   case TGSI_OPCODE_EX2:
677      src0 = emit_fetch(bld, inst, 0);
678      tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
679      dst0 = lp_build_exp2(&bld->base, tmp0);
680      break;
681
682   case TGSI_OPCODE_LG2:
683      src0 = emit_fetch(bld, inst, 0);
684      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
685      dst0 = lp_build_log2(&bld->base, tmp0);
686      break;
687
688   case TGSI_OPCODE_POW:
689      src0 = emit_fetch(bld, inst, 0);
690      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
691      src1 = emit_fetch(bld, inst, 1);
692      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
693      dst0 = lp_build_pow(&bld->base, src0, src1);
694      break;
695
696   case TGSI_OPCODE_XPD:
697      return FALSE;
698
699   case TGSI_OPCODE_ABS:
700      src0 = emit_fetch(bld, inst, 0);
701      dst0 = lp_build_abs(&bld->base, src0);
702      break;
703
704   case TGSI_OPCODE_RCC:
705      /* deprecated? */
706      assert(0);
707      return FALSE;
708
709   case TGSI_OPCODE_DPH:
710      return FALSE;
711
712   case TGSI_OPCODE_COS:
713      src0 = emit_fetch(bld, inst, 0);
714      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
715      dst0 = lp_build_cos(&bld->base, tmp0);
716      break;
717
718   case TGSI_OPCODE_DDX:
719      return FALSE;
720
721   case TGSI_OPCODE_DDY:
722      return FALSE;
723
724   case TGSI_OPCODE_KILP:
725      /* predicated kill */
726      return FALSE;
727
728   case TGSI_OPCODE_KIL:
729      /* conditional kill */
730      return FALSE;
731
732   case TGSI_OPCODE_PK2H:
733      return FALSE;
734      break;
735
736   case TGSI_OPCODE_PK2US:
737      return FALSE;
738      break;
739
740   case TGSI_OPCODE_PK4B:
741      return FALSE;
742      break;
743
744   case TGSI_OPCODE_PK4UB:
745      return FALSE;
746
747   case TGSI_OPCODE_RFL:
748      return FALSE;
749
750   case TGSI_OPCODE_SEQ:
751      src0 = emit_fetch(bld, inst, 0);
752      src1 = emit_fetch(bld, inst, 1);
753      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
754      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
755      break;
756
757   case TGSI_OPCODE_SFL:
758      dst0 = bld->base.zero;
759      break;
760
761   case TGSI_OPCODE_SGT:
762      src0 = emit_fetch(bld, inst, 0);
763      src1 = emit_fetch(bld, inst, 1);
764      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
765      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
766      break;
767
768   case TGSI_OPCODE_SIN:
769      src0 = emit_fetch(bld, inst, 0);
770      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
771      dst0 = lp_build_sin(&bld->base, tmp0);
772      break;
773
774   case TGSI_OPCODE_SLE:
775      src0 = emit_fetch(bld, inst, 0);
776      src1 = emit_fetch(bld, inst, 1);
777      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
778      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
779      break;
780
781   case TGSI_OPCODE_SNE:
782      src0 = emit_fetch(bld, inst, 0);
783      src1 = emit_fetch(bld, inst, 1);
784      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
785      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
786      break;
787
788   case TGSI_OPCODE_STR:
789      dst0 = bld->base.one;
790      break;
791
792   case TGSI_OPCODE_TEX:
793      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
794      break;
795
796   case TGSI_OPCODE_TXD:
797      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
798      break;
799
800   case TGSI_OPCODE_UP2H:
801      /* deprecated */
802      assert (0);
803      return FALSE;
804      break;
805
806   case TGSI_OPCODE_UP2US:
807      /* deprecated */
808      assert(0);
809      return FALSE;
810      break;
811
812   case TGSI_OPCODE_UP4B:
813      /* deprecated */
814      assert(0);
815      return FALSE;
816      break;
817
818   case TGSI_OPCODE_UP4UB:
819      /* deprecated */
820      assert(0);
821      return FALSE;
822      break;
823
824   case TGSI_OPCODE_X2D:
825      /* deprecated? */
826      assert(0);
827      return FALSE;
828      break;
829
830   case TGSI_OPCODE_ARA:
831      /* deprecated */
832      assert(0);
833      return FALSE;
834      break;
835
836   case TGSI_OPCODE_ARR:
837      src0 = emit_fetch(bld, inst, 0);
838      dst0 = lp_build_round(&bld->base, src0);
839      break;
840
841   case TGSI_OPCODE_BRA:
842      /* deprecated */
843      assert(0);
844      return FALSE;
845      break;
846
847   case TGSI_OPCODE_CAL:
848      return FALSE;
849
850   case TGSI_OPCODE_RET:
851      return FALSE;
852
853   case TGSI_OPCODE_END:
854      *pc = -1;
855      break;
856
857   case TGSI_OPCODE_SSG:
858   /* TGSI_OPCODE_SGN */
859      tmp0 = emit_fetch(bld, inst, 0);
860      dst0 = lp_build_sgn(&bld->base, tmp0);
861      break;
862
863   case TGSI_OPCODE_CMP:
864      src0 = emit_fetch(bld, inst, 0);
865      src1 = emit_fetch(bld, inst, 1);
866      src2 = emit_fetch(bld, inst, 2);
867      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
868      dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
869      break;
870
871   case TGSI_OPCODE_SCS:
872      return FALSE;
873
874   case TGSI_OPCODE_TXB:
875      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
876      break;
877
878   case TGSI_OPCODE_NRM:
879      /* fall-through */
880   case TGSI_OPCODE_NRM4:
881      return FALSE;
882
883   case TGSI_OPCODE_DIV:
884      /* deprecated */
885      assert(0);
886      return FALSE;
887      break;
888
889   case TGSI_OPCODE_DP2:
890      return FALSE;
891
892   case TGSI_OPCODE_TXL:
893      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
894      break;
895
896   case TGSI_OPCODE_TXP:
897      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
898      break;
899
900   case TGSI_OPCODE_BRK:
901      return FALSE;
902
903   case TGSI_OPCODE_IF:
904      return FALSE;
905
906   case TGSI_OPCODE_BGNLOOP:
907      return FALSE;
908
909   case TGSI_OPCODE_BGNSUB:
910      return FALSE;
911
912   case TGSI_OPCODE_ELSE:
913      return FALSE;
914
915   case TGSI_OPCODE_ENDIF:
916      return FALSE;
917
918   case TGSI_OPCODE_ENDLOOP:
919      return FALSE;
920
921   case TGSI_OPCODE_ENDSUB:
922      return FALSE;
923
924   case TGSI_OPCODE_PUSHA:
925      /* deprecated? */
926      assert(0);
927      return FALSE;
928      break;
929
930   case TGSI_OPCODE_POPA:
931      /* deprecated? */
932      assert(0);
933      return FALSE;
934      break;
935
936   case TGSI_OPCODE_CEIL:
937      src0 = emit_fetch(bld, inst, 0);
938      dst0 = lp_build_ceil(&bld->base, src0);
939      break;
940
941   case TGSI_OPCODE_I2F:
942      /* deprecated? */
943      assert(0);
944      return FALSE;
945      break;
946
947   case TGSI_OPCODE_NOT:
948      /* deprecated? */
949      assert(0);
950      return FALSE;
951      break;
952
953   case TGSI_OPCODE_TRUNC:
954      src0 = emit_fetch(bld, inst, 0);
955      dst0 = lp_build_trunc(&bld->base, src0);
956      break;
957
958   case TGSI_OPCODE_SHL:
959      /* deprecated? */
960      assert(0);
961      return FALSE;
962      break;
963
964   case TGSI_OPCODE_ISHR:
965      /* deprecated? */
966      assert(0);
967      return FALSE;
968      break;
969
970   case TGSI_OPCODE_AND:
971      /* deprecated? */
972      assert(0);
973      return FALSE;
974      break;
975
976   case TGSI_OPCODE_OR:
977      /* deprecated? */
978      assert(0);
979      return FALSE;
980      break;
981
982   case TGSI_OPCODE_MOD:
983      /* deprecated? */
984      assert(0);
985      return FALSE;
986      break;
987
988   case TGSI_OPCODE_XOR:
989      /* deprecated? */
990      assert(0);
991      return FALSE;
992      break;
993
994   case TGSI_OPCODE_SAD:
995      /* deprecated? */
996      assert(0);
997      return FALSE;
998      break;
999
1000   case TGSI_OPCODE_TXF:
1001      /* deprecated? */
1002      assert(0);
1003      return FALSE;
1004      break;
1005
1006   case TGSI_OPCODE_TXQ:
1007      /* deprecated? */
1008      assert(0);
1009      return FALSE;
1010      break;
1011
1012   case TGSI_OPCODE_CONT:
1013      return FALSE;
1014
1015   case TGSI_OPCODE_EMIT:
1016      return FALSE;
1017      break;
1018
1019   case TGSI_OPCODE_ENDPRIM:
1020      return FALSE;
1021      break;
1022
1023   case TGSI_OPCODE_NOP:
1024      break;
1025
1026   default:
1027      return FALSE;
1028   }
1029
1030   if (info->num_dst) {
1031      emit_store(bld, inst, 0, dst0);
1032   }
1033
1034   return TRUE;
1035}
1036
1037
1038void
1039lp_build_tgsi_aos(struct gallivm_state *gallivm,
1040                  const struct tgsi_token *tokens,
1041                  struct lp_type type,
1042                  const unsigned char swizzles[4],
1043                  LLVMValueRef consts_ptr,
1044                  const LLVMValueRef *inputs,
1045                  LLVMValueRef *outputs,
1046                  struct lp_build_sampler_aos *sampler,
1047                  const struct tgsi_shader_info *info)
1048{
1049   struct lp_build_tgsi_aos_context bld;
1050   struct tgsi_parse_context parse;
1051   uint num_immediates = 0;
1052   uint num_instructions = 0;
1053   unsigned chan;
1054   int pc = 0;
1055
1056   /* Setup build context */
1057   memset(&bld, 0, sizeof bld);
1058   lp_build_context_init(&bld.base, gallivm, type);
1059   lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1060
1061   for (chan = 0; chan < 4; ++chan) {
1062      bld.swizzles[chan] = swizzles[chan];
1063      bld.inv_swizzles[swizzles[chan]] = chan;
1064   }
1065
1066   bld.inputs = inputs;
1067   bld.outputs = outputs;
1068   bld.consts_ptr = consts_ptr;
1069   bld.sampler = sampler;
1070   bld.indirect_files = info->indirect_files;
1071   bld.instructions = (struct tgsi_full_instruction *)
1072                      MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
1073   bld.max_instructions = LP_MAX_INSTRUCTIONS;
1074
1075   if (!bld.instructions) {
1076      return;
1077   }
1078
1079   tgsi_parse_init(&parse, tokens);
1080
1081   while (!tgsi_parse_end_of_tokens(&parse)) {
1082      tgsi_parse_token(&parse);
1083
1084      switch(parse.FullToken.Token.Type) {
1085      case TGSI_TOKEN_TYPE_DECLARATION:
1086         /* Inputs already interpolated */
1087         emit_declaration(&bld, &parse.FullToken.FullDeclaration);
1088         break;
1089
1090      case TGSI_TOKEN_TYPE_INSTRUCTION:
1091         {
1092            /* save expanded instruction */
1093            if (num_instructions == bld.max_instructions) {
1094               struct tgsi_full_instruction *instructions;
1095               instructions = REALLOC(bld.instructions,
1096                                      bld.max_instructions
1097                                      * sizeof(struct tgsi_full_instruction),
1098                                      (bld.max_instructions + LP_MAX_INSTRUCTIONS)
1099                                      * sizeof(struct tgsi_full_instruction));
1100               if (!instructions) {
1101                  break;
1102               }
1103               bld.instructions = instructions;
1104               bld.max_instructions += LP_MAX_INSTRUCTIONS;
1105            }
1106
1107            memcpy(bld.instructions + num_instructions,
1108                   &parse.FullToken.FullInstruction,
1109                   sizeof(bld.instructions[0]));
1110
1111            num_instructions++;
1112         }
1113
1114         break;
1115
1116      case TGSI_TOKEN_TYPE_IMMEDIATE:
1117         /* simply copy the immediate values into the next immediates[] slot */
1118         {
1119            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1120            float imm[4];
1121            assert(size <= 4);
1122            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1123            for (chan = 0; chan < 4; ++chan) {
1124               imm[chan] = 0.0f;
1125            }
1126            for (chan = 0; chan < size; ++chan) {
1127               unsigned swizzle = bld.swizzles[chan];
1128               imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1129            }
1130            bld.immediates[num_immediates] =
1131                     lp_build_const_aos(gallivm, type,
1132                                        imm[0], imm[1], imm[2], imm[3],
1133                                        NULL);
1134            num_immediates++;
1135         }
1136         break;
1137
1138      case TGSI_TOKEN_TYPE_PROPERTY:
1139         break;
1140
1141      default:
1142         assert(0);
1143      }
1144   }
1145
1146   while (pc != -1) {
1147      struct tgsi_full_instruction *instr = bld.instructions + pc;
1148      const struct tgsi_opcode_info *opcode_info =
1149         tgsi_get_opcode_info(instr->Instruction.Opcode);
1150      if (!emit_instruction(&bld, instr, opcode_info, &pc))
1151         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1152                       opcode_info->mnemonic);
1153   }
1154
1155   if (0) {
1156      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1157      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1158      debug_printf("11111111111111111111111111111 \n");
1159      tgsi_dump(tokens, 0);
1160      lp_debug_dump_value(function);
1161      debug_printf("2222222222222222222222222222 \n");
1162   }
1163   tgsi_parse_free(&parse);
1164
1165   if (0) {
1166      LLVMModuleRef module = LLVMGetGlobalParent(
1167         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1168      LLVMDumpModule(module);
1169   }
1170
1171   FREE(bld.instructions);
1172}
1173
1174