lp_bld_tgsi_aos.c revision efc82aef35a2aac5d2ed9774f6d28f2626796416
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40#include "pipe/p_config.h"
41#include "pipe/p_shader_tokens.h"
42#include "util/u_debug.h"
43#include "util/u_math.h"
44#include "util/u_memory.h"
45#include "tgsi/tgsi_dump.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_quad.h"
57#include "lp_bld_tgsi.h"
58#include "lp_bld_limits.h"
59#include "lp_bld_debug.h"
60
61
62#define LP_MAX_INSTRUCTIONS 256
63
64
65struct lp_build_tgsi_aos_context
66{
67   struct lp_build_context base;
68
69   /* Builder for integer masks and indices */
70   struct lp_build_context int_bld;
71
72   /*
73    * AoS swizzle used:
74    * - swizzles[0] = red index
75    * - swizzles[1] = green index
76    * - swizzles[2] = blue index
77    * - swizzles[3] = alpha index
78    */
79   unsigned char swizzles[4];
80   unsigned char inv_swizzles[4];
81
82   LLVMValueRef consts_ptr;
83   const LLVMValueRef *inputs;
84   LLVMValueRef *outputs;
85
86   struct lp_build_sampler_aos *sampler;
87
88   LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
89   LLVMValueRef temps[LP_MAX_TGSI_TEMPS];
90   LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
91   LLVMValueRef preds[LP_MAX_TGSI_PREDS];
92
93   /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
94    * set in the indirect_files field.
95    * The temps[] array above is unused then.
96    */
97   LLVMValueRef temps_array;
98
99   /** bitmask indicating which register files are accessed indirectly */
100   unsigned indirect_files;
101
102   struct tgsi_full_instruction *instructions;
103   uint max_instructions;
104};
105
106
107/**
108 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
109 * ordering.
110 */
111static LLVMValueRef
112swizzle_aos(struct lp_build_tgsi_aos_context *bld,
113            LLVMValueRef a,
114            unsigned swizzle_x,
115            unsigned swizzle_y,
116            unsigned swizzle_z,
117            unsigned swizzle_w)
118{
119   unsigned char swizzles[4];
120
121   assert(swizzle_x < 4);
122   assert(swizzle_y < 4);
123   assert(swizzle_z < 4);
124   assert(swizzle_w < 4);
125
126   swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
127   swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
128   swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
129   swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
130
131   return lp_build_swizzle_aos(&bld->base, a, swizzles);
132}
133
134
135static LLVMValueRef
136swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
137                   LLVMValueRef a,
138                   unsigned chan)
139{
140   chan = bld->swizzles[chan];
141   return lp_build_swizzle_scalar_aos(&bld->base, a, chan);
142}
143
144
145/**
146 * Register fetch.
147 */
148static LLVMValueRef
149emit_fetch(
150   struct lp_build_tgsi_aos_context *bld,
151   const struct tgsi_full_instruction *inst,
152   unsigned src_op)
153{
154   struct lp_type type = bld->base.type;
155   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
156   LLVMValueRef res;
157   unsigned chan;
158
159   assert(!reg->Register.Indirect);
160
161   /*
162    * Fetch the from the register file.
163    */
164
165   switch (reg->Register.File) {
166   case TGSI_FILE_CONSTANT:
167      /*
168       * Get the constants components
169       */
170
171      res = bld->base.undef;
172      for (chan = 0; chan < 4; ++chan) {
173         LLVMValueRef index;
174         LLVMValueRef scalar_ptr;
175         LLVMValueRef scalar;
176         LLVMValueRef swizzle;
177
178         index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan);
179
180         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
181                                   &index, 1, "");
182
183         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
184
185         lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
186
187         /*
188          * NOTE: constants array is always assumed to be RGBA
189          */
190
191         swizzle = lp_build_const_int32(bld->base.gallivm, chan);
192
193         res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, "");
194      }
195
196      /*
197       * Broadcast the first quaternion to all others.
198       *
199       * XXX: could be factored into a reusable function.
200       */
201
202      if (type.length > 4) {
203         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
204         unsigned i;
205
206         for (chan = 0; chan < 4; ++chan) {
207            shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan);
208         }
209
210         for (i = 4; i < type.length; ++i) {
211            shuffles[i] = shuffles[i % 4];
212         }
213
214         res = LLVMBuildShuffleVector(bld->base.builder,
215                                      res, bld->base.undef,
216                                      LLVMConstVector(shuffles, type.length),
217                                      "");
218      }
219      break;
220
221   case TGSI_FILE_IMMEDIATE:
222      res = bld->immediates[reg->Register.Index];
223      assert(res);
224      break;
225
226   case TGSI_FILE_INPUT:
227      res = bld->inputs[reg->Register.Index];
228      assert(res);
229      break;
230
231   case TGSI_FILE_TEMPORARY:
232      {
233         LLVMValueRef temp_ptr;
234         temp_ptr = bld->temps[reg->Register.Index];
235         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
236         if (!res)
237            return bld->base.undef;
238      }
239      break;
240
241   default:
242      assert(0 && "invalid src register in emit_fetch()");
243      return bld->base.undef;
244   }
245
246   /*
247    * Apply sign modifier.
248    */
249
250   if (reg->Register.Absolute) {
251      res = lp_build_abs(&bld->base, res);
252   }
253
254   if(reg->Register.Negate) {
255      res = lp_build_negate(&bld->base, res);
256   }
257
258   /*
259    * Swizzle the argument
260    */
261
262   res = swizzle_aos(bld, res,
263                     reg->Register.SwizzleX,
264                     reg->Register.SwizzleY,
265                     reg->Register.SwizzleZ,
266                     reg->Register.SwizzleW);
267
268   return res;
269}
270
271
272/**
273 * Register store.
274 */
275static void
276emit_store(
277   struct lp_build_tgsi_aos_context *bld,
278   const struct tgsi_full_instruction *inst,
279   unsigned index,
280   LLVMValueRef value)
281{
282   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
283   LLVMValueRef mask = NULL;
284   LLVMValueRef ptr;
285
286   /*
287    * Saturate the value
288    */
289
290   switch (inst->Instruction.Saturate) {
291   case TGSI_SAT_NONE:
292      break;
293
294   case TGSI_SAT_ZERO_ONE:
295      value = lp_build_max(&bld->base, value, bld->base.zero);
296      value = lp_build_min(&bld->base, value, bld->base.one);
297      break;
298
299   case TGSI_SAT_MINUS_PLUS_ONE:
300      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0));
301      value = lp_build_min(&bld->base, value, bld->base.one);
302      break;
303
304   default:
305      assert(0);
306   }
307
308   /*
309    * Translate the register file
310    */
311
312   assert(!reg->Register.Indirect);
313
314   switch (reg->Register.File) {
315   case TGSI_FILE_OUTPUT:
316      ptr = bld->outputs[reg->Register.Index];
317      break;
318
319   case TGSI_FILE_TEMPORARY:
320      ptr = bld->temps[reg->Register.Index];
321      break;
322
323   case TGSI_FILE_ADDRESS:
324      ptr = bld->addr[reg->Indirect.Index];
325      break;
326
327   case TGSI_FILE_PREDICATE:
328      ptr = bld->preds[reg->Register.Index];
329      break;
330
331   default:
332      assert(0);
333      return;
334   }
335
336   /*
337    * Predicate
338    */
339
340   if (inst->Instruction.Predicate) {
341      LLVMValueRef pred;
342
343      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
344
345      pred = LLVMBuildLoad(bld->base.builder,
346                           bld->preds[inst->Predicate.Index], "");
347
348      /*
349       * Convert the value to an integer mask.
350       */
351      pred = lp_build_compare(bld->base.gallivm,
352                               bld->base.type,
353                               PIPE_FUNC_NOTEQUAL,
354                               pred,
355                               bld->base.zero);
356
357      if (inst->Predicate.Negate) {
358         pred = LLVMBuildNot(bld->base.builder, pred, "");
359      }
360
361      pred = swizzle_aos(bld, pred,
362                         inst->Predicate.SwizzleX,
363                         inst->Predicate.SwizzleY,
364                         inst->Predicate.SwizzleZ,
365                         inst->Predicate.SwizzleW);
366
367      if (mask) {
368         mask = LLVMBuildAnd(bld->base.builder, mask, pred, "");
369      } else {
370         mask = pred;
371      }
372   }
373
374   /*
375    * Writemask
376    */
377
378   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
379      LLVMValueRef writemask;
380
381      writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type,
382                                          reg->Register.WriteMask);
383
384      if (mask) {
385         mask = LLVMBuildAnd(bld->base.builder, mask, writemask, "");
386      } else {
387         mask = writemask;
388      }
389   }
390
391   if (mask) {
392      LLVMValueRef orig_value;
393
394      orig_value = LLVMBuildLoad(bld->base.builder, ptr, "");
395      value = lp_build_select(&bld->base,
396                              mask, value, orig_value);
397   }
398
399   LLVMBuildStore(bld->base.builder, value, ptr);
400}
401
402
403/**
404 * High-level instruction translators.
405 */
406
407static LLVMValueRef
408emit_tex(struct lp_build_tgsi_aos_context *bld,
409         const struct tgsi_full_instruction *inst,
410         enum lp_build_tex_modifier modifier)
411{
412   unsigned target;
413   unsigned unit;
414   LLVMValueRef coords;
415   LLVMValueRef ddx;
416   LLVMValueRef ddy;
417
418   if (!bld->sampler) {
419      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
420      return bld->base.undef;
421   }
422
423   target = inst->Texture.Texture;
424
425   coords = emit_fetch( bld, inst, 0 );
426
427   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
428      ddx = emit_fetch( bld, inst, 1 );
429      ddy = emit_fetch( bld, inst, 2 );
430      unit = inst->Src[3].Register.Index;
431   }  else {
432#if 0
433      ddx = lp_build_ddx( &bld->base, coords );
434      ddy = lp_build_ddy( &bld->base, coords );
435#else
436      /* TODO */
437      ddx = bld->base.one;
438      ddy = bld->base.one;
439#endif
440      unit = inst->Src[1].Register.Index;
441   }
442
443   return bld->sampler->emit_fetch_texel(bld->sampler,
444                                         &bld->base,
445                                         target, unit,
446                                         coords, ddx, ddy,
447                                         modifier);
448}
449
450
451static void
452emit_declaration(
453   struct lp_build_tgsi_aos_context *bld,
454   const struct tgsi_full_declaration *decl)
455{
456   struct gallivm_state *gallivm = bld->base.gallivm;
457   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type);
458
459   unsigned first = decl->Range.First;
460   unsigned last = decl->Range.Last;
461   unsigned idx;
462
463   for (idx = first; idx <= last; ++idx) {
464      switch (decl->Declaration.File) {
465      case TGSI_FILE_TEMPORARY:
466         assert(idx < LP_MAX_TGSI_TEMPS);
467         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
468            LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
469            bld->temps_array = lp_build_array_alloca(bld->base.gallivm,
470                                                     vec_type, array_size, "");
471         } else {
472            bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
473         }
474         break;
475
476      case TGSI_FILE_OUTPUT:
477         bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
478         break;
479
480      case TGSI_FILE_ADDRESS:
481         assert(idx < LP_MAX_TGSI_ADDRS);
482         bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
483         break;
484
485      case TGSI_FILE_PREDICATE:
486         assert(idx < LP_MAX_TGSI_PREDS);
487         bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
488         break;
489
490      default:
491         /* don't need to declare other vars */
492         break;
493      }
494   }
495}
496
497
498/**
499 * Emit LLVM for one TGSI instruction.
500 * \param return TRUE for success, FALSE otherwise
501 */
502static boolean
503emit_instruction(
504   struct lp_build_tgsi_aos_context *bld,
505   const struct tgsi_full_instruction *inst,
506   const struct tgsi_opcode_info *info,
507   int *pc)
508{
509   LLVMValueRef src0, src1, src2;
510   LLVMValueRef tmp0, tmp1;
511   LLVMValueRef dst0 = NULL;
512
513   /*
514    * Stores and write masks are handled in a general fashion after the long
515    * instruction opcode switch statement.
516    *
517    * Although not stricitly necessary, we avoid generating instructions for
518    * channels which won't be stored, in cases where's that easy. For some
519    * complex instructions, like texture sampling, it is more convenient to
520    * assume a full writemask and then let LLVM optimization passes eliminate
521    * redundant code.
522    */
523
524   (*pc)++;
525
526   assert(info->num_dst <= 1);
527   if (info->num_dst) {
528      dst0 = bld->base.undef;
529   }
530
531   switch (inst->Instruction.Opcode) {
532   case TGSI_OPCODE_ARL:
533      src0 = emit_fetch(bld, inst, 0);
534      dst0 = lp_build_floor(&bld->base, src0);
535      break;
536
537   case TGSI_OPCODE_MOV:
538      dst0 = emit_fetch(bld, inst, 0);
539      break;
540
541   case TGSI_OPCODE_LIT:
542      return FALSE;
543
544   case TGSI_OPCODE_RCP:
545   /* TGSI_OPCODE_RECIP */
546      src0 = emit_fetch(bld, inst, 0);
547      dst0 = lp_build_rcp(&bld->base, src0);
548      break;
549
550   case TGSI_OPCODE_RSQ:
551   /* TGSI_OPCODE_RECIPSQRT */
552      src0 = emit_fetch(bld, inst, 0);
553      tmp0 = lp_build_abs(&bld->base, src0);
554      dst0 = lp_build_rsqrt(&bld->base, tmp0);
555      break;
556
557   case TGSI_OPCODE_EXP:
558      return FALSE;
559
560   case TGSI_OPCODE_LOG:
561      return FALSE;
562
563   case TGSI_OPCODE_MUL:
564      src0 = emit_fetch(bld, inst, 0);
565      src1 = emit_fetch(bld, inst, 1);
566      dst0 = lp_build_mul(&bld->base, src0, src1);
567      break;
568
569   case TGSI_OPCODE_ADD:
570      src0 = emit_fetch(bld, inst, 0);
571      src1 = emit_fetch(bld, inst, 1);
572      dst0 = lp_build_add(&bld->base, src0, src1);
573      break;
574
575   case TGSI_OPCODE_DP3:
576   /* TGSI_OPCODE_DOT3 */
577      return FALSE;
578
579   case TGSI_OPCODE_DP4:
580   /* TGSI_OPCODE_DOT4 */
581      return FALSE;
582
583   case TGSI_OPCODE_DST:
584      return FALSE;
585
586   case TGSI_OPCODE_MIN:
587      src0 = emit_fetch(bld, inst, 0);
588      src1 = emit_fetch(bld, inst, 1);
589      dst0 = lp_build_max(&bld->base, src0, src1);
590      break;
591
592   case TGSI_OPCODE_MAX:
593      src0 = emit_fetch(bld, inst, 0);
594      src1 = emit_fetch(bld, inst, 1);
595      dst0 = lp_build_max(&bld->base, src0, src1);
596      break;
597
598   case TGSI_OPCODE_SLT:
599   /* TGSI_OPCODE_SETLT */
600      src0 = emit_fetch(bld, inst, 0);
601      src1 = emit_fetch(bld, inst, 1);
602      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
603      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
604      break;
605
606   case TGSI_OPCODE_SGE:
607   /* TGSI_OPCODE_SETGE */
608      src0 = emit_fetch(bld, inst, 0);
609      src1 = emit_fetch(bld, inst, 1);
610      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
611      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
612      break;
613
614   case TGSI_OPCODE_MAD:
615   /* TGSI_OPCODE_MADD */
616      src0 = emit_fetch(bld, inst, 0);
617      src1 = emit_fetch(bld, inst, 1);
618      src2 = emit_fetch(bld, inst, 2);
619      tmp0 = lp_build_mul(&bld->base, src0, src1);
620      dst0 = lp_build_add(&bld->base, tmp0, src2);
621      break;
622
623   case TGSI_OPCODE_SUB:
624      src0 = emit_fetch(bld, inst, 0);
625      src1 = emit_fetch(bld, inst, 1);
626      dst0 = lp_build_sub(&bld->base, src0, src1);
627      break;
628
629   case TGSI_OPCODE_LRP:
630      src0 = emit_fetch(bld, inst, 0);
631      src1 = emit_fetch(bld, inst, 1);
632      src2 = emit_fetch(bld, inst, 2);
633      tmp0 = lp_build_sub(&bld->base, src1, src2);
634      tmp0 = lp_build_mul(&bld->base, src0, tmp0);
635      dst0 = lp_build_add(&bld->base, tmp0, src2);
636      break;
637
638   case TGSI_OPCODE_CND:
639      src0 = emit_fetch(bld, inst, 0);
640      src1 = emit_fetch(bld, inst, 1);
641      src2 = emit_fetch(bld, inst, 2);
642      tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5);
643      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
644      dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
645      break;
646
647   case TGSI_OPCODE_DP2A:
648      return FALSE;
649
650   case TGSI_OPCODE_FRC:
651      src0 = emit_fetch(bld, inst, 0);
652      tmp0 = lp_build_floor(&bld->base, src0);
653      dst0 = lp_build_sub(&bld->base, src0, tmp0);
654      break;
655
656   case TGSI_OPCODE_CLAMP:
657      src0 = emit_fetch(bld, inst, 0);
658      src1 = emit_fetch(bld, inst, 1);
659      src2 = emit_fetch(bld, inst, 2);
660      tmp0 = lp_build_max(&bld->base, src0, src1);
661      dst0 = lp_build_min(&bld->base, tmp0, src2);
662      break;
663
664   case TGSI_OPCODE_FLR:
665      src0 = emit_fetch(bld, inst, 0);
666      dst0 = lp_build_floor(&bld->base, src0);
667      break;
668
669   case TGSI_OPCODE_ROUND:
670      src0 = emit_fetch(bld, inst, 0);
671      dst0 = lp_build_round(&bld->base, src0);
672      break;
673
674   case TGSI_OPCODE_EX2:
675      src0 = emit_fetch(bld, inst, 0);
676      tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
677      dst0 = lp_build_exp2(&bld->base, tmp0);
678      break;
679
680   case TGSI_OPCODE_LG2:
681      src0 = emit_fetch(bld, inst, 0);
682      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
683      dst0 = lp_build_log2(&bld->base, tmp0);
684      break;
685
686   case TGSI_OPCODE_POW:
687      src0 = emit_fetch(bld, inst, 0);
688      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
689      src1 = emit_fetch(bld, inst, 1);
690      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
691      dst0 = lp_build_pow(&bld->base, src0, src1);
692      break;
693
694   case TGSI_OPCODE_XPD:
695      return FALSE;
696
697   case TGSI_OPCODE_ABS:
698      src0 = emit_fetch(bld, inst, 0);
699      dst0 = lp_build_abs(&bld->base, src0);
700      break;
701
702   case TGSI_OPCODE_RCC:
703      /* deprecated? */
704      assert(0);
705      return FALSE;
706
707   case TGSI_OPCODE_DPH:
708      return FALSE;
709
710   case TGSI_OPCODE_COS:
711      src0 = emit_fetch(bld, inst, 0);
712      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
713      dst0 = lp_build_cos(&bld->base, tmp0);
714      break;
715
716   case TGSI_OPCODE_DDX:
717      return FALSE;
718
719   case TGSI_OPCODE_DDY:
720      return FALSE;
721
722   case TGSI_OPCODE_KILP:
723      /* predicated kill */
724      return FALSE;
725
726   case TGSI_OPCODE_KIL:
727      /* conditional kill */
728      return FALSE;
729
730   case TGSI_OPCODE_PK2H:
731      return FALSE;
732      break;
733
734   case TGSI_OPCODE_PK2US:
735      return FALSE;
736      break;
737
738   case TGSI_OPCODE_PK4B:
739      return FALSE;
740      break;
741
742   case TGSI_OPCODE_PK4UB:
743      return FALSE;
744
745   case TGSI_OPCODE_RFL:
746      return FALSE;
747
748   case TGSI_OPCODE_SEQ:
749      src0 = emit_fetch(bld, inst, 0);
750      src1 = emit_fetch(bld, inst, 1);
751      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
752      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
753      break;
754
755   case TGSI_OPCODE_SFL:
756      dst0 = bld->base.zero;
757      break;
758
759   case TGSI_OPCODE_SGT:
760      src0 = emit_fetch(bld, inst, 0);
761      src1 = emit_fetch(bld, inst, 1);
762      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
763      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
764      break;
765
766   case TGSI_OPCODE_SIN:
767      src0 = emit_fetch(bld, inst, 0);
768      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
769      dst0 = lp_build_sin(&bld->base, tmp0);
770      break;
771
772   case TGSI_OPCODE_SLE:
773      src0 = emit_fetch(bld, inst, 0);
774      src1 = emit_fetch(bld, inst, 1);
775      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
776      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
777      break;
778
779   case TGSI_OPCODE_SNE:
780      src0 = emit_fetch(bld, inst, 0);
781      src1 = emit_fetch(bld, inst, 1);
782      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
783      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
784      break;
785
786   case TGSI_OPCODE_STR:
787      dst0 = bld->base.one;
788      break;
789
790   case TGSI_OPCODE_TEX:
791      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
792      break;
793
794   case TGSI_OPCODE_TXD:
795      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
796      break;
797
798   case TGSI_OPCODE_UP2H:
799      /* deprecated */
800      assert (0);
801      return FALSE;
802      break;
803
804   case TGSI_OPCODE_UP2US:
805      /* deprecated */
806      assert(0);
807      return FALSE;
808      break;
809
810   case TGSI_OPCODE_UP4B:
811      /* deprecated */
812      assert(0);
813      return FALSE;
814      break;
815
816   case TGSI_OPCODE_UP4UB:
817      /* deprecated */
818      assert(0);
819      return FALSE;
820      break;
821
822   case TGSI_OPCODE_X2D:
823      /* deprecated? */
824      assert(0);
825      return FALSE;
826      break;
827
828   case TGSI_OPCODE_ARA:
829      /* deprecated */
830      assert(0);
831      return FALSE;
832      break;
833
834   case TGSI_OPCODE_ARR:
835      src0 = emit_fetch(bld, inst, 0);
836      dst0 = lp_build_round(&bld->base, src0);
837      break;
838
839   case TGSI_OPCODE_BRA:
840      /* deprecated */
841      assert(0);
842      return FALSE;
843      break;
844
845   case TGSI_OPCODE_CAL:
846      return FALSE;
847
848   case TGSI_OPCODE_RET:
849      return FALSE;
850
851   case TGSI_OPCODE_END:
852      *pc = -1;
853      break;
854
855   case TGSI_OPCODE_SSG:
856   /* TGSI_OPCODE_SGN */
857      tmp0 = emit_fetch(bld, inst, 0);
858      dst0 = lp_build_sgn(&bld->base, tmp0);
859      break;
860
861   case TGSI_OPCODE_CMP:
862      src0 = emit_fetch(bld, inst, 0);
863      src1 = emit_fetch(bld, inst, 1);
864      src2 = emit_fetch(bld, inst, 2);
865      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
866      dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
867      break;
868
869   case TGSI_OPCODE_SCS:
870      return FALSE;
871
872   case TGSI_OPCODE_TXB:
873      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
874      break;
875
876   case TGSI_OPCODE_NRM:
877      /* fall-through */
878   case TGSI_OPCODE_NRM4:
879      return FALSE;
880
881   case TGSI_OPCODE_DIV:
882      /* deprecated */
883      assert(0);
884      return FALSE;
885      break;
886
887   case TGSI_OPCODE_DP2:
888      return FALSE;
889
890   case TGSI_OPCODE_TXL:
891      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
892      break;
893
894   case TGSI_OPCODE_TXP:
895      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
896      break;
897
898   case TGSI_OPCODE_BRK:
899      return FALSE;
900
901   case TGSI_OPCODE_IF:
902      return FALSE;
903
904   case TGSI_OPCODE_BGNLOOP:
905      return FALSE;
906
907   case TGSI_OPCODE_BGNSUB:
908      return FALSE;
909
910   case TGSI_OPCODE_ELSE:
911      return FALSE;
912
913   case TGSI_OPCODE_ENDIF:
914      return FALSE;
915
916   case TGSI_OPCODE_ENDLOOP:
917      return FALSE;
918
919   case TGSI_OPCODE_ENDSUB:
920      return FALSE;
921
922   case TGSI_OPCODE_PUSHA:
923      /* deprecated? */
924      assert(0);
925      return FALSE;
926      break;
927
928   case TGSI_OPCODE_POPA:
929      /* deprecated? */
930      assert(0);
931      return FALSE;
932      break;
933
934   case TGSI_OPCODE_CEIL:
935      src0 = emit_fetch(bld, inst, 0);
936      dst0 = lp_build_ceil(&bld->base, src0);
937      break;
938
939   case TGSI_OPCODE_I2F:
940      /* deprecated? */
941      assert(0);
942      return FALSE;
943      break;
944
945   case TGSI_OPCODE_NOT:
946      /* deprecated? */
947      assert(0);
948      return FALSE;
949      break;
950
951   case TGSI_OPCODE_TRUNC:
952      src0 = emit_fetch(bld, inst, 0);
953      dst0 = lp_build_trunc(&bld->base, src0);
954      break;
955
956   case TGSI_OPCODE_SHL:
957      /* deprecated? */
958      assert(0);
959      return FALSE;
960      break;
961
962   case TGSI_OPCODE_ISHR:
963      /* deprecated? */
964      assert(0);
965      return FALSE;
966      break;
967
968   case TGSI_OPCODE_AND:
969      /* deprecated? */
970      assert(0);
971      return FALSE;
972      break;
973
974   case TGSI_OPCODE_OR:
975      /* deprecated? */
976      assert(0);
977      return FALSE;
978      break;
979
980   case TGSI_OPCODE_MOD:
981      /* deprecated? */
982      assert(0);
983      return FALSE;
984      break;
985
986   case TGSI_OPCODE_XOR:
987      /* deprecated? */
988      assert(0);
989      return FALSE;
990      break;
991
992   case TGSI_OPCODE_SAD:
993      /* deprecated? */
994      assert(0);
995      return FALSE;
996      break;
997
998   case TGSI_OPCODE_TXF:
999      /* deprecated? */
1000      assert(0);
1001      return FALSE;
1002      break;
1003
1004   case TGSI_OPCODE_TXQ:
1005      /* deprecated? */
1006      assert(0);
1007      return FALSE;
1008      break;
1009
1010   case TGSI_OPCODE_CONT:
1011      return FALSE;
1012
1013   case TGSI_OPCODE_EMIT:
1014      return FALSE;
1015      break;
1016
1017   case TGSI_OPCODE_ENDPRIM:
1018      return FALSE;
1019      break;
1020
1021   case TGSI_OPCODE_NOP:
1022      break;
1023
1024   default:
1025      return FALSE;
1026   }
1027
1028   if (info->num_dst) {
1029      emit_store(bld, inst, 0, dst0);
1030   }
1031
1032   return TRUE;
1033}
1034
1035
1036void
1037lp_build_tgsi_aos(struct gallivm_state *gallivm,
1038                  const struct tgsi_token *tokens,
1039                  struct lp_type type,
1040                  const unsigned char swizzles[4],
1041                  LLVMValueRef consts_ptr,
1042                  const LLVMValueRef *inputs,
1043                  LLVMValueRef *outputs,
1044                  struct lp_build_sampler_aos *sampler,
1045                  const struct tgsi_shader_info *info)
1046{
1047   struct lp_build_tgsi_aos_context bld;
1048   struct tgsi_parse_context parse;
1049   uint num_immediates = 0;
1050   uint num_instructions = 0;
1051   unsigned chan;
1052   int pc = 0;
1053
1054   /* Setup build context */
1055   memset(&bld, 0, sizeof bld);
1056   lp_build_context_init(&bld.base, gallivm, type);
1057   lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1058
1059   for (chan = 0; chan < 4; ++chan) {
1060      bld.swizzles[chan] = swizzles[chan];
1061      bld.inv_swizzles[swizzles[chan]] = chan;
1062   }
1063
1064   bld.inputs = inputs;
1065   bld.outputs = outputs;
1066   bld.consts_ptr = consts_ptr;
1067   bld.sampler = sampler;
1068   bld.indirect_files = info->indirect_files;
1069   bld.instructions = (struct tgsi_full_instruction *)
1070                      MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction));
1071   bld.max_instructions = LP_MAX_INSTRUCTIONS;
1072
1073   if (!bld.instructions) {
1074      return;
1075   }
1076
1077   tgsi_parse_init(&parse, tokens);
1078
1079   while (!tgsi_parse_end_of_tokens(&parse)) {
1080      tgsi_parse_token(&parse);
1081
1082      switch(parse.FullToken.Token.Type) {
1083      case TGSI_TOKEN_TYPE_DECLARATION:
1084         /* Inputs already interpolated */
1085         emit_declaration(&bld, &parse.FullToken.FullDeclaration);
1086         break;
1087
1088      case TGSI_TOKEN_TYPE_INSTRUCTION:
1089         {
1090            /* save expanded instruction */
1091            if (num_instructions == bld.max_instructions) {
1092               struct tgsi_full_instruction *instructions;
1093               instructions = REALLOC(bld.instructions,
1094                                      bld.max_instructions
1095                                      * sizeof(struct tgsi_full_instruction),
1096                                      (bld.max_instructions + LP_MAX_INSTRUCTIONS)
1097                                      * sizeof(struct tgsi_full_instruction));
1098               if (!instructions) {
1099                  break;
1100               }
1101               bld.instructions = instructions;
1102               bld.max_instructions += LP_MAX_INSTRUCTIONS;
1103            }
1104
1105            memcpy(bld.instructions + num_instructions,
1106                   &parse.FullToken.FullInstruction,
1107                   sizeof(bld.instructions[0]));
1108
1109            num_instructions++;
1110         }
1111
1112         break;
1113
1114      case TGSI_TOKEN_TYPE_IMMEDIATE:
1115         /* simply copy the immediate values into the next immediates[] slot */
1116         {
1117            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1118            float imm[4];
1119            assert(size <= 4);
1120            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1121            for (chan = 0; chan < 4; ++chan) {
1122               imm[chan] = 0.0f;
1123            }
1124            for (chan = 0; chan < size; ++chan) {
1125               unsigned swizzle = bld.swizzles[chan];
1126               imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1127            }
1128            bld.immediates[num_immediates] =
1129                     lp_build_const_aos(gallivm, type,
1130                                        imm[0], imm[1], imm[2], imm[3],
1131                                        NULL);
1132            num_immediates++;
1133         }
1134         break;
1135
1136      case TGSI_TOKEN_TYPE_PROPERTY:
1137         break;
1138
1139      default:
1140         assert(0);
1141      }
1142   }
1143
1144   while (pc != -1) {
1145      struct tgsi_full_instruction *instr = bld.instructions + pc;
1146      const struct tgsi_opcode_info *opcode_info =
1147         tgsi_get_opcode_info(instr->Instruction.Opcode);
1148      if (!emit_instruction(&bld, instr, opcode_info, &pc))
1149         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1150                       opcode_info->mnemonic);
1151   }
1152
1153   if (0) {
1154      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1155      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1156      debug_printf("11111111111111111111111111111 \n");
1157      tgsi_dump(tokens, 0);
1158      lp_debug_dump_value(function);
1159      debug_printf("2222222222222222222222222222 \n");
1160   }
1161   tgsi_parse_free(&parse);
1162
1163   if (0) {
1164      LLVMModuleRef module = LLVMGetGlobalParent(
1165         LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder)));
1166      LLVMDumpModule(module);
1167   }
1168
1169   FREE(bld.instructions);
1170}
1171
1172