1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40#include "pipe/p_config.h"
41#include "pipe/p_shader_tokens.h"
42#include "util/u_debug.h"
43#include "util/u_math.h"
44#include "util/u_memory.h"
45#include "tgsi/tgsi_dump.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_quad.h"
57#include "lp_bld_tgsi.h"
58#include "lp_bld_debug.h"
59#include "lp_bld_sample.h"
60
61
62/**
63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64 * ordering.
65 */
66static LLVMValueRef
67swizzle_aos(struct lp_build_tgsi_context *bld_base,
68            LLVMValueRef a,
69            unsigned swizzle_x,
70            unsigned swizzle_y,
71            unsigned swizzle_z,
72            unsigned swizzle_w)
73{
74   unsigned char swizzles[4];
75   struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76
77   assert(swizzle_x < 4);
78   assert(swizzle_y < 4);
79   assert(swizzle_z < 4);
80   assert(swizzle_w < 4);
81
82   swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83   swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84   swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85   swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86
87   return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88}
89
90
91static LLVMValueRef
92swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93                   LLVMValueRef a,
94                   unsigned chan)
95{
96   chan = bld->swizzles[chan];
97   return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98}
99
100
101static LLVMValueRef
102emit_fetch_constant(
103   struct lp_build_tgsi_context * bld_base,
104   const struct tgsi_full_src_register * reg,
105   enum tgsi_opcode_type stype,
106   unsigned swizzle)
107{
108   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110   struct lp_type type = bld_base->base.type;
111   LLVMValueRef res;
112   unsigned chan;
113
114   assert(!reg->Register.Indirect);
115
116   /*
117    * Get the constants components
118    */
119
120   res = bld->bld_base.base.undef;
121   for (chan = 0; chan < 4; ++chan) {
122      LLVMValueRef index;
123      LLVMValueRef scalar_ptr;
124      LLVMValueRef scalar;
125      LLVMValueRef swizzle;
126
127      index = lp_build_const_int32(bld->bld_base.base.gallivm,
128                                   reg->Register.Index * 4 + chan);
129
130      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131
132      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133
134      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135
136      /*
137       * NOTE: constants array is always assumed to be RGBA
138       */
139
140      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141                                     bld->swizzles[chan]);
142
143      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144   }
145
146   /*
147    * Broadcast the first quaternion to all others.
148    *
149    * XXX: could be factored into a reusable function.
150    */
151
152   if (type.length > 4) {
153      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154      unsigned i;
155
156      for (chan = 0; chan < 4; ++chan) {
157         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158      }
159
160      for (i = 4; i < type.length; ++i) {
161         shuffles[i] = shuffles[i % 4];
162      }
163
164      res = LLVMBuildShuffleVector(builder,
165                                   res, bld->bld_base.base.undef,
166                                   LLVMConstVector(shuffles, type.length),
167                                   "");
168   }
169   return res;
170}
171
172static LLVMValueRef
173emit_fetch_immediate(
174   struct lp_build_tgsi_context * bld_base,
175   const struct tgsi_full_src_register * reg,
176   enum tgsi_opcode_type stype,
177   unsigned swizzle)
178{
179   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180   LLVMValueRef res = bld->immediates[reg->Register.Index];
181   assert(res);
182   return res;
183}
184
185static LLVMValueRef
186emit_fetch_input(
187   struct lp_build_tgsi_context * bld_base,
188   const struct tgsi_full_src_register * reg,
189   enum tgsi_opcode_type stype,
190   unsigned swizzle)
191{
192   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193   LLVMValueRef res = bld->inputs[reg->Register.Index];
194   assert(!reg->Register.Indirect);
195   assert(res);
196   return res;
197}
198
199static LLVMValueRef
200emit_fetch_temporary(
201   struct lp_build_tgsi_context * bld_base,
202   const struct tgsi_full_src_register * reg,
203   enum tgsi_opcode_type stype,
204   unsigned swizzle)
205{
206   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208   LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209   LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210   assert(!reg->Register.Indirect);
211   if (!res)
212      return bld->bld_base.base.undef;
213
214   return res;
215}
216
217/**
218 * Register store.
219 */
220void
221lp_emit_store_aos(
222   struct lp_build_tgsi_aos_context *bld,
223   const struct tgsi_full_instruction *inst,
224   unsigned index,
225   LLVMValueRef value)
226{
227   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229   LLVMValueRef mask = NULL;
230   LLVMValueRef ptr;
231
232   /*
233    * Saturate the value
234    */
235   if (inst->Instruction.Saturate) {
236      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
237      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
238   }
239
240   /*
241    * Translate the register file
242    */
243
244   assert(!reg->Register.Indirect);
245
246   switch (reg->Register.File) {
247   case TGSI_FILE_OUTPUT:
248      ptr = bld->outputs[reg->Register.Index];
249      break;
250
251   case TGSI_FILE_TEMPORARY:
252      ptr = bld->temps[reg->Register.Index];
253      break;
254
255   case TGSI_FILE_ADDRESS:
256      ptr = bld->addr[reg->Indirect.Index];
257      break;
258
259   case TGSI_FILE_PREDICATE:
260      ptr = bld->preds[reg->Register.Index];
261      break;
262
263   default:
264      assert(0);
265      return;
266   }
267
268   if (!ptr)
269      return;
270   /*
271    * Predicate
272    */
273
274   if (inst->Instruction.Predicate) {
275      LLVMValueRef pred;
276
277      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
278
279      pred = LLVMBuildLoad(builder,
280                           bld->preds[inst->Predicate.Index], "");
281
282      /*
283       * Convert the value to an integer mask.
284       */
285      pred = lp_build_compare(bld->bld_base.base.gallivm,
286                               bld->bld_base.base.type,
287                               PIPE_FUNC_NOTEQUAL,
288                               pred,
289                               bld->bld_base.base.zero);
290
291      if (inst->Predicate.Negate) {
292         pred = LLVMBuildNot(builder, pred, "");
293      }
294
295      pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
296                         inst->Predicate.SwizzleX,
297                         inst->Predicate.SwizzleY,
298                         inst->Predicate.SwizzleZ,
299                         inst->Predicate.SwizzleW);
300
301      if (mask) {
302         mask = LLVMBuildAnd(builder, mask, pred, "");
303      } else {
304         mask = pred;
305      }
306   }
307
308   /*
309    * Writemask
310    */
311
312   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
313      LLVMValueRef writemask;
314
315      writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
316                                                   bld->bld_base.base.type,
317                                                   reg->Register.WriteMask,
318                                                   TGSI_NUM_CHANNELS,
319                                                   bld->swizzles);
320
321      if (mask) {
322         mask = LLVMBuildAnd(builder, mask, writemask, "");
323      } else {
324         mask = writemask;
325      }
326   }
327
328   if (mask) {
329      LLVMValueRef orig_value;
330
331      orig_value = LLVMBuildLoad(builder, ptr, "");
332      value = lp_build_select(&bld->bld_base.base,
333                              mask, value, orig_value);
334   }
335
336   LLVMBuildStore(builder, value, ptr);
337}
338
339
340/**
341 * High-level instruction translators.
342 */
343
344static LLVMValueRef
345emit_tex(struct lp_build_tgsi_aos_context *bld,
346         const struct tgsi_full_instruction *inst,
347         enum lp_build_tex_modifier modifier)
348{
349   unsigned target;
350   unsigned unit;
351   LLVMValueRef coords;
352   struct lp_derivatives derivs = { {NULL}, {NULL} };
353
354   if (!bld->sampler) {
355      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
356      return bld->bld_base.base.undef;
357   }
358
359   target = inst->Texture.Texture;
360
361   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
362
363   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
364      /* probably not going to work */
365      derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
366      derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
367      unit = inst->Src[3].Register.Index;
368   }
369   else {
370      unit = inst->Src[1].Register.Index;
371   }
372   return bld->sampler->emit_fetch_texel(bld->sampler,
373                                         &bld->bld_base.base,
374                                         target, unit,
375                                         coords, derivs,
376                                         modifier);
377}
378
379
380static LLVMValueRef
381emit_sample(struct lp_build_tgsi_aos_context *bld,
382            const struct tgsi_full_instruction *inst,
383            enum lp_build_tex_modifier modifier)
384{
385   unsigned target;
386   unsigned unit;
387   LLVMValueRef coords;
388   struct lp_derivatives derivs = { {NULL}, {NULL} };
389
390   if (!bld->sampler) {
391      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
392      return bld->bld_base.base.undef;
393   }
394
395   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
396
397   /* ignore modifiers, can't handle different sampler / sampler view, etc... */
398   unit = inst->Src[1].Register.Index;
399   assert(inst->Src[2].Register.Index == unit);
400
401   target = bld->sv[unit].Resource;
402
403   return bld->sampler->emit_fetch_texel(bld->sampler,
404                                         &bld->bld_base.base,
405                                         target, unit,
406                                         coords, derivs,
407                                         modifier);
408}
409
410
411void
412lp_emit_declaration_aos(
413   struct lp_build_tgsi_aos_context *bld,
414   const struct tgsi_full_declaration *decl)
415{
416   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
417   LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
418
419   unsigned first = decl->Range.First;
420   unsigned last = decl->Range.Last;
421   unsigned idx;
422
423   for (idx = first; idx <= last; ++idx) {
424      switch (decl->Declaration.File) {
425      case TGSI_FILE_TEMPORARY:
426         assert(idx < LP_MAX_INLINED_TEMPS);
427         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
428            LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
429            bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
430                                                     vec_type, array_size, "");
431         } else {
432            bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
433         }
434         break;
435
436      case TGSI_FILE_OUTPUT:
437         bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
438         break;
439
440      case TGSI_FILE_ADDRESS:
441         assert(idx < LP_MAX_TGSI_ADDRS);
442         bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
443         break;
444
445      case TGSI_FILE_PREDICATE:
446         assert(idx < LP_MAX_TGSI_PREDS);
447         bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
448         break;
449
450      case TGSI_FILE_SAMPLER_VIEW:
451         /*
452          * The target stored here MUST match whatever there actually
453          * is in the set sampler views (what about return type?).
454          */
455         assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
456         for (idx = first; idx <= last; ++idx) {
457            bld->sv[idx] = decl->SamplerView;
458         }
459         break;
460
461      default:
462         /* don't need to declare other vars */
463         break;
464      }
465   }
466}
467
468
469/**
470 * Emit LLVM for one TGSI instruction.
471 * \param return TRUE for success, FALSE otherwise
472 */
473boolean
474lp_emit_instruction_aos(
475   struct lp_build_tgsi_aos_context *bld,
476   const struct tgsi_full_instruction *inst,
477   const struct tgsi_opcode_info *info,
478   int *pc)
479{
480   LLVMValueRef src0, src1, src2;
481   LLVMValueRef tmp0;
482   LLVMValueRef dst0 = NULL;
483
484   /*
485    * Stores and write masks are handled in a general fashion after the long
486    * instruction opcode switch statement.
487    *
488    * Although not stricitly necessary, we avoid generating instructions for
489    * channels which won't be stored, in cases where's that easy. For some
490    * complex instructions, like texture sampling, it is more convenient to
491    * assume a full writemask and then let LLVM optimization passes eliminate
492    * redundant code.
493    */
494
495   (*pc)++;
496
497   assert(info->num_dst <= 1);
498   if (info->num_dst) {
499      dst0 = bld->bld_base.base.undef;
500   }
501
502   switch (inst->Instruction.Opcode) {
503   case TGSI_OPCODE_ARL:
504      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
505      dst0 = lp_build_floor(&bld->bld_base.base, src0);
506      break;
507
508   case TGSI_OPCODE_MOV:
509      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
510      break;
511
512   case TGSI_OPCODE_LIT:
513      return FALSE;
514
515   case TGSI_OPCODE_RCP:
516   /* TGSI_OPCODE_RECIP */
517      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
518      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
519      break;
520
521   case TGSI_OPCODE_RSQ:
522   /* TGSI_OPCODE_RECIPSQRT */
523      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
524      tmp0 = lp_build_abs(&bld->bld_base.base, src0);
525      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
526      break;
527
528   case TGSI_OPCODE_EXP:
529      return FALSE;
530
531   case TGSI_OPCODE_LOG:
532      return FALSE;
533
534   case TGSI_OPCODE_MUL:
535      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
536      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
537      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
538      break;
539
540   case TGSI_OPCODE_ADD:
541      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
542      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
543      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
544      break;
545
546   case TGSI_OPCODE_DP3:
547   /* TGSI_OPCODE_DOT3 */
548      return FALSE;
549
550   case TGSI_OPCODE_DP4:
551   /* TGSI_OPCODE_DOT4 */
552      return FALSE;
553
554   case TGSI_OPCODE_DST:
555      return FALSE;
556
557   case TGSI_OPCODE_MIN:
558      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
559      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
560      dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
561      break;
562
563   case TGSI_OPCODE_MAX:
564      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
565      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
566      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
567      break;
568
569   case TGSI_OPCODE_SLT:
570   /* TGSI_OPCODE_SETLT */
571      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
572      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
573      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
574      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
575      break;
576
577   case TGSI_OPCODE_SGE:
578   /* TGSI_OPCODE_SETGE */
579      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
580      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
581      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
582      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
583      break;
584
585   case TGSI_OPCODE_MAD:
586   /* TGSI_OPCODE_MADD */
587      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
588      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
589      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
590      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
591      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
592      break;
593
594   case TGSI_OPCODE_LRP:
595      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
596      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
597      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
598      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
599      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
600      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
601      break;
602
603   case TGSI_OPCODE_DP2A:
604      return FALSE;
605
606   case TGSI_OPCODE_FRC:
607      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
608      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
609      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
610      break;
611
612   case TGSI_OPCODE_CLAMP:
613      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
614      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
615      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
616      tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
617      dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
618      break;
619
620   case TGSI_OPCODE_FLR:
621      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
622      dst0 = lp_build_floor(&bld->bld_base.base, src0);
623      break;
624
625   case TGSI_OPCODE_ROUND:
626      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
627      dst0 = lp_build_round(&bld->bld_base.base, src0);
628      break;
629
630   case TGSI_OPCODE_EX2:
631      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
632      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
633      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
634      break;
635
636   case TGSI_OPCODE_LG2:
637      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
638      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
639      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
640      break;
641
642   case TGSI_OPCODE_POW:
643      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
644      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
645      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
646      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
647      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
648      break;
649
650   case TGSI_OPCODE_XPD:
651      return FALSE;
652
653   case TGSI_OPCODE_DPH:
654      return FALSE;
655
656   case TGSI_OPCODE_COS:
657      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
658      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
659      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
660      break;
661
662   case TGSI_OPCODE_DDX:
663      return FALSE;
664
665   case TGSI_OPCODE_DDY:
666      return FALSE;
667
668   case TGSI_OPCODE_KILL:
669      return FALSE;
670
671   case TGSI_OPCODE_KILL_IF:
672      return FALSE;
673
674   case TGSI_OPCODE_PK2H:
675      return FALSE;
676      break;
677
678   case TGSI_OPCODE_PK2US:
679      return FALSE;
680      break;
681
682   case TGSI_OPCODE_PK4B:
683      return FALSE;
684      break;
685
686   case TGSI_OPCODE_PK4UB:
687      return FALSE;
688
689   case TGSI_OPCODE_SEQ:
690      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
691      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
692      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
693      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
694      break;
695
696   case TGSI_OPCODE_SGT:
697      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
698      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
699      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
700      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
701      break;
702
703   case TGSI_OPCODE_SIN:
704      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
705      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
706      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
707      break;
708
709   case TGSI_OPCODE_SLE:
710      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
711      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
712      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
713      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
714      break;
715
716   case TGSI_OPCODE_SNE:
717      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
718      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
719      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
720      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
721      break;
722
723   case TGSI_OPCODE_TEX:
724      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
725      break;
726
727   case TGSI_OPCODE_TXD:
728      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
729      break;
730
731   case TGSI_OPCODE_UP2H:
732      /* deprecated */
733      assert (0);
734      return FALSE;
735      break;
736
737   case TGSI_OPCODE_UP2US:
738      /* deprecated */
739      assert(0);
740      return FALSE;
741      break;
742
743   case TGSI_OPCODE_UP4B:
744      /* deprecated */
745      assert(0);
746      return FALSE;
747      break;
748
749   case TGSI_OPCODE_UP4UB:
750      /* deprecated */
751      assert(0);
752      return FALSE;
753      break;
754
755   case TGSI_OPCODE_ARR:
756      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
757      dst0 = lp_build_round(&bld->bld_base.base, src0);
758      break;
759
760   case TGSI_OPCODE_CAL:
761      return FALSE;
762
763   case TGSI_OPCODE_RET:
764      /* safe to ignore at end */
765      break;
766
767   case TGSI_OPCODE_END:
768      *pc = -1;
769      break;
770
771   case TGSI_OPCODE_SSG:
772   /* TGSI_OPCODE_SGN */
773      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
774      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
775      break;
776
777   case TGSI_OPCODE_CMP:
778      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
779      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
780      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
781      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
782      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
783      break;
784
785   case TGSI_OPCODE_SCS:
786      return FALSE;
787
788   case TGSI_OPCODE_TXB:
789      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
790      break;
791
792   case TGSI_OPCODE_DIV:
793      assert(0);
794      return FALSE;
795      break;
796
797   case TGSI_OPCODE_DP2:
798      return FALSE;
799
800   case TGSI_OPCODE_TXL:
801      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
802      break;
803
804   case TGSI_OPCODE_TXP:
805      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
806      break;
807
808   case TGSI_OPCODE_BRK:
809      return FALSE;
810
811   case TGSI_OPCODE_IF:
812   case TGSI_OPCODE_UIF:
813      return FALSE;
814
815   case TGSI_OPCODE_BGNLOOP:
816      return FALSE;
817
818   case TGSI_OPCODE_BGNSUB:
819      return FALSE;
820
821   case TGSI_OPCODE_ELSE:
822      return FALSE;
823
824   case TGSI_OPCODE_ENDIF:
825      return FALSE;
826
827   case TGSI_OPCODE_ENDLOOP:
828      return FALSE;
829
830   case TGSI_OPCODE_ENDSUB:
831      return FALSE;
832
833   case TGSI_OPCODE_PUSHA:
834      /* deprecated? */
835      assert(0);
836      return FALSE;
837      break;
838
839   case TGSI_OPCODE_POPA:
840      /* deprecated? */
841      assert(0);
842      return FALSE;
843      break;
844
845   case TGSI_OPCODE_CEIL:
846      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
847      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
848      break;
849
850   case TGSI_OPCODE_I2F:
851      assert(0);
852      return FALSE;
853      break;
854
855   case TGSI_OPCODE_NOT:
856      assert(0);
857      return FALSE;
858      break;
859
860   case TGSI_OPCODE_TRUNC:
861      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
862      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
863      break;
864
865   case TGSI_OPCODE_SHL:
866      assert(0);
867      return FALSE;
868      break;
869
870   case TGSI_OPCODE_ISHR:
871      assert(0);
872      return FALSE;
873      break;
874
875   case TGSI_OPCODE_AND:
876      assert(0);
877      return FALSE;
878      break;
879
880   case TGSI_OPCODE_OR:
881      assert(0);
882      return FALSE;
883      break;
884
885   case TGSI_OPCODE_MOD:
886      assert(0);
887      return FALSE;
888      break;
889
890   case TGSI_OPCODE_XOR:
891      assert(0);
892      return FALSE;
893      break;
894
895   case TGSI_OPCODE_SAD:
896      assert(0);
897      return FALSE;
898      break;
899
900   case TGSI_OPCODE_TXF:
901      assert(0);
902      return FALSE;
903      break;
904
905   case TGSI_OPCODE_TXQ:
906      assert(0);
907      return FALSE;
908      break;
909
910   case TGSI_OPCODE_CONT:
911      return FALSE;
912
913   case TGSI_OPCODE_EMIT:
914      return FALSE;
915      break;
916
917   case TGSI_OPCODE_ENDPRIM:
918      return FALSE;
919      break;
920
921   case TGSI_OPCODE_NOP:
922      break;
923
924   case TGSI_OPCODE_SAMPLE:
925      dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
926      break;
927
928   default:
929      return FALSE;
930   }
931
932   if (info->num_dst) {
933      lp_emit_store_aos(bld, inst, 0, dst0);
934   }
935
936   return TRUE;
937}
938
939
940void
941lp_build_tgsi_aos(struct gallivm_state *gallivm,
942                  const struct tgsi_token *tokens,
943                  struct lp_type type,
944                  const unsigned char swizzles[4],
945                  LLVMValueRef consts_ptr,
946                  const LLVMValueRef *inputs,
947                  LLVMValueRef *outputs,
948                  struct lp_build_sampler_aos *sampler,
949                  const struct tgsi_shader_info *info)
950{
951   struct lp_build_tgsi_aos_context bld;
952   struct tgsi_parse_context parse;
953   uint num_immediates = 0;
954   unsigned chan;
955   int pc = 0;
956
957   /* Setup build context */
958   memset(&bld, 0, sizeof bld);
959   lp_build_context_init(&bld.bld_base.base, gallivm, type);
960   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
961   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
962   lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
963
964   for (chan = 0; chan < 4; ++chan) {
965      bld.swizzles[chan] = swizzles[chan];
966      bld.inv_swizzles[swizzles[chan]] = chan;
967   }
968
969   bld.inputs = inputs;
970   bld.outputs = outputs;
971   bld.consts_ptr = consts_ptr;
972   bld.sampler = sampler;
973   bld.indirect_files = info->indirect_files;
974   bld.bld_base.emit_swizzle = swizzle_aos;
975   bld.bld_base.info = info;
976
977   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
978   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
979   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
980   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
981
982   /* Set opcode actions */
983   lp_set_default_actions_cpu(&bld.bld_base);
984
985   if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
986      return;
987   }
988
989   tgsi_parse_init(&parse, tokens);
990
991   while (!tgsi_parse_end_of_tokens(&parse)) {
992      tgsi_parse_token(&parse);
993
994      switch(parse.FullToken.Token.Type) {
995      case TGSI_TOKEN_TYPE_DECLARATION:
996         /* Inputs already interpolated */
997         lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
998         break;
999
1000      case TGSI_TOKEN_TYPE_INSTRUCTION:
1001         /* save expanded instruction */
1002         lp_bld_tgsi_add_instruction(&bld.bld_base,
1003                                     &parse.FullToken.FullInstruction);
1004         break;
1005
1006      case TGSI_TOKEN_TYPE_IMMEDIATE:
1007         /* simply copy the immediate values into the next immediates[] slot */
1008         {
1009            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1010            float imm[4];
1011            assert(size <= 4);
1012            assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
1013            for (chan = 0; chan < 4; ++chan) {
1014               imm[chan] = 0.0f;
1015            }
1016            for (chan = 0; chan < size; ++chan) {
1017               unsigned swizzle = bld.swizzles[chan];
1018               imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1019            }
1020            bld.immediates[num_immediates] =
1021                     lp_build_const_aos(gallivm, type,
1022                                        imm[0], imm[1], imm[2], imm[3],
1023                                        NULL);
1024            num_immediates++;
1025         }
1026         break;
1027
1028      case TGSI_TOKEN_TYPE_PROPERTY:
1029         break;
1030
1031      default:
1032         assert(0);
1033      }
1034   }
1035
1036   while (pc != -1) {
1037      struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1038      const struct tgsi_opcode_info *opcode_info =
1039         tgsi_get_opcode_info(instr->Instruction.Opcode);
1040      if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1041         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1042                       opcode_info->mnemonic);
1043   }
1044
1045   if (0) {
1046      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1047      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1048      debug_printf("11111111111111111111111111111 \n");
1049      tgsi_dump(tokens, 0);
1050      lp_debug_dump_value(function);
1051      debug_printf("2222222222222222222222222222 \n");
1052   }
1053   tgsi_parse_free(&parse);
1054   FREE(bld.bld_base.instructions);
1055
1056   if (0) {
1057      LLVMModuleRef module = LLVMGetGlobalParent(
1058         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1059      LLVMDumpModule(module);
1060   }
1061
1062}
1063
1064