lp_bld_tgsi_aos.c revision c426e63aa064debc23f9819c3862f357f1726bce
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40#include "pipe/p_config.h"
41#include "pipe/p_shader_tokens.h"
42#include "util/u_debug.h"
43#include "util/u_math.h"
44#include "util/u_memory.h"
45#include "tgsi/tgsi_dump.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_quad.h"
57#include "lp_bld_tgsi.h"
58#include "lp_bld_debug.h"
59
60
61/**
62 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
63 * ordering.
64 */
65static LLVMValueRef
66swizzle_aos(struct lp_build_tgsi_context *bld_base,
67            LLVMValueRef a,
68            unsigned swizzle_x,
69            unsigned swizzle_y,
70            unsigned swizzle_z,
71            unsigned swizzle_w)
72{
73   unsigned char swizzles[4];
74   struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
75
76   assert(swizzle_x < 4);
77   assert(swizzle_y < 4);
78   assert(swizzle_z < 4);
79   assert(swizzle_w < 4);
80
81   swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
82   swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
83   swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
84   swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
85
86   return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
87}
88
89
90static LLVMValueRef
91swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
92                   LLVMValueRef a,
93                   unsigned chan)
94{
95   chan = bld->swizzles[chan];
96   return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
97}
98
99
100static LLVMValueRef
101emit_fetch_constant(
102   struct lp_build_tgsi_context * bld_base,
103   const struct tgsi_full_src_register * reg,
104   enum tgsi_opcode_type stype,
105   unsigned swizzle)
106{
107   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
108   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
109   struct lp_type type = bld_base->base.type;
110   LLVMValueRef res;
111   unsigned chan;
112
113   assert(!reg->Register.Indirect);
114
115   /*
116    * Get the constants components
117    */
118
119   res = bld->bld_base.base.undef;
120   for (chan = 0; chan < 4; ++chan) {
121      LLVMValueRef index;
122      LLVMValueRef scalar_ptr;
123      LLVMValueRef scalar;
124      LLVMValueRef swizzle;
125
126      index = lp_build_const_int32(bld->bld_base.base.gallivm,
127                                   reg->Register.Index * 4 + chan);
128
129      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
130
131      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
132
133      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
134
135      /*
136       * NOTE: constants array is always assumed to be RGBA
137       */
138
139      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
140                                     bld->swizzles[chan]);
141
142      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
143   }
144
145   /*
146    * Broadcast the first quaternion to all others.
147    *
148    * XXX: could be factored into a reusable function.
149    */
150
151   if (type.length > 4) {
152      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
153      unsigned i;
154
155      for (chan = 0; chan < 4; ++chan) {
156         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
157      }
158
159      for (i = 4; i < type.length; ++i) {
160         shuffles[i] = shuffles[i % 4];
161      }
162
163      res = LLVMBuildShuffleVector(builder,
164                                   res, bld->bld_base.base.undef,
165                                   LLVMConstVector(shuffles, type.length),
166                                   "");
167   }
168   return res;
169}
170
171static LLVMValueRef
172emit_fetch_immediate(
173   struct lp_build_tgsi_context * bld_base,
174   const struct tgsi_full_src_register * reg,
175   enum tgsi_opcode_type stype,
176   unsigned swizzle)
177{
178   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
179   LLVMValueRef res = bld->immediates[reg->Register.Index];
180   assert(res);
181   return res;
182}
183
184static LLVMValueRef
185emit_fetch_input(
186   struct lp_build_tgsi_context * bld_base,
187   const struct tgsi_full_src_register * reg,
188   enum tgsi_opcode_type stype,
189   unsigned swizzle)
190{
191   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
192   LLVMValueRef res = bld->inputs[reg->Register.Index];
193   assert(!reg->Register.Indirect);
194   assert(res);
195   return res;
196}
197
198static LLVMValueRef
199emit_fetch_temporary(
200   struct lp_build_tgsi_context * bld_base,
201   const struct tgsi_full_src_register * reg,
202   enum tgsi_opcode_type stype,
203   unsigned swizzle)
204{
205   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
206   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
207   LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
208   LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
209   assert(!reg->Register.Indirect);
210   if (!res)
211      return bld->bld_base.base.undef;
212
213   return res;
214}
215
216/**
217 * Register store.
218 */
219void
220lp_emit_store_aos(
221   struct lp_build_tgsi_aos_context *bld,
222   const struct tgsi_full_instruction *inst,
223   unsigned index,
224   LLVMValueRef value)
225{
226   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
227   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
228   LLVMValueRef mask = NULL;
229   LLVMValueRef ptr;
230
231   /*
232    * Saturate the value
233    */
234
235   switch (inst->Instruction.Saturate) {
236   case TGSI_SAT_NONE:
237      break;
238
239   case TGSI_SAT_ZERO_ONE:
240      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
241      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
242      break;
243
244   case TGSI_SAT_MINUS_PLUS_ONE:
245      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
246      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
247      break;
248
249   default:
250      assert(0);
251   }
252
253   /*
254    * Translate the register file
255    */
256
257   assert(!reg->Register.Indirect);
258
259   switch (reg->Register.File) {
260   case TGSI_FILE_OUTPUT:
261      ptr = bld->outputs[reg->Register.Index];
262      break;
263
264   case TGSI_FILE_TEMPORARY:
265      ptr = bld->temps[reg->Register.Index];
266      break;
267
268   case TGSI_FILE_ADDRESS:
269      ptr = bld->addr[reg->Indirect.Index];
270      break;
271
272   case TGSI_FILE_PREDICATE:
273      ptr = bld->preds[reg->Register.Index];
274      break;
275
276   default:
277      assert(0);
278      return;
279   }
280
281   if (!ptr)
282      return;
283   /*
284    * Predicate
285    */
286
287   if (inst->Instruction.Predicate) {
288      LLVMValueRef pred;
289
290      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
291
292      pred = LLVMBuildLoad(builder,
293                           bld->preds[inst->Predicate.Index], "");
294
295      /*
296       * Convert the value to an integer mask.
297       */
298      pred = lp_build_compare(bld->bld_base.base.gallivm,
299                               bld->bld_base.base.type,
300                               PIPE_FUNC_NOTEQUAL,
301                               pred,
302                               bld->bld_base.base.zero);
303
304      if (inst->Predicate.Negate) {
305         pred = LLVMBuildNot(builder, pred, "");
306      }
307
308      pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
309                         inst->Predicate.SwizzleX,
310                         inst->Predicate.SwizzleY,
311                         inst->Predicate.SwizzleZ,
312                         inst->Predicate.SwizzleW);
313
314      if (mask) {
315         mask = LLVMBuildAnd(builder, mask, pred, "");
316      } else {
317         mask = pred;
318      }
319   }
320
321   /*
322    * Writemask
323    */
324
325   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
326      LLVMValueRef writemask;
327
328      writemask = lp_build_const_mask_aos(bld->bld_base.base.gallivm, bld->bld_base.base.type,
329                                          reg->Register.WriteMask);
330
331      if (mask) {
332         mask = LLVMBuildAnd(builder, mask, writemask, "");
333      } else {
334         mask = writemask;
335      }
336   }
337
338   if (mask) {
339      LLVMValueRef orig_value;
340
341      orig_value = LLVMBuildLoad(builder, ptr, "");
342      value = lp_build_select(&bld->bld_base.base,
343                              mask, value, orig_value);
344   }
345
346   LLVMBuildStore(builder, value, ptr);
347}
348
349
350/**
351 * High-level instruction translators.
352 */
353
354static LLVMValueRef
355emit_tex(struct lp_build_tgsi_aos_context *bld,
356         const struct tgsi_full_instruction *inst,
357         enum lp_build_tex_modifier modifier)
358{
359   unsigned target;
360   unsigned unit;
361   LLVMValueRef coords;
362   LLVMValueRef ddx;
363   LLVMValueRef ddy;
364
365   if (!bld->sampler) {
366      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
367      return bld->bld_base.base.undef;
368   }
369
370   target = inst->Texture.Texture;
371
372   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
373
374   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
375      ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
376      ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
377      unit = inst->Src[3].Register.Index;
378   }  else {
379#if 0
380      ddx = lp_build_ddx( &bld->bld_base.base, coords );
381      ddy = lp_build_ddy( &bld->bld_base.base, coords );
382#else
383      /* TODO */
384      ddx = bld->bld_base.base.one;
385      ddy = bld->bld_base.base.one;
386#endif
387      unit = inst->Src[1].Register.Index;
388   }
389
390   return bld->sampler->emit_fetch_texel(bld->sampler,
391                                         &bld->bld_base.base,
392                                         target, unit,
393                                         coords, ddx, ddy,
394                                         modifier);
395}
396
397
398void
399lp_emit_declaration_aos(
400   struct lp_build_tgsi_aos_context *bld,
401   const struct tgsi_full_declaration *decl)
402{
403   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
404   LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
405
406   unsigned first = decl->Range.First;
407   unsigned last = decl->Range.Last;
408   unsigned idx;
409
410   for (idx = first; idx <= last; ++idx) {
411      switch (decl->Declaration.File) {
412      case TGSI_FILE_TEMPORARY:
413         assert(idx < LP_MAX_TGSI_TEMPS);
414         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
415            LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
416            bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
417                                                     vec_type, array_size, "");
418         } else {
419            bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
420         }
421         break;
422
423      case TGSI_FILE_OUTPUT:
424         bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
425         break;
426
427      case TGSI_FILE_ADDRESS:
428         assert(idx < LP_MAX_TGSI_ADDRS);
429         bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
430         break;
431
432      case TGSI_FILE_PREDICATE:
433         assert(idx < LP_MAX_TGSI_PREDS);
434         bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
435         break;
436
437      default:
438         /* don't need to declare other vars */
439         break;
440      }
441   }
442}
443
444
445/**
446 * Emit LLVM for one TGSI instruction.
447 * \param return TRUE for success, FALSE otherwise
448 */
449boolean
450lp_emit_instruction_aos(
451   struct lp_build_tgsi_aos_context *bld,
452   const struct tgsi_full_instruction *inst,
453   const struct tgsi_opcode_info *info,
454   int *pc)
455{
456   LLVMValueRef src0, src1, src2;
457   LLVMValueRef tmp0, tmp1;
458   LLVMValueRef dst0 = NULL;
459
460   /*
461    * Stores and write masks are handled in a general fashion after the long
462    * instruction opcode switch statement.
463    *
464    * Although not stricitly necessary, we avoid generating instructions for
465    * channels which won't be stored, in cases where's that easy. For some
466    * complex instructions, like texture sampling, it is more convenient to
467    * assume a full writemask and then let LLVM optimization passes eliminate
468    * redundant code.
469    */
470
471   (*pc)++;
472
473   assert(info->num_dst <= 1);
474   if (info->num_dst) {
475      dst0 = bld->bld_base.base.undef;
476   }
477
478   switch (inst->Instruction.Opcode) {
479   case TGSI_OPCODE_ARL:
480      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
481      dst0 = lp_build_floor(&bld->bld_base.base, src0);
482      break;
483
484   case TGSI_OPCODE_MOV:
485      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
486      break;
487
488   case TGSI_OPCODE_LIT:
489      return FALSE;
490
491   case TGSI_OPCODE_RCP:
492   /* TGSI_OPCODE_RECIP */
493      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
494      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
495      break;
496
497   case TGSI_OPCODE_RSQ:
498   /* TGSI_OPCODE_RECIPSQRT */
499      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
500      tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
501      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
502      break;
503
504   case TGSI_OPCODE_EXP:
505      return FALSE;
506
507   case TGSI_OPCODE_LOG:
508      return FALSE;
509
510   case TGSI_OPCODE_MUL:
511      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
512      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
513      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
514      break;
515
516   case TGSI_OPCODE_ADD:
517      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
518      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
519      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
520      break;
521
522   case TGSI_OPCODE_DP3:
523   /* TGSI_OPCODE_DOT3 */
524      return FALSE;
525
526   case TGSI_OPCODE_DP4:
527   /* TGSI_OPCODE_DOT4 */
528      return FALSE;
529
530   case TGSI_OPCODE_DST:
531      return FALSE;
532
533   case TGSI_OPCODE_MIN:
534      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
535      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
536      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
537      break;
538
539   case TGSI_OPCODE_MAX:
540      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
541      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
542      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
543      break;
544
545   case TGSI_OPCODE_SLT:
546   /* TGSI_OPCODE_SETLT */
547      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
548      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
549      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
550      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
551      break;
552
553   case TGSI_OPCODE_SGE:
554   /* TGSI_OPCODE_SETGE */
555      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
556      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
557      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
558      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
559      break;
560
561   case TGSI_OPCODE_MAD:
562   /* TGSI_OPCODE_MADD */
563      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
564      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
565      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
566      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
567      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
568      break;
569
570   case TGSI_OPCODE_SUB:
571      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
572      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
573      dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
574      break;
575
576   case TGSI_OPCODE_LRP:
577      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
578      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
579      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
580      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
581      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
582      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
583      break;
584
585   case TGSI_OPCODE_CND:
586      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
587      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
588      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
589      tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
590      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
591      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
592      break;
593
594   case TGSI_OPCODE_DP2A:
595      return FALSE;
596
597   case TGSI_OPCODE_FRC:
598      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
599      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
600      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
601      break;
602
603   case TGSI_OPCODE_CLAMP:
604      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
605      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
606      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
607      tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
608      dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
609      break;
610
611   case TGSI_OPCODE_FLR:
612      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
613      dst0 = lp_build_floor(&bld->bld_base.base, src0);
614      break;
615
616   case TGSI_OPCODE_ROUND:
617      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
618      dst0 = lp_build_round(&bld->bld_base.base, src0);
619      break;
620
621   case TGSI_OPCODE_EX2:
622      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
623      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
624      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
625      break;
626
627   case TGSI_OPCODE_LG2:
628      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
629      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
630      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
631      break;
632
633   case TGSI_OPCODE_POW:
634      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
635      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
636      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
637      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
638      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
639      break;
640
641   case TGSI_OPCODE_XPD:
642      return FALSE;
643
644   case TGSI_OPCODE_RCC:
645      /* deprecated? */
646      assert(0);
647      return FALSE;
648
649   case TGSI_OPCODE_DPH:
650      return FALSE;
651
652   case TGSI_OPCODE_COS:
653      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
654      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
655      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
656      break;
657
658   case TGSI_OPCODE_DDX:
659      return FALSE;
660
661   case TGSI_OPCODE_DDY:
662      return FALSE;
663
664   case TGSI_OPCODE_KILP:
665      /* predicated kill */
666      return FALSE;
667
668   case TGSI_OPCODE_KIL:
669      /* conditional kill */
670      return FALSE;
671
672   case TGSI_OPCODE_PK2H:
673      return FALSE;
674      break;
675
676   case TGSI_OPCODE_PK2US:
677      return FALSE;
678      break;
679
680   case TGSI_OPCODE_PK4B:
681      return FALSE;
682      break;
683
684   case TGSI_OPCODE_PK4UB:
685      return FALSE;
686
687   case TGSI_OPCODE_RFL:
688      return FALSE;
689
690   case TGSI_OPCODE_SEQ:
691      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
692      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
693      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
694      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
695      break;
696
697   case TGSI_OPCODE_SFL:
698      dst0 = bld->bld_base.base.zero;
699      break;
700
701   case TGSI_OPCODE_SGT:
702      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
703      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
704      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
705      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
706      break;
707
708   case TGSI_OPCODE_SIN:
709      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
710      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
711      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
712      break;
713
714   case TGSI_OPCODE_SLE:
715      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
716      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
717      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
718      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
719      break;
720
721   case TGSI_OPCODE_SNE:
722      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
723      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
724      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
725      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
726      break;
727
728   case TGSI_OPCODE_STR:
729      dst0 = bld->bld_base.base.one;
730      break;
731
732   case TGSI_OPCODE_TEX:
733      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
734      break;
735
736   case TGSI_OPCODE_TXD:
737      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
738      break;
739
740   case TGSI_OPCODE_UP2H:
741      /* deprecated */
742      assert (0);
743      return FALSE;
744      break;
745
746   case TGSI_OPCODE_UP2US:
747      /* deprecated */
748      assert(0);
749      return FALSE;
750      break;
751
752   case TGSI_OPCODE_UP4B:
753      /* deprecated */
754      assert(0);
755      return FALSE;
756      break;
757
758   case TGSI_OPCODE_UP4UB:
759      /* deprecated */
760      assert(0);
761      return FALSE;
762      break;
763
764   case TGSI_OPCODE_X2D:
765      /* deprecated? */
766      assert(0);
767      return FALSE;
768      break;
769
770   case TGSI_OPCODE_ARA:
771      /* deprecated */
772      assert(0);
773      return FALSE;
774      break;
775
776   case TGSI_OPCODE_ARR:
777      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
778      dst0 = lp_build_round(&bld->bld_base.base, src0);
779      break;
780
781   case TGSI_OPCODE_BRA:
782      /* deprecated */
783      assert(0);
784      return FALSE;
785      break;
786
787   case TGSI_OPCODE_CAL:
788      return FALSE;
789
790   case TGSI_OPCODE_RET:
791      return FALSE;
792
793   case TGSI_OPCODE_END:
794      *pc = -1;
795      break;
796
797   case TGSI_OPCODE_SSG:
798   /* TGSI_OPCODE_SGN */
799      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
800      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
801      break;
802
803   case TGSI_OPCODE_CMP:
804      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
805      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
806      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
807      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
808      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
809      break;
810
811   case TGSI_OPCODE_SCS:
812      return FALSE;
813
814   case TGSI_OPCODE_TXB:
815      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
816      break;
817
818   case TGSI_OPCODE_NRM:
819      /* fall-through */
820   case TGSI_OPCODE_NRM4:
821      return FALSE;
822
823   case TGSI_OPCODE_DIV:
824      /* deprecated */
825      assert(0);
826      return FALSE;
827      break;
828
829   case TGSI_OPCODE_DP2:
830      return FALSE;
831
832   case TGSI_OPCODE_TXL:
833      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
834      break;
835
836   case TGSI_OPCODE_TXP:
837      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
838      break;
839
840   case TGSI_OPCODE_BRK:
841      return FALSE;
842
843   case TGSI_OPCODE_IF:
844      return FALSE;
845
846   case TGSI_OPCODE_BGNLOOP:
847      return FALSE;
848
849   case TGSI_OPCODE_BGNSUB:
850      return FALSE;
851
852   case TGSI_OPCODE_ELSE:
853      return FALSE;
854
855   case TGSI_OPCODE_ENDIF:
856      return FALSE;
857
858   case TGSI_OPCODE_ENDLOOP:
859      return FALSE;
860
861   case TGSI_OPCODE_ENDSUB:
862      return FALSE;
863
864   case TGSI_OPCODE_PUSHA:
865      /* deprecated? */
866      assert(0);
867      return FALSE;
868      break;
869
870   case TGSI_OPCODE_POPA:
871      /* deprecated? */
872      assert(0);
873      return FALSE;
874      break;
875
876   case TGSI_OPCODE_CEIL:
877      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
878      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
879      break;
880
881   case TGSI_OPCODE_I2F:
882      /* deprecated? */
883      assert(0);
884      return FALSE;
885      break;
886
887   case TGSI_OPCODE_NOT:
888      /* deprecated? */
889      assert(0);
890      return FALSE;
891      break;
892
893   case TGSI_OPCODE_TRUNC:
894      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
895      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
896      break;
897
898   case TGSI_OPCODE_SHL:
899      /* deprecated? */
900      assert(0);
901      return FALSE;
902      break;
903
904   case TGSI_OPCODE_ISHR:
905      /* deprecated? */
906      assert(0);
907      return FALSE;
908      break;
909
910   case TGSI_OPCODE_AND:
911      /* deprecated? */
912      assert(0);
913      return FALSE;
914      break;
915
916   case TGSI_OPCODE_OR:
917      /* deprecated? */
918      assert(0);
919      return FALSE;
920      break;
921
922   case TGSI_OPCODE_MOD:
923      /* deprecated? */
924      assert(0);
925      return FALSE;
926      break;
927
928   case TGSI_OPCODE_XOR:
929      /* deprecated? */
930      assert(0);
931      return FALSE;
932      break;
933
934   case TGSI_OPCODE_SAD:
935      /* deprecated? */
936      assert(0);
937      return FALSE;
938      break;
939
940   case TGSI_OPCODE_TXF:
941      /* deprecated? */
942      assert(0);
943      return FALSE;
944      break;
945
946   case TGSI_OPCODE_TXQ:
947      /* deprecated? */
948      assert(0);
949      return FALSE;
950      break;
951
952   case TGSI_OPCODE_CONT:
953      return FALSE;
954
955   case TGSI_OPCODE_EMIT:
956      return FALSE;
957      break;
958
959   case TGSI_OPCODE_ENDPRIM:
960      return FALSE;
961      break;
962
963   case TGSI_OPCODE_NOP:
964      break;
965
966   default:
967      return FALSE;
968   }
969
970   if (info->num_dst) {
971      lp_emit_store_aos(bld, inst, 0, dst0);
972   }
973
974   return TRUE;
975}
976
977
978void
979lp_build_tgsi_aos(struct gallivm_state *gallivm,
980                  const struct tgsi_token *tokens,
981                  struct lp_type type,
982                  const unsigned char swizzles[4],
983                  LLVMValueRef consts_ptr,
984                  const LLVMValueRef *inputs,
985                  LLVMValueRef *outputs,
986                  struct lp_build_sampler_aos *sampler,
987                  const struct tgsi_shader_info *info)
988{
989   struct lp_build_tgsi_aos_context bld;
990   struct tgsi_parse_context parse;
991   uint num_immediates = 0;
992   unsigned chan;
993   int pc = 0;
994
995   /* Setup build context */
996   memset(&bld, 0, sizeof bld);
997   lp_build_context_init(&bld.bld_base.base, gallivm, type);
998   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
999   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1000   lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1001
1002   for (chan = 0; chan < 4; ++chan) {
1003      bld.swizzles[chan] = swizzles[chan];
1004      bld.inv_swizzles[swizzles[chan]] = chan;
1005   }
1006
1007   bld.inputs = inputs;
1008   bld.outputs = outputs;
1009   bld.consts_ptr = consts_ptr;
1010   bld.sampler = sampler;
1011   bld.indirect_files = info->indirect_files;
1012   bld.bld_base.emit_swizzle = swizzle_aos;
1013   bld.bld_base.info = info;
1014
1015   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1016   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1017   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1018   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1019
1020   /* Set opcode actions */
1021   lp_set_default_actions_cpu(&bld.bld_base);
1022
1023   if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1024      return;
1025   }
1026
1027   tgsi_parse_init(&parse, tokens);
1028
1029   while (!tgsi_parse_end_of_tokens(&parse)) {
1030      tgsi_parse_token(&parse);
1031
1032      switch(parse.FullToken.Token.Type) {
1033      case TGSI_TOKEN_TYPE_DECLARATION:
1034         /* Inputs already interpolated */
1035         lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1036         break;
1037
1038      case TGSI_TOKEN_TYPE_INSTRUCTION:
1039         /* save expanded instruction */
1040         lp_bld_tgsi_add_instruction(&bld.bld_base,
1041                                     &parse.FullToken.FullInstruction);
1042         break;
1043
1044      case TGSI_TOKEN_TYPE_IMMEDIATE:
1045         /* simply copy the immediate values into the next immediates[] slot */
1046         {
1047            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1048            float imm[4];
1049            assert(size <= 4);
1050            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1051            for (chan = 0; chan < 4; ++chan) {
1052               imm[chan] = 0.0f;
1053            }
1054            for (chan = 0; chan < size; ++chan) {
1055               unsigned swizzle = bld.swizzles[chan];
1056               imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1057            }
1058            bld.immediates[num_immediates] =
1059                     lp_build_const_aos(gallivm, type,
1060                                        imm[0], imm[1], imm[2], imm[3],
1061                                        NULL);
1062            num_immediates++;
1063         }
1064         break;
1065
1066      case TGSI_TOKEN_TYPE_PROPERTY:
1067         break;
1068
1069      default:
1070         assert(0);
1071      }
1072   }
1073
1074   while (pc != -1) {
1075      struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1076      const struct tgsi_opcode_info *opcode_info =
1077         tgsi_get_opcode_info(instr->Instruction.Opcode);
1078      if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1079         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1080                       opcode_info->mnemonic);
1081   }
1082
1083   if (0) {
1084      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1085      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1086      debug_printf("11111111111111111111111111111 \n");
1087      tgsi_dump(tokens, 0);
1088      lp_debug_dump_value(function);
1089      debug_printf("2222222222222222222222222222 \n");
1090   }
1091   tgsi_parse_free(&parse);
1092   FREE(bld.bld_base.instructions);
1093
1094   if (0) {
1095      LLVMModuleRef module = LLVMGetGlobalParent(
1096         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1097      LLVMDumpModule(module);
1098   }
1099
1100}
1101
1102