lp_bld_tgsi_aos.c revision c23fd547c060c4137eab0f878a1028c5903384eb
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40#include "pipe/p_config.h"
41#include "pipe/p_shader_tokens.h"
42#include "util/u_debug.h"
43#include "util/u_math.h"
44#include "util/u_memory.h"
45#include "tgsi/tgsi_dump.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_quad.h"
57#include "lp_bld_tgsi.h"
58#include "lp_bld_debug.h"
59
60
61/**
62 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
63 * ordering.
64 */
65static LLVMValueRef
66swizzle_aos(struct lp_build_tgsi_context *bld_base,
67            LLVMValueRef a,
68            unsigned swizzle_x,
69            unsigned swizzle_y,
70            unsigned swizzle_z,
71            unsigned swizzle_w)
72{
73   unsigned char swizzles[4];
74   struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
75
76   assert(swizzle_x < 4);
77   assert(swizzle_y < 4);
78   assert(swizzle_z < 4);
79   assert(swizzle_w < 4);
80
81   swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
82   swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
83   swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
84   swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
85
86   return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
87}
88
89
90static LLVMValueRef
91swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
92                   LLVMValueRef a,
93                   unsigned chan)
94{
95   chan = bld->swizzles[chan];
96   return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
97}
98
99
100static LLVMValueRef
101emit_fetch_constant(
102   struct lp_build_tgsi_context * bld_base,
103   const struct tgsi_full_src_register * reg,
104   enum tgsi_opcode_type stype,
105   unsigned swizzle)
106{
107   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
108   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
109   struct lp_type type = bld_base->base.type;
110   LLVMValueRef res;
111   unsigned chan;
112
113   assert(!reg->Register.Indirect);
114
115   /*
116    * Get the constants components
117    */
118
119   res = bld->bld_base.base.undef;
120   for (chan = 0; chan < 4; ++chan) {
121      LLVMValueRef index;
122      LLVMValueRef scalar_ptr;
123      LLVMValueRef scalar;
124      LLVMValueRef swizzle;
125
126      index = lp_build_const_int32(bld->bld_base.base.gallivm,
127                                   reg->Register.Index * 4 + chan);
128
129      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
130
131      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
132
133      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
134
135      /*
136       * NOTE: constants array is always assumed to be RGBA
137       */
138
139      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
140                                     bld->swizzles[chan]);
141
142      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
143   }
144
145   /*
146    * Broadcast the first quaternion to all others.
147    *
148    * XXX: could be factored into a reusable function.
149    */
150
151   if (type.length > 4) {
152      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
153      unsigned i;
154
155      for (chan = 0; chan < 4; ++chan) {
156         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
157      }
158
159      for (i = 4; i < type.length; ++i) {
160         shuffles[i] = shuffles[i % 4];
161      }
162
163      res = LLVMBuildShuffleVector(builder,
164                                   res, bld->bld_base.base.undef,
165                                   LLVMConstVector(shuffles, type.length),
166                                   "");
167   }
168   return res;
169}
170
171static LLVMValueRef
172emit_fetch_immediate(
173   struct lp_build_tgsi_context * bld_base,
174   const struct tgsi_full_src_register * reg,
175   enum tgsi_opcode_type stype,
176   unsigned swizzle)
177{
178   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
179   LLVMValueRef res = bld->immediates[reg->Register.Index];
180   assert(res);
181   return res;
182}
183
184static LLVMValueRef
185emit_fetch_input(
186   struct lp_build_tgsi_context * bld_base,
187   const struct tgsi_full_src_register * reg,
188   enum tgsi_opcode_type stype,
189   unsigned swizzle)
190{
191   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
192   LLVMValueRef res = bld->inputs[reg->Register.Index];
193   assert(!reg->Register.Indirect);
194   assert(res);
195   return res;
196}
197
198static LLVMValueRef
199emit_fetch_temporary(
200   struct lp_build_tgsi_context * bld_base,
201   const struct tgsi_full_src_register * reg,
202   enum tgsi_opcode_type stype,
203   unsigned swizzle)
204{
205   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
206   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
207   LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
208   LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
209   assert(!reg->Register.Indirect);
210   if (!res)
211      return bld->bld_base.base.undef;
212
213   return res;
214}
215
216/**
217 * Register store.
218 */
219void
220lp_emit_store_aos(
221   struct lp_build_tgsi_aos_context *bld,
222   const struct tgsi_full_instruction *inst,
223   unsigned index,
224   LLVMValueRef value)
225{
226   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
227   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
228   LLVMValueRef mask = NULL;
229   LLVMValueRef ptr;
230
231   /*
232    * Saturate the value
233    */
234
235   switch (inst->Instruction.Saturate) {
236   case TGSI_SAT_NONE:
237      break;
238
239   case TGSI_SAT_ZERO_ONE:
240      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
241      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
242      break;
243
244   case TGSI_SAT_MINUS_PLUS_ONE:
245      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
246      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
247      break;
248
249   default:
250      assert(0);
251   }
252
253   /*
254    * Translate the register file
255    */
256
257   assert(!reg->Register.Indirect);
258
259   switch (reg->Register.File) {
260   case TGSI_FILE_OUTPUT:
261      ptr = bld->outputs[reg->Register.Index];
262      break;
263
264   case TGSI_FILE_TEMPORARY:
265      ptr = bld->temps[reg->Register.Index];
266      break;
267
268   case TGSI_FILE_ADDRESS:
269      ptr = bld->addr[reg->Indirect.Index];
270      break;
271
272   case TGSI_FILE_PREDICATE:
273      ptr = bld->preds[reg->Register.Index];
274      break;
275
276   default:
277      assert(0);
278      return;
279   }
280
281   if (!ptr)
282      return;
283   /*
284    * Predicate
285    */
286
287   if (inst->Instruction.Predicate) {
288      LLVMValueRef pred;
289
290      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
291
292      pred = LLVMBuildLoad(builder,
293                           bld->preds[inst->Predicate.Index], "");
294
295      /*
296       * Convert the value to an integer mask.
297       */
298      pred = lp_build_compare(bld->bld_base.base.gallivm,
299                               bld->bld_base.base.type,
300                               PIPE_FUNC_NOTEQUAL,
301                               pred,
302                               bld->bld_base.base.zero);
303
304      if (inst->Predicate.Negate) {
305         pred = LLVMBuildNot(builder, pred, "");
306      }
307
308      pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
309                         inst->Predicate.SwizzleX,
310                         inst->Predicate.SwizzleY,
311                         inst->Predicate.SwizzleZ,
312                         inst->Predicate.SwizzleW);
313
314      if (mask) {
315         mask = LLVMBuildAnd(builder, mask, pred, "");
316      } else {
317         mask = pred;
318      }
319   }
320
321   /*
322    * Writemask
323    */
324
325   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
326      LLVMValueRef writemask;
327
328      writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
329                                                   bld->bld_base.base.type,
330                                                   reg->Register.WriteMask,
331                                                   bld->swizzles);
332
333      if (mask) {
334         mask = LLVMBuildAnd(builder, mask, writemask, "");
335      } else {
336         mask = writemask;
337      }
338   }
339
340   if (mask) {
341      LLVMValueRef orig_value;
342
343      orig_value = LLVMBuildLoad(builder, ptr, "");
344      value = lp_build_select(&bld->bld_base.base,
345                              mask, value, orig_value);
346   }
347
348   LLVMBuildStore(builder, value, ptr);
349}
350
351
352/**
353 * High-level instruction translators.
354 */
355
356static LLVMValueRef
357emit_tex(struct lp_build_tgsi_aos_context *bld,
358         const struct tgsi_full_instruction *inst,
359         enum lp_build_tex_modifier modifier)
360{
361   unsigned target;
362   unsigned unit;
363   LLVMValueRef coords;
364   LLVMValueRef ddx;
365   LLVMValueRef ddy;
366
367   if (!bld->sampler) {
368      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
369      return bld->bld_base.base.undef;
370   }
371
372   target = inst->Texture.Texture;
373
374   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
375
376   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
377      ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
378      ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
379      unit = inst->Src[3].Register.Index;
380   }  else {
381#if 0
382      ddx = lp_build_ddx( &bld->bld_base.base, coords );
383      ddy = lp_build_ddy( &bld->bld_base.base, coords );
384#else
385      /* TODO */
386      ddx = bld->bld_base.base.one;
387      ddy = bld->bld_base.base.one;
388#endif
389      unit = inst->Src[1].Register.Index;
390   }
391
392   return bld->sampler->emit_fetch_texel(bld->sampler,
393                                         &bld->bld_base.base,
394                                         target, unit,
395                                         coords, ddx, ddy,
396                                         modifier);
397}
398
399
400void
401lp_emit_declaration_aos(
402   struct lp_build_tgsi_aos_context *bld,
403   const struct tgsi_full_declaration *decl)
404{
405   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
406   LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
407
408   unsigned first = decl->Range.First;
409   unsigned last = decl->Range.Last;
410   unsigned idx;
411
412   for (idx = first; idx <= last; ++idx) {
413      switch (decl->Declaration.File) {
414      case TGSI_FILE_TEMPORARY:
415         assert(idx < LP_MAX_TGSI_TEMPS);
416         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
417            LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
418            bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
419                                                     vec_type, array_size, "");
420         } else {
421            bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
422         }
423         break;
424
425      case TGSI_FILE_OUTPUT:
426         bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
427         break;
428
429      case TGSI_FILE_ADDRESS:
430         assert(idx < LP_MAX_TGSI_ADDRS);
431         bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
432         break;
433
434      case TGSI_FILE_PREDICATE:
435         assert(idx < LP_MAX_TGSI_PREDS);
436         bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
437         break;
438
439      default:
440         /* don't need to declare other vars */
441         break;
442      }
443   }
444}
445
446
447/**
448 * Emit LLVM for one TGSI instruction.
449 * \param return TRUE for success, FALSE otherwise
450 */
451boolean
452lp_emit_instruction_aos(
453   struct lp_build_tgsi_aos_context *bld,
454   const struct tgsi_full_instruction *inst,
455   const struct tgsi_opcode_info *info,
456   int *pc)
457{
458   LLVMValueRef src0, src1, src2;
459   LLVMValueRef tmp0, tmp1;
460   LLVMValueRef dst0 = NULL;
461
462   /*
463    * Stores and write masks are handled in a general fashion after the long
464    * instruction opcode switch statement.
465    *
466    * Although not stricitly necessary, we avoid generating instructions for
467    * channels which won't be stored, in cases where's that easy. For some
468    * complex instructions, like texture sampling, it is more convenient to
469    * assume a full writemask and then let LLVM optimization passes eliminate
470    * redundant code.
471    */
472
473   (*pc)++;
474
475   assert(info->num_dst <= 1);
476   if (info->num_dst) {
477      dst0 = bld->bld_base.base.undef;
478   }
479
480   switch (inst->Instruction.Opcode) {
481   case TGSI_OPCODE_ARL:
482      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
483      dst0 = lp_build_floor(&bld->bld_base.base, src0);
484      break;
485
486   case TGSI_OPCODE_MOV:
487      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
488      break;
489
490   case TGSI_OPCODE_LIT:
491      return FALSE;
492
493   case TGSI_OPCODE_RCP:
494   /* TGSI_OPCODE_RECIP */
495      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
496      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
497      break;
498
499   case TGSI_OPCODE_RSQ:
500   /* TGSI_OPCODE_RECIPSQRT */
501      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
502      tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
503      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
504      break;
505
506   case TGSI_OPCODE_EXP:
507      return FALSE;
508
509   case TGSI_OPCODE_LOG:
510      return FALSE;
511
512   case TGSI_OPCODE_MUL:
513      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
514      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
515      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
516      break;
517
518   case TGSI_OPCODE_ADD:
519      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
520      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
521      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
522      break;
523
524   case TGSI_OPCODE_DP3:
525   /* TGSI_OPCODE_DOT3 */
526      return FALSE;
527
528   case TGSI_OPCODE_DP4:
529   /* TGSI_OPCODE_DOT4 */
530      return FALSE;
531
532   case TGSI_OPCODE_DST:
533      return FALSE;
534
535   case TGSI_OPCODE_MIN:
536      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
537      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
538      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
539      break;
540
541   case TGSI_OPCODE_MAX:
542      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
543      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
544      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
545      break;
546
547   case TGSI_OPCODE_SLT:
548   /* TGSI_OPCODE_SETLT */
549      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
550      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
551      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
552      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
553      break;
554
555   case TGSI_OPCODE_SGE:
556   /* TGSI_OPCODE_SETGE */
557      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
558      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
559      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
560      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
561      break;
562
563   case TGSI_OPCODE_MAD:
564   /* TGSI_OPCODE_MADD */
565      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
566      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
567      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
568      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
569      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
570      break;
571
572   case TGSI_OPCODE_SUB:
573      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
574      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
575      dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
576      break;
577
578   case TGSI_OPCODE_LRP:
579      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
580      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
581      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
582      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
583      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
584      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
585      break;
586
587   case TGSI_OPCODE_CND:
588      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
589      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
590      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
591      tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
592      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
593      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
594      break;
595
596   case TGSI_OPCODE_DP2A:
597      return FALSE;
598
599   case TGSI_OPCODE_FRC:
600      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
601      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
602      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
603      break;
604
605   case TGSI_OPCODE_CLAMP:
606      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
607      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
608      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
609      tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
610      dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
611      break;
612
613   case TGSI_OPCODE_FLR:
614      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
615      dst0 = lp_build_floor(&bld->bld_base.base, src0);
616      break;
617
618   case TGSI_OPCODE_ROUND:
619      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
620      dst0 = lp_build_round(&bld->bld_base.base, src0);
621      break;
622
623   case TGSI_OPCODE_EX2:
624      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
625      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
626      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
627      break;
628
629   case TGSI_OPCODE_LG2:
630      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
631      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
632      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
633      break;
634
635   case TGSI_OPCODE_POW:
636      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
637      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
638      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
639      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
640      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
641      break;
642
643   case TGSI_OPCODE_XPD:
644      return FALSE;
645
646   case TGSI_OPCODE_RCC:
647      /* deprecated? */
648      assert(0);
649      return FALSE;
650
651   case TGSI_OPCODE_DPH:
652      return FALSE;
653
654   case TGSI_OPCODE_COS:
655      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
656      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
657      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
658      break;
659
660   case TGSI_OPCODE_DDX:
661      return FALSE;
662
663   case TGSI_OPCODE_DDY:
664      return FALSE;
665
666   case TGSI_OPCODE_KILP:
667      /* predicated kill */
668      return FALSE;
669
670   case TGSI_OPCODE_KIL:
671      /* conditional kill */
672      return FALSE;
673
674   case TGSI_OPCODE_PK2H:
675      return FALSE;
676      break;
677
678   case TGSI_OPCODE_PK2US:
679      return FALSE;
680      break;
681
682   case TGSI_OPCODE_PK4B:
683      return FALSE;
684      break;
685
686   case TGSI_OPCODE_PK4UB:
687      return FALSE;
688
689   case TGSI_OPCODE_RFL:
690      return FALSE;
691
692   case TGSI_OPCODE_SEQ:
693      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
694      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
695      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
696      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
697      break;
698
699   case TGSI_OPCODE_SFL:
700      dst0 = bld->bld_base.base.zero;
701      break;
702
703   case TGSI_OPCODE_SGT:
704      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
705      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
706      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
707      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
708      break;
709
710   case TGSI_OPCODE_SIN:
711      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
712      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
713      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
714      break;
715
716   case TGSI_OPCODE_SLE:
717      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
718      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
719      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
720      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
721      break;
722
723   case TGSI_OPCODE_SNE:
724      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
725      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
726      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
727      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
728      break;
729
730   case TGSI_OPCODE_STR:
731      dst0 = bld->bld_base.base.one;
732      break;
733
734   case TGSI_OPCODE_TEX:
735      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
736      break;
737
738   case TGSI_OPCODE_TXD:
739      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
740      break;
741
742   case TGSI_OPCODE_UP2H:
743      /* deprecated */
744      assert (0);
745      return FALSE;
746      break;
747
748   case TGSI_OPCODE_UP2US:
749      /* deprecated */
750      assert(0);
751      return FALSE;
752      break;
753
754   case TGSI_OPCODE_UP4B:
755      /* deprecated */
756      assert(0);
757      return FALSE;
758      break;
759
760   case TGSI_OPCODE_UP4UB:
761      /* deprecated */
762      assert(0);
763      return FALSE;
764      break;
765
766   case TGSI_OPCODE_X2D:
767      /* deprecated? */
768      assert(0);
769      return FALSE;
770      break;
771
772   case TGSI_OPCODE_ARA:
773      /* deprecated */
774      assert(0);
775      return FALSE;
776      break;
777
778   case TGSI_OPCODE_ARR:
779      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
780      dst0 = lp_build_round(&bld->bld_base.base, src0);
781      break;
782
783   case TGSI_OPCODE_BRA:
784      /* deprecated */
785      assert(0);
786      return FALSE;
787      break;
788
789   case TGSI_OPCODE_CAL:
790      return FALSE;
791
792   case TGSI_OPCODE_RET:
793      return FALSE;
794
795   case TGSI_OPCODE_END:
796      *pc = -1;
797      break;
798
799   case TGSI_OPCODE_SSG:
800   /* TGSI_OPCODE_SGN */
801      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
802      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
803      break;
804
805   case TGSI_OPCODE_CMP:
806      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
807      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
808      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
809      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
810      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
811      break;
812
813   case TGSI_OPCODE_SCS:
814      return FALSE;
815
816   case TGSI_OPCODE_TXB:
817      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
818      break;
819
820   case TGSI_OPCODE_NRM:
821      /* fall-through */
822   case TGSI_OPCODE_NRM4:
823      return FALSE;
824
825   case TGSI_OPCODE_DIV:
826      /* deprecated */
827      assert(0);
828      return FALSE;
829      break;
830
831   case TGSI_OPCODE_DP2:
832      return FALSE;
833
834   case TGSI_OPCODE_TXL:
835      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
836      break;
837
838   case TGSI_OPCODE_TXP:
839      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
840      break;
841
842   case TGSI_OPCODE_BRK:
843      return FALSE;
844
845   case TGSI_OPCODE_IF:
846      return FALSE;
847
848   case TGSI_OPCODE_BGNLOOP:
849      return FALSE;
850
851   case TGSI_OPCODE_BGNSUB:
852      return FALSE;
853
854   case TGSI_OPCODE_ELSE:
855      return FALSE;
856
857   case TGSI_OPCODE_ENDIF:
858      return FALSE;
859
860   case TGSI_OPCODE_ENDLOOP:
861      return FALSE;
862
863   case TGSI_OPCODE_ENDSUB:
864      return FALSE;
865
866   case TGSI_OPCODE_PUSHA:
867      /* deprecated? */
868      assert(0);
869      return FALSE;
870      break;
871
872   case TGSI_OPCODE_POPA:
873      /* deprecated? */
874      assert(0);
875      return FALSE;
876      break;
877
878   case TGSI_OPCODE_CEIL:
879      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
880      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
881      break;
882
883   case TGSI_OPCODE_I2F:
884      /* deprecated? */
885      assert(0);
886      return FALSE;
887      break;
888
889   case TGSI_OPCODE_NOT:
890      /* deprecated? */
891      assert(0);
892      return FALSE;
893      break;
894
895   case TGSI_OPCODE_TRUNC:
896      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
897      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
898      break;
899
900   case TGSI_OPCODE_SHL:
901      /* deprecated? */
902      assert(0);
903      return FALSE;
904      break;
905
906   case TGSI_OPCODE_ISHR:
907      /* deprecated? */
908      assert(0);
909      return FALSE;
910      break;
911
912   case TGSI_OPCODE_AND:
913      /* deprecated? */
914      assert(0);
915      return FALSE;
916      break;
917
918   case TGSI_OPCODE_OR:
919      /* deprecated? */
920      assert(0);
921      return FALSE;
922      break;
923
924   case TGSI_OPCODE_MOD:
925      /* deprecated? */
926      assert(0);
927      return FALSE;
928      break;
929
930   case TGSI_OPCODE_XOR:
931      /* deprecated? */
932      assert(0);
933      return FALSE;
934      break;
935
936   case TGSI_OPCODE_SAD:
937      /* deprecated? */
938      assert(0);
939      return FALSE;
940      break;
941
942   case TGSI_OPCODE_TXF:
943      /* deprecated? */
944      assert(0);
945      return FALSE;
946      break;
947
948   case TGSI_OPCODE_TXQ:
949      /* deprecated? */
950      assert(0);
951      return FALSE;
952      break;
953
954   case TGSI_OPCODE_CONT:
955      return FALSE;
956
957   case TGSI_OPCODE_EMIT:
958      return FALSE;
959      break;
960
961   case TGSI_OPCODE_ENDPRIM:
962      return FALSE;
963      break;
964
965   case TGSI_OPCODE_NOP:
966      break;
967
968   default:
969      return FALSE;
970   }
971
972   if (info->num_dst) {
973      lp_emit_store_aos(bld, inst, 0, dst0);
974   }
975
976   return TRUE;
977}
978
979
980void
981lp_build_tgsi_aos(struct gallivm_state *gallivm,
982                  const struct tgsi_token *tokens,
983                  struct lp_type type,
984                  const unsigned char swizzles[4],
985                  LLVMValueRef consts_ptr,
986                  const LLVMValueRef *inputs,
987                  LLVMValueRef *outputs,
988                  struct lp_build_sampler_aos *sampler,
989                  const struct tgsi_shader_info *info)
990{
991   struct lp_build_tgsi_aos_context bld;
992   struct tgsi_parse_context parse;
993   uint num_immediates = 0;
994   unsigned chan;
995   int pc = 0;
996
997   /* Setup build context */
998   memset(&bld, 0, sizeof bld);
999   lp_build_context_init(&bld.bld_base.base, gallivm, type);
1000   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1001   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1002   lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1003
1004   for (chan = 0; chan < 4; ++chan) {
1005      bld.swizzles[chan] = swizzles[chan];
1006      bld.inv_swizzles[swizzles[chan]] = chan;
1007   }
1008
1009   bld.inputs = inputs;
1010   bld.outputs = outputs;
1011   bld.consts_ptr = consts_ptr;
1012   bld.sampler = sampler;
1013   bld.indirect_files = info->indirect_files;
1014   bld.bld_base.emit_swizzle = swizzle_aos;
1015   bld.bld_base.info = info;
1016
1017   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1018   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1019   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1020   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1021
1022   /* Set opcode actions */
1023   lp_set_default_actions_cpu(&bld.bld_base);
1024
1025   if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1026      return;
1027   }
1028
1029   tgsi_parse_init(&parse, tokens);
1030
1031   while (!tgsi_parse_end_of_tokens(&parse)) {
1032      tgsi_parse_token(&parse);
1033
1034      switch(parse.FullToken.Token.Type) {
1035      case TGSI_TOKEN_TYPE_DECLARATION:
1036         /* Inputs already interpolated */
1037         lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1038         break;
1039
1040      case TGSI_TOKEN_TYPE_INSTRUCTION:
1041         /* save expanded instruction */
1042         lp_bld_tgsi_add_instruction(&bld.bld_base,
1043                                     &parse.FullToken.FullInstruction);
1044         break;
1045
1046      case TGSI_TOKEN_TYPE_IMMEDIATE:
1047         /* simply copy the immediate values into the next immediates[] slot */
1048         {
1049            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1050            float imm[4];
1051            assert(size <= 4);
1052            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1053            for (chan = 0; chan < 4; ++chan) {
1054               imm[chan] = 0.0f;
1055            }
1056            for (chan = 0; chan < size; ++chan) {
1057               unsigned swizzle = bld.swizzles[chan];
1058               imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1059            }
1060            bld.immediates[num_immediates] =
1061                     lp_build_const_aos(gallivm, type,
1062                                        imm[0], imm[1], imm[2], imm[3],
1063                                        NULL);
1064            num_immediates++;
1065         }
1066         break;
1067
1068      case TGSI_TOKEN_TYPE_PROPERTY:
1069         break;
1070
1071      default:
1072         assert(0);
1073      }
1074   }
1075
1076   while (pc != -1) {
1077      struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1078      const struct tgsi_opcode_info *opcode_info =
1079         tgsi_get_opcode_info(instr->Instruction.Opcode);
1080      if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1081         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1082                       opcode_info->mnemonic);
1083   }
1084
1085   if (0) {
1086      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1087      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1088      debug_printf("11111111111111111111111111111 \n");
1089      tgsi_dump(tokens, 0);
1090      lp_debug_dump_value(function);
1091      debug_printf("2222222222222222222222222222 \n");
1092   }
1093   tgsi_parse_free(&parse);
1094   FREE(bld.bld_base.instructions);
1095
1096   if (0) {
1097      LLVMModuleRef module = LLVMGetGlobalParent(
1098         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1099      LLVMDumpModule(module);
1100   }
1101
1102}
1103
1104