1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * TGSI to LLVM IR translation -- AoS.
31 *
32 * FIXME:
33 * - No control flow support: the existing control flow code should be factored
34 * out into from the SoA code into a common module and shared.
35 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36 *
37 * @author Jose Fonseca <jfonseca@vmware.com>
38 */
39
40#include "pipe/p_config.h"
41#include "pipe/p_shader_tokens.h"
42#include "util/u_debug.h"
43#include "util/u_math.h"
44#include "util/u_memory.h"
45#include "tgsi/tgsi_dump.h"
46#include "tgsi/tgsi_info.h"
47#include "tgsi/tgsi_parse.h"
48#include "tgsi/tgsi_util.h"
49#include "tgsi/tgsi_scan.h"
50#include "lp_bld_type.h"
51#include "lp_bld_const.h"
52#include "lp_bld_arit.h"
53#include "lp_bld_logic.h"
54#include "lp_bld_swizzle.h"
55#include "lp_bld_flow.h"
56#include "lp_bld_quad.h"
57#include "lp_bld_tgsi.h"
58#include "lp_bld_debug.h"
59#include "lp_bld_sample.h"
60
61
62/**
63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64 * ordering.
65 */
66static LLVMValueRef
67swizzle_aos(struct lp_build_tgsi_context *bld_base,
68            LLVMValueRef a,
69            unsigned swizzle_x,
70            unsigned swizzle_y,
71            unsigned swizzle_z,
72            unsigned swizzle_w)
73{
74   unsigned char swizzles[4];
75   struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76
77   assert(swizzle_x < 4);
78   assert(swizzle_y < 4);
79   assert(swizzle_z < 4);
80   assert(swizzle_w < 4);
81
82   swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83   swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84   swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85   swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86
87   return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88}
89
90
91static LLVMValueRef
92swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93                   LLVMValueRef a,
94                   unsigned chan)
95{
96   chan = bld->swizzles[chan];
97   return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
98}
99
100
101static LLVMValueRef
102emit_fetch_constant(
103   struct lp_build_tgsi_context * bld_base,
104   const struct tgsi_full_src_register * reg,
105   enum tgsi_opcode_type stype,
106   unsigned swizzle)
107{
108   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110   struct lp_type type = bld_base->base.type;
111   LLVMValueRef res;
112   unsigned chan;
113
114   assert(!reg->Register.Indirect);
115
116   /*
117    * Get the constants components
118    */
119
120   res = bld->bld_base.base.undef;
121   for (chan = 0; chan < 4; ++chan) {
122      LLVMValueRef index;
123      LLVMValueRef scalar_ptr;
124      LLVMValueRef scalar;
125      LLVMValueRef swizzle;
126
127      index = lp_build_const_int32(bld->bld_base.base.gallivm,
128                                   reg->Register.Index * 4 + chan);
129
130      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131
132      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133
134      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135
136      /*
137       * NOTE: constants array is always assumed to be RGBA
138       */
139
140      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141                                     bld->swizzles[chan]);
142
143      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144   }
145
146   /*
147    * Broadcast the first quaternion to all others.
148    *
149    * XXX: could be factored into a reusable function.
150    */
151
152   if (type.length > 4) {
153      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154      unsigned i;
155
156      for (chan = 0; chan < 4; ++chan) {
157         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158      }
159
160      for (i = 4; i < type.length; ++i) {
161         shuffles[i] = shuffles[i % 4];
162      }
163
164      res = LLVMBuildShuffleVector(builder,
165                                   res, bld->bld_base.base.undef,
166                                   LLVMConstVector(shuffles, type.length),
167                                   "");
168   }
169   return res;
170}
171
172static LLVMValueRef
173emit_fetch_immediate(
174   struct lp_build_tgsi_context * bld_base,
175   const struct tgsi_full_src_register * reg,
176   enum tgsi_opcode_type stype,
177   unsigned swizzle)
178{
179   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180   LLVMValueRef res = bld->immediates[reg->Register.Index];
181   assert(res);
182   return res;
183}
184
185static LLVMValueRef
186emit_fetch_input(
187   struct lp_build_tgsi_context * bld_base,
188   const struct tgsi_full_src_register * reg,
189   enum tgsi_opcode_type stype,
190   unsigned swizzle)
191{
192   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193   LLVMValueRef res = bld->inputs[reg->Register.Index];
194   assert(!reg->Register.Indirect);
195   assert(res);
196   return res;
197}
198
199static LLVMValueRef
200emit_fetch_temporary(
201   struct lp_build_tgsi_context * bld_base,
202   const struct tgsi_full_src_register * reg,
203   enum tgsi_opcode_type stype,
204   unsigned swizzle)
205{
206   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208   LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209   LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210   assert(!reg->Register.Indirect);
211   if (!res)
212      return bld->bld_base.base.undef;
213
214   return res;
215}
216
217/**
218 * Register store.
219 */
220void
221lp_emit_store_aos(
222   struct lp_build_tgsi_aos_context *bld,
223   const struct tgsi_full_instruction *inst,
224   unsigned index,
225   LLVMValueRef value)
226{
227   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229   LLVMValueRef mask = NULL;
230   LLVMValueRef ptr;
231
232   /*
233    * Saturate the value
234    */
235
236   switch (inst->Instruction.Saturate) {
237   case TGSI_SAT_NONE:
238      break;
239
240   case TGSI_SAT_ZERO_ONE:
241      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
242      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
243      break;
244
245   case TGSI_SAT_MINUS_PLUS_ONE:
246      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
247      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
248      break;
249
250   default:
251      assert(0);
252   }
253
254   /*
255    * Translate the register file
256    */
257
258   assert(!reg->Register.Indirect);
259
260   switch (reg->Register.File) {
261   case TGSI_FILE_OUTPUT:
262      ptr = bld->outputs[reg->Register.Index];
263      break;
264
265   case TGSI_FILE_TEMPORARY:
266      ptr = bld->temps[reg->Register.Index];
267      break;
268
269   case TGSI_FILE_ADDRESS:
270      ptr = bld->addr[reg->Indirect.Index];
271      break;
272
273   case TGSI_FILE_PREDICATE:
274      ptr = bld->preds[reg->Register.Index];
275      break;
276
277   default:
278      assert(0);
279      return;
280   }
281
282   if (!ptr)
283      return;
284   /*
285    * Predicate
286    */
287
288   if (inst->Instruction.Predicate) {
289      LLVMValueRef pred;
290
291      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
292
293      pred = LLVMBuildLoad(builder,
294                           bld->preds[inst->Predicate.Index], "");
295
296      /*
297       * Convert the value to an integer mask.
298       */
299      pred = lp_build_compare(bld->bld_base.base.gallivm,
300                               bld->bld_base.base.type,
301                               PIPE_FUNC_NOTEQUAL,
302                               pred,
303                               bld->bld_base.base.zero);
304
305      if (inst->Predicate.Negate) {
306         pred = LLVMBuildNot(builder, pred, "");
307      }
308
309      pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
310                         inst->Predicate.SwizzleX,
311                         inst->Predicate.SwizzleY,
312                         inst->Predicate.SwizzleZ,
313                         inst->Predicate.SwizzleW);
314
315      if (mask) {
316         mask = LLVMBuildAnd(builder, mask, pred, "");
317      } else {
318         mask = pred;
319      }
320   }
321
322   /*
323    * Writemask
324    */
325
326   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
327      LLVMValueRef writemask;
328
329      writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
330                                                   bld->bld_base.base.type,
331                                                   reg->Register.WriteMask,
332                                                   bld->swizzles);
333
334      if (mask) {
335         mask = LLVMBuildAnd(builder, mask, writemask, "");
336      } else {
337         mask = writemask;
338      }
339   }
340
341   if (mask) {
342      LLVMValueRef orig_value;
343
344      orig_value = LLVMBuildLoad(builder, ptr, "");
345      value = lp_build_select(&bld->bld_base.base,
346                              mask, value, orig_value);
347   }
348
349   LLVMBuildStore(builder, value, ptr);
350}
351
352
353/**
354 * High-level instruction translators.
355 */
356
357static LLVMValueRef
358emit_tex(struct lp_build_tgsi_aos_context *bld,
359         const struct tgsi_full_instruction *inst,
360         enum lp_build_tex_modifier modifier)
361{
362   unsigned target;
363   unsigned unit;
364   LLVMValueRef coords;
365   LLVMValueRef ddx;
366   LLVMValueRef ddy;
367   struct lp_derivatives derivs;
368
369   if (!bld->sampler) {
370      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
371      return bld->bld_base.base.undef;
372   }
373
374   target = inst->Texture.Texture;
375
376   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
377
378   if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
379      ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
380      ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
381      unit = inst->Src[3].Register.Index;
382   }  else {
383#if 0
384      ddx = lp_build_ddx( &bld->bld_base.base, coords );
385      ddy = lp_build_ddy( &bld->bld_base.base, coords );
386#else
387      /* TODO */
388      derivs.ddx_ddy[0] = bld->bld_base.base.one;
389      derivs.ddx_ddy[1] = bld->bld_base.base.one;
390#endif
391      unit = inst->Src[1].Register.Index;
392   }
393
394   return bld->sampler->emit_fetch_texel(bld->sampler,
395                                         &bld->bld_base.base,
396                                         target, unit,
397                                         coords, derivs,
398                                         modifier);
399}
400
401
402void
403lp_emit_declaration_aos(
404   struct lp_build_tgsi_aos_context *bld,
405   const struct tgsi_full_declaration *decl)
406{
407   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
408   LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
409
410   unsigned first = decl->Range.First;
411   unsigned last = decl->Range.Last;
412   unsigned idx;
413
414   for (idx = first; idx <= last; ++idx) {
415      switch (decl->Declaration.File) {
416      case TGSI_FILE_TEMPORARY:
417         assert(idx < LP_MAX_TGSI_TEMPS);
418         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
419            LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
420            bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
421                                                     vec_type, array_size, "");
422         } else {
423            bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
424         }
425         break;
426
427      case TGSI_FILE_OUTPUT:
428         bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
429         break;
430
431      case TGSI_FILE_ADDRESS:
432         assert(idx < LP_MAX_TGSI_ADDRS);
433         bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
434         break;
435
436      case TGSI_FILE_PREDICATE:
437         assert(idx < LP_MAX_TGSI_PREDS);
438         bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
439         break;
440
441      default:
442         /* don't need to declare other vars */
443         break;
444      }
445   }
446}
447
448
449/**
450 * Emit LLVM for one TGSI instruction.
451 * \param return TRUE for success, FALSE otherwise
452 */
453boolean
454lp_emit_instruction_aos(
455   struct lp_build_tgsi_aos_context *bld,
456   const struct tgsi_full_instruction *inst,
457   const struct tgsi_opcode_info *info,
458   int *pc)
459{
460   LLVMValueRef src0, src1, src2;
461   LLVMValueRef tmp0, tmp1;
462   LLVMValueRef dst0 = NULL;
463
464   /*
465    * Stores and write masks are handled in a general fashion after the long
466    * instruction opcode switch statement.
467    *
468    * Although not stricitly necessary, we avoid generating instructions for
469    * channels which won't be stored, in cases where's that easy. For some
470    * complex instructions, like texture sampling, it is more convenient to
471    * assume a full writemask and then let LLVM optimization passes eliminate
472    * redundant code.
473    */
474
475   (*pc)++;
476
477   assert(info->num_dst <= 1);
478   if (info->num_dst) {
479      dst0 = bld->bld_base.base.undef;
480   }
481
482   switch (inst->Instruction.Opcode) {
483   case TGSI_OPCODE_ARL:
484      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
485      dst0 = lp_build_floor(&bld->bld_base.base, src0);
486      break;
487
488   case TGSI_OPCODE_MOV:
489      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
490      break;
491
492   case TGSI_OPCODE_LIT:
493      return FALSE;
494
495   case TGSI_OPCODE_RCP:
496   /* TGSI_OPCODE_RECIP */
497      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
498      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
499      break;
500
501   case TGSI_OPCODE_RSQ:
502   /* TGSI_OPCODE_RECIPSQRT */
503      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
504      tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
505      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
506      break;
507
508   case TGSI_OPCODE_EXP:
509      return FALSE;
510
511   case TGSI_OPCODE_LOG:
512      return FALSE;
513
514   case TGSI_OPCODE_MUL:
515      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
516      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
517      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
518      break;
519
520   case TGSI_OPCODE_ADD:
521      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
522      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
523      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
524      break;
525
526   case TGSI_OPCODE_DP3:
527   /* TGSI_OPCODE_DOT3 */
528      return FALSE;
529
530   case TGSI_OPCODE_DP4:
531   /* TGSI_OPCODE_DOT4 */
532      return FALSE;
533
534   case TGSI_OPCODE_DST:
535      return FALSE;
536
537   case TGSI_OPCODE_MIN:
538      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
539      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
540      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
541      break;
542
543   case TGSI_OPCODE_MAX:
544      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
545      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
546      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
547      break;
548
549   case TGSI_OPCODE_SLT:
550   /* TGSI_OPCODE_SETLT */
551      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
552      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
553      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
554      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
555      break;
556
557   case TGSI_OPCODE_SGE:
558   /* TGSI_OPCODE_SETGE */
559      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
560      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
561      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
562      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
563      break;
564
565   case TGSI_OPCODE_MAD:
566   /* TGSI_OPCODE_MADD */
567      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
568      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
569      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
570      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
571      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
572      break;
573
574   case TGSI_OPCODE_SUB:
575      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
576      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
577      dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
578      break;
579
580   case TGSI_OPCODE_LRP:
581      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
582      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
583      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
584      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
585      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
586      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
587      break;
588
589   case TGSI_OPCODE_CND:
590      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
591      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
592      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
593      tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
594      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
595      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
596      break;
597
598   case TGSI_OPCODE_DP2A:
599      return FALSE;
600
601   case TGSI_OPCODE_FRC:
602      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
603      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
604      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
605      break;
606
607   case TGSI_OPCODE_CLAMP:
608      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
609      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
610      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
611      tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
612      dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
613      break;
614
615   case TGSI_OPCODE_FLR:
616      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
617      dst0 = lp_build_floor(&bld->bld_base.base, src0);
618      break;
619
620   case TGSI_OPCODE_ROUND:
621      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
622      dst0 = lp_build_round(&bld->bld_base.base, src0);
623      break;
624
625   case TGSI_OPCODE_EX2:
626      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
627      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
628      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
629      break;
630
631   case TGSI_OPCODE_LG2:
632      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
633      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
634      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
635      break;
636
637   case TGSI_OPCODE_POW:
638      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
639      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
640      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
641      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
642      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
643      break;
644
645   case TGSI_OPCODE_XPD:
646      return FALSE;
647
648   case TGSI_OPCODE_RCC:
649      /* deprecated? */
650      assert(0);
651      return FALSE;
652
653   case TGSI_OPCODE_DPH:
654      return FALSE;
655
656   case TGSI_OPCODE_COS:
657      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
658      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
659      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
660      break;
661
662   case TGSI_OPCODE_DDX:
663      return FALSE;
664
665   case TGSI_OPCODE_DDY:
666      return FALSE;
667
668   case TGSI_OPCODE_KILP:
669      /* predicated kill */
670      return FALSE;
671
672   case TGSI_OPCODE_KIL:
673      /* conditional kill */
674      return FALSE;
675
676   case TGSI_OPCODE_PK2H:
677      return FALSE;
678      break;
679
680   case TGSI_OPCODE_PK2US:
681      return FALSE;
682      break;
683
684   case TGSI_OPCODE_PK4B:
685      return FALSE;
686      break;
687
688   case TGSI_OPCODE_PK4UB:
689      return FALSE;
690
691   case TGSI_OPCODE_RFL:
692      return FALSE;
693
694   case TGSI_OPCODE_SEQ:
695      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
696      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
697      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
698      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
699      break;
700
701   case TGSI_OPCODE_SFL:
702      dst0 = bld->bld_base.base.zero;
703      break;
704
705   case TGSI_OPCODE_SGT:
706      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
707      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
708      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
709      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
710      break;
711
712   case TGSI_OPCODE_SIN:
713      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
714      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
715      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
716      break;
717
718   case TGSI_OPCODE_SLE:
719      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
720      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
721      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
722      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
723      break;
724
725   case TGSI_OPCODE_SNE:
726      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
727      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
728      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
729      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
730      break;
731
732   case TGSI_OPCODE_STR:
733      dst0 = bld->bld_base.base.one;
734      break;
735
736   case TGSI_OPCODE_TEX:
737      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
738      break;
739
740   case TGSI_OPCODE_TXD:
741      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
742      break;
743
744   case TGSI_OPCODE_UP2H:
745      /* deprecated */
746      assert (0);
747      return FALSE;
748      break;
749
750   case TGSI_OPCODE_UP2US:
751      /* deprecated */
752      assert(0);
753      return FALSE;
754      break;
755
756   case TGSI_OPCODE_UP4B:
757      /* deprecated */
758      assert(0);
759      return FALSE;
760      break;
761
762   case TGSI_OPCODE_UP4UB:
763      /* deprecated */
764      assert(0);
765      return FALSE;
766      break;
767
768   case TGSI_OPCODE_X2D:
769      /* deprecated? */
770      assert(0);
771      return FALSE;
772      break;
773
774   case TGSI_OPCODE_ARA:
775      /* deprecated */
776      assert(0);
777      return FALSE;
778      break;
779
780   case TGSI_OPCODE_ARR:
781      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
782      dst0 = lp_build_round(&bld->bld_base.base, src0);
783      break;
784
785   case TGSI_OPCODE_BRA:
786      /* deprecated */
787      assert(0);
788      return FALSE;
789      break;
790
791   case TGSI_OPCODE_CAL:
792      return FALSE;
793
794   case TGSI_OPCODE_RET:
795      return FALSE;
796
797   case TGSI_OPCODE_END:
798      *pc = -1;
799      break;
800
801   case TGSI_OPCODE_SSG:
802   /* TGSI_OPCODE_SGN */
803      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
804      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
805      break;
806
807   case TGSI_OPCODE_CMP:
808      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
809      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
810      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
811      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
812      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
813      break;
814
815   case TGSI_OPCODE_SCS:
816      return FALSE;
817
818   case TGSI_OPCODE_TXB:
819      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
820      break;
821
822   case TGSI_OPCODE_NRM:
823      /* fall-through */
824   case TGSI_OPCODE_NRM4:
825      return FALSE;
826
827   case TGSI_OPCODE_DIV:
828      /* deprecated */
829      assert(0);
830      return FALSE;
831      break;
832
833   case TGSI_OPCODE_DP2:
834      return FALSE;
835
836   case TGSI_OPCODE_TXL:
837      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
838      break;
839
840   case TGSI_OPCODE_TXP:
841      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
842      break;
843
844   case TGSI_OPCODE_BRK:
845      return FALSE;
846
847   case TGSI_OPCODE_IF:
848      return FALSE;
849
850   case TGSI_OPCODE_BGNLOOP:
851      return FALSE;
852
853   case TGSI_OPCODE_BGNSUB:
854      return FALSE;
855
856   case TGSI_OPCODE_ELSE:
857      return FALSE;
858
859   case TGSI_OPCODE_ENDIF:
860      return FALSE;
861
862   case TGSI_OPCODE_ENDLOOP:
863      return FALSE;
864
865   case TGSI_OPCODE_ENDSUB:
866      return FALSE;
867
868   case TGSI_OPCODE_PUSHA:
869      /* deprecated? */
870      assert(0);
871      return FALSE;
872      break;
873
874   case TGSI_OPCODE_POPA:
875      /* deprecated? */
876      assert(0);
877      return FALSE;
878      break;
879
880   case TGSI_OPCODE_CEIL:
881      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
882      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
883      break;
884
885   case TGSI_OPCODE_I2F:
886      /* deprecated? */
887      assert(0);
888      return FALSE;
889      break;
890
891   case TGSI_OPCODE_NOT:
892      /* deprecated? */
893      assert(0);
894      return FALSE;
895      break;
896
897   case TGSI_OPCODE_TRUNC:
898      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
899      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
900      break;
901
902   case TGSI_OPCODE_SHL:
903      /* deprecated? */
904      assert(0);
905      return FALSE;
906      break;
907
908   case TGSI_OPCODE_ISHR:
909      /* deprecated? */
910      assert(0);
911      return FALSE;
912      break;
913
914   case TGSI_OPCODE_AND:
915      /* deprecated? */
916      assert(0);
917      return FALSE;
918      break;
919
920   case TGSI_OPCODE_OR:
921      /* deprecated? */
922      assert(0);
923      return FALSE;
924      break;
925
926   case TGSI_OPCODE_MOD:
927      /* deprecated? */
928      assert(0);
929      return FALSE;
930      break;
931
932   case TGSI_OPCODE_XOR:
933      /* deprecated? */
934      assert(0);
935      return FALSE;
936      break;
937
938   case TGSI_OPCODE_SAD:
939      /* deprecated? */
940      assert(0);
941      return FALSE;
942      break;
943
944   case TGSI_OPCODE_TXF:
945      /* deprecated? */
946      assert(0);
947      return FALSE;
948      break;
949
950   case TGSI_OPCODE_TXQ:
951      /* deprecated? */
952      assert(0);
953      return FALSE;
954      break;
955
956   case TGSI_OPCODE_CONT:
957      return FALSE;
958
959   case TGSI_OPCODE_EMIT:
960      return FALSE;
961      break;
962
963   case TGSI_OPCODE_ENDPRIM:
964      return FALSE;
965      break;
966
967   case TGSI_OPCODE_NOP:
968      break;
969
970   default:
971      return FALSE;
972   }
973
974   if (info->num_dst) {
975      lp_emit_store_aos(bld, inst, 0, dst0);
976   }
977
978   return TRUE;
979}
980
981
982void
983lp_build_tgsi_aos(struct gallivm_state *gallivm,
984                  const struct tgsi_token *tokens,
985                  struct lp_type type,
986                  const unsigned char swizzles[4],
987                  LLVMValueRef consts_ptr,
988                  const LLVMValueRef *inputs,
989                  LLVMValueRef *outputs,
990                  struct lp_build_sampler_aos *sampler,
991                  const struct tgsi_shader_info *info)
992{
993   struct lp_build_tgsi_aos_context bld;
994   struct tgsi_parse_context parse;
995   uint num_immediates = 0;
996   unsigned chan;
997   int pc = 0;
998
999   /* Setup build context */
1000   memset(&bld, 0, sizeof bld);
1001   lp_build_context_init(&bld.bld_base.base, gallivm, type);
1002   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
1003   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
1004   lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
1005
1006   for (chan = 0; chan < 4; ++chan) {
1007      bld.swizzles[chan] = swizzles[chan];
1008      bld.inv_swizzles[swizzles[chan]] = chan;
1009   }
1010
1011   bld.inputs = inputs;
1012   bld.outputs = outputs;
1013   bld.consts_ptr = consts_ptr;
1014   bld.sampler = sampler;
1015   bld.indirect_files = info->indirect_files;
1016   bld.bld_base.emit_swizzle = swizzle_aos;
1017   bld.bld_base.info = info;
1018
1019   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1020   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1021   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1022   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1023
1024   /* Set opcode actions */
1025   lp_set_default_actions_cpu(&bld.bld_base);
1026
1027   if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1028      return;
1029   }
1030
1031   tgsi_parse_init(&parse, tokens);
1032
1033   while (!tgsi_parse_end_of_tokens(&parse)) {
1034      tgsi_parse_token(&parse);
1035
1036      switch(parse.FullToken.Token.Type) {
1037      case TGSI_TOKEN_TYPE_DECLARATION:
1038         /* Inputs already interpolated */
1039         lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1040         break;
1041
1042      case TGSI_TOKEN_TYPE_INSTRUCTION:
1043         /* save expanded instruction */
1044         lp_bld_tgsi_add_instruction(&bld.bld_base,
1045                                     &parse.FullToken.FullInstruction);
1046         break;
1047
1048      case TGSI_TOKEN_TYPE_IMMEDIATE:
1049         /* simply copy the immediate values into the next immediates[] slot */
1050         {
1051            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1052            float imm[4];
1053            assert(size <= 4);
1054            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1055            for (chan = 0; chan < 4; ++chan) {
1056               imm[chan] = 0.0f;
1057            }
1058            for (chan = 0; chan < size; ++chan) {
1059               unsigned swizzle = bld.swizzles[chan];
1060               imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1061            }
1062            bld.immediates[num_immediates] =
1063                     lp_build_const_aos(gallivm, type,
1064                                        imm[0], imm[1], imm[2], imm[3],
1065                                        NULL);
1066            num_immediates++;
1067         }
1068         break;
1069
1070      case TGSI_TOKEN_TYPE_PROPERTY:
1071         break;
1072
1073      default:
1074         assert(0);
1075      }
1076   }
1077
1078   while (pc != -1) {
1079      struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1080      const struct tgsi_opcode_info *opcode_info =
1081         tgsi_get_opcode_info(instr->Instruction.Opcode);
1082      if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1083         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1084                       opcode_info->mnemonic);
1085   }
1086
1087   if (0) {
1088      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1089      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1090      debug_printf("11111111111111111111111111111 \n");
1091      tgsi_dump(tokens, 0);
1092      lp_debug_dump_value(function);
1093      debug_printf("2222222222222222222222222222 \n");
1094   }
1095   tgsi_parse_free(&parse);
1096   FREE(bld.bld_base.instructions);
1097
1098   if (0) {
1099      LLVMModuleRef module = LLVMGetGlobalParent(
1100         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1101      LLVMDumpModule(module);
1102   }
1103
1104}
1105
1106