tgsi_ppc.c revision f8ab4feb75f4a592e23859813c093dcdbd4b8988
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * TGSI to PowerPC code generation.
30 */
31
32#include "pipe/p_config.h"
33
34#if defined(PIPE_ARCH_PPC)
35
36#include "pipe/p_debug.h"
37#include "pipe/p_shader_tokens.h"
38#include "util/u_math.h"
39#include "util/u_memory.h"
40#include "util/u_sse.h"
41#include "tgsi/tgsi_parse.h"
42#include "tgsi/tgsi_util.h"
43#include "tgsi_exec.h"
44#include "tgsi_ppc.h"
45#include "rtasm/rtasm_ppc.h"
46
47
48/**
49 * Since it's pretty much impossible to form PPC vector immediates, load
50 * them from memory here:
51 */
52const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
53   1.0f, -128.0f, 128.0, 0.0
54};
55
56
57#define FOR_EACH_CHANNEL( CHAN )\
58   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
59
60#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
61   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
62
63#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
64   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
65
66#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
67   FOR_EACH_CHANNEL( CHAN )\
68      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
69
70#define CHAN_X 0
71#define CHAN_Y 1
72#define CHAN_Z 2
73#define CHAN_W 3
74
75#define TEMP_ONE_I   TGSI_EXEC_TEMP_ONE_I
76#define TEMP_ONE_C   TGSI_EXEC_TEMP_ONE_C
77
78#define TEMP_R0   TGSI_EXEC_TEMP_R0
79#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
80
81
82/**
83 * Context/state used during code gen.
84 */
85struct gen_context
86{
87   struct ppc_function *f;
88   int inputs_reg;    /**< GP register pointing to input params */
89   int outputs_reg;   /**< GP register pointing to output params */
90   int temps_reg;     /**< GP register pointing to temporary "registers" */
91   int immed_reg;     /**< GP register pointing to immediates buffer */
92   int const_reg;     /**< GP register pointing to constants buffer */
93   int builtins_reg;  /**< GP register pointint to built-in constants */
94
95   int one_vec;       /**< vector register with {1.0, 1.0, 1.0, 1.0} */
96   int bit31_vec;     /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */
97};
98
99
100/**
101 * Load the given vector register with {value, value, value, value}.
102 * The value must be in the ppu_builtin_constants[] array.
103 * We wouldn't need this if there was a simple way to load PPC vector
104 * registers with immediate values!
105 */
106static void
107load_constant_vec(struct gen_context *gen, int dst_vec, float value)
108{
109   uint pos;
110   for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) {
111      if (ppc_builtin_constants[pos] == value) {
112         int offset_reg = ppc_allocate_register(gen->f);
113         int offset = pos * 4;
114
115         ppc_li(gen->f, offset_reg, offset);
116         /* Load 4-byte word into vector register.
117          * The vector slot depends on the effective address we load from.
118          * We know that our builtins start at a 16-byte boundary so we
119          * know that 'swizzle' tells us which vector slot will have the
120          * loaded word.  The other vector slots will be undefined.
121          */
122         ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg);
123         /* splat word[pos % 4] across the vector reg */
124         ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4);
125         ppc_release_register(gen->f, offset_reg);
126         return;
127      }
128   }
129   assert(0 && "Need to add new constant to ppc_builtin_constants array");
130}
131
132
133/**
134 * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}.
135 */
136static int
137gen_one_vec(struct gen_context *gen)
138{
139   if (gen->one_vec < 0) {
140      gen->one_vec = ppc_allocate_vec_register(gen->f);
141      load_constant_vec(gen, gen->one_vec, 1.0f);
142   }
143   return gen->one_vec;
144}
145
146/**
147 * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}.
148 */
149static int
150gen_get_bit31_vec(struct gen_context *gen)
151{
152   if (gen->bit31_vec < 0) {
153      gen->bit31_vec = ppc_allocate_vec_register(gen->f);
154      ppc_vspltisw(gen->f, gen->bit31_vec, -1);
155      ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec);
156   }
157   return gen->bit31_vec;
158}
159
160
161/**
162 * Register fetch, put result in 'dst_vec'.
163 */
164static void
165emit_fetch(struct gen_context *gen,
166           unsigned dst_vec,
167           const struct tgsi_full_src_register *reg,
168           const unsigned chan_index)
169{
170   uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
171
172   switch (swizzle) {
173   case TGSI_EXTSWIZZLE_X:
174   case TGSI_EXTSWIZZLE_Y:
175   case TGSI_EXTSWIZZLE_Z:
176   case TGSI_EXTSWIZZLE_W:
177      switch (reg->SrcRegister.File) {
178      case TGSI_FILE_INPUT:
179         {
180            int offset_reg = ppc_allocate_register(gen->f);
181            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
182            ppc_li(gen->f, offset_reg, offset);
183            ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
184            ppc_release_register(gen->f, offset_reg);
185         }
186         break;
187      case TGSI_FILE_TEMPORARY:
188         {
189            int offset_reg = ppc_allocate_register(gen->f);
190            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
191            ppc_li(gen->f, offset_reg, offset);
192            ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg);
193            ppc_release_register(gen->f, offset_reg);
194         }
195         break;
196      case TGSI_FILE_IMMEDIATE:
197         {
198            int offset_reg = ppc_allocate_register(gen->f);
199            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
200            ppc_li(gen->f, offset_reg, offset);
201            ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg);
202            ppc_release_register(gen->f, offset_reg);
203         }
204         break;
205      case TGSI_FILE_CONSTANT:
206         {
207            int offset_reg = ppc_allocate_register(gen->f);
208            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
209            ppc_li(gen->f, offset_reg, offset);
210            /* Load 4-byte word into vector register.
211             * The vector slot depends on the effective address we load from.
212             * We know that our constants start at a 16-byte boundary so we
213             * know that 'swizzle' tells us which vector slot will have the
214             * loaded word.  The other vector slots will be undefined.
215             */
216            ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg);
217            /* splat word[swizzle] across the vector reg */
218            ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
219            ppc_release_register(gen->f, offset_reg);
220         }
221         break;
222      default:
223         assert( 0 );
224      }
225      break;
226   case TGSI_EXTSWIZZLE_ZERO:
227      ppc_vzero(gen->f, dst_vec);
228      break;
229   case TGSI_EXTSWIZZLE_ONE:
230      {
231         int one_vec = gen_one_vec(gen);
232         ppc_vmove(gen->f, dst_vec, one_vec);
233      }
234      break;
235   default:
236      assert( 0 );
237   }
238
239   {
240      uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
241      if (sign_op != TGSI_UTIL_SIGN_KEEP) {
242         int bit31_vec = gen_get_bit31_vec(gen);
243
244         switch (sign_op) {
245         case TGSI_UTIL_SIGN_CLEAR:
246            /* vec = vec & ~bit31 */
247            ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec);
248            break;
249         case TGSI_UTIL_SIGN_SET:
250            /* vec = vec | bit31 */
251            ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec);
252            break;
253         case TGSI_UTIL_SIGN_TOGGLE:
254            /* vec = vec ^ bit31 */
255            ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec);
256            break;
257         default:
258            assert(0);
259         }
260      }
261   }
262}
263
264#define FETCH( GEN, INST, DST_VEC, SRC_REG, CHAN ) \
265   emit_fetch( GEN, DST_VEC, &(INST).FullSrcRegisters[SRC_REG], CHAN )
266
267
268
269/**
270 * Register store.  Store 'src_vec' at location indicated by 'reg'.
271 */
272static void
273emit_store(struct gen_context *gen,
274           unsigned src_vec,
275           const struct tgsi_full_dst_register *reg,
276           const struct tgsi_full_instruction *inst,
277           unsigned chan_index)
278{
279   switch (reg->DstRegister.File) {
280   case TGSI_FILE_OUTPUT:
281      {
282         int offset_reg = ppc_allocate_register(gen->f);
283         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
284         ppc_li(gen->f, offset_reg, offset);
285         ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg);
286         ppc_release_register(gen->f, offset_reg);
287      }
288      break;
289   case TGSI_FILE_TEMPORARY:
290      {
291         int offset_reg = ppc_allocate_register(gen->f);
292         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
293         ppc_li(gen->f, offset_reg, offset);
294         ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg);
295         ppc_release_register(gen->f, offset_reg);
296      }
297      break;
298#if 0
299   case TGSI_FILE_ADDRESS:
300      emit_addrs(
301         func,
302         xmm,
303         reg->DstRegister.Index,
304         chan_index );
305      break;
306#endif
307   default:
308      assert( 0 );
309   }
310
311#if 0
312   switch( inst->Instruction.Saturate ) {
313   case TGSI_SAT_NONE:
314      break;
315
316   case TGSI_SAT_ZERO_ONE:
317      /* assert( 0 ); */
318      break;
319
320   case TGSI_SAT_MINUS_PLUS_ONE:
321      assert( 0 );
322      break;
323   }
324#endif
325}
326
327
328#define STORE( GEN, INST, XMM, INDEX, CHAN )\
329   emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
330
331
332
333static void
334emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
335{
336   int v0 = ppc_allocate_vec_register(gen->f);
337   int v1 = ppc_allocate_vec_register(gen->f);
338   uint chan_index;
339
340   FETCH(gen, *inst, v0, 0, CHAN_X);
341
342   switch (inst->Instruction.Opcode) {
343   case TGSI_OPCODE_RSQ:
344      /* v1 = 1.0 / sqrt(v0) */
345      ppc_vrsqrtefp(gen->f, v1, v0);
346      break;
347   case TGSI_OPCODE_RCP:
348      /* v1 = 1.0 / v0 */
349      ppc_vrefp(gen->f, v1, v0);
350      break;
351   default:
352      assert(0);
353   }
354
355   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
356      STORE(gen, *inst, v1, 0, chan_index);
357   }
358   ppc_release_vec_register(gen->f, v0);
359   ppc_release_vec_register(gen->f, v1);
360}
361
362
363static void
364emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
365{
366   int v0 = ppc_allocate_vec_register(gen->f);
367   uint chan_index;
368   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
369      FETCH(gen, *inst, 0, 0, chan_index);   /* v0 = srcreg[0] */
370      switch (inst->Instruction.Opcode) {
371      case TGSI_OPCODE_ABS:
372         /* turn off the most significant bit of each vector float word */
373         {
374            int v1 = ppc_allocate_vec_register(gen->f);
375            ppc_vspltisw(gen->f, v1, -1);  /* v1 = {-1, -1, -1, -1} */
376            ppc_vslw(gen->f, v1, v1, v1);  /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */
377            ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */
378            ppc_release_vec_register(gen->f, v1);
379         }
380         break;
381      case TGSI_OPCODE_FLOOR:
382         ppc_vrfim(gen->f, v0, v0);         /* v0 = floor(v0) */
383         break;
384      case TGSI_OPCODE_FRAC:
385         {
386            int v1 = ppc_allocate_vec_register(gen->f);
387            ppc_vrfim(gen->f, v1, v0);         /* v1 = floor(v0) */
388            ppc_vsubfp(gen->f, v0, v0, v1);    /* v0 = v0 - v1 */
389            ppc_release_vec_register(gen->f, v1);
390         }
391         break;
392      case TGSI_OPCODE_EXPBASE2:
393         ppc_vexptefp(gen->f, v0, v0);      /* v0 = 2^v0 */
394         break;
395      case TGSI_OPCODE_LOGBASE2:
396         /* XXX this may be broken! */
397         ppc_vlogefp(gen->f, v0, v0);      /* v0 = log2(v0) */
398         break;
399      case TGSI_OPCODE_MOV:
400         /* nothing */
401         break;
402      default:
403         assert(0);
404      }
405      STORE(gen, *inst, v0, 0, chan_index);   /* store v0 */
406   }
407   ppc_release_vec_register(gen->f, v0);
408}
409
410
411static void
412emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
413{
414   int v0 = ppc_allocate_vec_register(gen->f);
415   int v1 = ppc_allocate_vec_register(gen->f);
416   int v2 = ppc_allocate_vec_register(gen->f);
417   uint chan_index;
418   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
419      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
420      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
421      switch (inst->Instruction.Opcode) {
422      case TGSI_OPCODE_ADD:
423         ppc_vaddfp(gen->f, v2, v0, v1);
424         break;
425      case TGSI_OPCODE_SUB:
426         ppc_vsubfp(gen->f, v2, v0, v1);
427         break;
428      case TGSI_OPCODE_MUL:
429         ppc_vxor(gen->f, v2, v2, v2);        /* v2 = {0, 0, 0, 0} */
430         ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */
431         break;
432      case TGSI_OPCODE_MIN:
433         ppc_vminfp(gen->f, v2, v0, v1);
434         break;
435      case TGSI_OPCODE_MAX:
436         ppc_vmaxfp(gen->f, v2, v0, v1);
437         break;
438      default:
439         assert(0);
440      }
441      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
442   }
443   ppc_release_vec_register(gen->f, v0);
444   ppc_release_vec_register(gen->f, v1);
445   ppc_release_vec_register(gen->f, v2);
446}
447
448
449/**
450 * Vector comparisons, resulting in 1.0 or 0.0 values.
451 */
452static void
453emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
454{
455   int v0 = ppc_allocate_vec_register(gen->f);
456   int v1 = ppc_allocate_vec_register(gen->f);
457   int v2 = ppc_allocate_vec_register(gen->f);
458   uint chan_index;
459   boolean complement = FALSE;
460   int one_vec = gen_one_vec(gen);
461
462   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
463      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
464      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
465
466      switch (inst->Instruction.Opcode) {
467      case TGSI_OPCODE_SNE:
468         complement = TRUE;
469         /* fall-through */
470      case TGSI_OPCODE_SEQ:
471         ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */
472         break;
473
474      case TGSI_OPCODE_SGE:
475         complement = TRUE;
476         /* fall-through */
477      case TGSI_OPCODE_SLT:
478         ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */
479         break;
480
481      case TGSI_OPCODE_SLE:
482         complement = TRUE;
483         /* fall-through */
484      case TGSI_OPCODE_SGT:
485         ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */
486         break;
487      default:
488         assert(0);
489      }
490
491      /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */
492
493      if (complement)
494         ppc_vandc(gen->f, v2, one_vec, v2);    /* v2 = one_vec & ~v2 */
495      else
496         ppc_vand(gen->f, v2, one_vec, v2);     /* v2 = one_vec & v2 */
497
498      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
499   }
500
501   ppc_release_vec_register(gen->f, v0);
502   ppc_release_vec_register(gen->f, v1);
503   ppc_release_vec_register(gen->f, v2);
504}
505
506
507static void
508emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
509{
510   int v0 = ppc_allocate_vec_register(gen->f);
511   int v1 = ppc_allocate_vec_register(gen->f);
512   int v2 = ppc_allocate_vec_register(gen->f);
513   uint chan_index;
514
515   ppc_vxor(gen->f, v2, v2, v2);           /* v2 = {0, 0, 0, 0} */
516
517   FETCH(gen, *inst, v0, 0, CHAN_X);       /* v0 = src0.XXXX */
518   FETCH(gen, *inst, v1, 1, CHAN_X);       /* v1 = src1.XXXX */
519   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
520
521   FETCH(gen, *inst, v0, 0, CHAN_Y);       /* v0 = src0.YYYY */
522   FETCH(gen, *inst, v1, 1, CHAN_Y);       /* v1 = src1.YYYY */
523   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
524
525   FETCH(gen, *inst, v0, 0, CHAN_Z);       /* v0 = src0.ZZZZ */
526   FETCH(gen, *inst, v1, 1, CHAN_Z);       /* v1 = src1.ZZZZ */
527   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
528
529   if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) {
530      FETCH(gen, *inst, v0, 0, CHAN_W);    /* v0 = src0.WWWW */
531      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
532      ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
533   }
534   else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) {
535      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
536      ppc_vaddfp(gen->f, v2, v2, v1);      /* v2 = v2 + v1 */
537   }
538
539   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
540      STORE(gen, *inst, v2, 0, chan_index);  /* store v2 */
541   }
542   ppc_release_vec_register(gen->f, v0);
543   ppc_release_vec_register(gen->f, v1);
544   ppc_release_vec_register(gen->f, v2);
545}
546
547
548static void
549emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
550{
551   int v0 = ppc_allocate_vec_register(gen->f);
552   int v1 = ppc_allocate_vec_register(gen->f);
553   int v2 = ppc_allocate_vec_register(gen->f);
554   int v3 = ppc_allocate_vec_register(gen->f);
555   uint chan_index;
556   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
557      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
558      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
559      FETCH(gen, *inst, v2, 2, chan_index);   /* v2 = srcreg[2] */
560      switch (inst->Instruction.Opcode) {
561      case TGSI_OPCODE_MAD:
562         ppc_vmaddfp(gen->f, v3, v0, v1, v2);   /* v3 = v0 * v1 + v2 */
563         break;
564      case TGSI_OPCODE_LRP:
565         ppc_vsubfp(gen->f, v3, v1, v2);        /* v3 = v1 - v2 */
566         ppc_vmaddfp(gen->f, v3, v0, v3, v2);   /* v3 = v0 * v3 + v2 */
567         break;
568      default:
569         assert(0);
570      }
571      STORE(gen, *inst, v3, 0, chan_index);   /* store v3 */
572   }
573   ppc_release_vec_register(gen->f, v0);
574   ppc_release_vec_register(gen->f, v1);
575   ppc_release_vec_register(gen->f, v2);
576   ppc_release_vec_register(gen->f, v3);
577}
578
579
580
581/** Approximation for vr = pow(va, vb) */
582static void
583ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb)
584{
585   /* pow(a,b) ~= exp2(log2(a) * b) */
586   int t_vec = ppc_allocate_vec_register(f);
587   int zero_vec = ppc_allocate_vec_register(f);
588
589   ppc_vzero(f, zero_vec);
590
591   ppc_vlogefp(f, t_vec, va);                   /* t = log2(va) */
592   ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec);  /* t = t * vb */
593   ppc_vexptefp(f, vr, t_vec);                  /* vr = 2^t */
594
595   ppc_release_vec_register(f, t_vec);
596   ppc_release_vec_register(f, zero_vec);
597}
598
599
600static void
601emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
602{
603   int one_vec = gen_one_vec(gen);
604
605   /* Compute X */
606   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
607      STORE(gen, *inst, one_vec, 0, CHAN_X);
608   }
609
610   /* Compute Y, Z */
611   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
612       IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
613      int x_vec = ppc_allocate_vec_register(gen->f);
614      int zero_vec = ppc_allocate_vec_register(gen->f);
615
616      FETCH(gen, *inst, x_vec, 0, CHAN_X);        /* x_vec = src[0].x */
617
618      ppc_vzero(gen->f, zero_vec);                /* zero = {0,0,0,0} */
619      ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */
620
621      if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
622         STORE(gen, *inst, x_vec, 0, CHAN_Y);        /* store Y */
623      }
624
625      if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
626         int y_vec = ppc_allocate_vec_register(gen->f);
627         int z_vec = ppc_allocate_vec_register(gen->f);
628         int w_vec = ppc_allocate_vec_register(gen->f);
629         int pow_vec = ppc_allocate_vec_register(gen->f);
630         int pos_vec = ppc_allocate_vec_register(gen->f);
631         int p128_vec = ppc_allocate_vec_register(gen->f);
632         int n128_vec = ppc_allocate_vec_register(gen->f);
633
634         FETCH(gen, *inst, y_vec, 0, CHAN_Y);        /* y_vec = src[0].y */
635         ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */
636
637         FETCH(gen, *inst, w_vec, 0, CHAN_W);        /* w_vec = src[0].w */
638
639         /* XXX clamp Y to [-128, 128] */
640         load_constant_vec(gen, p128_vec, 128.0f);
641         load_constant_vec(gen, n128_vec, -128.0f);
642
643         /* if temp.x > 0
644          *    pow(tmp.y, tmp.w)
645          * else
646          *   0.0
647          */
648
649         ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec);      /* pow = pow(y, w) */
650         ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */
651         ppc_vand(gen->f, z_vec, pow_vec, pos_vec);       /* z = pow & pos */
652
653         STORE(gen, *inst, z_vec, 0, CHAN_Z);             /* store Z */
654
655         ppc_release_vec_register(gen->f, y_vec);
656         ppc_release_vec_register(gen->f, z_vec);
657         ppc_release_vec_register(gen->f, w_vec);
658         ppc_release_vec_register(gen->f, pow_vec);
659         ppc_release_vec_register(gen->f, pos_vec);
660         ppc_release_vec_register(gen->f, p128_vec);
661         ppc_release_vec_register(gen->f, n128_vec);
662      }
663
664      ppc_release_vec_register(gen->f, x_vec);
665      ppc_release_vec_register(gen->f, zero_vec);
666   }
667
668   /* Compute W */
669   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
670      STORE(gen, *inst, one_vec, 0, CHAN_W);
671   }
672}
673
674
675static int
676emit_instruction(struct gen_context *gen,
677                 struct tgsi_full_instruction *inst)
678{
679   switch (inst->Instruction.Opcode) {
680   case TGSI_OPCODE_MOV:
681   case TGSI_OPCODE_ABS:
682   case TGSI_OPCODE_FLOOR:
683   case TGSI_OPCODE_FRAC:
684   case TGSI_OPCODE_EXPBASE2:
685   case TGSI_OPCODE_LOGBASE2:
686      emit_unaryop(gen, inst);
687      break;
688   case TGSI_OPCODE_RSQ:
689   case TGSI_OPCODE_RCP:
690      emit_scalar_unaryop(gen, inst);
691      break;
692   case TGSI_OPCODE_ADD:
693   case TGSI_OPCODE_SUB:
694   case TGSI_OPCODE_MUL:
695   case TGSI_OPCODE_MIN:
696   case TGSI_OPCODE_MAX:
697      emit_binop(gen, inst);
698      break;
699   case TGSI_OPCODE_SEQ:
700   case TGSI_OPCODE_SNE:
701   case TGSI_OPCODE_SLT:
702   case TGSI_OPCODE_SGT:
703   case TGSI_OPCODE_SLE:
704   case TGSI_OPCODE_SGE:
705      emit_inequality(gen, inst);
706      break;
707   case TGSI_OPCODE_MAD:
708   case TGSI_OPCODE_LRP:
709      emit_triop(gen, inst);
710      break;
711   case TGSI_OPCODE_DP3:
712   case TGSI_OPCODE_DP4:
713   case TGSI_OPCODE_DPH:
714      emit_dotprod(gen, inst);
715      break;
716   case TGSI_OPCODE_LIT:
717      emit_lit(gen, inst);
718      break;
719   case TGSI_OPCODE_END:
720      /* normal end */
721      return 1;
722   default:
723      return 0;
724   }
725
726
727   return 1;
728}
729
730static void
731emit_declaration(
732   struct ppc_function *func,
733   struct tgsi_full_declaration *decl )
734{
735   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
736#if 0
737      unsigned first, last, mask;
738      unsigned i, j;
739
740      first = decl->DeclarationRange.First;
741      last = decl->DeclarationRange.Last;
742      mask = decl->Declaration.UsageMask;
743
744      for( i = first; i <= last; i++ ) {
745         for( j = 0; j < NUM_CHANNELS; j++ ) {
746            if( mask & (1 << j) ) {
747               switch( decl->Declaration.Interpolate ) {
748               case TGSI_INTERPOLATE_CONSTANT:
749                  emit_coef_a0( func, 0, i, j );
750                  emit_inputs( func, 0, i, j );
751                  break;
752
753               case TGSI_INTERPOLATE_LINEAR:
754                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
755                  emit_coef_dadx( func, 1, i, j );
756                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
757                  emit_coef_dady( func, 3, i, j );
758                  emit_mul( func, 0, 1 );    /* x * dadx */
759                  emit_coef_a0( func, 4, i, j );
760                  emit_mul( func, 2, 3 );    /* y * dady */
761                  emit_add( func, 0, 4 );    /* x * dadx + a0 */
762                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
763                  emit_inputs( func, 0, i, j );
764                  break;
765
766               case TGSI_INTERPOLATE_PERSPECTIVE:
767                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
768                  emit_coef_dadx( func, 1, i, j );
769                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
770                  emit_coef_dady( func, 3, i, j );
771                  emit_mul( func, 0, 1 );    /* x * dadx */
772                  emit_tempf( func, 4, 0, TGSI_SWIZZLE_W );
773                  emit_coef_a0( func, 5, i, j );
774                  emit_rcp( func, 4, 4 );    /* 1.0 / w */
775                  emit_mul( func, 2, 3 );    /* y * dady */
776                  emit_add( func, 0, 5 );    /* x * dadx + a0 */
777                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
778                  emit_mul( func, 0, 4 );    /* (x * dadx + y * dady + a0) / w */
779                  emit_inputs( func, 0, i, j );
780                  break;
781
782               default:
783                  assert( 0 );
784		  break;
785               }
786            }
787         }
788      }
789#endif
790   }
791}
792
793
794
795static void
796emit_prologue(struct ppc_function *func)
797{
798   /* XXX set up stack frame */
799}
800
801
802static void
803emit_epilogue(struct ppc_function *func)
804{
805   ppc_return(func);
806   /* XXX restore prev stack frame */
807}
808
809
810
811/**
812 * Translate a TGSI vertex/fragment shader to PPC code.
813 *
814 * \param tokens  the TGSI input shader
815 * \param func  the output PPC code/function
816 * \param immediates  buffer to place immediates, later passed to PPC func
817 * \return TRUE for success, FALSE if translation failed
818 */
819boolean
820tgsi_emit_ppc(const struct tgsi_token *tokens,
821              struct ppc_function *func,
822              float (*immediates)[4],
823              boolean do_swizzles )
824{
825   static int use_ppc_asm = -1;
826   struct tgsi_parse_context parse;
827   /*boolean instruction_phase = FALSE;*/
828   unsigned ok = 1;
829   uint num_immediates = 0;
830   struct gen_context gen;
831
832   if (use_ppc_asm < 0) {
833      /* If GALLIUM_NOPPC is set, don't use PPC codegen */
834      use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE);
835   }
836   if (!use_ppc_asm)
837      return FALSE;
838
839   util_init_math();
840
841   gen.f = func;
842   gen.inputs_reg = ppc_reserve_register(func, 3);   /* first function param */
843   gen.outputs_reg = ppc_reserve_register(func, 4);  /* second function param */
844   gen.temps_reg = ppc_reserve_register(func, 5);    /* ... */
845   gen.immed_reg = ppc_reserve_register(func, 6);
846   gen.const_reg = ppc_reserve_register(func, 7);
847   gen.builtins_reg = ppc_reserve_register(func, 8);
848   gen.one_vec = -1;
849   gen.bit31_vec = -1;
850
851   emit_prologue(func);
852
853   tgsi_parse_init( &parse, tokens );
854
855   while (!tgsi_parse_end_of_tokens(&parse) && ok) {
856      tgsi_parse_token(&parse);
857
858      switch (parse.FullToken.Token.Type) {
859      case TGSI_TOKEN_TYPE_DECLARATION:
860         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
861            emit_declaration(func, &parse.FullToken.FullDeclaration );
862         }
863         break;
864
865      case TGSI_TOKEN_TYPE_INSTRUCTION:
866         ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);
867
868	 if (!ok) {
869	    debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n",
870			 parse.FullToken.FullInstruction.Instruction.Opcode,
871                         parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
872                         "vertex shader" : "fragment shader");
873	 }
874         break;
875
876      case TGSI_TOKEN_TYPE_IMMEDIATE:
877         /* splat each immediate component into a float[4] vector for SoA */
878         {
879            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
880            float *imm = (float *) immediates;
881            uint i;
882            assert(size <= 4);
883            assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
884            for (i = 0; i < size; i++) {
885               const float value =
886                  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
887               imm[num_immediates * 4 + 0] =
888               imm[num_immediates * 4 + 1] =
889               imm[num_immediates * 4 + 2] =
890               imm[num_immediates * 4 + 3] = value;
891               num_immediates++;
892            }
893         }
894         break;
895
896      default:
897	 ok = 0;
898         assert( 0 );
899      }
900   }
901
902   emit_epilogue(func);
903
904   tgsi_parse_free( &parse );
905
906   return ok;
907}
908
909#endif /* PIPE_ARCH_PPC */
910