tgsi_ppc.c revision ae81aeb12868db219cbdc02437c481714cfed3f5
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * TGSI to PowerPC code generation.
30 */
31
32#include "pipe/p_config.h"
33
34#if defined(PIPE_ARCH_PPC)
35
36#include "pipe/p_debug.h"
37#include "pipe/p_shader_tokens.h"
38#include "util/u_math.h"
39#include "util/u_sse.h"
40#include "tgsi/tgsi_parse.h"
41#include "tgsi/tgsi_util.h"
42#include "tgsi_exec.h"
43#include "tgsi_ppc.h"
44#include "rtasm/rtasm_ppc.h"
45
46
47
48#define FOR_EACH_CHANNEL( CHAN )\
49   for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
50
51#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
52   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
53
54#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
55   if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
56
57#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
58   FOR_EACH_CHANNEL( CHAN )\
59      IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
60
61#define CHAN_X 0
62#define CHAN_Y 1
63#define CHAN_Z 2
64#define CHAN_W 3
65
66#define TEMP_ONE_I   TGSI_EXEC_TEMP_ONE_I
67#define TEMP_ONE_C   TGSI_EXEC_TEMP_ONE_C
68
69#define TEMP_R0   TGSI_EXEC_TEMP_R0
70#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
71
72
73/**
74 * Context/state used during code gen.
75 */
76struct gen_context
77{
78   struct ppc_function *f;
79   int inputs_reg;    /**< GP register pointing to input params */
80   int outputs_reg;   /**< GP register pointing to output params */
81   int temps_reg;     /**< GP register pointing to temporary "registers" */
82   int immed_reg;     /**< GP register pointing to immediates buffer */
83   int const_reg;     /**< GP register pointing to constants buffer */
84
85   int one_vec;       /**< vector register with {1.0, 1.0, 1.0, 1.0} */
86   int bit31_vec;     /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */
87};
88
89
90/**
91 * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}.
92 */
93static int
94gen_one_vec(struct gen_context *gen)
95{
96   if (gen->one_vec < 0) {
97      gen->one_vec = ppc_allocate_vec_register(gen->f);
98      ppc_vload_float(gen->f, gen->one_vec, 1.0f);
99   }
100   return gen->one_vec;
101}
102
103/**
104 * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}.
105 */
106static int
107gen_get_bit31_vec(struct gen_context *gen)
108{
109   if (gen->bit31_vec < 0) {
110      gen->bit31_vec = ppc_allocate_vec_register(gen->f);
111      ppc_vspltisw(gen->f, gen->bit31_vec, -1);
112      ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec);
113   }
114   return gen->bit31_vec;
115}
116
117
118
119/**
120 * Register fetch, put result in 'dst_vec'.
121 */
122static void
123emit_fetch(struct gen_context *gen,
124           unsigned dst_vec,
125           const struct tgsi_full_src_register *reg,
126           const unsigned chan_index)
127{
128   uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
129
130   switch (swizzle) {
131   case TGSI_EXTSWIZZLE_X:
132   case TGSI_EXTSWIZZLE_Y:
133   case TGSI_EXTSWIZZLE_Z:
134   case TGSI_EXTSWIZZLE_W:
135      switch (reg->SrcRegister.File) {
136      case TGSI_FILE_INPUT:
137         {
138            int offset_reg = ppc_allocate_register(gen->f);
139            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
140            ppc_li(gen->f, offset_reg, offset);
141            ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
142            ppc_release_register(gen->f, offset_reg);
143         }
144         break;
145      case TGSI_FILE_TEMPORARY:
146         {
147            int offset_reg = ppc_allocate_register(gen->f);
148            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
149            ppc_li(gen->f, offset_reg, offset);
150            ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg);
151            ppc_release_register(gen->f, offset_reg);
152         }
153         break;
154      case TGSI_FILE_IMMEDIATE:
155         {
156            int offset_reg = ppc_allocate_register(gen->f);
157            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
158            ppc_li(gen->f, offset_reg, offset);
159            ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg);
160            ppc_release_register(gen->f, offset_reg);
161         }
162         break;
163      case TGSI_FILE_CONSTANT:
164         {
165            int offset_reg = ppc_allocate_register(gen->f);
166            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
167            ppc_li(gen->f, offset_reg, offset);
168            /* Load 4-byte word into vector register.
169             * The vector slot depends on the effective address we load from.
170             * We know that our constants start at a 16-byte boundary so we
171             * know that 'swizzle' tells us which vector slot will have the
172             * loaded word.  The other vector slots will be undefined.
173             */
174            ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg);
175            /* splat word[swizzle] across the vector reg */
176            ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle);
177            ppc_release_register(gen->f, offset_reg);
178         }
179         break;
180      default:
181         assert( 0 );
182      }
183      break;
184   case TGSI_EXTSWIZZLE_ZERO:
185      ppc_vload_float(gen->f, dst_vec, 0.0f);
186      break;
187   case TGSI_EXTSWIZZLE_ONE:
188      {
189         int one_vec = gen_one_vec(gen);
190         ppc_vecmove(gen->f, dst_vec, one_vec);
191      }
192      break;
193   default:
194      assert( 0 );
195   }
196
197   {
198      uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index);
199      if (sign_op != TGSI_UTIL_SIGN_KEEP) {
200         int bit31_vec = gen_get_bit31_vec(gen);
201
202         switch (sign_op) {
203         case TGSI_UTIL_SIGN_CLEAR:
204            /* vec = vec & ~bit31 */
205            ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec);
206            break;
207         case TGSI_UTIL_SIGN_SET:
208            /* vec = vec | bit31 */
209            ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec);
210            break;
211         case TGSI_UTIL_SIGN_TOGGLE:
212            /* vec = vec ^ bit31 */
213            ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec);
214            break;
215         default:
216            assert(0);
217         }
218      }
219   }
220}
221
222#define FETCH( GEN, INST, DST_VEC, SRC_REG, CHAN ) \
223   emit_fetch( GEN, DST_VEC, &(INST).FullSrcRegisters[SRC_REG], CHAN )
224
225
226
227/**
228 * Register store.  Store 'src_vec' at location indicated by 'reg'.
229 */
230static void
231emit_store(struct gen_context *gen,
232           unsigned src_vec,
233           const struct tgsi_full_dst_register *reg,
234           const struct tgsi_full_instruction *inst,
235           unsigned chan_index)
236{
237   switch (reg->DstRegister.File) {
238   case TGSI_FILE_OUTPUT:
239      {
240         int offset_reg = ppc_allocate_register(gen->f);
241         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
242         ppc_li(gen->f, offset_reg, offset);
243         ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg);
244         ppc_release_register(gen->f, offset_reg);
245      }
246      break;
247   case TGSI_FILE_TEMPORARY:
248      {
249         int offset_reg = ppc_allocate_register(gen->f);
250         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
251         ppc_li(gen->f, offset_reg, offset);
252         ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg);
253         ppc_release_register(gen->f, offset_reg);
254      }
255      break;
256#if 0
257   case TGSI_FILE_ADDRESS:
258      emit_addrs(
259         func,
260         xmm,
261         reg->DstRegister.Index,
262         chan_index );
263      break;
264#endif
265   default:
266      assert( 0 );
267   }
268
269#if 0
270   switch( inst->Instruction.Saturate ) {
271   case TGSI_SAT_NONE:
272      break;
273
274   case TGSI_SAT_ZERO_ONE:
275      /* assert( 0 ); */
276      break;
277
278   case TGSI_SAT_MINUS_PLUS_ONE:
279      assert( 0 );
280      break;
281   }
282#endif
283}
284
285
286#define STORE( GEN, INST, XMM, INDEX, CHAN )\
287   emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
288
289
290
291static void
292emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
293{
294   int v0 = ppc_allocate_vec_register(gen->f);
295   int v1 = ppc_allocate_vec_register(gen->f);
296   uint chan_index;
297
298   FETCH(gen, *inst, v0, 0, CHAN_X);
299
300   switch (inst->Instruction.Opcode) {
301   case TGSI_OPCODE_RSQ:
302      /* v1 = 1.0 / sqrt(v0) */
303      ppc_vrsqrtefp(gen->f, v1, v0);
304      break;
305   case TGSI_OPCODE_RCP:
306      /* v1 = 1.0 / v0 */
307      ppc_vrefp(gen->f, v1, v0);
308      break;
309   default:
310      assert(0);
311   }
312
313   FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
314      STORE(gen, *inst, v1, 0, chan_index);
315   }
316   ppc_release_vec_register(gen->f, v0);
317   ppc_release_vec_register(gen->f, v1);
318}
319
320
321static void
322emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
323{
324   int v0 = ppc_allocate_vec_register(gen->f);
325   uint chan_index;
326   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
327      FETCH(gen, *inst, 0, 0, chan_index);   /* v0 = srcreg[0] */
328      switch (inst->Instruction.Opcode) {
329      case TGSI_OPCODE_ABS:
330         /* turn off the most significant bit of each vector float word */
331         {
332            int v1 = ppc_allocate_vec_register(gen->f);
333            ppc_vspltisw(gen->f, v1, -1);  /* v1 = {-1, -1, -1, -1} */
334            ppc_vslw(gen->f, v1, v1, v1);  /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */
335            ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */
336            ppc_release_vec_register(gen->f, v1);
337         }
338         break;
339      case TGSI_OPCODE_FLOOR:
340         ppc_vrfim(gen->f, v0, v0);         /* v0 = floor(v0) */
341         break;
342      case TGSI_OPCODE_FRAC:
343         {
344            int v1 = ppc_allocate_vec_register(gen->f);
345            ppc_vrfim(gen->f, v1, v0);         /* v1 = floor(v0) */
346            ppc_vsubfp(gen->f, v0, v0, v1);    /* v0 = v0 - v1 */
347            ppc_release_vec_register(gen->f, v1);
348         }
349         break;
350      case TGSI_OPCODE_EXPBASE2:
351         ppc_vexptefp(gen->f, v0, v0);      /* v0 = 2^v0 */
352         break;
353      case TGSI_OPCODE_LOGBASE2:
354         /* XXX this may be broken! */
355         ppc_vlogefp(gen->f, v0, v0);      /* v0 = log2(v0) */
356         break;
357      case TGSI_OPCODE_MOV:
358         /* nothing */
359         break;
360      default:
361         assert(0);
362      }
363      STORE(gen, *inst, v0, 0, chan_index);   /* store v0 */
364   }
365   ppc_release_vec_register(gen->f, v0);
366}
367
368
369static void
370emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst)
371{
372   int v0 = ppc_allocate_vec_register(gen->f);
373   int v1 = ppc_allocate_vec_register(gen->f);
374   int v2 = ppc_allocate_vec_register(gen->f);
375   uint chan_index;
376   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
377      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
378      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
379      switch (inst->Instruction.Opcode) {
380      case TGSI_OPCODE_ADD:
381         ppc_vaddfp(gen->f, v2, v0, v1);
382         break;
383      case TGSI_OPCODE_SUB:
384         ppc_vsubfp(gen->f, v2, v0, v1);
385         break;
386      case TGSI_OPCODE_MUL:
387         ppc_vxor(gen->f, v2, v2, v2);        /* v2 = {0, 0, 0, 0} */
388         ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */
389         break;
390      case TGSI_OPCODE_MIN:
391         ppc_vminfp(gen->f, v2, v0, v1);
392         break;
393      case TGSI_OPCODE_MAX:
394         ppc_vmaxfp(gen->f, v2, v0, v1);
395         break;
396      default:
397         assert(0);
398      }
399      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
400   }
401   ppc_release_vec_register(gen->f, v0);
402   ppc_release_vec_register(gen->f, v1);
403   ppc_release_vec_register(gen->f, v2);
404}
405
406
407/**
408 * Vector comparisons, resulting in 1.0 or 0.0 values.
409 */
410static void
411emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst)
412{
413   int v0 = ppc_allocate_vec_register(gen->f);
414   int v1 = ppc_allocate_vec_register(gen->f);
415   int v2 = ppc_allocate_vec_register(gen->f);
416   uint chan_index;
417   boolean complement = FALSE;
418   int one_vec = gen_one_vec(gen);
419
420   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
421      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
422      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
423
424      switch (inst->Instruction.Opcode) {
425      case TGSI_OPCODE_SNE:
426         complement = TRUE;
427         /* fall-through */
428      case TGSI_OPCODE_SEQ:
429         ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */
430         break;
431
432      case TGSI_OPCODE_SGE:
433         complement = TRUE;
434         /* fall-through */
435      case TGSI_OPCODE_SLT:
436         ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */
437         break;
438
439      case TGSI_OPCODE_SLE:
440         complement = TRUE;
441         /* fall-through */
442      case TGSI_OPCODE_SGT:
443         ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */
444         break;
445      default:
446         assert(0);
447      }
448
449      /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */
450
451      if (complement)
452         ppc_vandc(gen->f, v2, one_vec, v2);    /* v2 = one_vec & ~v2 */
453      else
454         ppc_vand(gen->f, v2, one_vec, v2);     /* v2 = one_vec & v2 */
455
456      STORE(gen, *inst, v2, 0, chan_index);   /* store v2 */
457   }
458
459   ppc_release_vec_register(gen->f, v0);
460   ppc_release_vec_register(gen->f, v1);
461   ppc_release_vec_register(gen->f, v2);
462}
463
464
465static void
466emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst)
467{
468   int v0 = ppc_allocate_vec_register(gen->f);
469   int v1 = ppc_allocate_vec_register(gen->f);
470   int v2 = ppc_allocate_vec_register(gen->f);
471   uint chan_index;
472
473   ppc_vxor(gen->f, v2, v2, v2);           /* v2 = {0, 0, 0, 0} */
474
475   FETCH(gen, *inst, v0, 0, CHAN_X);       /* v0 = src0.XXXX */
476   FETCH(gen, *inst, v1, 1, CHAN_X);       /* v1 = src1.XXXX */
477   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
478
479   FETCH(gen, *inst, v0, 0, CHAN_Y);       /* v0 = src0.YYYY */
480   FETCH(gen, *inst, v1, 1, CHAN_Y);       /* v1 = src1.YYYY */
481   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
482
483   FETCH(gen, *inst, v0, 0, CHAN_Z);       /* v0 = src0.ZZZZ */
484   FETCH(gen, *inst, v1, 1, CHAN_Z);       /* v1 = src1.ZZZZ */
485   ppc_vmaddfp(gen->f, v2, v0, v1, v2);    /* v2 = v0 * v1 + v2 */
486
487   if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) {
488      FETCH(gen, *inst, v0, 0, CHAN_W);    /* v0 = src0.WWWW */
489      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
490      ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */
491   }
492   else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) {
493      FETCH(gen, *inst, v1, 1, CHAN_W);    /* v1 = src1.WWWW */
494      ppc_vaddfp(gen->f, v2, v2, v1);      /* v2 = v2 + v1 */
495   }
496
497   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
498      STORE(gen, *inst, v2, 0, chan_index);  /* store v2 */
499   }
500   ppc_release_vec_register(gen->f, v0);
501   ppc_release_vec_register(gen->f, v1);
502   ppc_release_vec_register(gen->f, v2);
503}
504
505
506static void
507emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst)
508{
509   int v0 = ppc_allocate_vec_register(gen->f);
510   int v1 = ppc_allocate_vec_register(gen->f);
511   int v2 = ppc_allocate_vec_register(gen->f);
512   int v3 = ppc_allocate_vec_register(gen->f);
513   uint chan_index;
514   FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) {
515      FETCH(gen, *inst, v0, 0, chan_index);   /* v0 = srcreg[0] */
516      FETCH(gen, *inst, v1, 1, chan_index);   /* v1 = srcreg[1] */
517      FETCH(gen, *inst, v2, 2, chan_index);   /* v2 = srcreg[2] */
518      switch (inst->Instruction.Opcode) {
519      case TGSI_OPCODE_MAD:
520         ppc_vmaddfp(gen->f, v3, v0, v1, v2);   /* v3 = v0 * v1 + v2 */
521         break;
522      case TGSI_OPCODE_LRP:
523         ppc_vsubfp(gen->f, v3, v1, v2);        /* v3 = v1 - v2 */
524         ppc_vmaddfp(gen->f, v3, v0, v3, v2);   /* v3 = v0 * v3 + v2 */
525         break;
526      default:
527         assert(0);
528      }
529      STORE(gen, *inst, v3, 0, chan_index);   /* store v3 */
530   }
531   ppc_release_vec_register(gen->f, v0);
532   ppc_release_vec_register(gen->f, v1);
533   ppc_release_vec_register(gen->f, v2);
534   ppc_release_vec_register(gen->f, v3);
535}
536
537
538
539/** Approximation for vr = pow(va, vb) */
540static void
541ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb)
542{
543   /* pow(a,b) ~= exp2(log2(a) * b) */
544   int t_vec = ppc_allocate_vec_register(f);
545   int zero_vec = ppc_allocate_vec_register(f);
546
547   ppc_vload_float(f, zero_vec, 0.0f);
548
549   ppc_vlogefp(f, t_vec, va);                   /* t = log2(va) */
550   ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec);  /* t = t * vb */
551   ppc_vexptefp(f, vr, t_vec);                  /* vr = 2^t */
552
553   ppc_release_vec_register(f, t_vec);
554   ppc_release_vec_register(f, zero_vec);
555}
556
557
558static void
559emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst)
560{
561   int one_vec = gen_one_vec(gen);
562
563   /* Compute X */
564   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
565      STORE(gen, *inst, one_vec, 0, CHAN_X);
566   }
567
568   /* Compute Y, Z */
569   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
570       IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
571      int x_vec = ppc_allocate_vec_register(gen->f);
572      int zero_vec = ppc_allocate_vec_register(gen->f);
573
574      FETCH(gen, *inst, x_vec, 0, CHAN_X);        /* x_vec = src[0].x */
575
576      ppc_vload_float(gen->f, zero_vec, 0.0f);    /* zero = {0,0,0,0} */
577      ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */
578
579      if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
580         STORE(gen, *inst, x_vec, 0, CHAN_Y);        /* store Y */
581      }
582
583      if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
584         int y_vec = ppc_allocate_vec_register(gen->f);
585         int z_vec = ppc_allocate_vec_register(gen->f);
586         int w_vec = ppc_allocate_vec_register(gen->f);
587         int pow_vec = ppc_allocate_vec_register(gen->f);
588         int pos_vec = ppc_allocate_vec_register(gen->f);
589         int c128_vec = ppc_allocate_vec_register(gen->f);
590
591         FETCH(gen, *inst, y_vec, 0, CHAN_Y);        /* y_vec = src[0].y */
592         ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */
593
594         FETCH(gen, *inst, w_vec, 0, CHAN_W);        /* w_vec = src[0].w */
595
596         /* XXX clamp Y to [-128, 128] */
597         ppc_vload_float(gen->f, c128_vec, 128.0f);
598
599         /* if temp.x > 0
600          *    pow(tmp.y, tmp.w)
601          * else
602          *   0.0
603          */
604
605         ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec);      /* pow = pow(y, w) */
606         ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */
607         ppc_vand(gen->f, z_vec, pow_vec, pos_vec);       /* z = pow & pos */
608
609         STORE(gen, *inst, z_vec, 0, CHAN_Z);             /* store Z */
610
611         ppc_release_vec_register(gen->f, y_vec);
612         ppc_release_vec_register(gen->f, z_vec);
613         ppc_release_vec_register(gen->f, w_vec);
614         ppc_release_vec_register(gen->f, pow_vec);
615         ppc_release_vec_register(gen->f, pos_vec);
616      }
617
618      ppc_release_vec_register(gen->f, x_vec);
619      ppc_release_vec_register(gen->f, zero_vec);
620   }
621
622   /* Compute W */
623   if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
624      STORE(gen, *inst, one_vec, 0, CHAN_W);
625   }
626}
627
628
629static int
630emit_instruction(struct gen_context *gen,
631                 struct tgsi_full_instruction *inst)
632{
633   switch (inst->Instruction.Opcode) {
634   case TGSI_OPCODE_MOV:
635   case TGSI_OPCODE_ABS:
636   case TGSI_OPCODE_FLOOR:
637   case TGSI_OPCODE_FRAC:
638   case TGSI_OPCODE_EXPBASE2:
639   case TGSI_OPCODE_LOGBASE2:
640      emit_unaryop(gen, inst);
641      break;
642   case TGSI_OPCODE_RSQ:
643   case TGSI_OPCODE_RCP:
644      emit_scalar_unaryop(gen, inst);
645      break;
646   case TGSI_OPCODE_ADD:
647   case TGSI_OPCODE_SUB:
648   case TGSI_OPCODE_MUL:
649   case TGSI_OPCODE_MIN:
650   case TGSI_OPCODE_MAX:
651      emit_binop(gen, inst);
652      break;
653   case TGSI_OPCODE_SEQ:
654   case TGSI_OPCODE_SNE:
655   case TGSI_OPCODE_SLT:
656   case TGSI_OPCODE_SGT:
657   case TGSI_OPCODE_SLE:
658   case TGSI_OPCODE_SGE:
659      emit_inequality(gen, inst);
660      break;
661   case TGSI_OPCODE_MAD:
662   case TGSI_OPCODE_LRP:
663      emit_triop(gen, inst);
664      break;
665   case TGSI_OPCODE_DP3:
666   case TGSI_OPCODE_DP4:
667   case TGSI_OPCODE_DPH:
668      emit_dotprod(gen, inst);
669      break;
670   case TGSI_OPCODE_LIT:
671      emit_lit(gen, inst);
672      break;
673   case TGSI_OPCODE_END:
674      /* normal end */
675      return 1;
676   default:
677      return 0;
678   }
679
680
681   return 1;
682}
683
684static void
685emit_declaration(
686   struct ppc_function *func,
687   struct tgsi_full_declaration *decl )
688{
689   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
690#if 0
691      unsigned first, last, mask;
692      unsigned i, j;
693
694      first = decl->DeclarationRange.First;
695      last = decl->DeclarationRange.Last;
696      mask = decl->Declaration.UsageMask;
697
698      for( i = first; i <= last; i++ ) {
699         for( j = 0; j < NUM_CHANNELS; j++ ) {
700            if( mask & (1 << j) ) {
701               switch( decl->Declaration.Interpolate ) {
702               case TGSI_INTERPOLATE_CONSTANT:
703                  emit_coef_a0( func, 0, i, j );
704                  emit_inputs( func, 0, i, j );
705                  break;
706
707               case TGSI_INTERPOLATE_LINEAR:
708                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
709                  emit_coef_dadx( func, 1, i, j );
710                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
711                  emit_coef_dady( func, 3, i, j );
712                  emit_mul( func, 0, 1 );    /* x * dadx */
713                  emit_coef_a0( func, 4, i, j );
714                  emit_mul( func, 2, 3 );    /* y * dady */
715                  emit_add( func, 0, 4 );    /* x * dadx + a0 */
716                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
717                  emit_inputs( func, 0, i, j );
718                  break;
719
720               case TGSI_INTERPOLATE_PERSPECTIVE:
721                  emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
722                  emit_coef_dadx( func, 1, i, j );
723                  emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
724                  emit_coef_dady( func, 3, i, j );
725                  emit_mul( func, 0, 1 );    /* x * dadx */
726                  emit_tempf( func, 4, 0, TGSI_SWIZZLE_W );
727                  emit_coef_a0( func, 5, i, j );
728                  emit_rcp( func, 4, 4 );    /* 1.0 / w */
729                  emit_mul( func, 2, 3 );    /* y * dady */
730                  emit_add( func, 0, 5 );    /* x * dadx + a0 */
731                  emit_add( func, 0, 2 );    /* x * dadx + y * dady + a0 */
732                  emit_mul( func, 0, 4 );    /* (x * dadx + y * dady + a0) / w */
733                  emit_inputs( func, 0, i, j );
734                  break;
735
736               default:
737                  assert( 0 );
738		  break;
739               }
740            }
741         }
742      }
743#endif
744   }
745}
746
747
748
749static void
750emit_prologue(struct ppc_function *func)
751{
752   /* XXX set up stack frame */
753}
754
755
756static void
757emit_epilogue(struct ppc_function *func)
758{
759   ppc_return(func);
760   /* XXX restore prev stack frame */
761}
762
763
764
765/**
766 * Translate a TGSI vertex/fragment shader to PPC code.
767 *
768 * \param tokens  the TGSI input shader
769 * \param func  the output PPC code/function
770 * \param immediates  buffer to place immediates, later passed to PPC func
771 * \return TRUE for success, FALSE if translation failed
772 */
773boolean
774tgsi_emit_ppc(const struct tgsi_token *tokens,
775              struct ppc_function *func,
776              float (*immediates)[4],
777              boolean do_swizzles )
778{
779   static int use_ppc_asm = -1;
780   struct tgsi_parse_context parse;
781   /*boolean instruction_phase = FALSE;*/
782   unsigned ok = 1;
783   uint num_immediates = 0;
784   struct gen_context gen;
785
786   if (use_ppc_asm < 0) {
787      /* If GALLIUM_NOPPC is set, don't use PPC codegen */
788      use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE);
789   }
790   if (!use_ppc_asm)
791      return FALSE;
792
793   util_init_math();
794
795   gen.f = func;
796   gen.inputs_reg = ppc_reserve_register(func, 3);   /* first function param */
797   gen.outputs_reg = ppc_reserve_register(func, 4);  /* second function param */
798   gen.temps_reg = ppc_reserve_register(func, 5);    /* ... */
799   gen.immed_reg = ppc_reserve_register(func, 6);
800   gen.const_reg = ppc_reserve_register(func, 7);
801   gen.one_vec = -1;
802   gen.bit31_vec = -1;
803
804   emit_prologue(func);
805
806   tgsi_parse_init( &parse, tokens );
807
808   while (!tgsi_parse_end_of_tokens(&parse) && ok) {
809      tgsi_parse_token(&parse);
810
811      switch (parse.FullToken.Token.Type) {
812      case TGSI_TOKEN_TYPE_DECLARATION:
813         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
814            emit_declaration(func, &parse.FullToken.FullDeclaration );
815         }
816         break;
817
818      case TGSI_TOKEN_TYPE_INSTRUCTION:
819         ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);
820
821	 if (!ok) {
822	    debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n",
823			 parse.FullToken.FullInstruction.Instruction.Opcode,
824                         parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
825                         "vertex shader" : "fragment shader");
826	 }
827         break;
828
829      case TGSI_TOKEN_TYPE_IMMEDIATE:
830         /* splat each immediate component into a float[4] vector for SoA */
831         {
832            const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
833            float *imm = (float *) immediates;
834            uint i;
835            assert(size <= 4);
836            assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
837            for (i = 0; i < size; i++) {
838               const float value =
839                  parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
840               imm[num_immediates * 4 + 0] =
841               imm[num_immediates * 4 + 1] =
842               imm[num_immediates * 4 + 2] =
843               imm[num_immediates * 4 + 3] = value;
844               num_immediates++;
845            }
846         }
847         break;
848
849      default:
850	 ok = 0;
851         assert( 0 );
852      }
853   }
854
855   emit_epilogue(func);
856
857   tgsi_parse_free( &parse );
858
859   return ok;
860}
861
862#endif /* PIPE_ARCH_PPC */
863