1/**************************************************************************
2 *
3 * Copyright (C) 2008 Tungsten Graphics, Inc.   All Rights Reserved.
4 * Copyright (C) 2009 VMware, Inc.  All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 **************************************************************************/
24
25/**
26 * PPC code generation.
27 * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf
28 * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf
29 *
30 * Other PPC refs:
31 * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
32 * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html
33 * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
34 *
35 * \author Brian Paul
36 */
37
38
39#include <stdio.h>
40#include "util/u_memory.h"
41#include "util/u_debug.h"
42#include "rtasm_execmem.h"
43#include "rtasm_ppc.h"
44
45
46void
47ppc_init_func(struct ppc_function *p)
48{
49   uint i;
50
51   memset(p, 0, sizeof(*p));
52
53   p->num_inst = 0;
54   p->max_inst = 100; /* first guess at buffer size */
55   p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
56   p->reg_used = 0x0;
57   p->fp_used = 0x0;
58   p->vec_used = 0x0;
59
60   p->print = FALSE;
61   p->indent = 0;
62
63   /* only allow using gp registers 3..12 for now */
64   for (i = 0; i < 3; i++)
65      ppc_reserve_register(p, i);
66   for (i = 12; i < PPC_NUM_REGS; i++)
67      ppc_reserve_register(p, i);
68}
69
70
71void
72ppc_release_func(struct ppc_function *p)
73{
74   assert(p->num_inst <= p->max_inst);
75   if (p->store != NULL) {
76      rtasm_exec_free(p->store);
77   }
78   p->store = NULL;
79}
80
81
82uint
83ppc_num_instructions(const struct ppc_function *p)
84{
85   return p->num_inst;
86}
87
88
89void (*ppc_get_func(struct ppc_function *p))(void)
90{
91#if 0
92   DUMP_END();
93   if (DISASSEM && p->store)
94      debug_printf("disassemble %p %p\n", p->store, p->csr);
95
96   if (p->store == p->error_overflow)
97      return (void (*)(void)) NULL;
98   else
99#endif
100      return (void (*)(void)) pointer_to_func(p->store);
101}
102
103
104void
105ppc_dump_func(const struct ppc_function *p)
106{
107   uint i;
108   for (i = 0; i < p->num_inst; i++) {
109      debug_printf("%3u: 0x%08x\n", i, p->store[i]);
110   }
111}
112
113
114void
115ppc_print_code(struct ppc_function *p, boolean enable)
116{
117   p->print = enable;
118}
119
120
121void
122ppc_indent(struct ppc_function *p, int spaces)
123{
124   p->indent += spaces;
125}
126
127
128static void
129indent(const struct ppc_function *p)
130{
131   int i;
132   for (i = 0; i < p->indent; i++) {
133      putchar(' ');
134   }
135}
136
137
138void
139ppc_comment(struct ppc_function *p, int rel_indent, const char *s)
140{
141   if (p->print) {
142      p->indent += rel_indent;
143      indent(p);
144      p->indent -= rel_indent;
145      printf("# %s\n", s);
146   }
147}
148
149
150/**
151 * Mark a register as being unavailable.
152 */
153int
154ppc_reserve_register(struct ppc_function *p, int reg)
155{
156   assert(reg < PPC_NUM_REGS);
157   p->reg_used |= (1 << reg);
158   return reg;
159}
160
161
162/**
163 * Allocate a general purpose register.
164 * \return register index or -1 if none left.
165 */
166int
167ppc_allocate_register(struct ppc_function *p)
168{
169   unsigned i;
170   for (i = 0; i < PPC_NUM_REGS; i++) {
171      const uint32_t mask = 1 << i;
172      if ((p->reg_used & mask) == 0) {
173         p->reg_used |= mask;
174         return i;
175      }
176   }
177   printf("OUT OF PPC registers!\n");
178   return -1;
179}
180
181
182/**
183 * Mark the given general purpose register as "unallocated".
184 */
185void
186ppc_release_register(struct ppc_function *p, int reg)
187{
188   assert(reg < PPC_NUM_REGS);
189   assert(p->reg_used & (1 << reg));
190   p->reg_used &= ~(1 << reg);
191}
192
193
194/**
195 * Allocate a floating point register.
196 * \return register index or -1 if none left.
197 */
198int
199ppc_allocate_fp_register(struct ppc_function *p)
200{
201   unsigned i;
202   for (i = 0; i < PPC_NUM_FP_REGS; i++) {
203      const uint32_t mask = 1 << i;
204      if ((p->fp_used & mask) == 0) {
205         p->fp_used |= mask;
206         return i;
207      }
208   }
209   printf("OUT OF PPC FP registers!\n");
210   return -1;
211}
212
213
214/**
215 * Mark the given floating point register as "unallocated".
216 */
217void
218ppc_release_fp_register(struct ppc_function *p, int reg)
219{
220   assert(reg < PPC_NUM_FP_REGS);
221   assert(p->fp_used & (1 << reg));
222   p->fp_used &= ~(1 << reg);
223}
224
225
226/**
227 * Allocate a vector register.
228 * \return register index or -1 if none left.
229 */
230int
231ppc_allocate_vec_register(struct ppc_function *p)
232{
233   unsigned i;
234   for (i = 0; i < PPC_NUM_VEC_REGS; i++) {
235      const uint32_t mask = 1 << i;
236      if ((p->vec_used & mask) == 0) {
237         p->vec_used |= mask;
238         return i;
239      }
240   }
241   printf("OUT OF PPC VEC registers!\n");
242   return -1;
243}
244
245
246/**
247 * Mark the given vector register as "unallocated".
248 */
249void
250ppc_release_vec_register(struct ppc_function *p, int reg)
251{
252   assert(reg < PPC_NUM_VEC_REGS);
253   assert(p->vec_used & (1 << reg));
254   p->vec_used &= ~(1 << reg);
255}
256
257
258/**
259 * Append instruction to instruction buffer.  Grow buffer if out of room.
260 */
261static void
262emit_instruction(struct ppc_function *p, uint32_t inst_bits)
263{
264   if (!p->store)
265      return;  /* out of memory, drop the instruction */
266
267   if (p->num_inst == p->max_inst) {
268      /* allocate larger buffer */
269      uint32_t *newbuf;
270      p->max_inst *= 2;  /* 2x larger */
271      newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
272      if (newbuf) {
273         memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE);
274      }
275      rtasm_exec_free(p->store);
276      p->store = newbuf;
277      if (!p->store) {
278         /* out of memory */
279         p->num_inst = 0;
280         return;
281      }
282   }
283
284   p->store[p->num_inst++] = inst_bits;
285}
286
287
288union vx_inst {
289   uint32_t bits;
290   struct {
291      unsigned op:6;
292      unsigned vD:5;
293      unsigned vA:5;
294      unsigned vB:5;
295      unsigned op2:11;
296   } inst;
297};
298
299static INLINE void
300emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
301        const char *format, boolean transpose)
302{
303   union vx_inst inst;
304   inst.inst.op = 4;
305   inst.inst.vD = vD;
306   inst.inst.vA = vA;
307   inst.inst.vB = vB;
308   inst.inst.op2 = op2;
309   emit_instruction(p, inst.bits);
310   if (p->print) {
311      indent(p);
312      if (transpose)
313         printf(format, vD, vB, vA);
314      else
315         printf(format, vD, vA, vB);
316   }
317}
318
319
320union vxr_inst {
321   uint32_t bits;
322   struct {
323      unsigned op:6;
324      unsigned vD:5;
325      unsigned vA:5;
326      unsigned vB:5;
327      unsigned rC:1;
328      unsigned op2:10;
329   } inst;
330};
331
332static INLINE void
333emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
334         const char *format)
335{
336   union vxr_inst inst;
337   inst.inst.op = 4;
338   inst.inst.vD = vD;
339   inst.inst.vA = vA;
340   inst.inst.vB = vB;
341   inst.inst.rC = 0;
342   inst.inst.op2 = op2;
343   emit_instruction(p, inst.bits);
344   if (p->print) {
345      indent(p);
346      printf(format, vD, vA, vB);
347   }
348}
349
350
351union va_inst {
352   uint32_t bits;
353   struct {
354      unsigned op:6;
355      unsigned vD:5;
356      unsigned vA:5;
357      unsigned vB:5;
358      unsigned vC:5;
359      unsigned op2:6;
360   } inst;
361};
362
363static INLINE void
364emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC,
365        const char *format)
366{
367   union va_inst inst;
368   inst.inst.op = 4;
369   inst.inst.vD = vD;
370   inst.inst.vA = vA;
371   inst.inst.vB = vB;
372   inst.inst.vC = vC;
373   inst.inst.op2 = op2;
374   emit_instruction(p, inst.bits);
375   if (p->print) {
376      indent(p);
377      printf(format, vD, vA, vB, vC);
378   }
379}
380
381
382union i_inst {
383   uint32_t bits;
384   struct {
385      unsigned op:6;
386      unsigned li:24;
387      unsigned aa:1;
388      unsigned lk:1;
389   } inst;
390};
391
392static INLINE void
393emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk)
394{
395   union i_inst inst;
396   inst.inst.op = op;
397   inst.inst.li = li;
398   inst.inst.aa = aa;
399   inst.inst.lk = lk;
400   emit_instruction(p, inst.bits);
401}
402
403
404union xl_inst {
405   uint32_t bits;
406   struct {
407      unsigned op:6;
408      unsigned bo:5;
409      unsigned bi:5;
410      unsigned unused:3;
411      unsigned bh:2;
412      unsigned op2:10;
413      unsigned lk:1;
414   } inst;
415};
416
417static INLINE void
418emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh,
419        uint op2, uint lk)
420{
421   union xl_inst inst;
422   inst.inst.op = op;
423   inst.inst.bo = bo;
424   inst.inst.bi = bi;
425   inst.inst.unused = 0x0;
426   inst.inst.bh = bh;
427   inst.inst.op2 = op2;
428   inst.inst.lk = lk;
429   emit_instruction(p, inst.bits);
430}
431
432static INLINE void
433dump_xl(const char *name, uint inst)
434{
435   union xl_inst i;
436
437   i.bits = inst;
438   debug_printf("%s = 0x%08x\n", name, inst);
439   debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op);
440   debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo);
441   debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi);
442   debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused);
443   debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh);
444   debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2);
445   debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk);
446}
447
448
449union x_inst {
450   uint32_t bits;
451   struct {
452      unsigned op:6;
453      unsigned vrs:5;
454      unsigned ra:5;
455      unsigned rb:5;
456      unsigned op2:10;
457      unsigned unused:1;
458   } inst;
459};
460
461static INLINE void
462emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2,
463       const char *format)
464{
465   union x_inst inst;
466   inst.inst.op = op;
467   inst.inst.vrs = vrs;
468   inst.inst.ra = ra;
469   inst.inst.rb = rb;
470   inst.inst.op2 = op2;
471   inst.inst.unused = 0x0;
472   emit_instruction(p, inst.bits);
473   if (p->print) {
474      indent(p);
475      printf(format, vrs, ra, rb);
476   }
477}
478
479
480union d_inst {
481   uint32_t bits;
482   struct {
483      unsigned op:6;
484      unsigned rt:5;
485      unsigned ra:5;
486      unsigned si:16;
487   } inst;
488};
489
490static INLINE void
491emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si,
492       const char *format, boolean transpose)
493{
494   union d_inst inst;
495   assert(si >= -32768);
496   assert(si <= 32767);
497   inst.inst.op = op;
498   inst.inst.rt = rt;
499   inst.inst.ra = ra;
500   inst.inst.si = (unsigned) (si & 0xffff);
501   emit_instruction(p, inst.bits);
502   if (p->print) {
503      indent(p);
504      if (transpose)
505         printf(format, rt, si, ra);
506      else
507         printf(format, rt, ra, si);
508   }
509}
510
511
512union a_inst {
513   uint32_t bits;
514   struct {
515      unsigned op:6;
516      unsigned frt:5;
517      unsigned fra:5;
518      unsigned frb:5;
519      unsigned unused:5;
520      unsigned op2:5;
521      unsigned rc:1;
522   } inst;
523};
524
525static INLINE void
526emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
527       uint rc, const char *format)
528{
529   union a_inst inst;
530   inst.inst.op = op;
531   inst.inst.frt = frt;
532   inst.inst.fra = fra;
533   inst.inst.frb = frb;
534   inst.inst.unused = 0x0;
535   inst.inst.op2 = op2;
536   inst.inst.rc = rc;
537   emit_instruction(p, inst.bits);
538   if (p->print) {
539      indent(p);
540      printf(format, frt, fra, frb);
541   }
542}
543
544
545union xo_inst {
546   uint32_t bits;
547   struct {
548      unsigned op:6;
549      unsigned rt:5;
550      unsigned ra:5;
551      unsigned rb:5;
552      unsigned oe:1;
553      unsigned op2:9;
554      unsigned rc:1;
555   } inst;
556};
557
558static INLINE void
559emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe,
560        uint op2, uint rc, const char *format)
561{
562   union xo_inst inst;
563   inst.inst.op = op;
564   inst.inst.rt = rt;
565   inst.inst.ra = ra;
566   inst.inst.rb = rb;
567   inst.inst.oe = oe;
568   inst.inst.op2 = op2;
569   inst.inst.rc = rc;
570   emit_instruction(p, inst.bits);
571   if (p->print) {
572      indent(p);
573      printf(format, rt, ra, rb);
574   }
575}
576
577
578
579
580
581/**
582 ** float vector arithmetic
583 **/
584
585/** vector float add */
586void
587ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB)
588{
589   emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE);
590}
591
592/** vector float substract */
593void
594ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB)
595{
596   emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE);
597}
598
599/** vector float min */
600void
601ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB)
602{
603   emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE);
604}
605
606/** vector float max */
607void
608ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB)
609{
610   emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE);
611}
612
613/** vector float mult add: vD = vA * vB + vC */
614void
615ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
616{
617   /* note arg order */
618   emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n");
619}
620
621/** vector float negative mult subtract: vD = vA - vB * vC */
622void
623ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
624{
625   /* note arg order */
626   emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n");
627}
628
629/** vector float compare greater than */
630void
631ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
632{
633   emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u");
634}
635
636/** vector float compare greater than or equal to */
637void
638ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB)
639{
640   emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u");
641}
642
643/** vector float compare equal */
644void
645ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
646{
647   emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u");
648}
649
650/** vector float 2^x */
651void
652ppc_vexptefp(struct ppc_function *p, uint vD, uint vB)
653{
654   emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE);
655}
656
657/** vector float log2(x) */
658void
659ppc_vlogefp(struct ppc_function *p, uint vD, uint vB)
660{
661   emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE);
662}
663
664/** vector float reciprocol */
665void
666ppc_vrefp(struct ppc_function *p, uint vD, uint vB)
667{
668   emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE);
669}
670
671/** vector float reciprocol sqrt estimate */
672void
673ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB)
674{
675   emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE);
676}
677
678/** vector float round to negative infinity */
679void
680ppc_vrfim(struct ppc_function *p, uint vD, uint vB)
681{
682   emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE);
683}
684
685/** vector float round to positive infinity */
686void
687ppc_vrfip(struct ppc_function *p, uint vD, uint vB)
688{
689   emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE);
690}
691
692/** vector float round to nearest int */
693void
694ppc_vrfin(struct ppc_function *p, uint vD, uint vB)
695{
696   emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE);
697}
698
699/** vector float round to int toward zero */
700void
701ppc_vrfiz(struct ppc_function *p, uint vD, uint vB)
702{
703   emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE);
704}
705
706/** vector store: store vR at mem[rA+rB] */
707void
708ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB)
709{
710   emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n");
711}
712
713/** vector load: vR = mem[rA+rB] */
714void
715ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB)
716{
717   emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n");
718}
719
720/** load vector element word: vR = mem_word[ra+rb] */
721void
722ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB)
723{
724   emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n");
725}
726
727
728
729
730/**
731 ** vector bitwise operations
732 **/
733
734/** vector and */
735void
736ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB)
737{
738   emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE);
739}
740
741/** vector and complement */
742void
743ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB)
744{
745   emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE);
746}
747
748/** vector or */
749void
750ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB)
751{
752   emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE);
753}
754
755/** vector nor */
756void
757ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB)
758{
759   emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE);
760}
761
762/** vector xor */
763void
764ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB)
765{
766   emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE);
767}
768
769/** Pseudo-instruction: vector move */
770void
771ppc_vmove(struct ppc_function *p, uint vD, uint vA)
772{
773   boolean print = p->print;
774   p->print = FALSE;
775   ppc_vor(p, vD, vA, vA);
776   if (print) {
777      indent(p);
778      printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA);
779   }
780   p->print = print;
781}
782
783/** Set vector register to {0,0,0,0} */
784void
785ppc_vzero(struct ppc_function *p, uint vr)
786{
787   boolean print = p->print;
788   p->print = FALSE;
789   ppc_vxor(p, vr, vr, vr);
790   if (print) {
791      indent(p);
792      printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr);
793   }
794   p->print = print;
795}
796
797
798
799
800/**
801 ** Vector shuffle / select / splat / etc
802 **/
803
804/** vector permute */
805void
806ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
807{
808   emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u");
809}
810
811/** vector select */
812void
813ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
814{
815   emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u");
816}
817
818/** vector splat byte */
819void
820ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm)
821{
822   emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE);
823}
824
825/** vector splat half word */
826void
827ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm)
828{
829   emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE);
830}
831
832/** vector splat word */
833void
834ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm)
835{
836   emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE);
837}
838
839/** vector splat signed immediate word */
840void
841ppc_vspltisw(struct ppc_function *p, uint vD, int imm)
842{
843   assert(imm >= -16);
844   assert(imm < 15);
845   emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE);
846}
847
848/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
849void
850ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB)
851{
852   emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE);
853}
854
855
856
857
858/**
859 ** integer arithmetic
860 **/
861
862/** rt = ra + imm */
863void
864ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm)
865{
866   emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE);
867}
868
869/** rt = ra + (imm << 16) */
870void
871ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm)
872{
873   emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE);
874}
875
876/** rt = ra + rb */
877void
878ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb)
879{
880   emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n");
881}
882
883/** rt = ra AND ra */
884void
885ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb)
886{
887   emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n");  /* note argument order */
888}
889
890/** rt = ra AND imm */
891void
892ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm)
893{
894   /* note argument order */
895   emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE);
896}
897
898/** rt = ra OR ra */
899void
900ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb)
901{
902   emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n");  /* note argument order */
903}
904
905/** rt = ra OR imm */
906void
907ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm)
908{
909   /* note argument order */
910   emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE);
911}
912
913/** rt = ra XOR ra */
914void
915ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb)
916{
917   emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n");  /* note argument order */
918}
919
920/** rt = ra XOR imm */
921void
922ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm)
923{
924   /* note argument order */
925   emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE);
926}
927
928/** pseudo instruction: move: rt = ra */
929void
930ppc_mr(struct ppc_function *p, uint rt, uint ra)
931{
932   ppc_or(p, rt, ra, ra);
933}
934
935/** pseudo instruction: load immediate: rt = imm */
936void
937ppc_li(struct ppc_function *p, uint rt, int imm)
938{
939   boolean print = p->print;
940   p->print = FALSE;
941   ppc_addi(p, rt, 0, imm);
942   if (print) {
943      indent(p);
944      printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm);
945   }
946   p->print = print;
947}
948
949/** rt = imm << 16 */
950void
951ppc_lis(struct ppc_function *p, uint rt, int imm)
952{
953   ppc_addis(p, rt, 0, imm);
954}
955
956/** rt = imm */
957void
958ppc_load_int(struct ppc_function *p, uint rt, int imm)
959{
960   ppc_lis(p, rt, (imm >> 16));          /* rt = imm >> 16 */
961   ppc_ori(p, rt, rt, (imm & 0xffff));   /* rt = rt | (imm & 0xffff) */
962}
963
964
965
966
967/**
968 ** integer load/store
969 **/
970
971/** store rs at memory[(ra)+d],
972 * then update ra = (ra)+d
973 */
974void
975ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d)
976{
977   emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE);
978}
979
980/** store rs at memory[(ra)+d] */
981void
982ppc_stw(struct ppc_function *p, uint rs, uint ra, int d)
983{
984   emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE);
985}
986
987/** Load rt = mem[(ra)+d];  then zero set high 32 bits to zero. */
988void
989ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d)
990{
991   emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE);
992}
993
994
995
996/**
997 ** Float (non-vector) arithmetic
998 **/
999
1000/** add: frt = fra + frb */
1001void
1002ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb)
1003{
1004   emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n");
1005}
1006
1007/** sub: frt = fra - frb */
1008void
1009ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb)
1010{
1011   emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n");
1012}
1013
1014/** convert to int: rt = (int) ra */
1015void
1016ppc_fctiwz(struct ppc_function *p, uint rt, uint fra)
1017{
1018   emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n");
1019}
1020
1021/** store frs at mem[(ra)+offset] */
1022void
1023ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset)
1024{
1025   emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
1026}
1027
1028/** store frs at mem[(ra)+(rb)] */
1029void
1030ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb)
1031{
1032   emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n");
1033}
1034
1035/** load frt = mem[(ra)+offset] */
1036void
1037ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset)
1038{
1039   emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
1040}
1041
1042
1043
1044
1045
1046/**
1047 ** branch instructions
1048 **/
1049
1050/** BLR: Branch to link register (p. 35) */
1051void
1052ppc_blr(struct ppc_function *p)
1053{
1054   emit_i(p, 18, 0, 0, 1);
1055   if (p->print) {
1056      indent(p);
1057      printf("blr\n");
1058   }
1059}
1060
1061/** Branch Conditional to Link Register (p. 36) */
1062void
1063ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg)
1064{
1065   emit_xl(p, 19, condOp, condReg, branchHint, 16, 0);
1066   if (p->print) {
1067      indent(p);
1068      printf("bclr\t%u %u %u\n", condOp, branchHint, condReg);
1069   }
1070}
1071
1072/** Pseudo instruction: return from subroutine */
1073void
1074ppc_return(struct ppc_function *p)
1075{
1076   ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0);
1077}
1078