1/*  Copyright (C) 2013 IBM
2
3 Authors: Carl Love  <carll@us.ibm.com>
4          Maynard Johnson <maynardj@us.ibm.com>
5
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307, USA.
20
21 The GNU General Public License is contained in the file COPYING.
22
23 This program is based heavily on the test_isa_2_06_part*.c source files.
24 */
25
26#include <stdio.h>
27
28#ifdef HAS_ISA_2_07
29
30#include <stdint.h>
31#include <stdlib.h>
32#include <string.h>
33#include <malloc.h>
34#include <altivec.h>
35#include <math.h>
36
37#ifndef __powerpc64__
38typedef uint32_t HWord_t;
39#else
40typedef uint64_t HWord_t;
41#endif /* __powerpc64__ */
42
43register HWord_t r14 __asm__ ("r14");
44register HWord_t r15 __asm__ ("r15");
45register HWord_t r16 __asm__ ("r16");
46register HWord_t r17 __asm__ ("r17");
47register double f14 __asm__ ("fr14");
48register double f15 __asm__ ("fr15");
49register double f16 __asm__ ("fr16");
50register double f17 __asm__ ("fr17");
51
52static volatile unsigned int cond_reg;
53
54#define True  1
55#define False 0
56
57#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
58
59#define SET_CR(_arg) \
60      __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
61
62#define SET_XER(_arg) \
63      __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
64
65#define GET_CR(_lval) \
66      __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
67
68#define GET_XER(_lval) \
69      __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
70
71#define GET_CR_XER(_lval_cr,_lval_xer) \
72   do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
73
74#define SET_CR_ZERO \
75      SET_CR(0)
76
77#define SET_XER_ZERO \
78      SET_XER(0)
79
80#define SET_CR_XER_ZERO \
81   do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
82
83#define SET_FPSCR_ZERO \
84   do { double _d = 0.0; \
85        __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
86   } while (0)
87
88typedef unsigned char Bool;
89
90
91/* These functions below that construct a table of floating point
92 * values were lifted from none/tests/ppc32/jm-insns.c.
93 */
94
95#if defined (DEBUG_ARGS_BUILD)
96#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
97#else
98#define AB_DPRINTF(fmt, args...) do { } while (0)
99#endif
100
101static inline void register_farg (void *farg,
102                                  int s, uint16_t _exp, uint64_t mant)
103{
104   uint64_t tmp;
105
106   tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
107   *(uint64_t *)farg = tmp;
108   AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
109              s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
110}
111
112static inline void register_sp_farg (void *farg,
113                                     int s, uint16_t _exp, uint32_t mant)
114{
115   uint32_t tmp;
116   tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
117   *(uint32_t *)farg = tmp;
118}
119
120
121typedef struct fp_test_args {
122   int fra_idx;
123   int frb_idx;
124} fp_test_args_t;
125
126static int nb_special_fargs;
127static double * spec_fargs;
128static float * spec_sp_fargs;
129
130static void build_special_fargs_table(void)
131{
132   /*
133    * Double precision:
134    * Sign goes from zero to one               (1 bit)
135    * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
136    * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
137    * + special values:
138    * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
139    * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
140    * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
141    * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
142    * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
143    * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
144    * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
145    * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
146    * (8 values)
147    *
148    * Single precision
149    * Sign:     1 bit
150    * Exponent: 8 bits
151    * Mantissa: 23 bits
152    * +0.0      : 0 0x00 0x000000 => 0x00000000
153    * -0.0      : 1 0x00 0x000000 => 0x80000000
154    * +infinity : 0 0xFF 0x000000 => 0x7F800000
155    * -infinity : 1 0xFF 0x000000 => 0xFF800000
156    * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
157    * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
158    * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
159    * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
160   */
161
162   uint64_t mant;
163   uint32_t mant_sp;
164   uint16_t _exp;
165   int s;
166   int j, i = 0;
167
168   if (spec_fargs)
169      return;
170
171   spec_fargs = malloc( 20 * sizeof(double) );
172   spec_sp_fargs = malloc( 20 * sizeof(float) );
173
174   // #0
175   s = 0;
176   _exp = 0x3fd;
177   mant = 0x8000000000000ULL;
178   register_farg(&spec_fargs[i++], s, _exp, mant);
179
180   // #1
181   s = 0;
182   _exp = 0x404;
183   mant = 0xf000000000000ULL;
184   register_farg(&spec_fargs[i++], s, _exp, mant);
185
186   // #2
187   s = 0;
188   _exp = 0x001;
189   mant = 0x8000000b77501ULL;
190   register_farg(&spec_fargs[i++], s, _exp, mant);
191
192   // #3
193   s = 0;
194   _exp = 0x7fe;
195   mant = 0x800000000051bULL;
196   register_farg(&spec_fargs[i++], s, _exp, mant);
197
198   // #4
199   s = 0;
200   _exp = 0x012;
201   mant = 0x3214569900000ULL;
202   register_farg(&spec_fargs[i++], s, _exp, mant);
203
204   /* Special values */
205   /* +0.0      : 0 0x000 0x0000000000000 */
206   // #5
207   s = 0;
208   _exp = 0x000;
209   mant = 0x0000000000000ULL;
210   register_farg(&spec_fargs[i++], s, _exp, mant);
211
212   /* -0.0      : 1 0x000 0x0000000000000 */
213   // #6
214   s = 1;
215   _exp = 0x000;
216   mant = 0x0000000000000ULL;
217   register_farg(&spec_fargs[i++], s, _exp, mant);
218
219   /* +infinity : 0 0x7FF 0x0000000000000  */
220   // #7
221   s = 0;
222   _exp = 0x7FF;
223   mant = 0x0000000000000ULL;
224   register_farg(&spec_fargs[i++], s, _exp, mant);
225
226   /* -infinity : 1 0x7FF 0x0000000000000 */
227   // #8
228   s = 1;
229   _exp = 0x7FF;
230   mant = 0x0000000000000ULL;
231   register_farg(&spec_fargs[i++], s, _exp, mant);
232
233   /*
234    * This comment applies to values #9 and #10 below:
235    * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
236    * so we can't just copy the double-precision value to the corresponding slot in the
237    * single-precision array (i.e., in the loop at the end of this function).  Instead, we
238    * have to manually set the bits using register_sp_farg().
239    */
240
241   /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
242   // #9
243   s = 0;
244   _exp = 0x7FF;
245   mant = 0x7FFFFFFFFFFFFULL;
246   register_farg(&spec_fargs[i++], s, _exp, mant);
247   _exp = 0xff;
248   mant_sp = 0x3FFFFF;
249   register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
250
251   /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
252   // #10
253   s = 1;
254   _exp = 0x7FF;
255   mant = 0x7FFFFFFFFFFFFULL;
256   register_farg(&spec_fargs[i++], s, _exp, mant);
257   _exp = 0xff;
258   mant_sp = 0x3FFFFF;
259   register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
260
261   /* +QNaN     : 0 0x7FF 0x8000000000000 */
262   // #11
263   s = 0;
264   _exp = 0x7FF;
265   mant = 0x8000000000000ULL;
266   register_farg(&spec_fargs[i++], s, _exp, mant);
267
268   /* -QNaN     : 1 0x7FF 0x8000000000000 */
269   // #12
270   s = 1;
271   _exp = 0x7FF;
272   mant = 0x8000000000000ULL;
273   register_farg(&spec_fargs[i++], s, _exp, mant);
274
275   /* denormalized value */
276   // #13
277   s = 1;
278   _exp = 0x000;
279   mant = 0x8340000078000ULL;
280   register_farg(&spec_fargs[i++], s, _exp, mant);
281
282   /* Negative finite number */
283   // #14
284   s = 1;
285   _exp = 0x40d;
286   mant = 0x0650f5a07b353ULL;
287   register_farg(&spec_fargs[i++], s, _exp, mant);
288
289   /* A few positive finite numbers ... */
290   // #15
291   s = 0;
292   _exp = 0x412;
293   mant = 0x32585a9900000ULL;
294   register_farg(&spec_fargs[i++], s, _exp, mant);
295
296   // #16
297   s = 0;
298   _exp = 0x413;
299   mant = 0x82511a2000000ULL;
300   register_farg(&spec_fargs[i++], s, _exp, mant);
301
302   // #17
303   s = 0;
304   _exp = 0x403;
305   mant = 0x12ef5a9300000ULL;
306   register_farg(&spec_fargs[i++], s, _exp, mant);
307
308   // #18
309   s = 0;
310   _exp = 0x405;
311   mant = 0x14bf5d2300000ULL;
312   register_farg(&spec_fargs[i++], s, _exp, mant);
313
314   // #19
315   s = 0;
316   _exp = 0x409;
317   mant = 0x76bf982440000ULL;
318   register_farg(&spec_fargs[i++], s, _exp, mant);
319
320
321   nb_special_fargs = i;
322   for (j = 0; j < i; j++) {
323      if (!(j == 9 || j == 10))
324         spec_sp_fargs[j] = spec_fargs[j];
325   }
326}
327
328static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
329                                                              0, 0, 0, 0 };
330
331
332static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
333                                                                0x89abcdef,
334                                                                0x00112233,
335                                                                0x74556677,
336                                                                0x00001abb,
337                                                                0x00000001,
338                                                                0x31929394,
339                                                                0xa1a2a3a4,
340};
341#define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
342#define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
343
344typedef void (*test_func_t)(void);
345
346struct test_table
347{
348   test_func_t test_category;
349   char * name;
350};
351
352
353typedef enum {
354   SINGLE_TEST,
355   SINGLE_TEST_SINGLE_RES,
356   DOUBLE_TEST,
357   DOUBLE_TEST_SINGLE_RES
358} precision_type_t;
359#define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))
360
361typedef enum {
362   VX_FP_SMAS,   // multiply add single precision result
363   VX_FP_SMSS,   // multiply sub single precision result
364   VX_FP_SNMAS,  // negative multiply add single precision result
365   VX_FP_SNMSS,  // negative multiply sub single precision result
366   VX_FP_OTHER,
367   VX_CONV_WORD,
368   VX_ESTIMATE,
369   VX_CONV_TO_SINGLE,
370   VX_CONV_TO_DOUBLE,
371   VX_SCALAR_CONV_TO_WORD,
372   VX_SCALAR_SP_TO_VECTOR_SP,
373   VX_DEFAULT
374} vx_fp_test_type;
375
376typedef enum {
377   VSX_LOAD = 1,
378   VSX_LOAD_SPLAT,
379   VSX_STORE,
380} vsx_ldst_type;
381
382typedef enum {
383   VSX_AND = 1,
384   VSX_NAND,
385   VSX_ANDC,
386   VSX_OR,
387   VSX_ORC,
388   VSX_NOR,
389   VSX_XOR,
390   VSX_EQV,
391} vsx_log_op;
392
393struct vx_fp_test1
394{
395   test_func_t test_func;
396   const char *name;
397   fp_test_args_t * targs;
398   int num_tests;
399    vx_fp_test_type test_type;
400 };
401
402struct ldst_test
403{
404   test_func_t test_func;
405   const char *name;
406   precision_type_t precision;
407   void * base_addr;
408   uint32_t offset;
409   vsx_ldst_type type;
410};
411
412struct vx_fp_test2
413{
414   test_func_t test_func;
415   const char *name;
416   fp_test_args_t * targs;
417   int num_tests;
418   precision_type_t precision;
419   vx_fp_test_type test_type;
420   const char * op;
421};
422
423struct xs_conv_test
424{
425   test_func_t test_func;
426   const char *name;
427   int num_tests;
428};
429
430struct simple_test
431{
432   test_func_t test_func;
433   const char *name;
434};
435
436struct vsx_logic_test
437{
438   test_func_t test_func;
439   const char *name;
440   vsx_log_op op;
441};
442
443typedef struct vsx_logic_test logic_test_t;
444typedef struct ldst_test ldst_test_t;
445typedef struct simple_test xs_conv_test_t;
446typedef struct vx_fp_test1 vx_fp_test_basic_t;
447typedef struct vx_fp_test2 vx_fp_test2_t;
448typedef struct test_table test_table_t;
449
450
451static vector unsigned int vec_out, vec_inA, vec_inB;
452
453static void test_xscvdpspn(void)
454{
455   __asm__ __volatile__ ("xscvdpspn   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
456}
457
458static void test_xscvspdpn(void)
459{
460   __asm__ __volatile__ ("xscvspdpn  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
461}
462
463static int do_asp;
464static void test_xsmadds(void)
465{
466   if (do_asp)
467      __asm__ __volatile__ ("xsmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
468   else
469      __asm__ __volatile__ ("xsmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
470}
471
472static void test_xsmsubs(void)
473{
474   if (do_asp)
475      __asm__ __volatile__ ("xsmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
476   else
477      __asm__ __volatile__ ("xsmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
478}
479
480static void test_xscvsxdsp (void)
481{
482   __asm__ __volatile__ ("xscvsxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
483}
484
485static void test_xscvuxdsp (void)
486{
487   __asm__ __volatile__ ("xscvuxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
488}
489
490static void test_xsnmadds(void)
491{
492   if (do_asp)
493      __asm__ __volatile__ ("xsnmaddasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
494   else
495      __asm__ __volatile__ ("xsnmaddmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
496}
497
498static void test_xsnmsubs(void)
499{
500   if (do_asp)
501      __asm__ __volatile__ ("xsnmsubasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
502   else
503      __asm__ __volatile__ ("xsnmsubmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
504}
505
506static void test_stxsspx(void)
507{
508   __asm__ __volatile__ ("stxsspx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
509}
510
511static void test_stxsiwx(void)
512{
513   __asm__ __volatile__ ("stxsiwx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
514}
515
516static void test_lxsiwax(void)
517{
518   __asm__ __volatile__ ("lxsiwax          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
519}
520
521static void test_lxsiwzx(void)
522{
523   __asm__ __volatile__ ("lxsiwzx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
524}
525
526static void test_lxsspx(void)
527{
528   __asm__ __volatile__ ("lxsspx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
529}
530
531static void test_xssqrtsp(void)
532{
533   __asm__ __volatile__ ("xssqrtsp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
534}
535
536static void test_xsrsqrtesp(void)
537{
538   __asm__ __volatile__ ("xsrsqrtesp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
539}
540
541/* Three argument instuctions */
542static void test_xxleqv(void)
543{
544   __asm__ __volatile__ ("xxleqv          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
545}
546
547static void test_xxlorc(void)
548{
549   __asm__ __volatile__ ("xxlorc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
550}
551
552static void test_xxlnand(void)
553{
554   __asm__ __volatile__ ("xxlnand         %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
555}
556
557static void test_xsaddsp(void)
558{
559  __asm__ __volatile__ ("xsaddsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
560}
561
562static void test_xssubsp(void)
563{
564  __asm__ __volatile__ ("xssubsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
565}
566
567static void test_xsdivsp(void)
568{
569  __asm__ __volatile__ ("xsdivsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
570}
571
572static void test_xsmulsp(void)
573{
574   __asm__ __volatile__ ("xsmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
575}
576
577static void test_xsresp(void)
578{
579   __asm__ __volatile__ ("xsresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
580}
581static void test_xsrsp(void)
582{
583   __asm__ __volatile__ ("xsrsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
584}
585
586fp_test_args_t vx_math_tests[] = {
587                                  {8, 8},
588                                  {8, 14},
589                                  {8, 6},
590                                  {8, 5},
591                                  {8, 4},
592                                  {8, 7},
593                                  {8, 9},
594                                  {8, 11},
595                                  {14, 8},
596                                  {14, 14},
597                                  {14, 6},
598                                  {14, 5},
599                                  {14, 4},
600                                  {14, 7},
601                                  {14, 9},
602                                  {14, 11},
603                                  {6, 8},
604                                  {6, 14},
605                                  {6, 6},
606                                  {6, 5},
607                                  {6, 4},
608                                  {6, 7},
609                                  {6, 9},
610                                  {6, 11},
611                                  {5, 8},
612                                  {5, 14},
613                                  {5, 6},
614                                  {5, 5},
615                                  {5, 4},
616                                  {5, 7},
617                                  {5, 9},
618                                  {5, 11},
619                                  {4, 8},
620                                  {4, 14},
621                                  {4, 6},
622                                  {4, 5},
623                                  {4, 1},
624                                  {4, 7},
625                                  {4, 9},
626                                  {4, 11},
627                                  {7, 8},
628                                  {7, 14},
629                                  {7, 6},
630                                  {7, 5},
631                                  {7, 4},
632                                  {7, 7},
633                                  {7, 9},
634                                  {7, 11},
635                                  {10, 8},
636                                  {10, 14},
637                                  {10, 6},
638                                  {10, 5},
639                                  {10, 4},
640                                  {10, 7},
641                                  {10, 9},
642                                  {10, 11},
643                                  {12, 8},
644                                  {12, 14},
645                                  {12, 6},
646                                  {12, 5},
647                                  {12, 4},
648                                  {12, 7},
649                                  {12, 9},
650                                  {12, 11},
651                                  {8, 8},
652                                  {8, 14},
653                                  {8, 6},
654                                  {8, 5},
655                                  {8, 4},
656                                  {8, 7},
657                                  {8, 9},
658                                  {8, 11},
659                                  {14, 8},
660                                  {14, 14},
661                                  {14, 6},
662                                  {14, 5},
663                                  {14, 4},
664                                  {14, 7},
665                                  {14, 9},
666                                  {14, 11},
667                                  {6, 8},
668                                  {6, 14},
669                                  {6, 6},
670                                  {6, 5},
671                                  {6, 4},
672                                  {6, 7},
673                                  {6, 9},
674                                  {6, 11},
675                                  {5, 8},
676                                  {5, 14},
677                                  {5, 6},
678                                  {5, 5},
679                                  {5, 4},
680                                  {5, 7},
681                                  {5, 9},
682                                  {5, 11},
683                                  {4, 8},
684                                  {4, 14},
685                                  {4, 6},
686                                  {4, 5},
687                                  {4, 1},
688                                  {4, 7},
689                                  {4, 9},
690                                  {4, 11},
691                                  {7, 8},
692                                  {7, 14},
693                                  {7, 6},
694                                  {7, 5},
695                                  {7, 4},
696                                  {7, 7},
697                                  {7, 9},
698                                  {7, 11},
699                                  {10, 8},
700                                  {10, 14},
701                                  {10, 6},
702                                  {10, 5},
703                                  {10, 4},
704                                  {10, 7},
705                                  {10, 9},
706                                  {10, 11},
707                                  {12, 8},
708                                  {12, 14},
709                                  {12, 6},
710                                  {12, 5},
711                                  {12, 4},
712                                  {12, 7},
713                                  {12, 9},
714                                  {12, 11}
715};
716
717// These are all double precision inputs with double word outputs (mostly converted to single precision)
718static vx_fp_test_basic_t vx_fp_tests[] = {
719                                     { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
720                                     { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
721                                     { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
722                                     { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
723                                     { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
724                                     { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
725                                     { NULL, NULL, NULL, 0, 0 }
726};
727
728static vx_fp_test2_t
729vsx_one_fp_arg_tests[] = {
730                          { &test_xscvdpspn, "xscvdpspn", NULL, 20, SINGLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
731                          { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
732                          { &test_xsresp,    "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
733                          { &test_xsrsp,     "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
734                          { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
735                          { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
736                          { NULL, NULL, NULL, 0, 0, 0, NULL}
737};
738
739// These are all double precision inputs with double word outputs (mostly converted to single precision)
740static vx_fp_test_basic_t
741vx_simple_scalar_fp_tests[] = {
742                          { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
743                          { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
744                          { NULL, NULL, NULL, 0 , 0}
745};
746
747static ldst_test_t
748ldst_tests[] = {
749                    { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
750                    { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
751                    { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
752                    { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 1, VSX_LOAD },
753                    { &test_lxsspx,  "lxsspx",  SINGLE_TEST, NULL, 0, VSX_LOAD },
754                    { NULL, NULL, 0, NULL, 0, 0 } };
755
756static xs_conv_test_t
757xs_conv_tests[] = {
758                   { &test_xscvsxdsp, "xscvsxdsp"},
759                   { &test_xscvuxdsp, "xscvuxdsp"},
760                   { NULL, NULL}
761};
762
763static logic_test_t
764logic_tests[] = {
765                 { &test_xxleqv,  "xxleqv", VSX_EQV },
766                 { &test_xxlorc,  "xxlorc", VSX_ORC },
767                 { &test_xxlnand, "xxlnand", VSX_NAND },
768                 { NULL, NULL}
769};
770
771Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
772{
773   /* NOTE:
774    * This function has been verified only with the xsresp and xsrsqrtes instructions.
775    *
776    * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
777    * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
778    * does an actual reciprocal calculation versus estimation, so the answer we get back from
779    * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
780    * precision) and the estimate may still be within expected tolerances.  On top of that,
781    * we can't count on these estimates always being the same across implementations.
782    * For example, with the fre[s] instruction (which should be correct to within one part
783    * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
784    * one implementation could return 1.0111_1111_0000 and another implementation could return
785    * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
786    * single bit in common.
787    *
788    * The upshot is we can't validate the VEX output for these instructions by comparing against
789    * stored bit patterns.  We must check that the result is within expected tolerances.
790    */
791
792   /* A mask to be used for validation as a last resort.
793    * Only use 12 bits of precision for reasons discussed above.
794    */
795#define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000
796
797
798   Bool result = False;
799   double src_dp, res_dp;
800   float calc_diff = 0;
801   float real_diff = 0;
802   double recip_divisor;
803   float div_result;
804   float calc_diff_tmp;
805
806   src_dp = res_dp = 0;
807   Bool src_is_negative = False;
808   Bool res_is_negative = False;
809   unsigned long long * dst_dp = NULL;
810   unsigned long long * src_dp_ull;
811   dst_dp = (unsigned long long *) &vec_out;
812   src_dp = spec_fargs[idx];
813   src_dp_ull = (unsigned long long *) &src_dp;
814   src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
815   res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
816   memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
817
818
819   // Below are common rules
820   if (isnan(src_dp))
821      return isnan(res_dp);
822   if (fpclassify(src_dp) == FP_ZERO)
823      return isinf(res_dp);
824   if (!src_is_negative && isinf(src_dp))
825      return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
826   if (is_rsqrte) {
827      if (src_is_negative)
828         return isnan(res_dp);
829   } else {
830      if (src_is_negative && isinf(src_dp))
831         return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
832   }
833
834   if (is_rsqrte)
835      recip_divisor = sqrt(src_dp);
836   else
837      recip_divisor = src_dp;
838
839   /* The instructions handled by this function take a double precision
840    * input, perform a reciprocal estimate in double-precision, round
841    * the result to single precision and store into the destination
842    * register in double precision format.  So, to check the result
843    * for accuracy, we use float (single precision) values.
844    */
845   div_result = 1.0/recip_divisor;
846   calc_diff_tmp = recip_divisor * 16384.0;
847   if (isnormal(calc_diff_tmp)) {
848      calc_diff = fabs(1.0/calc_diff_tmp);
849      real_diff = fabs((float)res_dp - div_result);
850      result = ( ( res_dp == div_result )
851               || ( real_diff <= calc_diff ) );
852#if FRES_DEBUG
853      unsigned int * dv = (unsigned int *)&div_result;
854      unsigned int * rd = (unsigned int *)&real_diff;
855      unsigned int * cd = (unsigned int *)&calc_diff;
856      printf("\n\t {computed div_result: %08x; real_diff:  %08x; calc_diff:  %08x}\n",
857             *dv, *rd, *cd);
858#endif
859
860   } else {
861      /* Unable to compute theoretical difference, so we fall back to masking out
862       * un-precise bits.
863       */
864      unsigned int * div_result_sp = (unsigned int *)&div_result;
865      float res_sp = (float)res_dp;
866      unsigned int * dst_sp = (unsigned int *)&res_sp;
867#if FRES_DEBUG
868      unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
869      printf("Unable to compute theoretical difference, so we fall back to masking\n");
870      printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
871             *calc_diff_tmp_sp, *div_result_sp, *dst_sp);
872#endif
873      result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
874   }
875   return result;
876}
877
878static void test_vx_fp_ops(void)
879{
880
881   test_func_t func;
882   int k;
883   char * test_name = (char *)malloc(20);
884   k = 0;
885
886   build_special_fargs_table();
887   while ((func = vx_fp_tests[k].test_func)) {
888      int i, repeat = 0;
889      unsigned long long * frap, * frbp, * dst;
890      vx_fp_test_basic_t test_group = vx_fp_tests[k];
891      vx_fp_test_type test_type = test_group.test_type;
892
893      switch (test_type) {
894         case VX_FP_SMAS:
895         case VX_FP_SMSS:
896         case VX_FP_SNMAS:
897         case VX_FP_SNMSS:
898            if (test_type == VX_FP_SMAS)
899               strcpy(test_name, "xsmadd");
900            else if (test_type == VX_FP_SMSS)
901               strcpy(test_name, "xsmsub");
902            else if (test_type == VX_FP_SNMAS)
903               strcpy(test_name, "xsnmadd");
904            else
905               strcpy(test_name, "xsnmsub");
906
907            if (!repeat) {
908               repeat = 1;
909               strcat(test_name, "asp");
910               do_asp = 1;
911            }
912            break;
913         case VX_FP_OTHER:
914            strcpy(test_name, test_group.name);
915            break;
916         default:
917            printf("ERROR:  Invalid VX FP test type %d\n", test_type);
918            exit(1);
919      }
920
921again:
922      for (i = 0; i < test_group.num_tests; i++) {
923         unsigned int * inA, * inB, * pv;
924
925         fp_test_args_t aTest = test_group.targs[i];
926         inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
927         inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
928         frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
929         frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
930         int idx;
931         unsigned long long vsr_XT;
932         pv = (unsigned int *)&vec_out;
933
934         // Only need to copy one doubleword into each vector's element 0
935         memcpy(&vec_inA, inA, 8);
936         memcpy(&vec_inB, inB, 8);
937
938         // clear vec_out
939         for (idx = 0; idx < 4; idx++, pv++)
940            *pv = 0;
941
942         if (test_type != VX_FP_OTHER) {
943            /* Then we need a third src argument, which is stored in element 0 of
944             * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
945             * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
946             * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
947             * data (input args, result) contain only two inputs, so I arbitrarily
948             * use spec_fargs elements 4 and 14 (alternating) for the third source
949             * argument.  We can use the same input data for a given pair of
950             * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
951             * the expected result should be the same.
952             */
953            int extra_arg_idx;
954            if (i % 2)
955               extra_arg_idx = 4;
956            else
957               extra_arg_idx = 14;
958
959            if (repeat) {
960               /* We're on the first time through of one of the VX_FP_SMx
961                * test types, meaning we're testing a xs<ZZZ>adp case, thus
962                * we have to swap inputs as described above:
963                *    src2 <= VSX[XT]
964                *    src3 <= VSX[XB]
965                */
966               memcpy(&vec_out, inB, 8);  // src2
967               memcpy(&vec_inB, &spec_fargs[extra_arg_idx], 8);  //src3
968               frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
969            } else {
970               // Don't need to init src2, as it's done before the switch()
971               memcpy(&vec_out, &spec_fargs[extra_arg_idx], 8);  //src3
972            }
973            memcpy(&vsr_XT, &vec_out, 8);
974         }
975
976         (*func)();
977         dst = (unsigned long long *) &vec_out;
978
979         if (test_type == VX_FP_OTHER)
980            printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
981                   *frap, *frbp, *dst);
982         else
983            printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
984                    test_name, vsr_XT, *frap, *frbp, *dst );
985
986      }
987      /*
988           {
989               // Debug code.  Keep this block commented out except when debugging.
990               double result, expected;
991               memcpy(&result, dst, 8);
992               memcpy(&expected, &aTest.dp_bin_result, 8);
993               printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
994                       spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
995                       expected, result );
996            }
997       */
998      printf( "\n" );
999
1000      if (repeat) {
1001         repeat = 0;
1002         strcat(test_name, "UNKNOWN");
1003         switch (test_type) {
1004            case VX_FP_SMAS:
1005            case VX_FP_SMSS:
1006            case VX_FP_SNMAS:
1007            case VX_FP_SNMSS:
1008               if (test_type == VX_FP_SMAS)
1009                  strcpy(test_name, "xsmadd");
1010               else if (test_type == VX_FP_SMSS)
1011                  strcpy(test_name, "xsmsub");
1012               else if (test_type == VX_FP_SNMAS)
1013                  strcpy(test_name, "xsnmadd");
1014               else
1015                  strcpy(test_name, "xsnmsub");
1016
1017               do_asp = 0;
1018               strcat(test_name, "msp");
1019               break;
1020            default:
1021               break;
1022         }
1023         goto again;
1024      }
1025      k++;
1026   }
1027   printf( "\n" );
1028   free(test_name);
1029}
1030
1031
1032static void test_vsx_one_fp_arg(void)
1033{
1034   test_func_t func;
1035   int k;
1036   k = 0;
1037   build_special_fargs_table();
1038
1039   while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1040      int idx, i;
1041      unsigned long long *dst_dp;
1042      unsigned int * dst_sp;
1043      vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
1044      /* size of source operands */
1045      Bool dp  = ((test_group.precision == DOUBLE_TEST) ||
1046		  (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
1047      /* size of result */
1048      Bool dp_res = IS_DP_RESULT(test_group.precision);
1049      Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1050      Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1051      Bool sparse_sp = False;
1052      int stride = dp ? 2 : 4;
1053      int loops = is_scalar ? 1 : stride;
1054      stride = is_scalar ? 1: stride;
1055
1056      /* For conversions of single to double, the 128-bit input register is sparsely populated:
1057       *    |___ SP___|_Unused_|___SP___|__Unused__|   // for vector op
1058       *                     or
1059       *    |___ SP___|_Unused_|_Unused_|__Unused__|   // for scalar op
1060       *
1061       * For the vector op case, we need to adjust stride from '4' to '2', since
1062       * we'll only be loading two values per loop into the input register.
1063       */
1064      if (!dp && !is_scalar && test_group.test_type == VX_CONV_TO_DOUBLE) {
1065         sparse_sp = True;
1066         stride = 2;
1067      }
1068
1069      for (i = 0; i < test_group.num_tests; i+=stride) {
1070         unsigned int * pv;
1071         void * inB;
1072
1073         pv = (unsigned int *)&vec_out;
1074         // clear vec_out
1075         for (idx = 0; idx < 4; idx++, pv++)
1076            *pv = 0;
1077
1078         if (dp) {
1079            int j;
1080            unsigned long long * frB_dp;
1081            for (j = 0; j < loops; j++) {
1082               inB = (void *)&spec_fargs[i + j];
1083               // copy double precision FP into vector element i
1084               memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1085            }
1086            // execute test insn
1087            (*func)();
1088            if (dp_res)
1089               dst_dp = (unsigned long long *) &vec_out;
1090            else
1091               dst_sp = (unsigned int *) &vec_out;
1092
1093            printf("#%d: %s ", i/stride, test_group.name);
1094            for (j = 0; j < loops; j++) {
1095               if (j)
1096                  printf("; ");
1097               frB_dp = (unsigned long long *)&spec_fargs[i + j];
1098               printf("%s(%016llx)", test_group.op, *frB_dp);
1099               if (test_group.test_type == VX_ESTIMATE)
1100               {
1101                  Bool res;
1102                  res = check_reciprocal_estimate(is_sqrt, i + j, j);
1103                  printf(" ==> %s)", res ? "PASS" : "FAIL");
1104               } else if (dp_res) {
1105                  printf(" = %016llx", dst_dp[j]);
1106               } else {
1107                  printf(" = %08x", dst_sp[j]);
1108               }
1109            }
1110            printf("\n");
1111         } else {  // single precision test type
1112            int j;
1113            // Clear input vector
1114            pv = (unsigned int *)&vec_inB;
1115            for (idx = 0; idx < 4; idx++, pv++)
1116               *pv = 0;
1117
1118            if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
1119               /* Take a single-precision value stored in double word element 0
1120                * of src in double-precision format and convert to single-
1121                * precision and store in word element 0 of dst.
1122                */
1123               double input = spec_sp_fargs[i];
1124               memcpy(((void *)&vec_inB), (void *)&input, 8);
1125            } else {
1126               int skip_slot;
1127               if (sparse_sp) {
1128                  skip_slot = 1;
1129                  loops = 2;
1130               } else {
1131                  skip_slot = 0;
1132               }
1133               for (j = 0; j < loops; j++) {
1134                  inB = (void *)&spec_sp_fargs[i + j];
1135                  // copy single precision FP into vector element i
1136
1137                  if (skip_slot && j > 0)
1138                     memcpy(((void *)&vec_inB) + ((j + j) * 4), inB, 4);
1139                  else
1140                     memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1141               }
1142            }
1143            // execute test insn
1144            (*func)();
1145            if (dp_res)
1146               dst_dp = (unsigned long long *) &vec_out;
1147            else
1148               dst_sp = (unsigned int *) &vec_out;
1149            // print result
1150            printf("#%d: %s ", i/stride, test_group.name);
1151            for (j = 0; j < loops; j++) {
1152               if (j)
1153                  printf("; ");
1154               printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i + j]));
1155               if (dp_res)
1156                     printf(" = %016llx", dst_dp[j]);
1157               else
1158                  printf(" = %08x", dst_sp[j]);
1159            }
1160            printf("\n");
1161         }
1162      }
1163      k++;
1164      printf( "\n" );
1165   }
1166}
1167
1168/* This function currently only supports two double precision input arguments. */
1169static void test_vsx_two_fp_arg(void)
1170{
1171   test_func_t func;
1172   int k = 0;
1173
1174   build_special_fargs_table();
1175   while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
1176      unsigned long long * frap, * frbp, * dst;
1177      unsigned int * pv;
1178      int idx;
1179      vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
1180      pv = (unsigned int *)&vec_out;
1181      // clear vec_out
1182      for (idx = 0; idx < 4; idx++, pv++)
1183         *pv = 0;
1184
1185      void * inA, * inB;
1186      int i;
1187      for (i = 0; i < test_group.num_tests; i++) {
1188         fp_test_args_t aTest = test_group.targs[i];
1189         inA = (void *)&spec_fargs[aTest.fra_idx];
1190         inB = (void *)&spec_fargs[aTest.frb_idx];
1191         frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1192         frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1193         // Only need to copy one doubleword into each vector's element 0
1194         memcpy(&vec_inA, inA, 8);
1195         memcpy(&vec_inB, inB, 8);
1196         (*func)();
1197         dst = (unsigned long long *) &vec_out;
1198         printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
1199                *frap, *frbp, *dst);
1200      }
1201      printf( "\n" );
1202      k++;
1203   }
1204}
1205
1206/* This function handles the following cases:
1207 *   1) Single precision value stored in double-precision
1208 *      floating-point format in doubleword element 0 of src VSX register
1209 *   2) Integer word value stored in word element 1 of src VSX register
1210 */
1211static void _do_store_test (ldst_test_t storeTest)
1212{
1213   test_func_t func;
1214   unsigned int *dst32;
1215   unsigned int i, idx;
1216   unsigned int * pv = (unsigned int *) storeTest.base_addr;
1217
1218   func = storeTest.test_func;
1219   r14 = (HWord_t) storeTest.base_addr;
1220   r15 = (HWord_t) storeTest.offset;
1221
1222   if (storeTest.precision == DOUBLE_TEST_SINGLE_RES) {
1223      /* source is single precision stored in double precision format */
1224      /* test some of the pre-defined single precision values */
1225      for (i = 0; i < nb_special_fargs; i+=3) {
1226         // clear out storage destination
1227         for (idx = 0; idx < 4; idx++)
1228            *(pv + idx) = 0;
1229
1230         printf( "%s:", storeTest.name );
1231         unsigned long long * dp;
1232         double input = spec_sp_fargs[i];
1233         dp = (unsigned long long *)&input;
1234         memcpy(&vec_inA, dp, sizeof(unsigned long long));
1235         printf(" %016llx ==> ", *dp);
1236
1237         // execute test insn
1238         (*func)();
1239         dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset);
1240         printf( "%08x\n", *dst32);
1241      }
1242   } else {
1243      // source is an integer word
1244      for (i = 0; i < NUM_VIARGS_INTS; i++) {
1245         // clear out storage destination
1246         for (idx = 0; idx < 4; idx++)
1247            *(pv + idx) = 0;
1248         printf( "%s:", storeTest.name );
1249         unsigned int * pi = (unsigned int *)&vec_inA;
1250         memcpy(pi + 1, &viargs[i], sizeof(unsigned int));
1251         printf(" %08x ==> ", *(pi + 1));
1252
1253         // execute test insn
1254         (*func)();
1255         dst32 = (unsigned int*)(storeTest.base_addr + storeTest.offset);
1256         printf( "%08x\n", *dst32);
1257      }
1258   }
1259   printf("\n");
1260}
1261
1262static void _do_load_test(ldst_test_t storeTest)
1263{
1264   test_func_t func;
1265   unsigned int i;
1266   unsigned long long * dst_dp;
1267
1268   func = storeTest.test_func;
1269   r15 = (HWord_t) storeTest.offset;
1270
1271   if (storeTest.base_addr == NULL) {
1272      /* Test lxsspx: source is single precision value, so let's */
1273      /* test some of the pre-defined single precision values. */
1274      for (i = 0; i + storeTest.offset < nb_special_fargs; i+=3) {
1275         unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + storeTest.offset];
1276         printf( "%s:", storeTest.name );
1277         printf(" %08x ==> ", *sp);
1278         r14 = (HWord_t)&spec_sp_fargs[i];
1279
1280         // execute test insn
1281         (*func)();
1282         dst_dp = (unsigned long long *) &vec_out;
1283         printf("%016llx\n", *dst_dp);
1284      }
1285   } else {
1286      // source is an integer word
1287      for (i = 0; i < NUM_VIARGS_INTS; i++) {
1288         printf( "%s:", storeTest.name );
1289         r14 = (HWord_t)&viargs[i + storeTest.offset];
1290         printf(" %08x ==> ", viargs[i + storeTest.offset]);
1291
1292         // execute test insn
1293         (*func)();
1294         dst_dp = (unsigned long long *) &vec_out;
1295         printf("%016llx\n", *dst_dp);
1296      }
1297   }
1298   printf("\n");
1299}
1300
1301static void test_ldst(void)
1302{
1303   int k = 0;
1304
1305   while (ldst_tests[k].test_func) {
1306      if (ldst_tests[k].type == VSX_STORE)
1307         _do_store_test(ldst_tests[k]);
1308      else {
1309         _do_load_test(ldst_tests[k]);
1310      }
1311      k++;
1312      printf("\n");
1313   }
1314}
1315
1316static void test_xs_conv_ops(void)
1317{
1318
1319   test_func_t func;
1320   int k = 0;
1321
1322   build_special_fargs_table();
1323   while ((func = xs_conv_tests[k].test_func)) {
1324      int i;
1325      unsigned long long * dst;
1326      xs_conv_test_t test_group = xs_conv_tests[k];
1327      for (i = 0; i < NUM_VIARGS_INTS; i++) {
1328         unsigned int * inB, * pv;
1329         int idx;
1330         inB = (unsigned int *)&viargs[i];
1331         memcpy(&vec_inB, inB, 4);
1332         pv = (unsigned int *)&vec_out;
1333         // clear vec_out
1334         for (idx = 0; idx < 4; idx++, pv++)
1335            *pv = 0;
1336         (*func)();
1337         dst = (unsigned long long *) &vec_out;
1338         printf("#%d: %s %08x => %016llx\n", i, test_group.name, viargs[i], *dst);
1339      }
1340      k++;
1341      printf("\n");
1342   }
1343   printf( "\n" );
1344}
1345
1346
1347static void test_vsx_logic(void)
1348{
1349   logic_test_t aTest;
1350   test_func_t func;
1351   int k;
1352   k = 0;
1353
1354   while ((func = logic_tests[k].test_func)) {
1355
1356      unsigned int * pv;
1357      unsigned int * inA, * inB, * dst;
1358      int idx, i;
1359      aTest = logic_tests[k];
1360      for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
1361         pv = (unsigned int *)&vec_out;
1362         inA = &viargs[i];
1363         inB = &viargs[i];
1364         memcpy(&vec_inA, inA, sizeof(vector unsigned int));
1365         memcpy(&vec_inB, inB, sizeof(vector unsigned int));
1366         // clear vec_out
1367         for (idx = 0; idx < 4; idx++, pv++)
1368            *pv = 0;
1369
1370         // execute test insn
1371         (*func)();
1372         dst = (unsigned int*) &vec_out;
1373
1374         printf( "#%d: %10s ", k, aTest.name);
1375         printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
1376         printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
1377         printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
1378      }
1379      k++;
1380   }
1381   printf( "\n" );
1382}
1383
1384
1385//----------------------------------------------------------
1386
1387static test_table_t all_tests[] = {
1388                                     { &test_vx_fp_ops,
1389                                       "Test VSX floating point instructions"},
1390                                     { &test_vsx_one_fp_arg,
1391                                       "Test VSX vector and scalar single argument instructions"} ,
1392                                     { &test_vsx_logic,
1393                                       "Test VSX logic instructions" },
1394                                     { &test_xs_conv_ops,
1395                                       "Test VSX scalar integer conversion instructions" },
1396                                     { &test_ldst,
1397                                       "Test VSX load/store dp to sp instructions" },
1398                                     { &test_vsx_two_fp_arg,
1399                                       "Test VSX vector and scalar two argument instructions"} ,
1400                                     { NULL, NULL }
1401};
1402
1403#endif
1404
1405int main(int argc, char *argv[])
1406{
1407
1408#ifdef HAS_ISA_2_07
1409   test_table_t aTest;
1410   test_func_t func;
1411   int i = 0;
1412
1413   while ((func = all_tests[i].test_category)) {
1414      aTest = all_tests[i];
1415      printf( "%s\n", aTest.name );
1416      (*func)();
1417      i++;
1418   }
1419#else
1420   printf("NO ISA 2.07 SUPPORT\n");
1421#endif
1422   return 0;
1423}
1424