1/*  Copyright (C) 2013 IBM
2
3 Authors: Carl Love  <carll@us.ibm.com>
4          Maynard Johnson <maynardj@us.ibm.com>
5
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307, USA.
20
21 The GNU General Public License is contained in the file COPYING.
22
23 This program is based heavily on the test_isa_2_06_part*.c source files.
24 */
25
26#include <stdio.h>
27
28#ifdef HAS_ISA_2_07
29
30#include <stdint.h>
31#include <stdlib.h>
32#include <string.h>
33#include <malloc.h>
34#include <altivec.h>
35#include <math.h>
36
37#ifndef __powerpc64__
38typedef uint32_t HWord_t;
39#else
40typedef uint64_t HWord_t;
41#endif /* __powerpc64__ */
42
43#ifdef VGP_ppc64le_linux
44#define isLE 1
45#else
46#define isLE 0
47#endif
48
49register HWord_t r14 __asm__ ("r14");
50register HWord_t r15 __asm__ ("r15");
51register HWord_t r16 __asm__ ("r16");
52register HWord_t r17 __asm__ ("r17");
53register double f14 __asm__ ("fr14");
54register double f15 __asm__ ("fr15");
55register double f16 __asm__ ("fr16");
56register double f17 __asm__ ("fr17");
57
58static volatile unsigned int cond_reg;
59
60#define True  1
61#define False 0
62
63#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
64
65#define SET_CR(_arg) \
66      __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
67
68#define SET_XER(_arg) \
69      __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
70
71#define GET_CR(_lval) \
72      __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
73
74#define GET_XER(_lval) \
75      __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
76
77#define GET_CR_XER(_lval_cr,_lval_xer) \
78   do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
79
80#define SET_CR_ZERO \
81      SET_CR(0)
82
83#define SET_XER_ZERO \
84      SET_XER(0)
85
86#define SET_CR_XER_ZERO \
87   do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
88
89#define SET_FPSCR_ZERO \
90   do { double _d = 0.0; \
91        __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
92   } while (0)
93
94typedef unsigned char Bool;
95
96
97/* These functions below that construct a table of floating point
98 * values were lifted from none/tests/ppc32/jm-insns.c.
99 */
100
101#if defined (DEBUG_ARGS_BUILD)
102#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
103#else
104#define AB_DPRINTF(fmt, args...) do { } while (0)
105#endif
106
107static inline void register_farg (void *farg,
108                                  int s, uint16_t _exp, uint64_t mant)
109{
110   uint64_t tmp;
111
112   tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
113   *(uint64_t *)farg = tmp;
114   AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
115              s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
116}
117
118static inline void register_sp_farg (void *farg,
119                                     int s, uint16_t _exp, uint32_t mant)
120{
121   uint32_t tmp;
122   tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
123   *(uint32_t *)farg = tmp;
124}
125
126
127typedef struct fp_test_args {
128   int fra_idx;
129   int frb_idx;
130} fp_test_args_t;
131
132static int nb_special_fargs;
133static double * spec_fargs;
134static float * spec_sp_fargs;
135
136static void build_special_fargs_table(void)
137{
138   /*
139    * Double precision:
140    * Sign goes from zero to one               (1 bit)
141    * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
142    * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
143    * + special values:
144    * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
145    * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
146    * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
147    * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
148    * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
149    * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
150    * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
151    * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
152    * (8 values)
153    *
154    * Single precision
155    * Sign:     1 bit
156    * Exponent: 8 bits
157    * Mantissa: 23 bits
158    * +0.0      : 0 0x00 0x000000 => 0x00000000
159    * -0.0      : 1 0x00 0x000000 => 0x80000000
160    * +infinity : 0 0xFF 0x000000 => 0x7F800000
161    * -infinity : 1 0xFF 0x000000 => 0xFF800000
162    * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
163    * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
164    * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
165    * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
166   */
167
168   uint64_t mant;
169   uint32_t mant_sp;
170   uint16_t _exp;
171   int s;
172   int j, i = 0;
173
174   if (spec_fargs)
175      return;
176
177   spec_fargs = malloc( 20 * sizeof(double) );
178   spec_sp_fargs = malloc( 20 * sizeof(float) );
179
180   // #0
181   s = 0;
182   _exp = 0x3fd;
183   mant = 0x8000000000000ULL;
184   register_farg(&spec_fargs[i++], s, _exp, mant);
185
186   // #1
187   s = 0;
188   _exp = 0x404;
189   mant = 0xf000000000000ULL;
190   register_farg(&spec_fargs[i++], s, _exp, mant);
191
192   // #2
193   s = 0;
194   _exp = 0x001;
195   mant = 0x8000000b77501ULL;
196   register_farg(&spec_fargs[i++], s, _exp, mant);
197
198   // #3
199   s = 0;
200   _exp = 0x7fe;
201   mant = 0x800000000051bULL;
202   register_farg(&spec_fargs[i++], s, _exp, mant);
203
204   // #4
205   s = 0;
206   _exp = 0x012;
207   mant = 0x3214569900000ULL;
208   register_farg(&spec_fargs[i++], s, _exp, mant);
209
210   /* Special values */
211   /* +0.0      : 0 0x000 0x0000000000000 */
212   // #5
213   s = 0;
214   _exp = 0x000;
215   mant = 0x0000000000000ULL;
216   register_farg(&spec_fargs[i++], s, _exp, mant);
217
218   /* -0.0      : 1 0x000 0x0000000000000 */
219   // #6
220   s = 1;
221   _exp = 0x000;
222   mant = 0x0000000000000ULL;
223   register_farg(&spec_fargs[i++], s, _exp, mant);
224
225   /* +infinity : 0 0x7FF 0x0000000000000  */
226   // #7
227   s = 0;
228   _exp = 0x7FF;
229   mant = 0x0000000000000ULL;
230   register_farg(&spec_fargs[i++], s, _exp, mant);
231
232   /* -infinity : 1 0x7FF 0x0000000000000 */
233   // #8
234   s = 1;
235   _exp = 0x7FF;
236   mant = 0x0000000000000ULL;
237   register_farg(&spec_fargs[i++], s, _exp, mant);
238
239   /*
240    * This comment applies to values #9 and #10 below:
241    * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
242    * so we can't just copy the double-precision value to the corresponding slot in the
243    * single-precision array (i.e., in the loop at the end of this function).  Instead, we
244    * have to manually set the bits using register_sp_farg().
245    */
246
247   /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
248   // #9
249   s = 0;
250   _exp = 0x7FF;
251   mant = 0x7FFFFFFFFFFFFULL;
252   register_farg(&spec_fargs[i++], s, _exp, mant);
253   _exp = 0xff;
254   mant_sp = 0x3FFFFF;
255   register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
256
257   /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
258   // #10
259   s = 1;
260   _exp = 0x7FF;
261   mant = 0x7FFFFFFFFFFFFULL;
262   register_farg(&spec_fargs[i++], s, _exp, mant);
263   _exp = 0xff;
264   mant_sp = 0x3FFFFF;
265   register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
266
267   /* +QNaN     : 0 0x7FF 0x8000000000000 */
268   // #11
269   s = 0;
270   _exp = 0x7FF;
271   mant = 0x8000000000000ULL;
272   register_farg(&spec_fargs[i++], s, _exp, mant);
273
274   /* -QNaN     : 1 0x7FF 0x8000000000000 */
275   // #12
276   s = 1;
277   _exp = 0x7FF;
278   mant = 0x8000000000000ULL;
279   register_farg(&spec_fargs[i++], s, _exp, mant);
280
281   /* denormalized value */
282   // #13
283   s = 1;
284   _exp = 0x000;
285   mant = 0x8340000078000ULL;
286   register_farg(&spec_fargs[i++], s, _exp, mant);
287
288   /* Negative finite number */
289   // #14
290   s = 1;
291   _exp = 0x40d;
292   mant = 0x0650f5a07b353ULL;
293   register_farg(&spec_fargs[i++], s, _exp, mant);
294
295   /* A few positive finite numbers ... */
296   // #15
297   s = 0;
298   _exp = 0x412;
299   mant = 0x32585a9900000ULL;
300   register_farg(&spec_fargs[i++], s, _exp, mant);
301
302   // #16
303   s = 0;
304   _exp = 0x413;
305   mant = 0x82511a2000000ULL;
306   register_farg(&spec_fargs[i++], s, _exp, mant);
307
308   // #17
309   s = 0;
310   _exp = 0x403;
311   mant = 0x12ef5a9300000ULL;
312   register_farg(&spec_fargs[i++], s, _exp, mant);
313
314   // #18
315   s = 0;
316   _exp = 0x405;
317   mant = 0x14bf5d2300000ULL;
318   register_farg(&spec_fargs[i++], s, _exp, mant);
319
320   // #19
321   s = 0;
322   _exp = 0x409;
323   mant = 0x76bf982440000ULL;
324   register_farg(&spec_fargs[i++], s, _exp, mant);
325
326
327   nb_special_fargs = i;
328   for (j = 0; j < i; j++) {
329      if (!(j == 9 || j == 10))
330         spec_sp_fargs[j] = spec_fargs[j];
331   }
332}
333
334static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
335                                                              0, 0, 0, 0 };
336
337
338static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
339                                                                0x89abcdef,
340                                                                0x00112233,
341                                                                0x74556677,
342                                                                0x00001abb,
343                                                                0x00000001,
344                                                                0x31929394,
345                                                                0xa1a2a3a4,
346};
347#define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
348#define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
349
350
351static unsigned long long vdargs[] __attribute__ ((aligned (16))) = {
352                                                                     0x0102030405060708ULL,
353                                                                     0x090A0B0C0E0D0E0FULL,
354                                                                     0xF1F2F3F4F5F6F7F8ULL,
355                                                                     0xF9FAFBFCFEFDFEFFULL
356};
357#define NUM_VDARGS_INTS (sizeof vdargs/sizeof vdargs[0])
358#define NUM_VDARGS_VECS  (NUM_VDARGS_INTS/2)
359
360typedef void (*test_func_t)(void);
361
362struct test_table
363{
364   test_func_t test_category;
365   char * name;
366};
367
368
369typedef enum {
370   SINGLE_TEST,
371   SINGLE_TEST_SINGLE_RES,
372   DOUBLE_TEST,
373   DOUBLE_TEST_SINGLE_RES
374} precision_type_t;
375#define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))
376
377typedef enum {
378   VX_FP_SMAS,   // multiply add single precision result
379   VX_FP_SMSS,   // multiply sub single precision result
380   VX_FP_SNMAS,  // negative multiply add single precision result
381   VX_FP_SNMSS,  // negative multiply sub single precision result
382   VX_FP_OTHER,
383   VX_CONV_WORD,
384   VX_ESTIMATE,
385   VX_CONV_TO_SINGLE,
386   VX_CONV_TO_DOUBLE,
387   VX_SCALAR_CONV_TO_WORD,
388   VX_SCALAR_SP_TO_VECTOR_SP,
389   VX_DEFAULT
390} vx_fp_test_type;
391
392typedef enum {
393   VSX_LOAD = 1,
394   VSX_LOAD_SPLAT,
395   VSX_STORE,
396} vsx_ldst_type;
397
398typedef enum {
399   VSX_AND = 1,
400   VSX_NAND,
401   VSX_ANDC,
402   VSX_OR,
403   VSX_ORC,
404   VSX_NOR,
405   VSX_XOR,
406   VSX_EQV,
407} vsx_log_op;
408
409struct vx_fp_test1
410{
411   test_func_t test_func;
412   const char *name;
413   fp_test_args_t * targs;
414   int num_tests;
415    vx_fp_test_type test_type;
416 };
417
418struct ldst_test
419{
420   test_func_t test_func;
421   const char *name;
422   precision_type_t precision;
423   void * base_addr;
424   uint32_t offset;
425   vsx_ldst_type type;
426};
427
428struct vx_fp_test2
429{
430   test_func_t test_func;
431   const char *name;
432   fp_test_args_t * targs;
433   int num_tests;
434   precision_type_t precision;
435   vx_fp_test_type test_type;
436   const char * op;
437};
438
439struct xs_conv_test
440{
441   test_func_t test_func;
442   const char *name;
443   int num_tests;
444};
445
446struct simple_test
447{
448   test_func_t test_func;
449   const char *name;
450};
451
452struct vsx_logic_test
453{
454   test_func_t test_func;
455   const char *name;
456   vsx_log_op op;
457};
458
459typedef struct vsx_logic_test logic_test_t;
460typedef struct ldst_test ldst_test_t;
461typedef struct simple_test xs_conv_test_t;
462typedef struct vx_fp_test1 vx_fp_test_basic_t;
463typedef struct vx_fp_test2 vx_fp_test2_t;
464typedef struct test_table test_table_t;
465
466
467static vector unsigned int vec_out, vec_inA, vec_inB;
468
469static void test_xscvdpspn(void)
470{
471   __asm__ __volatile__ ("xscvdpspn   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
472}
473
474static void test_xscvspdpn(void)
475{
476   __asm__ __volatile__ ("xscvspdpn  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
477}
478
479static int do_asp;
480static void test_xsmadds(void)
481{
482   if (do_asp)
483      __asm__ __volatile__ ("xsmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
484   else
485      __asm__ __volatile__ ("xsmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
486}
487
488static void test_xsmsubs(void)
489{
490   if (do_asp)
491      __asm__ __volatile__ ("xsmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
492   else
493      __asm__ __volatile__ ("xsmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
494}
495
496static void test_xscvsxdsp (void)
497{
498   __asm__ __volatile__ ("xscvsxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
499}
500
501static void test_xscvuxdsp (void)
502{
503   __asm__ __volatile__ ("xscvuxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
504}
505
506static void test_xsnmadds(void)
507{
508   if (do_asp)
509      __asm__ __volatile__ ("xsnmaddasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
510   else
511      __asm__ __volatile__ ("xsnmaddmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
512}
513
514static void test_xsnmsubs(void)
515{
516   if (do_asp)
517      __asm__ __volatile__ ("xsnmsubasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
518   else
519      __asm__ __volatile__ ("xsnmsubmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
520}
521
522static void test_stxsspx(void)
523{
524   __asm__ __volatile__ ("stxsspx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
525}
526
527static void test_stxsiwx(void)
528{
529   __asm__ __volatile__ ("stxsiwx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
530}
531
532static void test_lxsiwax(void)
533{
534   __asm__ __volatile__ ("lxsiwax          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
535}
536
537static void test_lxsiwzx(void)
538{
539   __asm__ __volatile__ ("lxsiwzx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
540}
541
542static void test_lxsspx(void)
543{
544   __asm__ __volatile__ ("lxsspx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
545}
546
547static void test_xssqrtsp(void)
548{
549   __asm__ __volatile__ ("xssqrtsp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
550}
551
552static void test_xsrsqrtesp(void)
553{
554   __asm__ __volatile__ ("xsrsqrtesp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
555}
556
557/* Three argument instuctions */
558static void test_xxleqv(void)
559{
560   __asm__ __volatile__ ("xxleqv          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
561}
562
563static void test_xxlorc(void)
564{
565   __asm__ __volatile__ ("xxlorc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
566}
567
568static void test_xxlnand(void)
569{
570   __asm__ __volatile__ ("xxlnand         %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
571}
572
573static void test_xsaddsp(void)
574{
575  __asm__ __volatile__ ("xsaddsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
576}
577
578static void test_xssubsp(void)
579{
580  __asm__ __volatile__ ("xssubsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
581}
582
583static void test_xsdivsp(void)
584{
585  __asm__ __volatile__ ("xsdivsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
586}
587
588static void test_xsmulsp(void)
589{
590   __asm__ __volatile__ ("xsmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
591}
592
593static void test_xsresp(void)
594{
595   __asm__ __volatile__ ("xsresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
596}
597static void test_xsrsp(void)
598{
599   __asm__ __volatile__ ("xsrsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
600}
601
602fp_test_args_t vx_math_tests[] = {
603                                  {8, 8},
604                                  {8, 14},
605                                  {8, 6},
606                                  {8, 5},
607                                  {8, 4},
608                                  {8, 7},
609                                  {8, 9},
610                                  {8, 11},
611                                  {14, 8},
612                                  {14, 14},
613                                  {14, 6},
614                                  {14, 5},
615                                  {14, 4},
616                                  {14, 7},
617                                  {14, 9},
618                                  {14, 11},
619                                  {6, 8},
620                                  {6, 14},
621                                  {6, 6},
622                                  {6, 5},
623                                  {6, 4},
624                                  {6, 7},
625                                  {6, 9},
626                                  {6, 11},
627                                  {5, 8},
628                                  {5, 14},
629                                  {5, 6},
630                                  {5, 5},
631                                  {5, 4},
632                                  {5, 7},
633                                  {5, 9},
634                                  {5, 11},
635                                  {4, 8},
636                                  {4, 14},
637                                  {4, 6},
638                                  {4, 5},
639                                  {4, 1},
640                                  {4, 7},
641                                  {4, 9},
642                                  {4, 11},
643                                  {7, 8},
644                                  {7, 14},
645                                  {7, 6},
646                                  {7, 5},
647                                  {7, 4},
648                                  {7, 7},
649                                  {7, 9},
650                                  {7, 11},
651                                  {10, 8},
652                                  {10, 14},
653                                  {10, 6},
654                                  {10, 5},
655                                  {10, 4},
656                                  {10, 7},
657                                  {10, 9},
658                                  {10, 11},
659                                  {12, 8},
660                                  {12, 14},
661                                  {12, 6},
662                                  {12, 5},
663                                  {12, 4},
664                                  {12, 7},
665                                  {12, 9},
666                                  {12, 11},
667                                  {8, 8},
668                                  {8, 14},
669                                  {8, 6},
670                                  {8, 5},
671                                  {8, 4},
672                                  {8, 7},
673                                  {8, 9},
674                                  {8, 11},
675                                  {14, 8},
676                                  {14, 14},
677                                  {14, 6},
678                                  {14, 5},
679                                  {14, 4},
680                                  {14, 7},
681                                  {14, 9},
682                                  {14, 11},
683                                  {6, 8},
684                                  {6, 14},
685                                  {6, 6},
686                                  {6, 5},
687                                  {6, 4},
688                                  {6, 7},
689                                  {6, 9},
690                                  {6, 11},
691                                  {5, 8},
692                                  {5, 14},
693                                  {5, 6},
694                                  {5, 5},
695                                  {5, 4},
696                                  {5, 7},
697                                  {5, 9},
698                                  {5, 11},
699                                  {4, 8},
700                                  {4, 14},
701                                  {4, 6},
702                                  {4, 5},
703                                  {4, 1},
704                                  {4, 7},
705                                  {4, 9},
706                                  {4, 11},
707                                  {7, 8},
708                                  {7, 14},
709                                  {7, 6},
710                                  {7, 5},
711                                  {7, 4},
712                                  {7, 7},
713                                  {7, 9},
714                                  {7, 11},
715                                  {10, 8},
716                                  {10, 14},
717                                  {10, 6},
718                                  {10, 5},
719                                  {10, 4},
720                                  {10, 7},
721                                  {10, 9},
722                                  {10, 11},
723                                  {12, 8},
724                                  {12, 14},
725                                  {12, 6},
726                                  {12, 5},
727                                  {12, 4},
728                                  {12, 7},
729                                  {12, 9},
730                                  {12, 11}
731};
732
733// These are all double precision inputs with double word outputs (mostly converted to single precision)
734static vx_fp_test_basic_t vx_fp_tests[] = {
735                                     { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
736                                     { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
737                                     { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
738                                     { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
739                                     { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
740                                     { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
741                                     { NULL, NULL, NULL, 0, 0 }
742};
743
744static vx_fp_test2_t
745vsx_one_fp_arg_tests[] = {
746                          { &test_xscvdpspn, "xscvdpspn", NULL, 20, DOUBLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
747                          { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
748                          { &test_xsresp,    "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
749                          { &test_xsrsp,     "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
750                          { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
751                          { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
752                          { NULL, NULL, NULL, 0, 0, 0, NULL}
753};
754
755// These are all double precision inputs with double word outputs (mostly converted to single precision)
756static vx_fp_test_basic_t
757vx_simple_scalar_fp_tests[] = {
758                          { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
759                          { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
760                          { NULL, NULL, NULL, 0 , 0}
761};
762
763static ldst_test_t
764ldst_tests[] = {
765                    { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
766                    { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
767                    { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
768                    { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 4, VSX_LOAD },
769                    { &test_lxsspx,  "lxsspx",  SINGLE_TEST, NULL, 0, VSX_LOAD },
770                    { NULL, NULL, 0, NULL, 0, 0 } };
771
772static xs_conv_test_t
773xs_conv_tests[] = {
774                   { &test_xscvsxdsp, "xscvsxdsp"},
775                   { &test_xscvuxdsp, "xscvuxdsp"},
776                   { NULL, NULL}
777};
778
779static logic_test_t
780logic_tests[] = {
781                 { &test_xxleqv,  "xxleqv", VSX_EQV },
782                 { &test_xxlorc,  "xxlorc", VSX_ORC },
783                 { &test_xxlnand, "xxlnand", VSX_NAND },
784                 { NULL, NULL}
785};
786
787Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
788{
789   /* NOTE:
790    * This function has been verified only with the xsresp and xsrsqrtes instructions.
791    *
792    * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
793    * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
794    * does an actual reciprocal calculation versus estimation, so the answer we get back from
795    * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
796    * precision) and the estimate may still be within expected tolerances.  On top of that,
797    * we can't count on these estimates always being the same across implementations.
798    * For example, with the fre[s] instruction (which should be correct to within one part
799    * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
800    * one implementation could return 1.0111_1111_0000 and another implementation could return
801    * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
802    * single bit in common.
803    *
804    * The upshot is we can't validate the VEX output for these instructions by comparing against
805    * stored bit patterns.  We must check that the result is within expected tolerances.
806    */
807
808   /* A mask to be used for validation as a last resort.
809    * Only use 12 bits of precision for reasons discussed above.
810    */
811#define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000
812
813
814   Bool result = False;
815   double src_dp, res_dp;
816   float calc_diff = 0;
817   float real_diff = 0;
818   double recip_divisor;
819   float div_result;
820   float calc_diff_tmp;
821
822   src_dp = res_dp = 0;
823   Bool src_is_negative = False;
824   Bool res_is_negative = False;
825   unsigned long long * dst_dp = NULL;
826   unsigned long long * src_dp_ull;
827   dst_dp = (unsigned long long *) &vec_out;
828   src_dp = spec_fargs[idx];
829   src_dp_ull = (unsigned long long *) &src_dp;
830   src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
831   res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
832   memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
833
834
835   // Below are common rules
836   if (isnan(src_dp))
837      return isnan(res_dp);
838   if (fpclassify(src_dp) == FP_ZERO)
839      return isinf(res_dp);
840   if (!src_is_negative && isinf(src_dp))
841      return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
842   if (is_rsqrte) {
843      if (src_is_negative)
844         return isnan(res_dp);
845   } else {
846      if (src_is_negative && isinf(src_dp))
847         return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
848   }
849
850   if (is_rsqrte)
851      recip_divisor = sqrt(src_dp);
852   else
853      recip_divisor = src_dp;
854
855   /* The instructions handled by this function take a double precision
856    * input, perform a reciprocal estimate in double-precision, round
857    * the result to single precision and store into the destination
858    * register in double precision format.  So, to check the result
859    * for accuracy, we use float (single precision) values.
860    */
861   div_result = 1.0/recip_divisor;
862   calc_diff_tmp = recip_divisor * 16384.0;
863   if (isnormal(calc_diff_tmp)) {
864      calc_diff = fabs(1.0/calc_diff_tmp);
865      real_diff = fabs((float)res_dp - div_result);
866      result = ( ( res_dp == div_result )
867               || ( real_diff <= calc_diff ) );
868#if FRES_DEBUG
869      unsigned int * dv = (unsigned int *)&div_result;
870      unsigned int * rd = (unsigned int *)&real_diff;
871      unsigned int * cd = (unsigned int *)&calc_diff;
872      printf("\n\t {computed div_result: %08x; real_diff:  %08x; calc_diff:  %08x}\n",
873             *dv, *rd, *cd);
874#endif
875
876   } else {
877      /* Unable to compute theoretical difference, so we fall back to masking out
878       * un-precise bits.
879       */
880      unsigned int * div_result_sp = (unsigned int *)&div_result;
881      float res_sp = (float)res_dp;
882      unsigned int * dst_sp = (unsigned int *)&res_sp;
883#if FRES_DEBUG
884      unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
885      printf("Unable to compute theoretical difference, so we fall back to masking\n");
886      printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
887             *calc_diff_tmp_sp, *div_result_sp, *dst_sp);
888#endif
889      result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
890   }
891   return result;
892}
893
894static void test_vx_fp_ops(void)
895{
896
897   test_func_t func;
898   int k;
899   char * test_name = (char *)malloc(20);
900   void  * vecA_void_ptr, * vecB_void_ptr, * vecOut_void_ptr;
901
902   if (isLE) {
903      vecA_void_ptr = (void *)&vec_inA + 8;
904      vecB_void_ptr = (void *)&vec_inB + 8;
905      vecOut_void_ptr = (void *)&vec_out + 8;
906   } else {
907      vecA_void_ptr = (void *)&vec_inA;
908      vecB_void_ptr = (void *)&vec_inB;
909      vecOut_void_ptr = (void *)&vec_out;
910   }
911
912   k = 0;
913   build_special_fargs_table();
914   while ((func = vx_fp_tests[k].test_func)) {
915      int i, repeat = 0;
916      unsigned long long * frap, * frbp, * dst;
917      vx_fp_test_basic_t test_group = vx_fp_tests[k];
918      vx_fp_test_type test_type = test_group.test_type;
919
920      switch (test_type) {
921         case VX_FP_SMAS:
922         case VX_FP_SMSS:
923         case VX_FP_SNMAS:
924         case VX_FP_SNMSS:
925            if (test_type == VX_FP_SMAS)
926               strcpy(test_name, "xsmadd");
927            else if (test_type == VX_FP_SMSS)
928               strcpy(test_name, "xsmsub");
929            else if (test_type == VX_FP_SNMAS)
930               strcpy(test_name, "xsnmadd");
931            else
932               strcpy(test_name, "xsnmsub");
933
934            if (!repeat) {
935               repeat = 1;
936               strcat(test_name, "asp");
937               do_asp = 1;
938            }
939            break;
940         case VX_FP_OTHER:
941            strcpy(test_name, test_group.name);
942            break;
943         default:
944            printf("ERROR:  Invalid VX FP test type %d\n", test_type);
945            exit(1);
946      }
947
948again:
949      for (i = 0; i < test_group.num_tests; i++) {
950         unsigned int * inA, * inB, * pv;
951
952         fp_test_args_t aTest = test_group.targs[i];
953         inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
954         inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
955         frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
956         frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
957         int idx;
958         unsigned long long vsr_XT;
959         pv = (unsigned int *)&vec_out;
960
961         // Only need to copy one doubleword into each vector's element 0
962         memcpy(vecA_void_ptr, inA, 8);
963         memcpy(vecB_void_ptr, inB, 8);
964
965         // clear vec_out
966         for (idx = 0; idx < 4; idx++, pv++)
967            *pv = 0;
968
969         if (test_type != VX_FP_OTHER) {
970            /* Then we need a third src argument, which is stored in element 0 of
971             * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
972             * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
973             * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
974             * data (input args, result) contain only two inputs, so I arbitrarily
975             * use spec_fargs elements 4 and 14 (alternating) for the third source
976             * argument.  We can use the same input data for a given pair of
977             * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
978             * the expected result should be the same.
979             */
980            int extra_arg_idx;
981            if (i % 2)
982               extra_arg_idx = 4;
983            else
984               extra_arg_idx = 14;
985
986            if (repeat) {
987               /* We're on the first time through of one of the VX_FP_SMx
988                * test types, meaning we're testing a xs<ZZZ>adp case, thus
989                * we have to swap inputs as described above:
990                *    src2 <= VSX[XT]
991                *    src3 <= VSX[XB]
992                */
993               memcpy(vecOut_void_ptr, inB, 8);  // src2
994               memcpy(vecB_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
995               frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
996            } else {
997               // Don't need to init src2, as it's done before the switch()
998               memcpy(vecOut_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
999            }
1000            memcpy(&vsr_XT, vecOut_void_ptr, 8);
1001         }
1002
1003         (*func)();
1004         dst = (unsigned long long *) &vec_out;
1005         if (isLE)
1006            dst++;
1007
1008         if (test_type == VX_FP_OTHER)
1009            printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
1010                   *frap, *frbp, *dst);
1011         else
1012            printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
1013                    test_name, vsr_XT, *frap, *frbp, *dst );
1014
1015      }
1016      /*
1017           {
1018               // Debug code.  Keep this block commented out except when debugging.
1019               double result, expected;
1020               memcpy(&result, dst, 8);
1021               memcpy(&expected, &aTest.dp_bin_result, 8);
1022               printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
1023                       spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
1024                       expected, result );
1025            }
1026       */
1027      printf( "\n" );
1028
1029      if (repeat) {
1030         repeat = 0;
1031         strcat(test_name, "UNKNOWN");
1032         switch (test_type) {
1033            case VX_FP_SMAS:
1034            case VX_FP_SMSS:
1035            case VX_FP_SNMAS:
1036            case VX_FP_SNMSS:
1037               if (test_type == VX_FP_SMAS)
1038                  strcpy(test_name, "xsmadd");
1039               else if (test_type == VX_FP_SMSS)
1040                  strcpy(test_name, "xsmsub");
1041               else if (test_type == VX_FP_SNMAS)
1042                  strcpy(test_name, "xsnmadd");
1043               else
1044                  strcpy(test_name, "xsnmsub");
1045
1046               do_asp = 0;
1047               strcat(test_name, "msp");
1048               break;
1049            default:
1050               break;
1051         }
1052         goto again;
1053      }
1054      k++;
1055   }
1056   printf( "\n" );
1057   free(test_name);
1058}
1059
1060
1061static void test_vsx_one_fp_arg(void)
1062{
1063   test_func_t func;
1064   int k;
1065   void  * vecB_void_ptr;
1066
1067   k = 0;
1068   build_special_fargs_table();
1069
1070   while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1071      int idx, i;
1072      unsigned long long *dst_dp;
1073      unsigned int * dst_sp;
1074      vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
1075      /* size of source operands */
1076      Bool dp  = ((test_group.precision == DOUBLE_TEST) ||
1077		  (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
1078      /* size of result */
1079      Bool dp_res = IS_DP_RESULT(test_group.precision);
1080      Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1081
1082      vecB_void_ptr = (void *)&vec_inB;
1083      if (isLE) {
1084         vecB_void_ptr += dp? 8 : 12;
1085      }
1086
1087      for (i = 0; i < test_group.num_tests; i++) {
1088         unsigned int * pv;
1089         void * inB;
1090
1091         pv = (unsigned int *)&vec_out;
1092         // clear vec_out
1093         for (idx = 0; idx < 4; idx++, pv++)
1094            *pv = 0;
1095
1096         if (dp) {
1097            int vec_out_idx;
1098            unsigned long long * frB_dp;
1099            if (isLE)
1100               vec_out_idx = dp_res ? 1 : 3;
1101            else
1102               vec_out_idx = 0;
1103
1104            if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
1105               /* Take a single-precision value stored in double word element 0
1106                * of src in double-precision format and convert to single-
1107                * precision and store in word element 0 of dst.
1108                */
1109               double input = spec_sp_fargs[i];
1110               memcpy(vecB_void_ptr, (void *)&input, 8);
1111            } else {
1112               inB = (void *)&spec_fargs[i];
1113               // copy double precision FP into input vector element 0
1114               memcpy(vecB_void_ptr, inB, 8);
1115            }
1116
1117            // execute test insn
1118            (*func)();
1119            if (dp_res)
1120               dst_dp = (unsigned long long *) &vec_out;
1121            else
1122               dst_sp = (unsigned int *) &vec_out;
1123
1124            printf("#%d: %s ", i, test_group.name);
1125            frB_dp = (unsigned long long *)&spec_fargs[i];
1126            printf("%s(%016llx)", test_group.op, *frB_dp);
1127            if (test_group.test_type == VX_ESTIMATE)
1128            {
1129               Bool res;
1130               res = check_reciprocal_estimate(is_sqrt, i, vec_out_idx);
1131               printf(" ==> %s)", res ? "PASS" : "FAIL");
1132            } else if (dp_res) {
1133               printf(" = %016llx", dst_dp[vec_out_idx]);
1134            } else {
1135               printf(" = %08x", dst_sp[vec_out_idx]);
1136            }
1137
1138            printf("\n");
1139         } else {  // single precision test type
1140            int vec_out_idx;
1141            if (isLE)
1142               vec_out_idx = dp_res ? 1 : 3;
1143            else
1144               vec_out_idx = 0;
1145            // Clear input vector
1146            pv = (unsigned int *)&vec_inB;
1147            for (idx = 0; idx < 4; idx++, pv++)
1148               *pv = 0;
1149            inB = (void *)&spec_sp_fargs[i];
1150            // copy single precision FP into input vector element i
1151            memcpy(vecB_void_ptr, inB, 4);
1152            // execute test insn
1153            (*func)();
1154            if (dp_res)
1155               dst_dp = (unsigned long long *) &vec_out;
1156            else
1157               dst_sp = (unsigned int *) &vec_out;
1158            // print result
1159            printf("#%d: %s ", i, test_group.name);
1160               printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i]));
1161               if (dp_res)
1162                     printf(" = %016llx", dst_dp[vec_out_idx]);
1163               else
1164                  printf(" = %08x", dst_sp[vec_out_idx]);
1165
1166            printf("\n");
1167         }
1168      }
1169      k++;
1170      printf( "\n" );
1171   }
1172}
1173
1174/* This function currently only supports two double precision input arguments. */
1175static void test_vsx_two_fp_arg(void)
1176{
1177   test_func_t func;
1178   int k = 0;
1179   void  * vecA_void_ptr, * vecB_void_ptr;
1180
1181   if (isLE) {
1182      vecA_void_ptr = (void *)&vec_inA + 8;
1183      vecB_void_ptr = (void *)&vec_inB + 8;
1184   } else {
1185      vecA_void_ptr = (void *)&vec_inA;
1186      vecB_void_ptr = (void *)&vec_inB;
1187   }
1188
1189   build_special_fargs_table();
1190   while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
1191      unsigned long long * frap, * frbp, * dst;
1192      unsigned int * pv;
1193      int idx;
1194      vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
1195      pv = (unsigned int *)&vec_out;
1196      // clear vec_out
1197      for (idx = 0; idx < 4; idx++, pv++)
1198         *pv = 0;
1199
1200      void * inA, * inB;
1201      int i;
1202      for (i = 0; i < test_group.num_tests; i++) {
1203         fp_test_args_t aTest = test_group.targs[i];
1204         inA = (void *)&spec_fargs[aTest.fra_idx];
1205         inB = (void *)&spec_fargs[aTest.frb_idx];
1206         frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1207         frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1208         // Only need to copy one doubleword into each vector's element 0
1209         memcpy(vecA_void_ptr, inA, 8);
1210         memcpy(vecB_void_ptr, inB, 8);
1211         (*func)();
1212         dst = (unsigned long long *) &vec_out;
1213         if (isLE)
1214            dst++;
1215         printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
1216                *frap, *frbp, *dst);
1217      }
1218      printf( "\n" );
1219      k++;
1220   }
1221}
1222
1223/* This function handles the following cases:
1224 *   1) Single precision value stored in double-precision
1225 *      floating-point format in doubleword element 0 of src VSX register
1226 *   2) Integer word value stored in word element 1 of src VSX register
1227 */
1228static void _do_store_test (ldst_test_t storeTest)
1229{
1230   test_func_t func;
1231   unsigned int *dst32;
1232   unsigned int i, idx;
1233   unsigned int * pv = (unsigned int *) storeTest.base_addr;
1234   void  * vecA_void_ptr;
1235
1236   if (isLE) {
1237      if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
1238         vecA_void_ptr = (void *)&vec_inA + 8;
1239   } else {
1240      if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
1241         vecA_void_ptr = (void *)&vec_inA + 4;
1242      else
1243         vecA_void_ptr = (void *)&vec_inA;
1244   }
1245
1246   func = storeTest.test_func;
1247   r14 = (HWord_t) storeTest.base_addr;
1248   r15 = (HWord_t) storeTest.offset;
1249
1250   /* test some of the pre-defined single precision values */
1251   for (i = 0; i < nb_special_fargs; i+=3) {
1252      // clear out storage destination
1253      for (idx = 0; idx < 4; idx++)
1254         *(pv + idx) = 0;
1255
1256      printf( "%s:", storeTest.name );
1257      if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
1258      {
1259         unsigned int * arg_ptr = (unsigned int *)&spec_sp_fargs[i];
1260         memcpy(vecA_void_ptr, arg_ptr, sizeof(unsigned int));
1261         printf(" %08x ==> ", *arg_ptr);
1262      } else {
1263         unsigned long long * dp;
1264         double input = spec_sp_fargs[i];
1265         dp = (unsigned long long *)&input;
1266         memcpy(vecA_void_ptr, dp, sizeof(unsigned long long));
1267         printf(" %016llx ==> ", *dp);
1268      }
1269
1270      // execute test insn
1271      (*func)();
1272      dst32 = (unsigned int*)(storeTest.base_addr);
1273      dst32 += (storeTest.offset/sizeof(int));
1274      printf( "%08x\n", *dst32);
1275   }
1276
1277   printf("\n");
1278}
1279
1280static void _do_load_test(ldst_test_t loadTest)
1281{
1282   test_func_t func;
1283   unsigned int i;
1284   unsigned long long * dst_dp;
1285
1286   func = loadTest.test_func;
1287   r15 = (HWord_t) loadTest.offset;
1288
1289   if (loadTest.base_addr == NULL) {
1290      /* Test lxsspx: source is single precision value, so let's */
1291      /* test some of the pre-defined single precision values. */
1292      int num_loops = (loadTest.offset == 0) ?  nb_special_fargs : (nb_special_fargs - (loadTest.offset/sizeof(int)));
1293      for (i = 0; i < num_loops; i+=3) {
1294         unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + (loadTest.offset/sizeof(int))];
1295         printf( "%s:", loadTest.name );
1296         printf(" %08x ==> ", *sp);
1297         r14 = (HWord_t)&spec_sp_fargs[i];
1298
1299         // execute test insn
1300         (*func)();
1301         dst_dp = (unsigned long long *) &vec_out;
1302         if (isLE)
1303            dst_dp++;
1304         printf("%016llx\n", *dst_dp);
1305      }
1306   } else {
1307      // source is an integer word
1308      int num_loops = (loadTest.offset == 0) ?  NUM_VIARGS_INTS : (NUM_VIARGS_INTS - (loadTest.offset/sizeof(int)));
1309      for (i = 0; i < num_loops; i++) {
1310         printf( "%s:", loadTest.name );
1311         r14 = (HWord_t)&viargs[i];
1312         printf(" %08x ==> ", viargs[i + (loadTest.offset/sizeof(int))]);
1313
1314         // execute test insn
1315         (*func)();
1316         dst_dp = (unsigned long long *) &vec_out;
1317         if (isLE)
1318            dst_dp++;
1319         printf("%016llx\n", *dst_dp);
1320      }
1321   }
1322   printf("\n");
1323}
1324
1325static void test_ldst(void)
1326{
1327   int k = 0;
1328
1329   while (ldst_tests[k].test_func) {
1330      if (ldst_tests[k].type == VSX_STORE)
1331         _do_store_test(ldst_tests[k]);
1332      else {
1333         _do_load_test(ldst_tests[k]);
1334      }
1335      k++;
1336      printf("\n");
1337   }
1338}
1339
1340static void test_xs_conv_ops(void)
1341{
1342
1343   test_func_t func;
1344   int k = 0;
1345   void  * vecB_void_ptr;
1346
1347   if (isLE)
1348      vecB_void_ptr = (void *)&vec_inB + 8;
1349   else
1350      vecB_void_ptr = (void *)&vec_inB;
1351
1352   build_special_fargs_table();
1353   while ((func = xs_conv_tests[k].test_func)) {
1354      int i;
1355      unsigned long long * dst;
1356      xs_conv_test_t test_group = xs_conv_tests[k];
1357      for (i = 0; i < NUM_VDARGS_INTS; i++) {
1358         unsigned long long  * inB, * pv;
1359         int idx;
1360         inB = (unsigned long long *)&vdargs[i];
1361         memcpy(vecB_void_ptr, inB, 8);
1362         pv = (unsigned long long *)&vec_out;
1363         // clear vec_out
1364         for (idx = 0; idx < 2; idx++, pv++)
1365            *pv = 0ULL;
1366         (*func)();
1367         dst = (unsigned long long *) &vec_out;
1368         if (isLE)
1369            dst++;
1370         printf("#%d: %s %016llx => %016llx\n", i, test_group.name, vdargs[i], *dst);
1371      }
1372      k++;
1373      printf("\n");
1374   }
1375   printf( "\n" );
1376}
1377
1378
1379static void test_vsx_logic(void)
1380{
1381   logic_test_t aTest;
1382   test_func_t func;
1383   int k;
1384   k = 0;
1385
1386   while ((func = logic_tests[k].test_func)) {
1387
1388      unsigned int * pv;
1389      unsigned int * inA, * inB, * dst;
1390      int idx, i;
1391      aTest = logic_tests[k];
1392      for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
1393         pv = (unsigned int *)&vec_out;
1394         inA = &viargs[i];
1395         inB = &viargs[i];
1396         memcpy(&vec_inA, inA, sizeof(vector unsigned int));
1397         memcpy(&vec_inB, inB, sizeof(vector unsigned int));
1398         // clear vec_out
1399         for (idx = 0; idx < 4; idx++, pv++)
1400            *pv = 0;
1401
1402         // execute test insn
1403         (*func)();
1404         dst = (unsigned int*) &vec_out;
1405
1406         printf( "#%d: %10s ", k, aTest.name);
1407         printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
1408         printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
1409         printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
1410      }
1411      k++;
1412   }
1413   printf( "\n" );
1414}
1415
1416
1417//----------------------------------------------------------
1418
1419static test_table_t all_tests[] = {
1420                                     { &test_vx_fp_ops,
1421                                       "Test VSX floating point instructions"},
1422                                     { &test_vsx_one_fp_arg,
1423                                       "Test VSX vector and scalar single argument instructions"} ,
1424                                     { &test_vsx_logic,
1425                                       "Test VSX logic instructions" },
1426                                     { &test_xs_conv_ops,
1427                                       "Test VSX scalar integer conversion instructions" },
1428                                     { &test_ldst,
1429                                       "Test VSX load/store dp to sp instructions" },
1430                                     { &test_vsx_two_fp_arg,
1431                                       "Test VSX vector and scalar two argument instructions"} ,
1432                                     { NULL, NULL }
1433};
1434
1435#endif
1436
1437int main(int argc, char *argv[])
1438{
1439
1440#ifdef HAS_ISA_2_07
1441   test_table_t aTest;
1442   test_func_t func;
1443   int i = 0;
1444
1445   while ((func = all_tests[i].test_category)) {
1446      aTest = all_tests[i];
1447      printf( "%s\n", aTest.name );
1448      (*func)();
1449      i++;
1450   }
1451#else
1452   printf("NO ISA 2.07 SUPPORT\n");
1453#endif
1454   return 0;
1455}
1456