1/*  Copyright (C) 2011 IBM
2
3 Author: Maynard Johnson <maynardj@us.ibm.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307, USA.
19
20 The GNU General Public License is contained in the file COPYING.
21 */
22
23#ifdef HAS_VSX
24
25#include <stdio.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29#include <malloc.h>
30#include <altivec.h>
31#include <math.h>
32#include <unistd.h>    // getopt
33
34#ifndef __powerpc64__
35typedef uint32_t HWord_t;
36#else
37typedef uint64_t HWord_t;
38#endif /* __powerpc64__ */
39
40typedef unsigned char Bool;
41#define True 1
42#define False 0
43
44#ifdef VGP_ppc64le_linux
45#define isLE 1
46#else
47#define isLE 0
48#endif
49
50register HWord_t r14 __asm__ ("r14");
51register HWord_t r15 __asm__ ("r15");
52register HWord_t r16 __asm__ ("r16");
53register HWord_t r17 __asm__ ("r17");
54register double f14 __asm__ ("fr14");
55register double f15 __asm__ ("fr15");
56register double f16 __asm__ ("fr16");
57register double f17 __asm__ ("fr17");
58
59static volatile unsigned int div_flags, div_xer;
60
61#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
62
63#define SET_CR(_arg) \
64      __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
65
66#define SET_XER(_arg) \
67      __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
68
69#define GET_CR(_lval) \
70      __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
71
72#define GET_XER(_lval) \
73      __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
74
75#define GET_CR_XER(_lval_cr,_lval_xer) \
76   do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
77
78#define SET_CR_ZERO \
79      SET_CR(0)
80
81#define SET_XER_ZERO \
82      SET_XER(0)
83
84#define SET_CR_XER_ZERO \
85   do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
86
87#define SET_FPSCR_ZERO \
88   do { double _d = 0.0; \
89        __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
90   } while (0)
91
92
93typedef void (*test_func_t)(void);
94typedef struct test_table test_table_t;
95
96/* Defines for the instructiion groups, use bit field to identify */
97#define SCALAR_DIV_INST    0x0001
98#define OTHER_INST  0x0002
99
100/* These functions below that construct a table of floating point
101 * values were lifted from none/tests/ppc32/jm-insns.c.
102 */
103
104#if defined (DEBUG_ARGS_BUILD)
105#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
106#else
107#define AB_DPRINTF(fmt, args...) do { } while (0)
108#endif
109
110static inline void register_farg (void *farg,
111                                  int s, uint16_t _exp, uint64_t mant)
112{
113   uint64_t tmp;
114
115   tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
116   *(uint64_t *)farg = tmp;
117   AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
118              s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
119}
120
121static inline void register_sp_farg (void *farg,
122                                     int s, uint16_t _exp, uint32_t mant)
123{
124   uint32_t tmp;
125   tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
126   *(uint32_t *)farg = tmp;
127}
128
129typedef struct fp_test_args {
130   int fra_idx;
131   int frb_idx;
132} fp_test_args_t;
133
134
135fp_test_args_t fp_cmp_tests[] = {
136                                   {8, 8},
137                                   {8, 14},
138                                   {8, 6},
139                                   {8, 5},
140                                   {8, 4},
141                                   {8, 7},
142                                   {8, 9},
143                                   {8, 11},
144                                   {14, 8},
145                                   {14, 14},
146                                   {14, 6},
147                                   {14, 5},
148                                   {14, 4},
149                                   {14, 7},
150                                   {14, 9},
151                                   {14, 11},
152                                   {6, 8},
153                                   {6, 14},
154                                   {6, 6},
155                                   {6, 5},
156                                   {6, 4},
157                                   {6, 7},
158                                   {6, 9},
159                                   {6, 11},
160                                   {5, 8},
161                                   {5, 14},
162                                   {5, 6},
163                                   {5, 5},
164                                   {5, 4},
165                                   {5, 7},
166                                   {5, 9},
167                                   {5, 11},
168                                   {4, 8},
169                                   {4, 14},
170                                   {4, 6},
171                                   {4, 5},
172                                   {4, 1},
173                                   {4, 7},
174                                   {4, 9},
175                                   {4, 11},
176                                   {7, 8},
177                                   {7, 14},
178                                   {7, 6},
179                                   {7, 5},
180                                   {7, 4},
181                                   {7, 7},
182                                   {7, 9},
183                                   {7, 11},
184                                   {10, 8},
185                                   {10, 14},
186                                   {10, 6},
187                                   {10, 5},
188                                   {10, 4},
189                                   {10, 7},
190                                   {10, 9},
191                                   {10, 10},
192                                   {12, 8},
193                                   {12, 14},
194                                   {12, 6},
195                                   {12, 5},
196                                   {1, 1},
197                                   {2, 2},
198                                   {3, 3},
199                                   {4, 4},
200};
201
202
203fp_test_args_t two_arg_fp_tests[] = {
204                                     {8, 8},
205                                     {8, 14},
206                                     {15, 16},
207                                     {8, 5},
208                                     {8, 4},
209                                     {8, 7},
210                                     {8, 9},
211                                     {8, 11},
212                                     {14, 8},
213                                     {14, 14},
214                                     {14, 6},
215                                     {14, 5},
216                                     {14, 4},
217                                     {14, 7},
218                                     {14, 9},
219                                     {14, 11},
220                                     {6, 8},
221                                     {6, 14},
222                                     {6, 6},
223                                     {6, 5},
224                                     {6, 4},
225                                     {6, 7},
226                                     {6, 9},
227                                     {6, 11},
228                                     {5, 8},
229                                     {5, 14},
230                                     {5, 6},
231                                     {5, 5},
232                                     {5, 4},
233                                     {5, 7},
234                                     {5, 9},
235                                     {5, 11},
236                                     {4, 8},
237                                     {4, 14},
238                                     {4, 6},
239                                     {4, 5},
240                                     {4, 1},
241                                     {4, 7},
242                                     {4, 9},
243                                     {4, 11},
244                                     {7, 8},
245                                     {7, 14},
246                                     {7, 6},
247                                     {7, 5},
248                                     {7, 4},
249                                     {7, 7},
250                                     {7, 9},
251                                     {7, 11},
252                                     {10, 8},
253                                     {10, 14},
254                                     {12, 6},
255                                     {12, 5},
256                                     {10, 4},
257                                     {10, 7},
258                                     {10, 9},
259                                     {10, 11},
260                                     {12, 8 },
261                                     {12, 14},
262                                     {12, 6},
263                                     {15, 16},
264                                     {15, 16},
265                                     {9, 11},
266                                     {11, 11},
267                                     {11, 12}
268};
269
270
271static int nb_special_fargs;
272static double * spec_fargs;
273static float * spec_sp_fargs;
274
275static void build_special_fargs_table(void)
276{
277/*
278  Entry  Sign Exp   fraction                  Special value
279   0      0   3fd   0x8000000000000ULL         Positive finite number
280   1      0   404   0xf000000000000ULL         ...
281   2      0   001   0x8000000b77501ULL         ...
282   3      0   7fe   0x800000000051bULL         ...
283   4      0   012   0x3214569900000ULL         ...
284   5      0   000   0x0000000000000ULL         +0.0 (+zero)
285   6      1   000   0x0000000000000ULL         -0.0 (-zero)
286   7      0   7ff   0x0000000000000ULL         +infinity
287   8      1   7ff   0x0000000000000ULL         -infinity
288   9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
289   10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
290   11     0   7ff   0x8000000000000ULL         +QNaN
291   12     1   7ff   0x8000000000000ULL         -QNaN
292   13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
293   14     1   40d   0x0650f5a07b353ULL         Negative finite number
294   15     0   412   0x32585a9900000ULL         A couple more positive finite numbers
295   16     0   413   0x82511a2000000ULL         ...
296*/
297
298   uint64_t mant;
299   uint32_t mant_sp;
300   uint16_t _exp;
301   int s;
302   int j, i = 0;
303
304   if (spec_fargs)
305      return;
306
307   spec_fargs = malloc( 17 * sizeof(double) );
308   spec_sp_fargs = malloc( 17 * sizeof(float) );
309
310   // #0
311   s = 0;
312   _exp = 0x3fd;
313   mant = 0x8000000000000ULL;
314   register_farg(&spec_fargs[i++], s, _exp, mant);
315
316   // #1
317   s = 0;
318   _exp = 0x404;
319   mant = 0xf000000000000ULL;
320   register_farg(&spec_fargs[i++], s, _exp, mant);
321
322   /* None of the ftdiv tests succeed.
323    * FRA = value #0; FRB = value #1
324    * ea_ = -2; e_b = 5
325    * fl_flag || fg_flag || fe_flag = 100
326    */
327
328   /*************************************************
329    *     fe_flag tests
330    *
331    *************************************************/
332
333   /* fe_flag <- 1 if FRA is a NaN
334    * FRA = value #9; FRB = value #1
335    * e_a = 1024; e_b = 5
336    * fl_flag || fg_flag || fe_flag = 101
337    */
338
339   /* fe_flag <- 1 if FRB is a NaN
340    * FRA = value #1; FRB = value #12
341    * e_a = 5; e_b = 1024
342    * fl_flag || fg_flag || fe_flag = 101
343    */
344
345   /* fe_flag <- 1 if e_b <= -1022
346    * FRA = value #0; FRB = value #2
347    * e_a = -2; e_b = -1022
348    * fl_flag || fg_flag || fe_flag = 101
349    *
350    */
351   // #2
352   s = 0;
353   _exp = 0x001;
354   mant = 0x8000000b77501ULL;
355   register_farg(&spec_fargs[i++], s, _exp, mant);
356
357   /* fe_flag <- 1 if e_b >= 1021
358    * FRA = value #1; FRB = value #3
359    * e_a = 5; e_b = 1023
360    * fl_flag || fg_flag || fe_flag = 101
361    */
362   // #3
363   s = 0;
364   _exp = 0x7fe;
365   mant = 0x800000000051bULL;
366   register_farg(&spec_fargs[i++], s, _exp, mant);
367
368   /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
369    * Let FRA = value #3 and FRB be value #0.
370    * e_a = 1023; e_b = -2
371    * fl_flag || fg_flag || fe_flag = 101
372    */
373
374   /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
375    * Let FRA = value #0 above and FRB be value #3 above
376    * e_a = -2; e_b = 1023
377    * fl_flag || fg_flag || fe_flag = 101
378    */
379
380   /* fe_flag <- 1 if FRA != 0 && e_a <= -970
381    * Let FRA = value #4 and FRB be value #0
382    * e_a = -1005; e_b = -2
383    * fl_flag || fg_flag || fe_flag = 101
384   */
385   // #4
386   s = 0;
387   _exp = 0x012;
388   mant = 0x3214569900000ULL;
389   register_farg(&spec_fargs[i++], s, _exp, mant);
390
391   /*************************************************
392    *     fg_flag tests
393    *
394    *************************************************/
395   /* fg_flag <- 1 if FRA is an Infinity
396    * NOTE: FRA = Inf also sets fe_flag
397    * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
398    * Test 1:
399    *   Let FRA be value #7 and FRB be value #1
400    *   e_a = 1024; e_b = 5
401    *   fl_flag || fg_flag || fe_flag = 111
402    *
403    * Test 2:
404    *   Let FRA be value #8 and FRB be value #1
405    *   e_a = 1024; e_b = 5
406    *   fl_flag || fg_flag || fe_flag = 111
407    *
408    */
409
410   /* fg_flag <- 1 if FRB is an Infinity
411    * NOTE: FRB = Inf also sets fe_flag
412    * Let FRA be value #1 and FRB be value #7
413    * e_a = 5; e_b = 1024
414    * fl_flag || fg_flag || fe_flag = 111
415    */
416
417   /* fg_flag <- 1 if FRB is denormalized
418    * NOTE: e_b < -1022 ==> fe_flag <- 1
419    * Let FRA be value #0 and FRB be value #13
420    * e_a = -2; e_b = -1023
421    * fl_flag || fg_flag || fe_flag = 111
422    */
423
424   /* fg_flag <- 1 if FRB is +zero
425    * NOTE: FRA = Inf also sets fe_flag
426    * Let FRA = val #5; FRB = val #5
427    * ea_ = -1023; e_b = -1023
428    * fl_flag || fg_flag || fe_flag = 111
429    */
430
431   /* fg_flag <- 1 if FRB is -zero
432    * NOTE: FRA = Inf also sets fe_flag
433    * Let FRA = val #5; FRB = val #6
434    * ea_ = -1023; e_b = -1023
435    * fl_flag || fg_flag || fe_flag = 111
436    */
437
438   /* Special values */
439   /* +0.0      : 0 0x000 0x0000000000000 */
440   // #5
441   s = 0;
442   _exp = 0x000;
443   mant = 0x0000000000000ULL;
444   register_farg(&spec_fargs[i++], s, _exp, mant);
445
446   /* -0.0      : 1 0x000 0x0000000000000 */
447   // #6
448   s = 1;
449   _exp = 0x000;
450   mant = 0x0000000000000ULL;
451   register_farg(&spec_fargs[i++], s, _exp, mant);
452
453   /* +infinity : 0 0x7FF 0x0000000000000  */
454   // #7
455   s = 0;
456   _exp = 0x7FF;
457   mant = 0x0000000000000ULL;
458   register_farg(&spec_fargs[i++], s, _exp, mant);
459
460   /* -infinity : 1 0x7FF 0x0000000000000 */
461   // #8
462   s = 1;
463   _exp = 0x7FF;
464   mant = 0x0000000000000ULL;
465   register_farg(&spec_fargs[i++], s, _exp, mant);
466
467   /*
468    * This comment applies to values #9 and #10 below:
469    * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
470    * so we can't just copy the double-precision value to the corresponding slot in the
471    * single-precision array (i.e., in the loop at the end of this function).  Instead, we
472    * have to manually set the bits using register_sp_farg().
473    */
474
475   /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
476   // #9
477   s = 0;
478   _exp = 0x7FF;
479   mant = 0x7FFFFFFFFFFFFULL;
480   register_farg(&spec_fargs[i++], s, _exp, mant);
481   _exp = 0xff;
482   mant_sp = 0x3FFFFF;
483   register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
484
485   /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
486   // #10
487   s = 1;
488   _exp = 0x7FF;
489   mant = 0x7FFFFFFFFFFFFULL;
490   register_farg(&spec_fargs[i++], s, _exp, mant);
491   _exp = 0xff;
492   mant_sp = 0x3FFFFF;
493   register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
494
495   /* +QNaN     : 0 0x7FF 0x8000000000000 */
496   // #11
497   s = 0;
498   _exp = 0x7FF;
499   mant = 0x8000000000000ULL;
500   register_farg(&spec_fargs[i++], s, _exp, mant);
501
502   /* -QNaN     : 1 0x7FF 0x8000000000000 */
503   // #12
504   s = 1;
505   _exp = 0x7FF;
506   mant = 0x8000000000000ULL;
507   register_farg(&spec_fargs[i++], s, _exp, mant);
508
509   /* denormalized value */
510   // #13
511   s = 1;
512   _exp = 0x000;
513   mant = 0x8340000078000ULL;
514   register_farg(&spec_fargs[i++], s, _exp, mant);
515
516   /* Negative finite number */
517   // #14
518   s = 1;
519   _exp = 0x40d;
520   mant = 0x0650f5a07b353ULL;
521   register_farg(&spec_fargs[i++], s, _exp, mant);
522
523   /* A couple positive finite numbers ... */
524   // #15
525   s = 0;
526   _exp = 0x412;
527   mant = 0x32585a9900000ULL;
528   register_farg(&spec_fargs[i++], s, _exp, mant);
529
530   // #16
531   s = 0;
532   _exp = 0x413;
533   mant = 0x82511a2000000ULL;
534   register_farg(&spec_fargs[i++], s, _exp, mant);
535
536   nb_special_fargs = i;
537   for (j = 0; j < i; j++) {
538      if (!(j == 9 || j == 10))
539         spec_sp_fargs[j] = spec_fargs[j];
540   }
541}
542
543
544struct test_table
545{
546   test_func_t test_category;
547   char * name;
548   unsigned int test_group;
549};
550
551typedef enum {
552   SINGLE_TEST,
553   DOUBLE_TEST
554} precision_type_t;
555
556typedef enum {
557   VX_SCALAR_FP_NMSUB = 0,
558   // ALL VECTOR-TYPE OPS SHOULD BE ADDED AFTER THIS LINE
559   VX_VECTOR_FP_MULT_AND_OP2 = 10,
560   // and before this line
561   VX_BASIC_CMP = 30,
562   VX_CONV_WORD,
563   VX_DEFAULT
564} vx_fp_test_type;
565
566typedef struct vx_fp_test
567{
568   test_func_t test_func;
569   const char * name;
570   fp_test_args_t * targs;
571   int num_tests;
572   precision_type_t precision;
573   vx_fp_test_type type;
574   const char * op;
575} vx_fp_test_t;
576
577static vector unsigned int vec_out, vec_inA, vec_inB, vec_inC;
578
579static Bool do_dot;
580static void test_xvcmpeqdp(void)
581{
582   if (do_dot)
583      __asm__ __volatile__ ("xvcmpeqdp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
584   else
585      __asm__ __volatile__ ("xvcmpeqdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
586}
587
588static void test_xvcmpgedp(void)
589{
590   if (do_dot)
591      __asm__ __volatile__ ("xvcmpgedp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
592   else
593      __asm__ __volatile__ ("xvcmpgedp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
594}
595
596static void test_xvcmpgtdp(void)
597{
598   if (do_dot)
599      __asm__ __volatile__ ("xvcmpgtdp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
600   else
601      __asm__ __volatile__ ("xvcmpgtdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
602}
603
604static void test_xvcmpeqsp(void)
605{
606   if (do_dot)
607      __asm__ __volatile__ ("xvcmpeqsp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
608   else
609      __asm__ __volatile__ ("xvcmpeqsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
610}
611
612static void test_xvcmpgesp(void)
613{
614   if (do_dot)
615      __asm__ __volatile__ ("xvcmpgesp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
616   else
617      __asm__ __volatile__ ("xvcmpgesp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
618}
619
620static void test_xvcmpgtsp(void)
621{
622   if (do_dot)
623      __asm__ __volatile__ ("xvcmpgtsp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
624   else
625      __asm__ __volatile__ ("xvcmpgtsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
626}
627
628static Bool do_aXp;
629static Bool do_dp;
630static void test_xsnmsub(void)
631{
632   if (do_aXp)
633      __asm__ __volatile__ ("xsnmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
634   else
635      __asm__ __volatile__ ("xsnmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
636}
637
638static void test_xvmadd(void)
639{
640   if (do_aXp)
641      if (do_dp)
642         __asm__ __volatile__ ("xvmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
643      else
644         __asm__ __volatile__ ("xvmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
645   else
646      if (do_dp)
647         __asm__ __volatile__ ("xvmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
648      else
649         __asm__ __volatile__ ("xvmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
650}
651
652static void test_xvnmadd(void)
653{
654   if (do_aXp)
655      if (do_dp)
656         __asm__ __volatile__ ("xvnmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
657      else
658         __asm__ __volatile__ ("xvnmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
659   else
660      if (do_dp)
661         __asm__ __volatile__ ("xvnmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
662      else
663         __asm__ __volatile__ ("xvnmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
664}
665
666static void test_xvnmsub(void)
667{
668   if (do_aXp)
669      if (do_dp)
670         __asm__ __volatile__ ("xvnmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
671      else
672         __asm__ __volatile__ ("xvnmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
673   else
674      if (do_dp)
675         __asm__ __volatile__ ("xvnmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
676      else
677         __asm__ __volatile__ ("xvnmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
678}
679
680static void test_xvmsub(void)
681{
682   if (do_aXp)
683      if (do_dp)
684         __asm__ __volatile__ ("xvmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
685      else
686         __asm__ __volatile__ ("xvmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
687   else
688      if (do_dp)
689         __asm__ __volatile__ ("xvmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
690      else
691         __asm__ __volatile__ ("xvmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
692}
693
694static void test_xssqrtdp(void)
695{
696   __asm__ __volatile__ ("xssqrtdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
697}
698
699static void test_xsrdpim(void)
700{
701   __asm__ __volatile__ ("xsrdpim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
702}
703
704static void test_xsrdpip(void)
705{
706   __asm__ __volatile__ ("xsrdpip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
707}
708
709static void test_xstdivdp(void)
710{
711   __asm__ __volatile__ ("xstdivdp   6, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
712}
713
714static void test_xsmaxdp(void)
715{
716   __asm__ __volatile__ ("xsmaxdp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
717}
718
719static void test_xsmindp(void)
720{
721   __asm__ __volatile__ ("xsmindp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
722}
723
724static void test_xvadddp(void)
725{
726   __asm__ __volatile__ ("xvadddp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
727}
728
729static void test_xvaddsp(void)
730{
731   __asm__ __volatile__ ("xvaddsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
732}
733
734static void test_xvdivdp(void)
735{
736   __asm__ __volatile__ ("xvdivdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
737}
738
739static void test_xvdivsp(void)
740{
741   __asm__ __volatile__ ("xvdivsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
742}
743
744static void test_xvmuldp(void)
745{
746   __asm__ __volatile__ ("xvmuldp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
747}
748
749static void test_xvmulsp(void)
750{
751   __asm__ __volatile__ ("xvmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
752}
753
754static void test_xvsubdp(void)
755{
756   __asm__ __volatile__ ("xvsubdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
757}
758
759static void test_xvmaxdp(void)
760{
761   __asm__ __volatile__ ("xvmaxdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
762}
763
764static void test_xvmindp(void)
765{
766   __asm__ __volatile__ ("xvmindp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
767}
768
769static void test_xvmaxsp(void)
770{
771   __asm__ __volatile__ ("xvmaxsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
772}
773
774static void test_xvminsp(void)
775{
776   __asm__ __volatile__ ("xvminsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
777}
778
779static void test_xvsubsp(void)
780{
781   __asm__ __volatile__ ("xvsubsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
782}
783
784static void test_xvresp(void)
785{
786   __asm__ __volatile__ ("xvresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
787}
788
789static void test_xxsel(void)
790{
791   unsigned long long * dst;
792   unsigned long long xa[] =  { 0xa12bc37de56f9708ULL, 0x3894c1fddeadbeefULL};
793   unsigned long long xb[] =  { 0xfedc432124681235ULL, 0xf1e2d3c4e0057708ULL};
794   unsigned long long xc[] =  { 0xffffffff01020304ULL, 0x128934bd00000000ULL};
795
796   memcpy(&vec_inA, xa, 16);
797   memcpy(&vec_inB, xb, 16);
798   memcpy(&vec_inC, xc, 16);
799
800
801   __asm__ __volatile__ ("xxsel   %x0, %x1, %x2, %x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB),"wa" (vec_inC));
802   dst = (unsigned long long *) &vec_out;
803   printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[0], xb[0], xc[0], *dst);
804   dst++;
805   printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[1], xb[1], xc[1], *dst);
806   printf("\n");
807}
808
809static void test_xxspltw(void)
810{
811   int uim;
812   unsigned long long * dst = NULL;
813   unsigned int xb[] =  { 0xfedc4321, 0x24681235, 0xf1e2d3c4, 0xe0057708};
814   int i;
815   void * vecB_ptr = &vec_inB;
816   if (isLE) {
817      for (i = 3; i >=0; i--) {
818         memcpy(vecB_ptr, &xb[i], 4);
819         vecB_ptr+=4;
820      }
821   } else {
822      for (i = 0; i < 4; i++) {
823         memcpy(vecB_ptr, &xb[i], 4);
824         vecB_ptr+=4;
825      }
826   }
827
828   for (uim = 0; uim < 4; uim++) {
829      switch (uim) {
830         case 0:
831            __asm__ __volatile__ ("xxspltw   %x0, %x1, 0" : "=wa" (vec_out): "wa" (vec_inB));
832            break;
833         case 1:
834            __asm__ __volatile__ ("xxspltw   %x0, %x1, 1" : "=wa" (vec_out): "wa" (vec_inB));
835            break;
836         case 2:
837            __asm__ __volatile__ ("xxspltw   %x0, %x1, 2" : "=wa" (vec_out): "wa" (vec_inB));
838            break;
839         case 3:
840            __asm__ __volatile__ ("xxspltw   %x0, %x1, 3" : "=wa" (vec_out): "wa" (vec_inB));
841            break;
842      }
843      dst = (unsigned long long *) &vec_out;
844      printf("xxspltw 0x%08x%08x%08x%08x %d=> 0x%016llx",  xb[0], xb[1],
845             xb[2], xb[3], uim, *dst);
846      dst++;
847      printf("%016llx\n", *dst);
848   }
849   printf("\n");
850}
851
852static void test_xscvdpsxws(void)
853{
854   __asm__ __volatile__ ("xscvdpsxws  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
855}
856
857static void test_xscvdpuxds(void)
858{
859   __asm__ __volatile__ ("xscvdpuxds  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
860}
861
862static void test_xvcpsgndp(void)
863{
864   __asm__ __volatile__  ("xvcpsgndp  %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
865}
866
867static void test_xvcpsgnsp(void)
868{
869   __asm__ __volatile__  ("xvcpsgnsp  %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
870}
871
872static void test_xvcvdpsxws(void)
873{
874   __asm__ __volatile__ ("xvcvdpsxws  %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
875}
876
877static void test_xvcvspsxws(void)
878{
879   __asm__ __volatile__ ("xvcvspsxws  %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
880}
881
882static vx_fp_test_t
883vx_vector_one_fp_arg_tests[] = {
884                                { &test_xvresp, "xvresp", NULL, 16, SINGLE_TEST, VX_BASIC_CMP, "1/x"},
885                                { &test_xvcvdpsxws, "xvcvdpsxws", NULL, 16, DOUBLE_TEST, VX_CONV_WORD, "conv"},
886                                { &test_xvcvspsxws, "xvcvspsxws", NULL, 16, SINGLE_TEST, VX_CONV_WORD, "conv"},
887                                { NULL, NULL, NULL, 0 , 0, 0, NULL}
888};
889
890static vx_fp_test_t
891vx_vector_fp_tests[] = {
892                        { &test_xvcmpeqdp, "xvcmpeqdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "eq"},
893                        { &test_xvcmpgedp, "xvcmpgedp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "ge"},
894                        { &test_xvcmpgtdp, "xvcmpgtdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "gt"},
895                        { &test_xvcmpeqsp, "xvcmpeqsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "eq"},
896                        { &test_xvcmpgesp, "xvcmpgesp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "ge"},
897                        { &test_xvcmpgtsp, "xvcmpgtsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "gt"},
898                        { &test_xvadddp, "xvadddp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+" },
899                        { &test_xvaddsp, "xvaddsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+" },
900                        { &test_xvdivdp, "xvdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "/" },
901                        { &test_xvdivsp, "xvdivsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "/" },
902                        { &test_xvmuldp, "xvmuldp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "*" },
903                        { &test_xvmulsp, "xvmulsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "*" },
904                        { &test_xvsubdp, "xvsubdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "-" },
905                        { &test_xvsubsp, "xvsubsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "-" },
906                        { &test_xvmaxdp, "xvmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@max@" },
907                        { &test_xvmindp, "xvmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@min@" },
908                        { &test_xvmaxsp, "xvmaxsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@max@" },
909                        { &test_xvminsp, "xvminsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@min@" },
910                        { &test_xvcpsgndp, "xvcpsgndp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+-cp"},
911                        { &test_xvcpsgnsp, "xvcpsgnsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+-cp"},
912                        { NULL, NULL, NULL, 0 , 0, 0, NULL}
913};
914
915
916static vx_fp_test_t
917vx_aORm_fp_tests[] = {
918                       { &test_xsnmsub, "xsnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_SCALAR_FP_NMSUB, "!*-"},
919                       { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
920                       { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
921                       { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
922                       { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
923                       { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
924                       { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
925                       { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
926                       { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
927                       { NULL, NULL, NULL, 0, 0, 0,  NULL }
928};
929
930static vx_fp_test_t
931vx_simple_scalar_fp_tests[] = {
932                               { &test_xssqrtdp, "xssqrtdp", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
933                               { &test_xsrdpim, "xsrdpim", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
934                               { &test_xsrdpip, "xsrdpip", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
935                               { &test_xstdivdp, "xstdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
936                               { &test_xsmaxdp, "xsmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
937                               { &test_xsmindp, "xsmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
938                               { &test_xscvdpsxws, "xscvdpsxws", NULL, 17, DOUBLE_TEST, VX_CONV_WORD, NULL},
939                               { &test_xscvdpuxds, "xscvdpuxds", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
940                               { NULL, NULL, NULL, 0, 0, 0, NULL }
941};
942
943
944#ifdef __powerpc64__
945static void test_bpermd(void)
946{
947   /* NOTE: Bit number is '0 . . . 63'
948    *
949    * Permuted bits are generated bit 0 -7 as follows:
950    *    index = (r14)8*i:8*i+7
951    *    perm[i] = (r15)index
952    *
953    * So, for i = 0, index is (r14)8*0:8*0+7, or (r14)0:7, which is the MSB
954    * byte of r14, 0x1b(27/base 10).  This identifies bit 27 of r15, which is '1'.
955    * For i = 1, index is 0x2c, identifying bit 44 of r15, which is '1'.
956    * So the result of the first two iterations of i are:
957    *   perm = 0b01xxxxxx
958    *
959    */
960   r15 = 0xa12bc37de56f9708ULL;
961   r14 = 0x1b2c31f030000001ULL;
962   __asm__ __volatile__ ("bpermd %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
963   printf("bpermd: 0x%016llx : 0x%016llx => 0x%llx\n", (unsigned long long)r14,
964          (unsigned long long)r15, (unsigned long long)r17);
965   printf("\n");
966}
967#endif
968
969static Bool do_OE;
970typedef enum {
971   DIV_BASE = 1,
972   DIV_OE = 2,
973   DIV_DOT = 4,
974} div_type_t;
975/* Possible divde type combinations are:
976 *   - base
977 *   - base+dot
978 *   - base+OE
979 *   - base+OE+dot
980 */
981#ifdef __powerpc64__
982static void test_divde(void)
983{
984   int divde_type = DIV_BASE;
985   if (do_OE)
986      divde_type |= DIV_OE;
987   if (do_dot)
988      divde_type |= DIV_DOT;
989
990   switch (divde_type) {
991      case 1:
992        SET_CR_XER_ZERO;
993         __asm__ __volatile__ ("divde %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
994         GET_CR_XER(div_flags, div_xer);
995         break;
996      case 3:
997        SET_CR_XER_ZERO;
998         __asm__ __volatile__ ("divdeo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
999         GET_CR_XER(div_flags, div_xer);
1000         break;
1001      case 5:
1002        SET_CR_XER_ZERO;
1003         __asm__ __volatile__ ("divde. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1004         GET_CR_XER(div_flags, div_xer);
1005         break;
1006      case 7:
1007        SET_CR_XER_ZERO;
1008         __asm__ __volatile__ ("divdeo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1009         GET_CR_XER(div_flags, div_xer);
1010         break;
1011      default:
1012         fprintf(stderr, "Invalid divde type. Exiting\n");
1013         exit(1);
1014   }
1015}
1016#endif
1017
1018static void test_divweu(void)
1019{
1020   int divweu_type = DIV_BASE;
1021   if (do_OE)
1022      divweu_type |= DIV_OE;
1023   if (do_dot)
1024      divweu_type |= DIV_DOT;
1025
1026   switch (divweu_type) {
1027      case 1:
1028        SET_CR_XER_ZERO;
1029         __asm__ __volatile__ ("divweu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1030         GET_CR_XER(div_flags, div_xer);
1031         break;
1032      case 3:
1033        SET_CR_XER_ZERO;
1034         __asm__ __volatile__ ("divweuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1035         GET_CR_XER(div_flags, div_xer);
1036         break;
1037      case 5:
1038        SET_CR_XER_ZERO;
1039         __asm__ __volatile__ ("divweu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1040         GET_CR_XER(div_flags, div_xer);
1041         break;
1042      case 7:
1043        SET_CR_XER_ZERO;
1044         __asm__ __volatile__ ("divweuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1045         GET_CR_XER(div_flags, div_xer);
1046         break;
1047      default:
1048         fprintf(stderr, "Invalid divweu type. Exiting\n");
1049         exit(1);
1050   }
1051}
1052
1053static void test_fctiduz(void)
1054{
1055   if (do_dot)
1056      __asm__ __volatile__ ("fctiduz. %0, %1" : "=d" (f17) : "d" (f14));
1057   else
1058      __asm__ __volatile__ ("fctiduz %0, %1" : "=d" (f17) : "d" (f14));
1059}
1060
1061static void test_fctidu(void)
1062{
1063   if (do_dot)
1064      __asm__ __volatile__ ("fctidu. %0, %1" : "=d" (f17) : "d" (f14));
1065   else
1066      __asm__ __volatile__ ("fctidu %0, %1" : "=d" (f17) : "d" (f14));
1067}
1068
1069static void test_fctiwuz(void)
1070{
1071   if (do_dot)
1072      __asm__ __volatile__ ("fctiwuz. %0, %1" : "=d" (f17) : "d" (f14));
1073   else
1074      __asm__ __volatile__ ("fctiwuz %0, %1" : "=d" (f17) : "d" (f14));
1075}
1076
1077static void test_fctiwu(void)
1078{
1079   if (do_dot)
1080      __asm__ __volatile__ ("fctiwu. %0, %1" : "=d" (f17) : "d" (f14));
1081   else
1082      __asm__ __volatile__ ("fctiwu %0, %1" : "=d" (f17) : "d" (f14));
1083}
1084
1085typedef struct simple_test {
1086   test_func_t test_func;
1087   char * name;
1088   precision_type_t precision;
1089} simple_test_t;
1090
1091static simple_test_t fct_tests[] = {
1092                                    { &test_fctiduz, "fctiduz", DOUBLE_TEST },
1093                                    { &test_fctidu, "fctidu", DOUBLE_TEST },
1094                                    { &test_fctiwuz, "fctiwuz", SINGLE_TEST },
1095                                    { &test_fctiwu, "fctiwu", SINGLE_TEST },
1096                                   { NULL, NULL }
1097};
1098
1099static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1100{
1101   int a_idx, b_idx, i;
1102   void * inA, * inB;
1103   void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1104
1105   for (i = 0; i < 4; i++) {
1106      a_idx = targs->fra_idx;
1107      b_idx = targs->frb_idx;
1108      inA = (void *)&spec_sp_fargs[a_idx];
1109      inB = (void *)&spec_sp_fargs[b_idx];
1110      // copy single precision FP  into vector element i
1111      memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1112      memcpy(vec_src + (i * 4), inB, 4);
1113      targs++;
1114   }
1115}
1116
1117static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1118{
1119   int a_idx, b_idx, i;
1120   void * inA, * inB;
1121   void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1122
1123   for (i = 0; i < 2; i++) {
1124      a_idx = targs->fra_idx;
1125      b_idx = targs->frb_idx;
1126      inA = (void *)&spec_fargs[a_idx];
1127      inB = (void *)&spec_fargs[b_idx];
1128      // copy double precision FP  into vector element i
1129      memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1130      memcpy(vec_src + (i * 8), inB, 8);
1131      targs++;
1132   }
1133}
1134
1135#define VX_NOT_CMP_OP 0xffffffff
1136static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i)
1137{
1138   int a_idx, b_idx, k;
1139   char * name = malloc(20);
1140   int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1141   int loops = dp ? 2 : 4;
1142   fp_test_args_t * targs = &test_group->targs[i];
1143   unsigned long long * frA_dp, * frB_dp, * dst_dp;
1144   unsigned int * frA_sp, *frB_sp, * dst_sp;
1145   strcpy(name, test_group->name);
1146   printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1147   for (k = 0; k < loops; k++) {
1148      a_idx = targs->fra_idx;
1149      b_idx = targs->frb_idx;
1150      if (k)
1151         printf(" AND ");
1152      if (dp) {
1153         frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1154         frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1155         printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1156      } else {
1157         frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1158         frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1159         printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1160      }
1161      targs++;
1162   }
1163   if (cc != VX_NOT_CMP_OP)
1164      printf(" ? cc=%x", cc);
1165
1166   if (dp) {
1167      dst_dp = (unsigned long long *) &vec_out;
1168      printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1169   } else {
1170      dst_sp = (unsigned int *) &vec_out;
1171      printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1172   }
1173   free(name);
1174}
1175
1176
1177static void print_vx_aORm_fp_result(unsigned long long * XT_arg, unsigned long long * XB_arg,
1178                                    vx_fp_test_t * test_group, int i)
1179{
1180   int a_idx, k;
1181   char * name = malloc(20);
1182   int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1183   int loops = dp ? 2 : 4;
1184   fp_test_args_t * targs = &test_group->targs[i];
1185   unsigned long long frA_dp, * dst_dp;
1186   unsigned int frA_sp, * dst_sp;
1187
1188   strcpy(name, test_group->name);
1189   if (do_aXp)
1190      if (dp)
1191         strcat(name, "adp");
1192      else
1193         strcat(name, "asp");
1194   else
1195      if (dp)
1196         strcat(name, "mdp");
1197      else
1198         strcat(name, "msp");
1199
1200   printf("#%d: %s ", dp? i/2 : i/4, name);
1201   for (k = 0; k < loops; k++) {
1202      a_idx = targs->fra_idx;
1203      if (k)
1204         printf(" AND ");
1205      if (dp) {
1206         frA_dp = *((unsigned long long *)&spec_fargs[a_idx]);
1207         printf("%s(%016llx,%016llx,%016llx)", test_group->op, XT_arg[k], frA_dp, XB_arg[k]);
1208      } else {
1209         unsigned int * xt_sp = (unsigned int *)XT_arg;
1210         unsigned int * xb_sp = (unsigned int *)XB_arg;
1211         frA_sp = *((unsigned int *)&spec_sp_fargs[a_idx]);
1212         printf("%s(%08x,%08x,%08x)", test_group->op, xt_sp[k], frA_sp, xb_sp[k]);
1213      }
1214      targs++;
1215   }
1216
1217   if (dp) {
1218      dst_dp = (unsigned long long *) &vec_out;
1219      printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1220   } else {
1221      dst_sp = (unsigned int *) &vec_out;
1222      printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1223   }
1224   free(name);
1225}
1226
1227/* This function currently only supports double precision input arguments. */
1228static void test_vx_simple_scalar_fp_ops(void)
1229{
1230   test_func_t func;
1231   int k = 0;
1232
1233   build_special_fargs_table();
1234   while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
1235      unsigned long long * frap, * frbp, * dst;
1236      unsigned int * pv;
1237      int idx;
1238      vx_fp_test_t test_group = vx_simple_scalar_fp_tests[k];
1239      Bool convToWord = (test_group.type == VX_CONV_WORD);
1240      if (test_group.precision != DOUBLE_TEST) {
1241         fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
1242         exit(1);
1243      }
1244      pv = (unsigned int *)&vec_out;
1245      // clear vec_out
1246      for (idx = 0; idx < 4; idx++, pv++)
1247         *pv = 0;
1248
1249      /* If num_tests is exactly equal to nb_special_fargs, this implies the
1250       * instruction being tested only requires one floating point argument
1251       * (e.g. xssqrtdp).
1252       */
1253      if (test_group.num_tests == nb_special_fargs && !test_group.targs) {
1254         void * inB, * vec_void_ptr = (void *)&vec_inB;
1255         int i;
1256         if (isLE)
1257            vec_void_ptr += 8;
1258         for (i = 0; i < nb_special_fargs; i++) {
1259            inB = (void *)&spec_fargs[i];
1260            frbp = (unsigned long long *)&spec_fargs[i];
1261            memcpy(vec_void_ptr, inB, 8);
1262            (*func)();
1263            dst = (unsigned long long *) &vec_out;
1264            if (isLE)
1265               dst++;
1266            printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp,
1267                   convToWord ? (*dst & 0x00000000ffffffffULL) : *dst);
1268         }
1269      } else {
1270         void * inA, * inB, * vecA_void_ptr, * vecB_void_ptr;
1271         unsigned int condreg, flags;
1272         int isTdiv = (strstr(test_group.name, "xstdivdp") != NULL) ? 1 : 0;
1273         int i;
1274         if (isLE) {
1275            vecA_void_ptr = (void *)&vec_inA + 8;
1276            vecB_void_ptr = (void *)&vec_inB + 8;
1277         } else {
1278            vecA_void_ptr = (void *)&vec_inA;
1279            vecB_void_ptr = (void *)&vec_inB;
1280         }
1281         for (i = 0; i < test_group.num_tests; i++) {
1282            fp_test_args_t aTest = test_group.targs[i];
1283            inA = (void *)&spec_fargs[aTest.fra_idx];
1284            inB = (void *)&spec_fargs[aTest.frb_idx];
1285            frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1286            frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1287            // Only need to copy one doubleword into each vector's element 0
1288            memcpy(vecA_void_ptr, inA, 8);
1289            memcpy(vecB_void_ptr, inB, 8);
1290            SET_FPSCR_ZERO;
1291            SET_CR_XER_ZERO;
1292            (*func)();
1293            GET_CR(flags);
1294            if (isTdiv) {
1295               condreg = (flags & 0x000000f0) >> 4;
1296               printf("#%d: %s %016llx,%016llx => cr %x\n", i, test_group.name, *frap, *frbp, condreg);
1297            } else {
1298               dst = (unsigned long long *) &vec_out;
1299               if (isLE)
1300                  dst++;
1301               printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
1302                      *frap, *frbp, *dst);
1303            }
1304         }
1305      }
1306      printf( "\n" );
1307      k++;
1308   }
1309}
1310
1311static void test_vx_aORm_fp_ops(void)
1312{
1313   /* These ops need a third src argument, which is stored in element 0 of
1314    * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>m{d|s}p cases, VSX[XT] holds
1315    * src3 and VSX[XB] holds src2; for the xs<ZZZ>a{d|s}p cases, VSX[XT] holds
1316    * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
1317    * data (input args, result) contain only two inputs, so I arbitrarily
1318    * choose some spec_fargs elements for the third source argument.
1319    * Note that that by using the same input data for a given pair of
1320    * a{d|s}p/m{d|s}p-type instructions (by swapping the src2 and src3
1321    * arguments), the expected result should be the same.
1322    */
1323
1324   test_func_t func;
1325   int k;
1326   char * test_name = (char *)malloc(20);
1327   k = 0;
1328   do_dot = False;
1329
1330   build_special_fargs_table();
1331   while ((func = vx_aORm_fp_tests[k].test_func)) {
1332      int i, stride;
1333      Bool repeat = False;
1334      Bool scalar = False;
1335      unsigned long long * frap, * frbp, * dst;
1336      vx_fp_test_t test_group = vx_aORm_fp_tests[k];
1337      vx_fp_test_type test_type = test_group.type;
1338      do_dp = test_group.precision == DOUBLE_TEST ? True : False;
1339      frap = frbp = NULL;
1340
1341      if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
1342            scalar = True;
1343            strcpy(test_name, test_group.name);
1344            if (!repeat) {
1345               repeat = 1;
1346               stride = 1;
1347               // Only support double precision scalar ops in this function
1348               if (do_dp) {
1349                  strcat(test_name, "adp");
1350               } else {
1351                  fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
1352                  exit(1);
1353               }
1354               do_aXp = True;
1355            }
1356      } else if (test_type < VX_BASIC_CMP) {
1357         // Then it must be a VX_VECTOR_xxx type
1358            stride = do_dp ? 2 : 4;
1359            if (!repeat) {
1360               // No need to work up the testcase name here, since that will be done in
1361               // the print_vx_aORm_fp_result() function we'll call for vector-type ops.
1362               repeat = 1;
1363               do_aXp = True;
1364            }
1365      } else {
1366            printf("ERROR:  Invalid VX FP test type %d\n", test_type);
1367            exit(1);
1368      }
1369
1370again:
1371      for (i = 0; i < test_group.num_tests; i+=stride) {
1372         void  * inA, * inB;
1373         int m, fp_idx[4];
1374         unsigned long long vsr_XT[2];
1375         unsigned long long vsr_XB[2];
1376         fp_test_args_t aTest = test_group.targs[i];
1377         for (m = 0; m < stride; m++)
1378            fp_idx[m] = i % (nb_special_fargs - stride) + m;
1379
1380         /* When repeat == True, we're on the first time through of one of the VX_FP_SMx
1381          * test types, meaning we're testing a xs<ZZZ>adp case, thus we have to swap
1382          * inputs as described above:
1383          *    src2 <= VSX[XT]
1384          *    src3 <= VSX[XB]
1385          */
1386         if (scalar) {
1387#ifdef VGP_ppc64le_linux
1388#define VECTOR_ADDR(_v) ((void *)&_v) + 8
1389#else
1390#define VECTOR_ADDR(_v) ((void *)&_v)
1391#endif
1392            // For scalar op, only need to copy one doubleword into each vector's element 0
1393            inA = (void *)&spec_fargs[aTest.fra_idx];
1394            inB = (void *)&spec_fargs[aTest.frb_idx];
1395            frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1396            memcpy(VECTOR_ADDR(vec_inA), inA, 8);
1397            if (repeat) {
1398               memcpy(VECTOR_ADDR(vec_out), inB, 8);  // src2
1399               memcpy(VECTOR_ADDR(vec_inB), &spec_fargs[fp_idx[0]], 8);  //src3
1400               frbp = (unsigned long long *)&spec_fargs[fp_idx[0]];
1401            } else {
1402               frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1403               memcpy(VECTOR_ADDR(vec_inB), inB, 8);  // src2
1404               memcpy(VECTOR_ADDR(vec_out), &spec_fargs[fp_idx[0]], 8);  //src3
1405            }
1406            memcpy(vsr_XT, VECTOR_ADDR(vec_out), 8);
1407         } else {
1408            int j, loops = do_dp ? 2 : 4;
1409            size_t len = do_dp ? 8 : 4;
1410            void * vec_src = repeat ? (void *)&vec_inB : (void *)&vec_out;
1411            for (j = 0; j < loops; j++) {
1412               if (do_dp)
1413                  memcpy(vec_src + (j * len), &spec_fargs[fp_idx[j]], len);
1414               else
1415                  memcpy(vec_src + (j * len), &spec_sp_fargs[fp_idx[j]], len);
1416            }
1417            if (do_dp)
1418               setup_dp_fp_args(&test_group.targs[i], repeat);
1419            else
1420               setup_sp_fp_args(&test_group.targs[i], repeat);
1421
1422            memcpy(vsr_XT, &vec_out, 16);
1423            memcpy(vsr_XB, &vec_inB, 16);
1424         }
1425
1426         (*func)();
1427         dst = (unsigned long long *) &vec_out;
1428         if (isLE)
1429            dst++;
1430         if (test_type < VX_VECTOR_FP_MULT_AND_OP2)
1431            printf( "#%d: %s %s(%016llx,%016llx,%016llx) = %016llx\n", i,
1432                    test_name, test_group.op, vsr_XT[0], *frap, *frbp, *dst );
1433         else
1434            print_vx_aORm_fp_result(vsr_XT, vsr_XB, &test_group, i);
1435      }
1436      printf( "\n" );
1437
1438      if (repeat) {
1439         repeat = 0;
1440         if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
1441               strcpy(test_name, test_group.name);
1442               strcat(test_name, "mdp");
1443         }
1444         do_aXp = False;
1445         goto again;
1446      }
1447      k++;
1448   }
1449   printf( "\n" );
1450   free(test_name);
1451}
1452
1453static void test_vx_vector_one_fp_arg(void)
1454{
1455   test_func_t func;
1456   int k;
1457   k = 0;
1458   build_special_fargs_table();
1459
1460   while ((func = vx_vector_one_fp_arg_tests[k].test_func)) {
1461      int idx, i;
1462      vx_fp_test_t test_group = vx_vector_one_fp_arg_tests[k];
1463      Bool convToWord = (test_group.type == VX_CONV_WORD);
1464      Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1465      Bool xvrespTest = (strstr(test_group.name , "xvresp") != NULL) ? True: False;
1466      int stride = dp ? 2 : 4;
1467
1468      for (i = 0; i < test_group.num_tests; i+=stride) {
1469         unsigned int * pv;
1470         void * inB;
1471
1472         pv = (unsigned int *)&vec_out;
1473         // clear vec_out
1474         for (idx = 0; idx < 4; idx++, pv++)
1475            *pv = 0;
1476
1477         if (dp) {
1478            int j;
1479            unsigned long long * frB_dp, *dst_dp;
1480            for (j = 0; j < 2; j++) {
1481               inB = (void *)&spec_fargs[i + j];
1482               // copy double precision FP into vector element i
1483               memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1484            }
1485            // execute test insn
1486            (*func)();
1487            dst_dp = (unsigned long long *) &vec_out;
1488            printf("#%d: %s ", i/2, test_group.name);
1489            for (j = 0; j < 2; j++) {
1490               if (j)
1491                  printf("; ");
1492               frB_dp = (unsigned long long *)&spec_fargs[i + j];
1493               printf("%s(%016llx)", test_group.op, *frB_dp);
1494               printf(" = %016llx", convToWord ? (dst_dp[j] & 0x00000000ffffffffULL) : dst_dp[j]);
1495            }
1496            printf("\n");
1497         } else {
1498            int j;
1499            unsigned int * frB_sp, * dst_sp;
1500
1501            for (j = 0; j < 4; j++) {
1502               inB = (void *)&spec_sp_fargs[i + j];
1503               // copy single precision FP into vector element i
1504               memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1505            }
1506            // execute test insn
1507            (*func)();
1508            dst_sp = (unsigned int *) &vec_out;
1509            // print result
1510            printf("#%d: %s ", i/4, test_group.name);
1511            for (j = 0; j < 4; j++) {
1512               if (j)
1513                  printf("; ");
1514               frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1515               printf("%s(%08x)", test_group.op, *frB_sp);
1516               if (xvrespTest) {
1517                  float calc_diff = fabs(spec_sp_fargs[i + j]/256);
1518                  float sp_res;
1519                  memcpy(&sp_res, &dst_sp[j], 4);
1520                  float div_result = 1/spec_sp_fargs[i + j];
1521                  float real_diff = fabs(sp_res - div_result);
1522                  printf( " ==> %s",
1523                          ( ( sp_res == div_result )
1524                                   || ( isnan(sp_res) && isnan(div_result) )
1525                                   || ( real_diff <= calc_diff ) ) ? "PASS"
1526                                                                     : "FAIL");
1527               } else {
1528                  printf(" = %08x", dst_sp[j]);
1529               }
1530            }
1531            printf("\n");
1532         }
1533      }
1534      k++;
1535      printf( "\n" );
1536   }
1537
1538}
1539
1540/* This function assumes the instruction being tested requires two args. */
1541static void test_vx_vector_fp_ops(void)
1542{
1543   test_func_t func;
1544   int k;
1545   k = 0;
1546   build_special_fargs_table();
1547
1548   while ((func = vx_vector_fp_tests[k].test_func)) {
1549      int idx, i, repeat = 1;
1550      vx_fp_test_t test_group = vx_vector_fp_tests[k];
1551      int stride = test_group.precision == DOUBLE_TEST ? 2 : 4;
1552      do_dot = False;
1553
1554again:
1555      for (i = 0; i < test_group.num_tests; i+=stride) {
1556         unsigned int * pv, condreg;
1557         unsigned int flags;
1558
1559         pv = (unsigned int *)&vec_out;
1560         if (test_group.precision == DOUBLE_TEST)
1561            setup_dp_fp_args(&test_group.targs[i], False);
1562         else
1563            setup_sp_fp_args(&test_group.targs[i], False);
1564
1565         // clear vec_out
1566         for (idx = 0; idx < 4; idx++, pv++)
1567            *pv = 0;
1568
1569         // execute test insn
1570         SET_FPSCR_ZERO;
1571         SET_CR_XER_ZERO;
1572         (*func)();
1573         GET_CR(flags);
1574         if (test_group.type == VX_BASIC_CMP) {
1575            condreg = (flags & 0x000000f0) >> 4;
1576         } else {
1577            condreg = VX_NOT_CMP_OP;
1578         }
1579         print_vector_fp_result(condreg, &test_group, i);
1580      }
1581      printf("\n");
1582      if (repeat && test_group.type == VX_BASIC_CMP) {
1583         repeat = 0;
1584         do_dot = True;
1585         goto again;
1586      }
1587      k++;
1588      printf( "\n" );
1589   }
1590}
1591
1592
1593// The div doubleword test data
1594signed long long div_dw_tdata[13][2] = {
1595                                       { 4, -4 },
1596                                       { 4, -3 },
1597                                       { 4, 4 },
1598                                       { 4, -5 },
1599                                       { 3, 8 },
1600                                       { 0x8000000000000000ULL, 0xa },
1601                                       { 0x50c, -1 },
1602                                       { 0x50c, -4096 },
1603                                       { 0x1234fedc, 0x8000a873 },
1604                                       { 0xabcd87651234fedcULL, 0xa123b893 },
1605                                       { 0x123456789abdcULL, 0 },
1606                                       { 0, 2 },
1607                                       { 0x77, 0xa3499 }
1608};
1609#define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1610
1611// The div word test data
1612unsigned int div_w_tdata[6][2] = {
1613                              { 0, 2 },
1614                              { 2, 0 },
1615                              { 0x7abc1234, 0xf0000000 },
1616                              { 0xfabc1234, 5 },
1617                              { 77, 66 },
1618                              { 5, 0xfabc1234 },
1619};
1620#define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1621
1622typedef struct div_ext_test
1623{
1624   test_func_t test_func;
1625   const char *name;
1626   int num_tests;
1627   div_type_t div_type;
1628   precision_type_t precision;
1629} div_ext_test_t;
1630
1631static div_ext_test_t div_tests[] = {
1632#ifdef __powerpc64__
1633                                   { &test_divde, "divde", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1634                                   { &test_divde, "divdeo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1635#endif
1636                                   { &test_divweu, "divweu", w_tdata_len, DIV_BASE, SINGLE_TEST },
1637                                   { &test_divweu, "divweuo", w_tdata_len, DIV_OE, SINGLE_TEST },
1638                                   { NULL, NULL, 0, 0, 0 }
1639};
1640
1641static void test_div_extensions(void)
1642{
1643   test_func_t func;
1644   int k;
1645   k = 0;
1646
1647   while ((func = div_tests[k].test_func)) {
1648      int i, repeat = 1;
1649      div_ext_test_t test_group = div_tests[k];
1650      do_dot = False;
1651
1652again:
1653      for (i = 0; i < test_group.num_tests; i++) {
1654         unsigned int condreg;
1655
1656         if (test_group.div_type == DIV_OE)
1657            do_OE = True;
1658         else
1659            do_OE = False;
1660
1661         if (test_group.precision == DOUBLE_TEST) {
1662            r14 = div_dw_tdata[i][0];
1663            r15 = div_dw_tdata[i][1];
1664         } else {
1665            r14 = div_w_tdata[i][0];
1666            r15 = div_w_tdata[i][1];
1667         }
1668         // execute test insn
1669         (*func)();
1670         condreg = (div_flags & 0xf0000000) >> 28;
1671         printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1672         if (test_group.precision == DOUBLE_TEST) {
1673            printf("0x%016llx / 0x%016llx = 0x%016llx;",
1674                   div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1675         } else {
1676            printf("0x%08x / 0x%08x = 0x%08x;",
1677                   div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1678         }
1679         printf(" CR=%x; XER=%x\n", condreg, div_xer);
1680      }
1681      printf("\n");
1682      if (repeat) {
1683         repeat = 0;
1684         do_dot = True;
1685         goto again;
1686      }
1687      k++;
1688      printf( "\n" );
1689   }
1690
1691}
1692
1693static void test_fct_ops(void)
1694{
1695   test_func_t func;
1696   int k;
1697   k = 0;
1698
1699   while ((func = fct_tests[k].test_func)) {
1700      int i, repeat = 1;
1701      simple_test_t test_group = fct_tests[k];
1702      do_dot = False;
1703
1704again:
1705      for (i = 0; i < nb_special_fargs; i++) {
1706         double result;
1707#define SINGLE_MASK 0x00000000FFFFFFFFULL
1708
1709         f14 = spec_fargs[i];
1710         // execute test insn
1711         SET_FPSCR_ZERO;
1712         (*func)();
1713         result = f17;
1714         printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1715         printf("0x%016llx (%e) ==> 0x%016llx\n",
1716                *((unsigned long long *)(&spec_fargs[i])), spec_fargs[i],
1717                test_group.precision == SINGLE_TEST ? (SINGLE_MASK &
1718                         *((unsigned long long *)(&result))) :
1719                         *((unsigned long long *)(&result)));
1720      }
1721      printf("\n");
1722      if (repeat) {
1723         repeat = 0;
1724         do_dot = True;
1725         goto again;
1726      }
1727      k++;
1728      printf( "\n" );
1729   }
1730}
1731
1732#ifdef __powerpc64__
1733void test_stdbrx(void)
1734{
1735   unsigned long long store, val = 0xdeadbacf12345678ULL;
1736   printf("stdbrx: 0x%llx ==> ", val);
1737   r17 = (HWord_t)val;
1738   r14 = (HWord_t)&store;
1739   __asm__ __volatile__ ("stdbrx %0, 0, %1" : : "r"(r17), "r"(r14));
1740   printf("0x%llx\n", store);
1741   printf( "\n" );
1742}
1743#endif
1744
1745static test_table_t
1746         all_tests[] =
1747{
1748                    { &test_vx_vector_one_fp_arg,
1749                      "Test VSX vector single arg instructions", OTHER_INST },
1750                    { &test_vx_vector_fp_ops,
1751                      "Test VSX floating point compare and basic arithmetic instructions", OTHER_INST },
1752#ifdef __powerpc64__
1753                     { &test_bpermd,
1754                       "Test bit permute double", OTHER_INST },
1755#endif
1756                     { &test_xxsel,
1757                         "Test xxsel instruction", OTHER_INST },
1758                     { &test_xxspltw,
1759                         "Test xxspltw instruction", OTHER_INST },
1760                     { &test_div_extensions,
1761                       "Test div extensions", SCALAR_DIV_INST },
1762                     { &test_fct_ops,
1763                       "Test floating point convert [word | doubleword] unsigned, with round toward zero", OTHER_INST },
1764#ifdef __powerpc64__
1765                     { &test_stdbrx,
1766                      "Test stdbrx instruction", OTHER_INST },
1767#endif
1768                     { &test_vx_aORm_fp_ops,
1769		       "Test floating point arithmetic instructions -- with a{d|s}p or m{d|s}p", OTHER_INST },
1770                     { &test_vx_simple_scalar_fp_ops,
1771                      "Test scalar floating point arithmetic instructions", OTHER_INST },
1772                     { NULL, NULL }
1773};
1774#endif // HAS_VSX
1775
1776static void usage (void)
1777{
1778  fprintf(stderr,
1779	  "Usage: test_isa_3_0 [OPTIONS]\n"
1780	  "\t-d: test scalar division instructions (default)\n"
1781	  "\t-o: test non scalar division instructions (default)\n"
1782	  "\t-A: test all instructions (default)\n"
1783	  "\t-h: display this help and exit\n"
1784	  );
1785}
1786
1787int main(int argc, char *argv[])
1788{
1789#ifdef HAS_VSX
1790
1791   test_table_t aTest;
1792   test_func_t func;
1793   int i = 0;
1794   int c;
1795   unsigned int test_run_mask = 0;
1796
1797   /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1798    * bits are set on various arithimetic instructions.  This means this
1799    * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1800    * hardware.  The tests have been grouped so that the tests that generate
1801    * different results are in one test and the rest are in a different test.
1802    * this minimizes the size of the result expect files for the two cases.
1803    */
1804
1805   while ((c = getopt(argc, argv, "doAh")) != -1) {
1806      switch (c) {
1807      case 'd':
1808	test_run_mask |= SCALAR_DIV_INST;
1809         break;
1810      case 'o':
1811	test_run_mask |= OTHER_INST;
1812         break;
1813      case 'A':
1814	test_run_mask = 0xFFFF;
1815         break;
1816      case 'h':
1817         usage();
1818         return 0;
1819
1820      default:
1821         usage();
1822         fprintf(stderr, "Unknown argument: '%c'\n", c);
1823         return 1;
1824      }
1825   }
1826
1827   while ((func = all_tests[i].test_category)) {
1828      aTest = all_tests[i];
1829      if(test_run_mask & aTest.test_group) {
1830	/* Test group  specified on command line */
1831
1832	printf( "%s\n", aTest.name );
1833	(*func)();
1834      }
1835      i++;
1836   }
1837   if (spec_fargs)
1838     free(spec_fargs);
1839   if (spec_sp_fargs)
1840     free(spec_sp_fargs);
1841
1842#endif // HAS _VSX
1843
1844   return 0;
1845}
1846