1d99bad8ced046157531c26e5ecc61a98a1c2fdc1sewardj/*  Copyright (C) 2011 IBM
2
3 Author: Maynard Johnson <maynardj@us.ibm.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307, USA.
19
20 The GNU General Public License is contained in the file COPYING.
21 */
22
23#ifdef HAS_VSX
24
25#include <stdio.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29#include <malloc.h>
30#include <altivec.h>
31
32#ifndef __powerpc64__
33typedef uint32_t HWord_t;
34#else
35typedef uint64_t HWord_t;
36#endif /* __powerpc64__ */
37
38#ifdef VGP_ppc64le_linux
39#define isLE 1
40#else
41#define isLE 0
42#endif
43
44register HWord_t r14 __asm__ ("r14");
45register HWord_t r15 __asm__ ("r15");
46register HWord_t r16 __asm__ ("r16");
47register HWord_t r17 __asm__ ("r17");
48register double f14 __asm__ ("fr14");
49register double f15 __asm__ ("fr15");
50register double f16 __asm__ ("fr16");
51register double f17 __asm__ ("fr17");
52
53static volatile unsigned int cond_reg;
54
55#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
56
57#define SET_CR(_arg) \
58      __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
59
60#define SET_XER(_arg) \
61      __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
62
63#define GET_CR(_lval) \
64      __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
65
66#define GET_XER(_lval) \
67      __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
68
69#define GET_CR_XER(_lval_cr,_lval_xer) \
70   do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
71
72#define SET_CR_ZERO \
73      SET_CR(0)
74
75#define SET_XER_ZERO \
76      SET_XER(0)
77
78#define SET_CR_XER_ZERO \
79   do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
80
81#define SET_FPSCR_ZERO \
82   do { double _d = 0.0; \
83        __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
84   } while (0)
85
86
87typedef void (*test_func_t)(void);
88typedef struct ldst_test ldst_test_t;
89typedef struct vsx_logic_test logic_test_t;
90typedef struct xs_conv_test xs_conv_test_t;
91typedef struct p7_fp_test fp_test_t;
92typedef struct vx_fp_test vx_fp_test_t;
93typedef struct vsx_move_test move_test_t;
94typedef struct vsx_permute_test permute_test_t;
95typedef struct test_table test_table_t;
96
97static double *fargs = NULL;
98static int nb_fargs;
99
100/* These functions below that construct a table of floating point
101 * values were lifted from none/tests/ppc32/jm-insns.c.
102 */
103
104#if defined (DEBUG_ARGS_BUILD)
105#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
106#else
107#define AB_DPRINTF(fmt, args...) do { } while (0)
108#endif
109
110static inline void register_farg (void *farg,
111                                  int s, uint16_t _exp, uint64_t mant)
112{
113   uint64_t tmp;
114
115   tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
116   *(uint64_t *)farg = tmp;
117   AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
118              s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
119}
120
121static void build_fargs_table(void)
122/*
123 * Double precision:
124 * Sign goes from zero to one               (1 bit)
125 * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
126 * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
127 * + special values:
128 * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
129 * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
130 * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
131 * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
132 * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
133 * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
134 * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
135 * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
136 * (8 values)
137 *
138 * Single precision
139 * Sign:     1 bit
140 * Exponent: 8 bits
141 * Mantissa: 23 bits
142 * +0.0      : 0 0x00 0x000000 => 0x00000000
143 * -0.0      : 1 0x00 0x000000 => 0x80000000
144 * +infinity : 0 0xFF 0x000000 => 0x7F800000
145 * -infinity : 1 0xFF 0x000000 => 0xFF800000
146 * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
147 * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
148 * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
149 * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
150*/
151{
152   uint64_t mant;
153   uint16_t _exp, e1;
154   int s;
155   int i=0;
156
157   if (nb_fargs)
158      return;
159
160   fargs = malloc( 16 * sizeof(double) );
161   for (s = 0; s < 2; s++) {
162      for (e1 = 0x001;; e1 = ((e1 + 1) << 13) + 7) {
163         if (e1 >= 0x400)
164            e1 = 0x3fe;
165         _exp = e1;
166         for (mant = 0x0000000000001ULL; mant < (1ULL << 52);
167         /* Add 'random' bits */
168         mant = ((mant + 0x4A6) << 29) + 0x359) {
169            register_farg( &fargs[i++], s, _exp, mant );
170         }
171         if (e1 == 0x3fe)
172            break;
173      }
174   }
175   // add a few smaller values to fargs . . .
176   s = 0;
177   _exp = 0x002;
178   mant = 0x0000000000b01ULL;
179   register_farg(&fargs[i++], s, _exp, mant);
180
181   _exp = 0x000;
182   mant = 0x00000203f0b3dULL;
183   register_farg(&fargs[i++], s, _exp, mant);
184
185   mant = 0x00000005a203dULL;
186   register_farg(&fargs[i++], s, _exp, mant);
187
188   s = 1;
189   _exp = 0x002;
190   mant = 0x0000000000b01ULL;
191   register_farg(&fargs[i++], s, _exp, mant);
192
193   _exp = 0x000;
194   mant = 0x00000203f0b3dULL;
195   register_farg(&fargs[i++], s, _exp, mant);
196
197   nb_fargs = i;
198}
199
200
201typedef struct fp_test_args {
202   int fra_idx;
203   int frb_idx;
204   int cr_flags;
205} fp_test_args_t;
206
207
208fp_test_args_t ftdiv_tests[] = {
209                              {0, 1, 0x8},
210                              {9, 1, 0xa},
211                              {1, 12, 0xa},
212                              {0, 2, 0xa},
213                              {1, 3, 0xa},
214                              {3, 0, 0xa},
215                              {0, 3, 0xa},
216                              {4, 0, 0xa},
217                              {7, 1, 0xe},
218                              {8, 1, 0xe},
219                              {1, 7, 0xe},
220                              {0, 13, 0xe},
221                              {5, 5, 0xe},
222                              {5, 6, 0xe},
223};
224
225fp_test_args_t xscmpX_tests[] = {
226                                   {8, 8, 0x2},
227                                   {8, 14, 0x8},
228                                   {8, 6, 0x8},
229                                   {8, 5, 0x8},
230                                   {8, 4, 0x8},
231                                   {8, 7, 0x8},
232                                   {8, 9, 0x1},
233                                   {8, 11, 0x1},
234                                   {14, 8, 0x4},
235                                   {14, 14, 0x2},
236                                   {14, 6, 0x8},
237                                   {14, 5, 0x8},
238                                   {14, 4, 0x8},
239                                   {14, 7, 0x8},
240                                   {14, 9, 0x1},
241                                   {14, 11, 0x1},
242                                   {6, 8, 0x4},
243                                   {6, 14, 0x4},
244                                   {6, 6, 0x2},
245                                   {6, 5, 0x2},
246                                   {6, 4, 0x8},
247                                   {6, 7, 0x8},
248                                   {6, 9, 0x1},
249                                   {6, 11, 0x1},
250                                   {5, 8, 0x4},
251                                   {5, 14, 0x4},
252                                   {5, 6, 0x2},
253                                   {5, 5, 0x2},
254                                   {5, 4, 0x8},
255                                   {5, 7, 0x8},
256                                   {5, 9, 0x1},
257                                   {5, 11, 0x1},
258                                   {4, 8, 0x4},
259                                   {4, 14, 0x4},
260                                   {4, 6, 0x4},
261                                   {4, 5, 0x4},
262                                   {4, 1, 0x8},
263                                   {4, 7, 0x8},
264                                   {4, 9, 0x1},
265                                   {4, 11, 0x1},
266                                   {7, 8, 0x4},
267                                   {7, 14, 0x4},
268                                   {7, 6, 0x4},
269                                   {7, 5, 0x4},
270                                   {7, 4, 0x4},
271                                   {7, 7, 0x2},
272                                   {7, 9, 0x1},
273                                   {7, 11, 0x1},
274                                   {10, 8, 0x1},
275                                   {10, 14, 0x1},
276                                   {10, 6, 0x1},
277                                   {10, 5, 0x1},
278                                   {10, 4, 0x1},
279                                   {10, 7, 0x1},
280                                   {10, 9, 0x1},
281                                   {10, 11, 0x1},
282                                   {12, 8, 0x1},
283                                   {12, 14, 0x1},
284                                   {12, 6, 0x1},
285                                   {12, 5, 0x1},
286                                   {12, 4, 0x1},
287                                   {12, 7, 0x1},
288                                   {12, 9, 0x1},
289                                   {12, 11, 0x1},
290};
291
292fp_test_args_t xsadddp_tests[] = {
293                                   {8, 8, 0x0},
294                                   {8, 14, 0x0},
295                                   {8, 6, 0x0},
296                                   {8, 5, 0x0},
297                                   {8, 4, 0x0},
298                                   {8, 7, 0x0},
299                                   {8, 9, 0x0},
300                                   {8, 11, 0x0},
301                                   {14, 8, 0x0},
302                                   {14, 14, 0x0},
303                                   {14, 6, 0x0},
304                                   {14, 5, 0x0},
305                                   {14, 4, 0x0},
306                                   {14, 7, 0x0},
307                                   {14, 9, 0x0},
308                                   {14, 11, 0x0},
309                                   {6, 8, 0x0},
310                                   {6, 14, 0x0},
311                                   {6, 6, 0x0},
312                                   {6, 5, 0x0},
313                                   {6, 4, 0x0},
314                                   {6, 7, 0x0},
315                                   {6, 9, 0x0},
316                                   {6, 11, 0x0},
317                                   {5, 8, 0x0},
318                                   {5, 14, 0x0},
319                                   {5, 6, 0x0},
320                                   {5, 5, 0x0},
321                                   {5, 4, 0x0},
322                                   {5, 7, 0x0},
323                                   {5, 9, 0x0},
324                                   {5, 11, 0x0},
325                                   {4, 8, 0x0},
326                                   {4, 14, 0x0},
327                                   {4, 6, 0x0},
328                                   {4, 5, 0x0},
329                                   {4, 1, 0x0},
330                                   {4, 7, 0x0},
331                                   {4, 9, 0x0},
332                                   {4, 11, 0x0},
333                                   {7, 8, 0x0},
334                                   {7, 14, 0x0},
335                                   {7, 6, 0x0},
336                                   {7, 5, 0x0},
337                                   {7, 4, 0x0},
338                                   {7, 7, 0x0},
339                                   {7, 9, 0x0},
340                                   {7, 11, 0x0},
341                                   {10, 8, 0x0},
342                                   {10, 14, 0x0},
343                                   {10, 6, 0x0},
344                                   {10, 5, 0x0},
345                                   {10, 4, 0x0},
346                                   {10, 7, 0x0},
347                                   {10, 9, 0x0},
348                                   {10, 11, 0x0},
349                                   {12, 8, 0x0},
350                                   {12, 14, 0x0},
351                                   {12, 6, 0x0},
352                                   {12, 5, 0x0},
353                                   {12, 4, 0x0},
354                                   {12, 7, 0x0},
355                                   {12, 9, 0x0},
356                                   {12, 11, 0x0},
357};
358
359fp_test_args_t xsdivdp_tests[] = {
360                                   {8, 8, 0x0},
361                                   {8, 14, 0x0},
362                                   {8, 6, 0x0},
363                                   {8, 5, 0x0},
364                                   {8, 4, 0x0},
365                                   {8, 7, 0x0},
366                                   {8, 9, 0x0},
367                                   {8, 11, 0x0},
368                                   {14, 8, 0x0},
369                                   {14, 14, 0x0},
370                                   {14, 6, 0x0},
371                                   {14, 5, 0x0},
372                                   {14, 4, 0x0},
373                                   {14, 7, 0x0},
374                                   {14, 9, 0x0},
375                                   {14, 11, 0x0},
376                                   {6, 8, 0x0},
377                                   {6, 14, 0x0},
378                                   {6, 6, 0x0},
379                                   {6, 5, 0x0},
380                                   {6, 4, 0x0},
381                                   {6, 7, 0x0},
382                                   {6, 9, 0x0},
383                                   {6, 11, 0x0},
384                                   {5, 8, 0x0},
385                                   {5, 14, 0x0},
386                                   {5, 6, 0x0},
387                                   {5, 5, 0x0},
388                                   {5, 4, 0x0},
389                                   {5, 7, 0x0},
390                                   {5, 9, 0x0},
391                                   {5, 11, 0x0},
392                                   {4, 8, 0x0},
393                                   {4, 14, 0x0},
394                                   {4, 6, 0x0},
395                                   {4, 5, 0x0},
396                                   {4, 1, 0x0},
397                                   {4, 7, 0x0},
398                                   {4, 9, 0x0},
399                                   {4, 11, 0x0},
400                                   {7, 8, 0x0},
401                                   {7, 14, 0x0},
402                                   {7, 6, 0x0},
403                                   {7, 5, 0x0},
404                                   {7, 4, 0x0},
405                                   {7, 7, 0x0},
406                                   {7, 9, 0x0},
407                                   {7, 11, 0x0},
408                                   {10, 8, 0x0},
409                                   {10, 14, 0x0},
410                                   {10, 6, 0x0},
411                                   {10, 5, 0x0},
412                                   {10, 4, 0x0},
413                                   {10, 7, 0x0},
414                                   {10, 9, 0x0},
415                                   {10, 11, 0x0},
416                                   {12, 8, 0x0},
417                                   {12, 14, 0x0},
418                                   {12, 6, 0x0},
419                                   {12, 5, 0x0},
420                                   {12, 4, 0x0},
421                                   {12, 7, 0x0},
422                                   {12, 9, 0x0},
423                                   {12, 11, 0x0},
424};
425
426fp_test_args_t xsmaddXdp_tests[] = {
427                                   {8, 8, 0x0},
428                                   {8, 14, 0x0},
429                                   {8, 6, 0x0},
430                                   {8, 5, 0x0},
431                                   {8, 4, 0x0},
432                                   {8, 7, 0x0},
433                                   {8, 9, 0x0},
434                                   {8, 11, 0x0},
435                                   {14, 8, 0x0},
436                                   {14, 14, 0x0},
437                                   {14, 6, 0x0},
438                                   {14, 5, 0x0},
439                                   {14, 4, 0x0},
440                                   {14, 7, 0x0},
441                                   {14, 9, 0x0},
442                                   {14, 11, 0x0},
443                                   {6, 8, 0x0},
444                                   {6, 14, 0x0},
445                                   {6, 6, 0x0},
446                                   {6, 5, 0x0},
447                                   {6, 4, 0x0},
448                                   {6, 7, 0x0},
449                                   {6, 9, 0x0},
450                                   {6, 11, 0x0},
451                                   {5, 8, 0x0},
452                                   {5, 14, 0x0},
453                                   {5, 6, 0x0},
454                                   {5, 5, 0x0},
455                                   {5, 4, 0x0},
456                                   {5, 7, 0x0},
457                                   {5, 9, 0x0},
458                                   {5, 11, 0x0},
459                                   {4, 8, 0x0},
460                                   {4, 14, 0x0},
461                                   {4, 6, 0x0},
462                                   {4, 5, 0x0},
463                                   {4, 1, 0x0},
464                                   {4, 7, 0x0},
465                                   {4, 9, 0x0},
466                                   {4, 11, 0x0},
467                                   {7, 8, 0x0},
468                                   {7, 14, 0x0},
469                                   {7, 6, 0x0},
470                                   {7, 5, 0x0},
471                                   {7, 4, 0x0},
472                                   {7, 7, 0x0},
473                                   {7, 9, 0x0},
474                                   {7, 11, 0x0},
475                                   {10, 8, 0x0},
476                                   {10, 14, 0x0},
477                                   {10, 6, 0x0},
478                                   {10, 5, 0x0},
479                                   {10, 4, 0x0},
480                                   {10, 7, 0x0},
481                                   {10, 9, 0x0},
482                                   {10, 11, 0x0},
483                                   {12, 8, 0x0},
484                                   {12, 14, 0x0},
485                                   {12, 6, 0x0},
486                                   {12, 5, 0x0},
487                                   {12, 4, 0x0},
488                                   {12, 7, 0x0},
489                                   {12, 9, 0x0},
490                                   {12, 11, 0x0},
491};
492
493fp_test_args_t xsmsubXdp_tests[] = {
494                                   {8, 8, 0x0},
495                                   {8, 14, 0x0},
496                                   {8, 6, 0x0},
497                                   {8, 5, 0x0},
498                                   {8, 4, 0x0},
499                                   {8, 7, 0x0},
500                                   {8, 9, 0x0},
501                                   {8, 11, 0x0},
502                                   {14, 8, 0x0},
503                                   {14, 14, 0x0},
504                                   {14, 6, 0x0},
505                                   {14, 5, 0x0},
506                                   {14, 4, 0x0},
507                                   {14, 7, 0x0},
508                                   {14, 9, 0x0},
509                                   {14, 11, 0x0},
510                                   {6, 8, 0x0},
511                                   {6, 14, 0x0},
512                                   {6, 6, 0x0},
513                                   {6, 5, 0x0},
514                                   {6, 4, 0x0},
515                                   {6, 7, 0x0},
516                                   {6, 9, 0x0},
517                                   {6, 11, 0x0},
518                                   {5, 8, 0x0},
519                                   {5, 14, 0x0},
520                                   {5, 6, 0x0},
521                                   {5, 5, 0x0},
522                                   {5, 4, 0x0},
523                                   {5, 7, 0x0},
524                                   {5, 9, 0x0},
525                                   {5, 11, 0x0},
526                                   {4, 8, 0x0},
527                                   {4, 14, 0x0},
528                                   {4, 6, 0x0},
529                                   {4, 5, 0x0},
530                                   {4, 1, 0x0},
531                                   {4, 7, 0x0},
532                                   {4, 9, 0x0},
533                                   {4, 11, 0x0},
534                                   {7, 8, 0x0},
535                                   {7, 14, 0x0},
536                                   {7, 6, 0x0},
537                                   {7, 5, 0x0},
538                                   {7, 4, 0x0},
539                                   {7, 7, 0x0},
540                                   {7, 9, 0x0},
541                                   {7, 11, 0x0},
542                                   {10, 8, 0x0},
543                                   {10, 14, 0x0},
544                                   {10, 6, 0x0},
545                                   {10, 5, 0x0},
546                                   {10, 4, 0x0},
547                                   {10, 7, 0x0},
548                                   {10, 9, 0x0},
549                                   {10, 11, 0x0},
550                                   {12, 8, 0x0},
551                                   {12, 14, 0x0},
552                                   {12, 6, 0x0},
553                                   {12, 5, 0x0},
554                                   {12, 4, 0x0},
555                                   {12, 7, 0x0},
556                                   {12, 9, 0x0},
557                                   {12, 11, 0x0},
558};
559
560fp_test_args_t xsnmaddXdp_tests[] = {
561                                     {8, 8, 0x0},
562                                     {8, 14, 0x0},
563                                     {8, 6, 0x0},
564                                     {8, 5, 0x0},
565                                     {8, 4, 0x0},
566                                     {8, 7, 0x0},
567                                     {8, 9, 0x0},
568                                     {8, 11, 0x0},
569                                     {14, 8, 0x0},
570                                     {14, 14, 0x0},
571                                     {14, 6, 0x0},
572                                     {14, 5, 0x0},
573                                     {14, 4, 0x0},
574                                     {14, 7, 0x0},
575                                     {14, 9, 0x0},
576                                     {14, 11, 0x0},
577                                     {6, 8, 0x0},
578                                     {6, 14, 0x0},
579                                     {6, 6, 0x0},
580                                     {6, 5, 0x0},
581                                     {6, 4, 0x0},
582                                     {6, 7, 0x0},
583                                     {6, 9, 0x0},
584                                     {6, 11, 0x0},
585                                     {5, 8, 0x0},
586                                     {5, 14, 0x0},
587                                     {5, 6, 0x0},
588                                     {5, 5, 0x0},
589                                     {5, 4, 0x0},
590                                     {5, 7, 0x0},
591                                     {5, 9, 0x0},
592                                     {5, 11, 0x0},
593                                     {4, 8, 0x0},
594                                     {4, 14, 0x0},
595                                     {4, 6, 0x0},
596                                     {4, 5, 0x0},
597                                     {4, 1, 0x0},
598                                     {4, 7, 0x0},
599                                     {4, 9, 0x0},
600                                     {4, 11, 0x0},
601                                     {7, 8, 0x0},
602                                     {7, 14, 0x0},
603                                     {7, 6, 0x0},
604                                     {7, 5, 0x0},
605                                     {7, 4, 0x0},
606                                     {7, 7, 0x0},
607                                     {7, 9, 0x0},
608                                     {7, 11, 0x0},
609                                     {10, 8, 0x0},
610                                     {10, 14, 0x0},
611                                     {10, 6, 0x0},
612                                     {10, 5, 0x0},
613                                     {10, 4, 0x0},
614                                     {10, 7, 0x0},
615                                     {10, 9, 0x0},
616                                     {10, 11, 0x0},
617                                     {12, 8, 0x0},
618                                     {12, 14, 0x0},
619                                     {12, 6, 0x0},
620                                     {12, 5, 0x0},
621                                     {12, 4, 0x0},
622                                     {12, 7, 0x0},
623                                     {12, 9, 0x0},
624                                     {12, 11, 0x0},
625};
626
627fp_test_args_t xsmuldp_tests[] = {
628                                  {8, 8, 0x0},
629                                  {8, 14, 0x0},
630                                  {8, 6, 0x0},
631                                  {8, 5, 0x0},
632                                  {8, 4, 0x0},
633                                  {8, 7, 0x0},
634                                  {8, 9, 0x0},
635                                  {8, 11, 0x0},
636                                  {14, 8, 0x0},
637                                  {14, 14, 0x0},
638                                  {14, 6, 0x0},
639                                  {14, 5, 0x0},
640                                  {14, 4, 0x0},
641                                  {14, 7, 0x0},
642                                  {14, 9, 0x0},
643                                  {14, 11, 0x0},
644                                  {6, 8, 0x0},
645                                  {6, 14, 0x0},
646                                  {6, 6, 0x0},
647                                  {6, 5, 0x0},
648                                  {6, 4, 0x0},
649                                  {6, 7, 0x0},
650                                  {6, 9, 0x0},
651                                  {6, 11, 0x0},
652                                  {5, 8, 0x0},
653                                  {5, 14, 0x0},
654                                  {5, 6, 0x0},
655                                  {5, 5, 0x0},
656                                  {5, 4, 0x0},
657                                  {5, 7, 0x0},
658                                  {5, 9, 0x0},
659                                  {5, 11, 0x0},
660                                  {4, 8, 0x0},
661                                  {4, 14, 0x0},
662                                  {4, 6, 0x0},
663                                  {4, 5, 0x0},
664                                  {4, 1, 0x0},
665                                  {4, 7, 0x0},
666                                  {4, 9, 0x0},
667                                  {4, 11, 0x0},
668                                  {7, 8, 0x0},
669                                  {7, 14, 0x0},
670                                  {7, 6, 0x0},
671                                  {7, 5, 0x0},
672                                  {7, 4, 0x0},
673                                  {7, 7, 0x0},
674                                  {7, 9, 0x0},
675                                  {7, 11, 0x0},
676                                  {10, 8, 0x0},
677                                  {10, 14, 0x0},
678                                  {10, 6, 0x0},
679                                  {10, 5, 0x0},
680                                  {10, 4, 0x0},
681                                  {10, 7, 0x0},
682                                  {10, 9, 0x0},
683                                  {10, 11, 0x0},
684                                  {12, 8, 0x0},
685                                  {12, 14, 0x0},
686                                  {12, 6, 0x0},
687                                  {12, 5, 0x0},
688                                  {12, 4, 0x0},
689                                  {12, 7, 0x0},
690                                  {12, 9, 0x0},
691                                  {12, 11, 0x0},
692};
693
694fp_test_args_t xssubdp_tests[] = {
695                                  {8, 8, 0x0},
696                                  {8, 14, 0x0},
697                                  {8, 6, 0x0},
698                                  {8, 5, 0x0},
699                                  {8, 4, 0x0},
700                                  {8, 7, 0x0},
701                                  {8, 9, 0x0},
702                                  {8, 11, 0x0},
703                                  {14, 8, 0x0},
704                                  {14, 14, 0x0},
705                                  {14, 6, 0x0},
706                                  {14, 5, 0x0},
707                                  {14, 4, 0x0},
708                                  {14, 7, 0x0},
709                                  {14, 9, 0x0},
710                                  {14, 11, 0x0},
711                                  {6, 8, 0x0},
712                                  {6, 14, 0x0},
713                                  {6, 6, 0x0},
714                                  {6, 5, 0x0},
715                                  {6, 4, 0x0},
716                                  {6, 7, 0x0},
717                                  {6, 9, 0x0},
718                                  {6, 11, 0x0},
719                                  {5, 8, 0x0},
720                                  {5, 14, 0x0},
721                                  {5, 6, 0x0},
722                                  {5, 5, 0x0},
723                                  {5, 4, 0x0},
724                                  {5, 7, 0x0},
725                                  {5, 9, 0x0},
726                                  {5, 11, 0x0},
727                                  {4, 8, 0x0},
728                                  {4, 14, 0x0},
729                                  {4, 6, 0x0},
730                                  {4, 5, 0x0},
731                                  {4, 1, 0x0},
732                                  {4, 7, 0x0},
733                                  {4, 9, 0x0},
734                                  {4, 11, 0x0},
735                                  {7, 8, 0x0},
736                                  {7, 14, 0x0},
737                                  {7, 6, 0x0},
738                                  {7, 5, 0x0},
739                                  {7, 4, 0x0},
740                                  {7, 7, 0x0},
741                                  {7, 9, 0x0},
742                                  {7, 11, 0x0},
743                                  {10, 8, 0x0},
744                                  {10, 14, 0x0},
745                                  {10, 6, 0x0},
746                                  {10, 5, 0x0},
747                                  {10, 4, 0x0},
748                                  {10, 7, 0x0},
749                                  {10, 9, 0x0},
750                                  {10, 11, 0x0},
751                                  {12, 8, 0x0},
752                                  {12, 14, 0x0},
753                                  {12, 6, 0x0},
754                                  {12, 5, 0x0},
755                                  {12, 4, 0x0},
756                                  {12, 7, 0x0},
757                                  {12, 9, 0x0},
758                                  {12, 11, 0x0},
759};
760
761
762
763static int nb_special_fargs;
764static double * spec_fargs;
765
766static void build_special_fargs_table(void)
767{
768   /* The special floating point values created below are for
769    * use in the ftdiv tests for setting the fe_flag and fg_flag,
770    * but they can also be used for other tests (e.g., xscmpudp).
771    *
772    * Note that fl_flag is 'always '1' on ppc64 Linux.
773    *
774  Entry  Sign Exp   fraction                  Special value
775   0      0   3fd   0x8000000000000ULL         Positive finite number
776   1      0   404   0xf000000000000ULL         ...
777   2      0   001   0x8000000b77501ULL         ...
778   3      0   7fe   0x800000000051bULL         ...
779   4      0   012   0x3214569900000ULL         ...
780   5      0   000   0x0000000000000ULL         +0.0 (+zero)
781   6      1   000   0x0000000000000ULL         -0.0 (-zero)
782   7      0   7ff   0x0000000000000ULL         +infinity
783   8      1   7ff   0x0000000000000ULL         -infinity
784   9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
785   10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
786   11     0   7ff   0x8000000000000ULL         +QNaN
787   12     1   7ff   0x8000000000000ULL         -QNaN
788   13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
789   14     1   40d   0x0650f5a07b353ULL         Negative finite number
790    */
791
792   uint64_t mant;
793   uint16_t _exp;
794   int s;
795   int i = 0;
796
797   if (spec_fargs)
798      return;
799
800   spec_fargs = malloc( 16 * sizeof(double) );
801
802   // #0
803   s = 0;
804   _exp = 0x3fd;
805   mant = 0x8000000000000ULL;
806   register_farg(&spec_fargs[i++], s, _exp, mant);
807
808   // #1
809   s = 0;
810   _exp = 0x404;
811   mant = 0xf000000000000ULL;
812   register_farg(&spec_fargs[i++], s, _exp, mant);
813
814   /* None of the ftdiv tests succeed.
815    * FRA = value #0; FRB = value #1
816    * ea_ = -2; e_b = 5
817    * fl_flag || fg_flag || fe_flag = 100
818    */
819
820   /*************************************************
821    *     fe_flag tests
822    *
823    *************************************************/
824
825   /* fe_flag <- 1 if FRA is a NaN
826    * FRA = value #9; FRB = value #1
827    * e_a = 1024; e_b = 5
828    * fl_flag || fg_flag || fe_flag = 101
829    */
830
831   /* fe_flag <- 1 if FRB is a NaN
832    * FRA = value #1; FRB = value #12
833    * e_a = 5; e_b = 1024
834    * fl_flag || fg_flag || fe_flag = 101
835    */
836
837   /* fe_flag <- 1 if e_b <= -1022
838    * FRA = value #0; FRB = value #2
839    * e_a = -2; e_b = -1022
840    * fl_flag || fg_flag || fe_flag = 101
841    *
842    */
843   // #2
844   s = 0;
845   _exp = 0x001;
846   mant = 0x8000000b77501ULL;
847   register_farg(&spec_fargs[i++], s, _exp, mant);
848
849   /* fe_flag <- 1 if e_b >= 1021
850    * FRA = value #1; FRB = value #3
851    * e_a = 5; e_b = 1023
852    * fl_flag || fg_flag || fe_flag = 101
853    */
854   // #3
855   s = 0;
856   _exp = 0x7fe;
857   mant = 0x800000000051bULL;
858   register_farg(&spec_fargs[i++], s, _exp, mant);
859
860   /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
861    * Let FRA = value #3 and FRB be value #0.
862    * e_a = 1023; e_b = -2
863    * fl_flag || fg_flag || fe_flag = 101
864    */
865
866   /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
867    * Let FRA = value #0 above and FRB be value #3 above
868    * e_a = -2; e_b = 1023
869    * fl_flag || fg_flag || fe_flag = 101
870    */
871
872   /* fe_flag <- 1 if FRA != 0 && e_a <= -970
873    * Let FRA = value #4 and FRB be value #0
874    * e_a = -1005; e_b = -2
875    * fl_flag || fg_flag || fe_flag = 101
876   */
877   // #4
878   s = 0;
879   _exp = 0x012;
880   mant = 0x3214569900000ULL;
881   register_farg(&spec_fargs[i++], s, _exp, mant);
882
883   /*************************************************
884    *     fg_flag tests
885    *
886    *************************************************/
887   /* fg_flag <- 1 if FRA is an Infinity
888    * NOTE: FRA = Inf also sets fe_flag
889    * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
890    * Test 1:
891    *   Let FRA be value #7 and FRB be value #1
892    *   e_a = 1024; e_b = 5
893    *   fl_flag || fg_flag || fe_flag = 111
894    *
895    * Test 2:
896    *   Let FRA be value #8 and FRB be value #1
897    *   e_a = 1024; e_b = 5
898    *   fl_flag || fg_flag || fe_flag = 111
899    *
900    */
901
902   /* fg_flag <- 1 if FRB is an Infinity
903    * NOTE: FRB = Inf also sets fe_flag
904    * Let FRA be value #1 and FRB be value #7
905    * e_a = 5; e_b = 1024
906    * fl_flag || fg_flag || fe_flag = 111
907    */
908
909   /* fg_flag <- 1 if FRB is denormalized
910    * NOTE: e_b < -1022 ==> fe_flag <- 1
911    * Let FRA be value #0 and FRB be value #13
912    * e_a = -2; e_b = -1023
913    * fl_flag || fg_flag || fe_flag = 111
914    */
915
916   /* fg_flag <- 1 if FRB is +zero
917    * NOTE: FRA = Inf also sets fe_flag
918    * Let FRA = val #5; FRB = val #5
919    * ea_ = -1023; e_b = -1023
920    * fl_flag || fg_flag || fe_flag = 111
921    */
922
923   /* fg_flag <- 1 if FRB is -zero
924    * NOTE: FRA = Inf also sets fe_flag
925    * Let FRA = val #5; FRB = val #6
926    * ea_ = -1023; e_b = -1023
927    * fl_flag || fg_flag || fe_flag = 111
928    */
929
930   /* Special values */
931   /* +0.0      : 0 0x000 0x0000000000000 */
932   // #5
933   s = 0;
934   _exp = 0x000;
935   mant = 0x0000000000000ULL;
936   register_farg(&spec_fargs[i++], s, _exp, mant);
937
938   /* -0.0      : 1 0x000 0x0000000000000 */
939   // #6
940   s = 1;
941   _exp = 0x000;
942   mant = 0x0000000000000ULL;
943   register_farg(&spec_fargs[i++], s, _exp, mant);
944
945   /* +infinity : 0 0x7FF 0x0000000000000  */
946   // #7
947   s = 0;
948   _exp = 0x7FF;
949   mant = 0x0000000000000ULL;
950   register_farg(&spec_fargs[i++], s, _exp, mant);
951
952   /* -infinity : 1 0x7FF 0x0000000000000 */
953   // #8
954   s = 1;
955   _exp = 0x7FF;
956   mant = 0x0000000000000ULL;
957   register_farg(&spec_fargs[i++], s, _exp, mant);
958
959   /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
960   // #9
961   s = 0;
962   _exp = 0x7FF;
963   mant = 0x7FFFFFFFFFFFFULL;
964   register_farg(&spec_fargs[i++], s, _exp, mant);
965
966   /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
967   // #10
968   s = 1;
969   _exp = 0x7FF;
970   mant = 0x7FFFFFFFFFFFFULL;
971   register_farg(&spec_fargs[i++], s, _exp, mant);
972
973   /* +QNaN     : 0 0x7FF 0x8000000000000 */
974   // #11
975   s = 0;
976   _exp = 0x7FF;
977   mant = 0x8000000000000ULL;
978   register_farg(&spec_fargs[i++], s, _exp, mant);
979
980   /* -QNaN     : 1 0x7FF 0x8000000000000 */
981   // #12
982   s = 1;
983   _exp = 0x7FF;
984   mant = 0x8000000000000ULL;
985   register_farg(&spec_fargs[i++], s, _exp, mant);
986
987   /* denormalized value */
988   // #13
989   s = 1;
990   _exp = 0x000;
991   mant = 0x8340000078000ULL;
992   register_farg(&spec_fargs[i++], s, _exp, mant);
993
994   /* Negative finite number */
995   // #14
996   s = 1;
997   _exp = 0x40d;
998   mant = 0x0650f5a07b353ULL;
999   register_farg(&spec_fargs[i++], s, _exp, mant);
1000
1001   nb_special_fargs = i;
1002}
1003
1004
1005struct test_table
1006{
1007   test_func_t test_category;
1008   char * name;
1009};
1010
1011struct p7_fp_test
1012{
1013   test_func_t test_func;
1014   const char *name;
1015   int single;  // 1=single precision result; 0=double precision result
1016};
1017
1018typedef enum {
1019   VX_FP_CMP,
1020   VX_FP_SMA,
1021   VX_FP_SMS,
1022   VX_FP_SNMA,
1023   VX_FP_OTHER
1024} vx_fp_test_type;
1025
1026struct vx_fp_test
1027{
1028   test_func_t test_func;
1029   const char *name;
1030   fp_test_args_t * targs;
1031   int num_tests;
1032   vx_fp_test_type test_type;
1033};
1034
1035struct xs_conv_test
1036{
1037   test_func_t test_func;
1038   const char *name;
1039   int num_tests;
1040};
1041
1042typedef enum {
1043   VSX_LOAD =1,
1044   VSX_LOAD_SPLAT,
1045   VSX_STORE
1046} vsx_ldst_type;
1047
1048struct ldst_test
1049{
1050   test_func_t test_func;
1051   const char *name;
1052   void * base_addr;
1053   uint32_t offset;
1054   int num_words_to_process;
1055   vsx_ldst_type type;
1056};
1057
1058typedef enum {
1059   VSX_AND = 1,
1060   VSX_XOR,
1061   VSX_ANDC,
1062   VSX_OR,
1063   VSX_NOR
1064} vsx_log_op;
1065
1066struct vsx_logic_test
1067{
1068   test_func_t test_func;
1069   const char *name;
1070   vsx_log_op op;
1071};
1072
1073struct vsx_move_test
1074{
1075   test_func_t test_func;
1076   const char *name;
1077};
1078
1079struct vsx_permute_test
1080{
1081   test_func_t test_func;
1082   const char *name;
1083   unsigned int xa[4];
1084   unsigned int xb[4];
1085};
1086
1087static vector unsigned int vec_out, vec_inA, vec_inB;
1088
1089static void test_lxsdx(void)
1090{
1091   __asm__ __volatile__ ("lxsdx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1092}
1093
1094static void
1095test_lxvd2x(void)
1096{
1097   __asm__ __volatile__ ("lxvd2x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1098}
1099
1100static void test_lxvdsx(void)
1101{
1102   __asm__ __volatile__ ("lxvdsx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1103}
1104
1105static void test_lxvw4x(void)
1106{
1107   __asm__ __volatile__ ("lxvw4x          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
1108}
1109
1110static void test_stxsdx(void)
1111{
1112   __asm__ __volatile__ ("stxsdx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
1113}
1114
1115static void test_stxvd2x(void)
1116{
1117   __asm__ __volatile__ ("stxvd2x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
1118}
1119
1120static void test_stxvw4x(void)
1121{
1122   __asm__ __volatile__ ("stxvw4x          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
1123}
1124
1125static void test_xxlxor(void)
1126{
1127   __asm__ __volatile__ ("xxlxor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1128}
1129
1130static void test_xxlor(void)
1131{
1132   __asm__ __volatile__ ("xxlor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1133}
1134
1135static void test_xxlnor(void)
1136{
1137   __asm__ __volatile__ ("xxlnor          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1138}
1139
1140static void test_xxland(void)
1141{
1142   __asm__ __volatile__ ("xxland          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1143}
1144
1145static void test_xxlandc(void)
1146{
1147   __asm__ __volatile__ ("xxlandc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1148}
1149
1150static void test_xxmrghw(void)
1151{
1152   __asm__ __volatile__ ("xxmrghw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1153}
1154
1155static void test_xxmrglw(void)
1156{
1157   __asm__ __volatile__ ("xxmrglw          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1158}
1159
1160static void test_xxpermdi_00(void)
1161{
1162   __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1163}
1164
1165static void test_xxpermdi_01(void)
1166{
1167   __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1168}
1169
1170static void test_xxpermdi_10(void)
1171{
1172   __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1173}
1174
1175static void test_xxpermdi_11(void)
1176{
1177   __asm__ __volatile__ ("xxpermdi         %x0, %x1, %x2, 0x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1178}
1179
1180static void test_xxsldwi_0(void)
1181{
1182   __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 0" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1183}
1184
1185static void test_xxsldwi_1(void)
1186{
1187   __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 1" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1188}
1189
1190static void test_xxsldwi_2(void)
1191{
1192   __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1193}
1194
1195static void test_xxsldwi_3(void)
1196{
1197   __asm__ __volatile__ ("xxsldwi         %x0, %x1, %x2, 3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1198}
1199
1200static void test_fcfids (void)
1201{
1202    __asm__ __volatile__ ("fcfids          %0, %1" : "=f" (f17): "d" (f14));
1203}
1204
1205static void test_fcfidus (void)
1206{
1207    __asm__ __volatile__ ("fcfidus          %0, %1" : "=f" (f17): "d" (f14));
1208}
1209
1210static void test_fcfidu (void)
1211{
1212    __asm__ __volatile__ ("fcfidu          %0, %1" : "=f" (f17): "d" (f14));
1213}
1214
1215static void test_xsabsdp (void)
1216{
1217   __asm__ __volatile__ ("xsabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1218}
1219
1220static void test_xscpsgndp (void)
1221{
1222   __asm__ __volatile__ ("xscpsgndp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1223}
1224
1225static void test_xsnabsdp (void)
1226{
1227   __asm__ __volatile__ ("xsnabsdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1228}
1229
1230static void test_xsnegdp (void)
1231{
1232   __asm__ __volatile__ ("xsnegdp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1233}
1234
1235static int do_cmpudp;
1236static void test_xscmp (void)
1237{
1238   if (do_cmpudp)
1239      __asm__ __volatile__ ("xscmpudp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
1240   else
1241      __asm__ __volatile__ ("xscmpodp          cr1, %x0, %x1" : : "wa" (vec_inA),"wa" (vec_inB));
1242}
1243
1244static void test_xsadddp(void)
1245{
1246   __asm__ __volatile__ ("xsadddp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1247}
1248
1249static void test_xsdivdp(void)
1250{
1251   __asm__ __volatile__ ("xsdivdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1252}
1253
1254static int do_adp;
1255static void test_xsmadd(void)
1256{
1257   if (do_adp)
1258      __asm__ __volatile__ ("xsmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1259   else
1260      __asm__ __volatile__ ("xsmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1261}
1262
1263static void test_xsmsub(void)
1264{
1265   if (do_adp)
1266      __asm__ __volatile__ ("xsmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1267   else
1268      __asm__ __volatile__ ("xsmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1269}
1270
1271static void test_xsnmadd(void)
1272{
1273   if (do_adp)
1274      __asm__ __volatile__ ("xsnmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1275   else
1276      __asm__ __volatile__ ("xsnmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1277}
1278
1279static void test_xsmuldp(void)
1280{
1281   __asm__ __volatile__ ("xsmuldp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1282}
1283
1284static void test_xssubdp(void)
1285{
1286   __asm__ __volatile__ ("xssubdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
1287}
1288
1289static void test_xscvdpsxds (void)
1290{
1291   __asm__ __volatile__ ("xscvdpsxds          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1292}
1293
1294static void test_xscvsxddp (void)
1295{
1296   __asm__ __volatile__ ("xscvsxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1297}
1298
1299static void test_xscvuxddp (void)
1300{
1301   __asm__ __volatile__ ("xscvuxddp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
1302}
1303
1304static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
1305                                                              0, 0, 0, 0 };
1306
1307#define NUM_VSTG_INTS (sizeof vstg/sizeof vstg[0])
1308#define NUM_VSTG_VECS (NUM_VSTG_INTS/4)
1309
1310static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x01234567,
1311                                                                0x89abcdef,
1312                                                                0x00112233,
1313                                                                0x44556677,
1314                                                                0x8899aabb,
1315                                                                0x91929394,
1316                                                                0xa1a2a3a4,
1317                                                                0xb1b2b3b4,
1318                                                                0xc1c2c3c4,
1319                                                                0xd1d2d3d4,
1320                                                                0x7a6b5d3e
1321};
1322#define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
1323#define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
1324
1325static ldst_test_t ldst_tests[] = { { &test_lxsdx, "lxsdx", viargs, 0, 2, VSX_LOAD },
1326                                     { &test_lxsdx, "lxsdx", viargs, 4, 2, VSX_LOAD },
1327                                     { &test_lxvd2x, "lxvd2x", viargs, 0, 4, VSX_LOAD },
1328                                     { &test_lxvd2x, "lxvd2x", viargs, 4, 4, VSX_LOAD },
1329                                     { &test_lxvdsx, "lxvdsx", viargs, 0, 4, VSX_LOAD_SPLAT },
1330                                     { &test_lxvdsx, "lxvdsx", viargs, 4, 4, VSX_LOAD_SPLAT },
1331                                     { &test_lxvw4x, "lxvw4x", viargs, 0, 4, VSX_LOAD },
1332                                     { &test_lxvw4x, "lxvw4x", viargs, 4, 4, VSX_LOAD },
1333                                     { &test_stxsdx, "stxsdx", vstg, 0, 2, VSX_STORE },
1334                                     { &test_stxsdx, "stxsdx", vstg, 4, 2, VSX_STORE },
1335                                     { &test_stxvd2x, "stxvd2x", vstg, 0, 4, VSX_STORE },
1336                                     { &test_stxvd2x, "stxvd2x", vstg, 4, 4, VSX_STORE },
1337                                     { &test_stxvw4x, "stxvw4x", vstg, 0, 4, VSX_STORE },
1338                                     { &test_stxvw4x, "stxvw4x", vstg, 4, 4, VSX_STORE },
1339                                     { NULL, NULL, NULL, 0, 0, 0 } };
1340
1341static logic_test_t logic_tests[] = { { &test_xxlxor, "xxlxor", VSX_XOR },
1342                                      { &test_xxlor, "xxlor", VSX_OR } ,
1343                                      { &test_xxlnor, "xxlnor", VSX_NOR },
1344                                      { &test_xxland, "xxland", VSX_AND },
1345                                      { &test_xxlandc, "xxlandc", VSX_ANDC },
1346                                      { NULL, NULL, 0}};
1347
1348static move_test_t move_tests[] = { { &test_xsabsdp, "xsabsdp" },
1349                                    { &test_xscpsgndp, "xscpsgndp" },
1350                                    { &test_xsnabsdp, "xsnabsdp" },
1351                                    { &test_xsnegdp, "xsnegdp" },
1352                                    { NULL, NULL }
1353
1354};
1355
1356static permute_test_t permute_tests[] =
1357{
1358  { &test_xxmrghw, "xxmrghw",
1359    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1360    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1361  },
1362  { &test_xxmrghw, "xxmrghw",
1363    { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff }, /* XA input */
1364    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XB input */
1365  },
1366  { &test_xxmrglw, "xxmrglw",
1367    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1368    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1369  },
1370  { &test_xxmrglw, "xxmrglw",
1371    { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff}, /* XA input */
1372    { 0x11111111, 0x22222222, 0x33333333, 0x44444444}, /* XB input */
1373  },
1374  { &test_xxpermdi_00, "xxpermdi DM=00",
1375    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1376    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1377  },
1378  { &test_xxpermdi_01, "xxpermdi DM=01",
1379    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1380    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1381  },
1382  { &test_xxpermdi_10, "xxpermdi DM=10",
1383    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1384    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1385  },
1386  { &test_xxpermdi_11, "xxpermdi DM=11",
1387    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1388    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1389  },
1390  { &test_xxsldwi_0, "xxsldwi SHW=0",
1391    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1392    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1393  },
1394  { &test_xxsldwi_1, "xxsldwi SHW=1",
1395    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1396    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1397  },
1398  { &test_xxsldwi_2, "xxsldwi SHW=2",
1399    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1400    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1401  },
1402  { &test_xxsldwi_3, "xxsldwi SHW=3",
1403    { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1404    { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1405  },
1406  { NULL, NULL }
1407};
1408
1409static fp_test_t fp_tests[] = { { &test_fcfids, "fcfids", 1 },
1410                                { &test_fcfidus, "fcfidus", 1 },
1411                                { &test_fcfidu, "fcfidu", 1 },
1412                                { NULL, NULL, 0 },
1413
1414};
1415
1416static vx_fp_test_t vx_fp_tests[] = {
1417                                     { &test_xscmp, "xscmp", xscmpX_tests, 64, VX_FP_CMP},
1418                                     { &test_xsadddp, "xsadddp", xsadddp_tests, 64, VX_FP_OTHER},
1419                                     { &test_xsdivdp, "xsdivdp", xsdivdp_tests, 64, VX_FP_OTHER},
1420                                     { &test_xsmadd, "xsmadd", xsmaddXdp_tests, 64, VX_FP_SMA},
1421                                     { &test_xsmsub, "xsmsub", xsmsubXdp_tests, 64, VX_FP_SMS},
1422                                     { &test_xsnmadd, "xsnmadd", xsnmaddXdp_tests, 64, VX_FP_SNMA},
1423                                     { & test_xsmuldp, "xsmuldp", xsmuldp_tests, 64, VX_FP_OTHER},
1424                                     { & test_xssubdp, "xssubdp", xssubdp_tests, 64, VX_FP_OTHER},
1425                                     { NULL, NULL, NULL, 0, 0 }
1426};
1427
1428static xs_conv_test_t xs_conv_tests[] = {
1429                                         { &test_xscvdpsxds, "xscvdpsxds", 15},
1430                                         { &test_xscvsxddp, "xscvsxddp", 15},
1431                                         { &test_xscvuxddp, "xscvuxddp", 15},
1432                                         { NULL, NULL, 0}
1433};
1434
1435#ifdef __powerpc64__
1436static void test_ldbrx(void)
1437{
1438   int i;
1439   HWord_t reg_out;
1440   unsigned char * byteIn, * byteOut;
1441   r14 = (HWord_t)viargs;
1442   // Just try the instruction an arbitrary number of times at different r15 offsets.
1443   for (i = 0; i < 3; i++) {
1444      int j, k;
1445      reg_out = 0;
1446      r15 = i * 4;
1447      __asm__ __volatile__ ("ldbrx          %0, %1, %2" : "=r" (reg_out): "b" (r14),"r" (r15));
1448      byteIn = ((unsigned char *)(r14 + r15));
1449      byteOut = (unsigned char *)&reg_out;
1450
1451      printf("ldbrx:");
1452      for (k = 0; k < 8; k++) {
1453         printf( " %02x", (byteIn[k]));
1454      }
1455      printf(" (reverse) =>");
1456      for (j = 0; j < 8; j++) {
1457         printf( " %02x", (byteOut[j]));
1458      }
1459      printf("\n");
1460   }
1461   printf( "\n" );
1462}
1463
1464static void
1465test_popcntd(void)
1466{
1467   uint64_t res;
1468   unsigned long long src = 0x9182736405504536ULL;
1469   r14 = src;
1470   __asm__ __volatile__ ("popcntd          %0, %1" : "=r" (res): "r" (r14));
1471   printf("popcntd: 0x%llx => %d\n", src, (int)res);
1472   printf( "\n" );
1473}
1474#endif
1475
1476static void
1477test_lfiwzx(void)
1478{
1479   unsigned int i;
1480   unsigned int * src;
1481   uint64_t reg_out;
1482   r14 = (HWord_t)viargs;
1483   // Just try the instruction an arbitrary number of times at different r15 offsets.
1484   for (i = 0; i < 3; i++) {
1485      reg_out = 0;
1486      r15 = i * 4;
1487      __asm__ __volatile__ ("lfiwzx          %0, %1, %2" : "=d" (reg_out): "b" (r14),"r" (r15));
1488      src = ((unsigned int *)(r14 + r15));
1489      printf("lfiwzx: %u => %llu.00\n", *src, (unsigned long long)reg_out);
1490
1491   }
1492   printf( "\n" );
1493}
1494
1495static void test_vx_fp_ops(void)
1496{
1497
1498   test_func_t func;
1499   int k;
1500   char * test_name = (char *)malloc(20);
1501   k = 0;
1502
1503   build_special_fargs_table();
1504   while ((func = vx_fp_tests[k].test_func)) {
1505      int i, condreg, repeat = 0;
1506      unsigned int flags;
1507      unsigned long long * frap, * frbp, * dst;
1508      vx_fp_test_t test_group = vx_fp_tests[k];
1509      vx_fp_test_type test_type = test_group.test_type;
1510
1511      switch (test_type) {
1512         case VX_FP_CMP:
1513            strcpy(test_name, "xscmp");
1514            if (!repeat) {
1515               repeat = 1;
1516               strcat(test_name, "udp");
1517               do_cmpudp = 1;
1518            }
1519            break;
1520         case VX_FP_SMA:
1521         case VX_FP_SMS:
1522         case VX_FP_SNMA:
1523            if (test_type == VX_FP_SMA)
1524               strcpy(test_name, "xsmadd");
1525            else if (test_type == VX_FP_SMS)
1526               strcpy(test_name, "xsmsub");
1527            else
1528               strcpy(test_name, "xsnmadd");
1529            if (!repeat) {
1530               repeat = 1;
1531               strcat(test_name, "adp");
1532               do_adp = 1;
1533            }
1534            break;
1535         case VX_FP_OTHER:
1536            strcpy(test_name, test_group.name);
1537            break;
1538         default:
1539            printf("ERROR:  Invalid VX FP test type %d\n", test_type);
1540            exit(1);
1541      }
1542
1543again:
1544      for (i = 0; i < test_group.num_tests; i++) {
1545         unsigned int * inA, * inB, * pv;
1546         double * dpA = (double *)&vec_inA;
1547         double * dpB = (double *)&vec_inB;
1548         double * dpT = (double *)&vec_out;
1549
1550         fp_test_args_t aTest = test_group.targs[i];
1551         inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
1552         inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
1553         frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1554         frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1555         // Only need to copy one doubleword into each vector's element 0
1556         if (isLE) {
1557            // With LE, vector element 0 is the second doubleword from the left
1558            memset(dpA, 0, 8);
1559            memset(dpB, 0, 8);
1560            dpA++;
1561            dpB++;
1562         }
1563         memcpy(dpA, inA, 8);
1564         memcpy(dpB, inB, 8);
1565
1566         switch (test_type) {
1567            case VX_FP_CMP:
1568               SET_FPSCR_ZERO;
1569               SET_CR_XER_ZERO;
1570               (*func)();
1571               GET_CR(flags);
1572               condreg = (flags & 0x0f000000) >> 24;
1573               printf("#%d: %s %016llx <=> %016llx ? %x (CRx)\n", i, test_name, *frap, *frbp, condreg);
1574              // printf("\tFRA: %e;  FRB: %e\n", spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx]);
1575               if ( condreg != aTest.cr_flags) {
1576                  printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, condreg);
1577               }
1578               break;
1579            case VX_FP_SMA:
1580            case VX_FP_SMS:
1581            case VX_FP_SNMA:
1582            case VX_FP_OTHER:
1583            {
1584               int idx;
1585               unsigned long long vsr_XT;
1586               pv = (unsigned int *)&vec_out;
1587               // clear vec_out
1588               for (idx = 0; idx < 4; idx++, pv++)
1589                  *pv = 0;
1590
1591               if (test_type != VX_FP_OTHER) {
1592                  /* Then we need a third src argument, which is stored in element 0 of
1593                   * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
1594                   * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
1595                   * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
1596                   * data (input args) contain only two inputs, so I arbitrarily
1597                   * use spec_fargs elements 4 and 14 (alternating) for the third source
1598                   * argument.  We can use the same input data for a given pair of
1599                   * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
1600                   * the expected result should be the same.
1601                   */
1602                  int extra_arg_idx;
1603                  if (i % 2)
1604                     extra_arg_idx = 4;
1605                  else
1606                     extra_arg_idx = 14;
1607
1608                  if (repeat) {
1609                     /* We're on the first time through of one of the VX_FP_SMx
1610                      * test types, meaning we're testing a xs<ZZZ>adp case, thus we
1611                      * have to swap inputs as described above:
1612                      *    src2 <= VSX[XT]
1613                      *    src3 <= VSX[XB]
1614                      */
1615                     if (isLE)
1616                        dpT++;
1617                     memcpy(dpT, inB, 8);  // src2
1618                     memcpy(dpB, &spec_fargs[extra_arg_idx], 8);  //src3
1619                     frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
1620                  } else {
1621                     // Don't need to init src2, as it's done before the switch()
1622                     if (isLE)
1623                        dpT++;
1624                     memcpy(dpT, &spec_fargs[extra_arg_idx], 8);  //src3
1625                  }
1626                  memcpy(&vsr_XT, dpT, 8);
1627               }
1628
1629               (*func)();
1630               dst = (unsigned long long *) &vec_out;
1631               if (isLE)
1632                  dst++;
1633               if (test_type == VX_FP_OTHER)
1634                  printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name, *frap, *frbp, *dst);
1635               else
1636                  printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
1637                          test_name, vsr_XT, *frap, *frbp, *dst );
1638
1639               /*
1640              {
1641                  // Debug code.  Keep this block commented out except when debugging.
1642                  double result, expected;
1643                  memcpy(&result, dst, 8);
1644                  memcpy(&expected, &aTest.dp_bin_result, 8);
1645                  printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
1646                          spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
1647                          expected, result );
1648               }
1649              */
1650               break;
1651            }
1652         }
1653
1654
1655      }
1656      printf( "\n" );
1657
1658      if (repeat) {
1659         repeat = 0;
1660         switch (test_type) {
1661            case VX_FP_CMP:
1662               strcpy(test_name, "xscmp");
1663               strcat(test_name, "odp");
1664               do_cmpudp = 0;
1665               break;
1666            case VX_FP_SMA:
1667            case VX_FP_SMS:
1668            case VX_FP_SNMA:
1669               if (test_type == VX_FP_SMA)
1670                  strcpy(test_name, "xsmadd");
1671               else if (test_type == VX_FP_SMS)
1672                  strcpy(test_name, "xsmsub");
1673               else
1674                  strcpy(test_name, "xsnmadd");
1675               strcat(test_name, "mdp");
1676               do_adp = 0;
1677               break;
1678            case VX_FP_OTHER:
1679               break;
1680         }
1681         goto again;
1682      }
1683      k++;
1684   }
1685   printf( "\n" );
1686   free(test_name);
1687}
1688
1689static void test_xs_conv_ops(void)
1690{
1691
1692   test_func_t func;
1693   int k = 0;
1694   double * dpB = (double *)&vec_inB;
1695   if (isLE) {
1696      memset(dpB, 0, 8);
1697      dpB++;
1698   }
1699
1700   build_special_fargs_table();
1701   while ((func = xs_conv_tests[k].test_func)) {
1702      int i;
1703      unsigned long long * frbp, * dst;
1704      xs_conv_test_t test_group = xs_conv_tests[k];
1705      for (i = 0; i < test_group.num_tests; i++) {
1706         unsigned int * inB, * pv;
1707         int idx;
1708         inB = (unsigned int *)&spec_fargs[i];
1709         frbp = (unsigned long long *)&spec_fargs[i];
1710
1711         memcpy(dpB, inB, 8);
1712         pv = (unsigned int *)&vec_out;
1713         // clear vec_out
1714         for (idx = 0; idx < 4; idx++, pv++)
1715            *pv = 0;
1716         (*func)();
1717         dst = (unsigned long long *) &vec_out;
1718         if (isLE)
1719            dst++;
1720         printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp, *dst);
1721
1722      }
1723      k++;
1724      printf("\n");
1725   }
1726   printf( "\n" );
1727}
1728
1729static void do_load_test(ldst_test_t loadTest)
1730{
1731   test_func_t func;
1732   unsigned int *src, *dst;
1733   int splat = loadTest.type == VSX_LOAD_SPLAT ? 1: 0;
1734   int i, j, m, k;
1735   i = j = 0;
1736
1737   func = loadTest.test_func;
1738   for (i = 0, r14 = (HWord_t) loadTest.base_addr; i < NUM_VIARGS_VECS; i++) {
1739      int again;
1740      j = 0;
1741       r14 += i * 16;
1742      do {
1743         unsigned int * pv = (unsigned int *)&vec_out;
1744         int idx;
1745         // clear vec_out
1746         for (idx = 0; idx < 4; idx++, pv+=idx)
1747            *pv = 0;
1748
1749         again = 0;
1750         r15 = j;
1751
1752         // execute test insn
1753         (*func)();
1754
1755         src = (unsigned int*) (((unsigned char *)r14) + j);
1756         dst = (unsigned int*) &vec_out;
1757
1758         printf( "%s:", loadTest.name);
1759         for (m = 0; m < loadTest.num_words_to_process; m++) {
1760            printf( " %08x", src[splat ? m % 2 : m]);
1761         }
1762         printf( " =>");
1763         m = 0;
1764         k = loadTest.num_words_to_process;
1765         if (isLE) {
1766            if (loadTest.num_words_to_process == 2) {
1767               m = 2;
1768               k += 2;
1769            }
1770         }
1771
1772         for (; m < k; m++) {
1773            printf( " %08x", dst[m]);
1774         }
1775         printf("\n");
1776         if (j == 0 && loadTest.offset) {
1777            again = 1;
1778            j += loadTest.offset;
1779         }
1780      }
1781      while (again);
1782   }
1783}
1784
1785static void
1786do_store_test ( ldst_test_t storeTest )
1787{
1788   test_func_t func;
1789   unsigned int *src, *dst;
1790   int m;
1791
1792   func = storeTest.test_func;
1793   r14 = (HWord_t) storeTest.base_addr;
1794   r15 = (HWord_t) storeTest.offset;
1795   unsigned int * pv = (unsigned int *) storeTest.base_addr;
1796   int idx;
1797   // clear out storage destination
1798   for (idx = 0; idx < 4; idx++, pv += idx)
1799      *pv = 0;
1800
1801   memcpy(&vec_inA, &viargs[0], sizeof(vector unsigned char));
1802
1803   // execute test insn
1804   (*func)();
1805   src = &viargs[0];
1806   dst = (unsigned int*) (((unsigned char *) r14) + storeTest.offset);
1807
1808   printf( "%s:", storeTest.name );
1809   for (m = 0; m < storeTest.num_words_to_process; m++) {
1810      printf( " %08x", src[m] );
1811   }
1812   printf( " =>" );
1813   for (m = 0; m < storeTest.num_words_to_process; m++) {
1814      printf( " %08x", dst[m] );
1815   }
1816   printf( "\n" );
1817}
1818
1819
1820static void test_ldst(void)
1821{
1822   int k = 0;
1823
1824   while (ldst_tests[k].test_func) {
1825      if (ldst_tests[k].type == VSX_STORE)
1826         do_store_test(ldst_tests[k]);
1827      else
1828         do_load_test(ldst_tests[k]);
1829      k++;
1830      printf("\n");
1831   }
1832}
1833
1834static void test_ftdiv(void)
1835{
1836   int i, num_tests, crx;
1837   unsigned int flags;
1838   unsigned long long * frap, * frbp;
1839   build_special_fargs_table();
1840
1841   num_tests = sizeof ftdiv_tests/sizeof ftdiv_tests[0];
1842
1843   for (i = 0; i < num_tests; i++) {
1844      fp_test_args_t aTest = ftdiv_tests[i];
1845      f14 = spec_fargs[aTest.fra_idx];
1846      f15 = spec_fargs[aTest.frb_idx];
1847      frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1848      frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1849      SET_FPSCR_ZERO;
1850      SET_CR_XER_ZERO;
1851      __asm__ __volatile__ ("ftdiv           cr1, %0, %1" : : "d" (f14), "d" (f15));
1852      GET_CR(flags);
1853      crx = (flags & 0x0f000000) >> 24;
1854      printf( "ftdiv: %016llx <=> %016llx ? %x (CRx)\n", *frap, *frbp, crx);
1855//      printf("\tFRA: %e;  FRB: %e\n", f14, f15);
1856      if ( crx != aTest.cr_flags) {
1857         printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest.cr_flags, crx);
1858      }
1859   }
1860   printf( "\n" );
1861}
1862
1863
1864static void test_p7_fpops ( void )
1865{
1866   int k = 0;
1867   test_func_t func;
1868
1869   build_fargs_table();
1870   while ((func = fp_tests[k].test_func)) {
1871      float res;
1872      double resd;
1873      unsigned long long u0;
1874      int i;
1875      int res32 = strcmp(fp_tests[k].name, "fcfidu");
1876
1877      for (i = 0; i < nb_fargs; i++) {
1878         u0 = *(unsigned long long *) (&fargs[i]);
1879         f14 = fargs[i];
1880         (*func)();
1881         if (res32) {
1882            res = f17;
1883            printf( "%s %016llx => (raw sp) %08x)",
1884                    fp_tests[k].name, u0, *((unsigned int *)&res));
1885         } else {
1886            resd = f17;
1887            printf( "%s %016llx => (raw sp) %016llx)",
1888                    fp_tests[k].name, u0, *(unsigned long long *)(&resd));
1889         }
1890         printf( "\n" );
1891      }
1892
1893      k++;
1894      printf( "\n" );
1895   }
1896}
1897
1898static void test_vsx_logic(void)
1899{
1900   logic_test_t aTest;
1901   test_func_t func;
1902   int k;
1903   k = 0;
1904
1905   while ((func = logic_tests[k].test_func)) {
1906      unsigned int * pv;
1907      int startA, startB;
1908      unsigned int * inA, * inB, * dst;
1909      int idx, i;
1910      startA = 0;
1911      aTest = logic_tests[k];
1912      for (i = 0; i <= (NUM_VIARGS_INTS - (NUM_VIARGS_VECS * sizeof(int))); i++, startA++) {
1913         startB = startA + 4;
1914         pv = (unsigned int *)&vec_out;
1915         inA = &viargs[startA];
1916         inB = &viargs[startB];
1917         memcpy(&vec_inA, inA, sizeof(vector unsigned char));
1918         memcpy(&vec_inB, inB, sizeof(vector unsigned char));
1919         // clear vec_out
1920         for (idx = 0; idx < 4; idx++, pv++)
1921            *pv = 0;
1922
1923         // execute test insn
1924         (*func)();
1925         dst = (unsigned int*) &vec_out;
1926
1927         printf( "%s:", aTest.name);
1928         printf( " %08x %08x %08x %08x %s", inA[0], inA[1], inA[2], inA[3], aTest.name);
1929         printf( " %08x %08x %08x %08x", inB[0], inB[1], inB[2], inB[3]);
1930         printf(" => %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
1931
1932      }
1933      k++;
1934   }
1935   printf( "\n" );
1936}
1937
1938static vector unsigned long long vec_args[] __attribute__ ((aligned (16))) =
1939{
1940 { 0x0123456789abcdefULL, 0x0011223344556677ULL},
1941 { 0x8899aabb19293942ULL, 0xa1a2a3a4b1b2b3b4ULL},
1942 { 0xc1c2c3c4d1d2d3d4ULL, 0x7a6b5d3efc032778ULL}
1943};
1944#define NUM_VEC_ARGS_LONGS (sizeof vec_args/sizeof vec_args[0])
1945
1946static void test_move_ops (void)
1947{
1948   move_test_t aTest;
1949   test_func_t func;
1950   int k;
1951   k = 0;
1952
1953   while ((func = move_tests[k].test_func)) {
1954      unsigned int * pv;
1955      int startA, startB;
1956      unsigned long long * inA, * inB, * dst;
1957      int use_vecA = (strcmp(move_tests[k].name, "xscpsgndp") == 0);
1958      int idx;
1959      inA = NULL;
1960      aTest = move_tests[k];
1961      for (startB = 0; startB < NUM_VEC_ARGS_LONGS; startB++) {
1962         inB = (unsigned long long *)&vec_args[startB];
1963         memcpy(&vec_inB, inB, sizeof(vector unsigned char));
1964         if (isLE)
1965            inB++;
1966         startA = 0;
1967repeat:
1968         if (use_vecA) {
1969            inA = (unsigned long long *)&vec_args[startA];
1970            memcpy(&vec_inA, inA, sizeof(vector unsigned char));
1971            startA++;
1972         }
1973         pv = (unsigned int *)&vec_out;
1974         // clear vec_out
1975         for (idx = 0; idx < 4; idx++, pv++)
1976            *pv = 0;
1977
1978         // execute test insn
1979         (*func)();
1980         dst = (unsigned long long *) &vec_out;
1981         if (isLE) {
1982            dst++;
1983            inA++;
1984         }
1985
1986         printf( "%s:", aTest.name);
1987         if (use_vecA)
1988            printf( " X[A]: %016llx ", *inA);
1989         printf( " X[B]: %016llx", *inB);
1990         printf(" => %016llx\n", *dst);
1991
1992         if (use_vecA && startA < NUM_VEC_ARGS_LONGS)
1993            goto repeat;
1994      }
1995      k++;
1996      printf( "\n" );
1997   }
1998}
1999
2000static void test_permute_ops (void)
2001{
2002  permute_test_t *aTest;
2003  unsigned int *dst = (unsigned int *) &vec_out;
2004
2005  for (aTest = &(permute_tests[0]); aTest->test_func != NULL; aTest++)
2006    {
2007      /* Grab test input and clear output vector.  */
2008      memcpy(&vec_inA, aTest->xa, sizeof(vec_inA));
2009      memcpy(&vec_inB, aTest->xb, sizeof(vec_inB));
2010      memset(dst, 0, sizeof(vec_out));
2011
2012      /* execute test insn */
2013      aTest->test_func();
2014
2015      printf( "%s:\n", aTest->name);
2016      printf( "        XA[%08x,%08x,%08x,%08x]\n",
2017              aTest->xa[0], aTest->xa[1], aTest->xa[2], aTest->xa[3]);
2018      printf( "        XB[%08x,%08x,%08x,%08x]\n",
2019              aTest->xb[0], aTest->xb[1], aTest->xb[2], aTest->xb[3]);
2020      printf( "   =>   XT[%08x,%08x,%08x,%08x]\n",
2021              dst[0], dst[1], dst[2], dst[3]);
2022
2023    }
2024  printf( "\n" );
2025}
2026
2027static test_table_t all_tests[] = { { &test_ldst,
2028                                       "Test VSX load/store instructions" },
2029                                     { &test_vsx_logic,
2030                                       "Test VSX logic instructions" },
2031#ifdef __powerpc64__
2032                                     { &test_ldbrx,
2033                                       "Test ldbrx instruction" },
2034                                     { &test_popcntd,
2035                                       "Test popcntd instruction" },
2036#endif
2037                                     { &test_lfiwzx,
2038                                       "Test lfiwzx instruction" },
2039                                     { &test_p7_fpops,
2040                                       "Test P7 floating point convert instructions"},
2041                                     { &test_ftdiv,
2042                                       "Test ftdiv instruction" },
2043                                     { &test_move_ops,
2044                                       "Test VSX move instructions"},
2045                                     { &test_permute_ops,
2046                                       "Test VSX permute instructions"},
2047                                     { &test_vx_fp_ops,
2048                                       "Test VSX floating point instructions"},
2049                                     { &test_xs_conv_ops,
2050                                       "Test VSX scalar integer conversion instructions" },
2051                                     { NULL, NULL }
2052};
2053#endif // HAS_VSX
2054
2055int main(int argc, char *argv[])
2056{
2057#ifdef HAS_VSX
2058
2059   test_table_t aTest;
2060   test_func_t func;
2061   int i = 0;
2062
2063   while ((func = all_tests[i].test_category)) {
2064      aTest = all_tests[i];
2065      printf( "%s\n", aTest.name );
2066      (*func)();
2067      i++;
2068   }
2069
2070#endif // HAS _VSX
2071
2072   return 0;
2073}
2074