1/*  Copyright (C) 2012 IBM
2
3 Author: Maynard Johnson <maynardj@us.ibm.com>
4         Carl Love <carll@us.ibm.com>
5
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307, USA.
20
21 The GNU General Public License is contained in the file COPYING.
22 */
23
24#include <stdio.h>
25#include <stdlib.h>
26#include <stdint.h>
27#include <string.h>
28#include <elf.h>
29#include <link.h>
30
31#define PPC_FEATURE_HAS_VSX  0x00000080 /* Vector Scalar Extension. */
32
33#if defined(HAS_DFP)
34
35register double f14 __asm__ ("fr14");
36register double f15 __asm__ ("fr15");
37register double f16 __asm__ ("fr16");
38register double f17 __asm__ ("fr17");
39register double f18 __asm__ ("fr18");
40register double f19 __asm__ ("fr19");
41
42typedef unsigned char Bool;
43#define True 1
44#define False 0
45
46#define SET_FPSCR_ZERO \
47		do { double _d = 0.0;		                           \
48		__asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
49		} while (0)
50
51#define GET_FPSCR(_arg) \
52  __asm__ __volatile__ ("mffs %0"  : "=f"(_arg) )
53
54#define SET_FPSCR_DRN \
55  __asm__ __volatile__ ("mtfsf  1, %0, 0, 1" :  : "f"(f14) )
56
57#define SH_0  0
58#define SH_1  1
59#define SH_2  15
60#define SH_3  63
61
62#define NUM_RND_MODES  8
63#define CONDREG_MASK  0x0f000000
64#define CONDREG_SHIFT 24
65
66static char ** my_envp;
67static inline char** __auxv_find(void)
68{
69   char **result = my_envp;
70   /* Scan over the env vector looking for the ending NULL */
71   for (; *result != NULL; ++result) {
72   }
73   /* Bump the pointer one more step, which should be the auxv. */
74   return ++result;
75}
76
77static unsigned long fetch_at_hwcap(void)
78{
79   static unsigned long auxv_hwcap = 0;
80   int i;
81   ElfW(auxv_t) * auxv_buf = NULL;
82
83   if (auxv_hwcap)
84      return auxv_hwcap;
85
86   auxv_buf = (ElfW(auxv_t)*) __auxv_find();
87   for (i = 0; auxv_buf[i].a_type != AT_NULL; i++)
88      if (auxv_buf[i].a_type == AT_HWCAP) {
89         auxv_hwcap = auxv_buf[i].a_un.a_val;
90         break;
91      }
92
93   return auxv_hwcap;
94}
95
96int get_vsx(void)
97{
98   /* Check to see if the AUX vector has the bit set indicating the HW
99    * supports the vsx instructions.  This implies the processor is
100    * at least a POWER 7.
101    */
102   unsigned long hwcap;
103
104   hwcap = fetch_at_hwcap();
105   if ((hwcap & PPC_FEATURE_HAS_VSX) == PPC_FEATURE_HAS_VSX)
106      return 1;
107
108   return 0;
109}
110
111/* The assembly-level instructions being tested */
112static void _test_dscri (int shift)
113{
114   switch(shift) {
115   case SH_0:
116      __asm__ __volatile__ ("dscri  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0));
117      break;
118
119   case SH_1:
120      __asm__ __volatile__ ("dscri  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1));
121      break;
122
123   case SH_2:
124      __asm__ __volatile__ ("dscri  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2));
125      break;
126
127   case SH_3:
128      __asm__ __volatile__ ("dscri  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3));
129      break;
130   default:
131      printf(" dscri, unsupported shift case %d\n", shift);
132   }
133}
134
135static void _test_dscli (int shift)
136{
137   switch(shift) {
138   case SH_0:
139      __asm__ __volatile__ ("dscli  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0));
140      break;
141
142   case SH_1:
143      __asm__ __volatile__ ("dscli  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1));
144      break;
145
146   case SH_2:
147      __asm__ __volatile__ ("dscli  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2));
148      break;
149
150   case SH_3:
151      __asm__ __volatile__ ("dscli  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3));
152      break;
153   default:
154      printf(" dscli, unsupported shift case %d\n", shift);
155   }
156}
157
158static void _test_dctdp (void)
159{
160   __asm__ __volatile__ ("dctdp  %0, %1" : "=f" (f18) : "f" (f14));
161}
162
163static void _test_drsp (void)
164{
165   __asm__ __volatile__ ("drsp  %0, %1" : "=f" (f18) : "f" (f14));
166}
167
168static void _test_dctfix (void)
169{
170   __asm__ __volatile__ ("dctfix  %0, %1" : "=f" (f18) : "f" (f14));
171}
172
173/* Power 7 and newer processors support this instruction */
174static void _test_dcffix (void)
175{
176   __asm__ __volatile__ ("dcffix  %0, %1" : "=f" (f18) : "f" (f14));
177}
178
179static void _test_dscriq (int shift)
180{
181   switch(shift) {
182   case SH_0:
183      __asm__ __volatile__ ("dscriq  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0));
184      break;
185   case SH_1:
186      __asm__ __volatile__ ("dscriq  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1));
187      break;
188   case SH_2:
189      __asm__ __volatile__ ("dscriq  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2));
190      break;
191   case SH_3:
192      __asm__ __volatile__ ("dscriq  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3));
193      break;
194   default:
195      printf(" dscriq, unsupported shift case %d\n", shift);
196   }
197}
198
199static void _test_dscliq (int shift)
200{
201   switch(shift) {
202   case SH_0:
203      __asm__ __volatile__ ("dscliq  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_0));
204      break;
205   case SH_1:
206      __asm__ __volatile__ ("dscliq  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_1));
207      break;
208   case SH_2:
209      __asm__ __volatile__ ("dscliq  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_2));
210      break;
211   case SH_3:
212      __asm__ __volatile__ ("dscliq  %0, %1, %2" : "=f" (f18) : "f" (f14), "i" (SH_3));
213      break;
214   default:
215      printf(" dscliq, unsupported shift case %d\n", shift);
216   }
217}
218
219static void _test_dctqpq (void)
220{
221   __asm__ __volatile__ ("dctqpq  %0, %1" : "=f" (f18) : "f" (f14));
222}
223
224static void _test_dctfixq (void)
225{
226   __asm__ __volatile__ ("dctfixq  %0, %1" : "=f" (f18) : "f" (f14));
227}
228
229static void _test_drdpq (void)
230{
231   __asm__ __volatile__ ("drdpq  %0, %1" : "=f" (f18) : "f" (f14));
232}
233
234static void _test_dcffixq (void)
235{
236   __asm__ __volatile__ ("dcffixq  %0, %1" : "=f" (f18) : "f" (f14));
237}
238
239typedef void (*test_func_t)();
240typedef void (*test_func_main_t)(int);
241typedef void (*test_func_shift_t)(int);
242typedef struct test_table
243{
244   test_func_main_t test_category;
245   char * name;
246} test_table_t;
247
248static unsigned long long dfp128_vals[] = {
249                                           // Some finite numbers
250                                           0x2207c00000000000ULL, 0x0000000000000e50ULL,
251                                           0x2f07c00000000000ULL, 0x000000000014c000ULL,  //large number
252                                           0xa207c00000000000ULL, 0x00000000000000e0ULL,
253                                           0x2206c00000000000ULL, 0x00000000000000cfULL,
254                                           0xa205c00000000000ULL, 0x000000010a395bcfULL,
255                                           0x6209400000fd0000ULL, 0x00253f1f534acdd4ULL, // a small number
256                                           0x000400000089b000ULL, 0x0a6000d000000049ULL, // very small number
257                                           // flavors of zero
258                                           0x2208000000000000ULL, 0x0000000000000000ULL,
259                                           0xa208000000000000ULL, 0x0000000000000000ULL, // negative
260                                           0xa248000000000000ULL, 0x0000000000000000ULL,
261                                           // flavors of NAN
262                                           0x7c00000000000000ULL, 0x0000000000000000ULL, // quiet
263                                           0xfc00000000000000ULL, 0xc00100035b007700ULL,
264                                           0x7e00000000000000ULL, 0xfe000000d0e0a0d0ULL, // signaling
265                                           // flavors of Infinity
266                                           0x7800000000000000ULL, 0x0000000000000000ULL,
267                                           0xf800000000000000ULL, 0x0000000000000000ULL, // negative
268                                           0xf900000000000000ULL, 0x0000000000000000ULL
269};
270
271static unsigned long long int64_vals[] = {
272                                          // I64 values
273                                          0x0ULL,                // zero
274                                          0x1ULL,                // one
275                                          0xffffffffffffffffULL, // minus one
276                                          0x2386f26fc0ffffULL,   // 9999999999999999
277                                          0xffdc790d903f0001ULL, // -9999999999999999
278                                          0x462d53c8abac0ULL,    // 1234567890124567
279                                          0xfffb9d2ac3754540ULL, // -1234567890124567
280};
281
282static unsigned long long dfp64_vals[] = {
283                                          // various finite numbers
284                                          0x2234000000000e50ULL,
285                                          0x223400000014c000ULL,
286                                          0xa2340000000000e0ULL,// negative
287                                          0x22240000000000cfULL,
288                                          0xa21400010a395bcfULL,// negative
289                                          0x6e4d3f1f534acdd4ULL,// large number
290                                          0x000400000089b000ULL,// very small number
291                                          // flavors of zero
292                                          0x2238000000000000ULL,
293                                          0xa238000000000000ULL,
294                                          0x4248000000000000ULL,
295                                          // flavors of NAN
296                                          0x7e34000000000111ULL,
297                                          0xfe000000d0e0a0d0ULL,//signaling
298                                          0xfc00000000000000ULL,//quiet
299                                          // flavors of Infinity
300                                          0x7800000000000000ULL,
301                                          0xf800000000000000ULL,//negative
302                                          0x7a34000000000000ULL,
303};
304
305
306typedef struct dfp_test_args {
307   int fra_idx;
308   int frb_idx;
309} dfp_test_args_t;
310
311
312/* Index pairs from dfp64_vals or dfp128_vals array to be used with
313 * dfp_two_arg_tests */
314static dfp_test_args_t int64_args_x1[] = {
315  /*                        {int64 input val, unused } */
316                                          {0, 0},
317                                          {1, 0},
318                                          {2, 0},
319                                          {3, 0},
320                                          {4, 0},
321                                          {5, 0},
322                                          {6, 0},
323};
324
325static dfp_test_args_t dfp_2args_x1[] = {
326  /*                               {dfp_arg, shift_arg} */
327                                         {0, SH_0},
328                                         {0, SH_1},
329                                         {0, SH_2},
330                                         {0, SH_3},
331                                         {5, SH_0},
332                                         {5, SH_1},
333                                         {5, SH_2},
334                                         {5, SH_3},
335                                         {6, SH_0},
336                                         {6, SH_1},
337                                         {6, SH_2},
338                                         {6, SH_3},
339                                         {7, SH_0},
340                                         {7, SH_1},
341                                         {7, SH_2},
342                                         {7, SH_3},
343                                         {10, SH_0},
344                                         {10, SH_1},
345                                         {10, SH_2},
346                                         {10, SH_3},
347                                         {13, SH_0},
348                                         {13, SH_1},
349                                         {13, SH_2},
350                                         {13, SH_3},
351};
352
353/* Index pairs from dfp64_vals array to be used with dfp_one_arg_tests */
354static dfp_test_args_t dfp_1args_x1[] = {
355  /*                               {dfp_arg, unused} */
356                                         {0, 0},
357                                         {1, 0},
358                                         {2, 0},
359                                         {3, 0},
360                                         {4, 0},
361                                         {5, 0},
362                                         {6, 0},
363                                         {7, 0},
364                                         {8, 0},
365                                         {9, 0},
366                                         {10, 0},
367                                         {11, 0},
368                                         {12, 0},
369                                         {13, 0},
370                                         {14, 0},
371};
372
373typedef enum {
374   LONG_TEST,
375   QUAD_TEST
376} precision_type_t;
377
378typedef struct dfp_test
379{
380   test_func_t test_func;
381   const char * name;
382   dfp_test_args_t * targs;
383   int num_tests;
384   precision_type_t precision;
385   const char * op;
386   Bool cr_supported;
387} dfp_test_t;
388
389/* The dcffix and dcffixq tests are a little different in that they both take
390 * an I64 input.
391 */
392static dfp_test_t
393dfp_dcffix_dcffixq_tests[] = {
394                              { &_test_dcffixq,"dcffixq", int64_args_x1, 7, QUAD_TEST, "I64S->D128", True},
395                              /* Power 7 instruction */
396                              { &_test_dcffix, "dcffix",  int64_args_x1, 7, LONG_TEST, "I64S->D64", True},
397                              { NULL, NULL, NULL, 0, 0, NULL}
398};
399
400static dfp_test_t
401dfp_one_arg_tests[] = {
402                       { &_test_dctdp,  "dctdp",   dfp_1args_x1, 15, LONG_TEST, "D32->D64", True},
403                       { &_test_drsp,   "drsp",    dfp_1args_x1, 15, LONG_TEST, "D64->D32", True},
404                       { &_test_dctfix, "dctfix",  dfp_1args_x1, 15, LONG_TEST, "D64->I64S", True},
405                       { &_test_dctqpq, "dctqpq",  dfp_1args_x1, 15, QUAD_TEST, "D64->D128", True},
406                       { &_test_dctfixq,"dctfixq", dfp_1args_x1, 15, QUAD_TEST, "D128->I64S", True},
407                       { &_test_drdpq,  "drdpq",   dfp_1args_x1, 15, QUAD_TEST, "D128->D64", True},
408                       { NULL, NULL, NULL, 0, 0, NULL}
409};
410
411
412static dfp_test_t
413dfp_two_arg_tests[] = {
414                       { &_test_dscri,  "dscri",   dfp_2args_x1, 20, LONG_TEST, ">>", True},
415                       { &_test_dscli,  "dscli",   dfp_2args_x1, 20, LONG_TEST, "<<", True},
416                       { &_test_dscriq, "dscriq",  dfp_2args_x1, 20, QUAD_TEST, ">>", True},
417                       { &_test_dscliq, "dscliq",  dfp_2args_x1, 20, QUAD_TEST, "<<", True},
418                       { NULL, NULL, NULL, 0, 0, NULL}
419};
420
421void set_rounding_mode(unsigned long long rnd_mode)
422{
423   double fpscr;
424   unsigned long long * hex_fpscr = (unsigned long long *)&fpscr;
425
426   *hex_fpscr = 0ULL;
427   __asm__ __volatile__ ("mffs %0"  : "=f"(f14));
428   fpscr = f14;
429   *hex_fpscr &= 0xFFFFFFF0FFFFFFFFULL;
430   *hex_fpscr |= (rnd_mode << 32);
431   f14 = fpscr;
432   SET_FPSCR_DRN;
433}
434
435static void test_dfp_one_arg_ops(int unused)
436{
437   test_func_t func;
438   unsigned long long u0, u0x;
439   double res, d0, *d0p;
440   double d0x, *d0xp;
441   unsigned long round_mode;
442   int k = 0;
443
444   u0x = 0;
445   d0p = &d0;
446   d0xp = &d0x;
447
448   while ((func = dfp_one_arg_tests[k].test_func)) {
449      int i;
450
451      for (round_mode = 0; round_mode < NUM_RND_MODES; round_mode++) {
452         /* Do each test with each of the possible rounding modes */
453         dfp_test_t test_group = dfp_one_arg_tests[k];
454
455         printf("\ntest with rounding mode %lu \n", round_mode);
456         /* The set_rounding_mode() uses the global value f14. Call the
457          * function before setting up the test for the specific instruction
458          * to avoid avoid conflicts using f14.
459          */
460         set_rounding_mode(round_mode);
461
462         for (i = 0; i < test_group.num_tests; i++) {
463
464            if (test_group.precision == LONG_TEST) {
465               u0 = dfp64_vals[test_group.targs[i].fra_idx];
466            } else {
467               u0 = dfp128_vals[test_group.targs[i].fra_idx * 2];
468               u0x = dfp128_vals[(test_group.targs[i].fra_idx * 2) + 1];
469            }
470
471            *(unsigned long long *)d0p = u0;
472            f14 = d0;
473            if (test_group.precision == QUAD_TEST) {
474	       *(unsigned long long *)d0xp = u0x;
475                f15 = d0x;
476            }
477
478            (*func)();
479            res = f18;
480
481            printf("%s %016llx", test_group.name, u0);
482
483            if (test_group.precision == LONG_TEST) {
484               printf(" %s  => %016llx",
485                      test_group.op, *((unsigned long long *)(&res)));
486            } else {
487               double resx = f19;
488               printf(" %016llx %s ==> %016llx %016llx",
489                      u0x, test_group.op,
490                      *((unsigned long long *)(&res)),
491                      *((unsigned long long *)(&resx)));
492            }
493            printf("\n");
494         }
495      }
496
497      k++;
498      printf( "\n" );
499   }
500}
501
502static void test_dfp_two_arg_ops(int unused)
503/* Shift instructions: first argument is the DFP source, second argument
504 * is 6 bit shift amount.
505 */
506{
507   test_func_shift_t func;
508   unsigned long long u0, u0x;
509   unsigned int shift_by;
510   double res, d0, *d0p;
511   double d0x, *d0xp;
512   unsigned long round_mode;
513   int k = 0;
514
515   u0x = 0;
516   d0p = &d0;
517   d0xp = &d0x;
518
519   while ((func = dfp_two_arg_tests[k].test_func)) {
520      int i;
521
522      for (round_mode = 0; round_mode < NUM_RND_MODES; round_mode++) {
523         /* Do each test with each of the possible rounding modes */
524         dfp_test_t test_group = dfp_two_arg_tests[k];
525
526         printf("\ntest with rounding mode %lu \n", round_mode);
527
528         /* The set_rounding_mode() uses the global value f14. Call the
529          * function before setting up the test for the specific instruction
530          * to avoid avoid conflicts using f14.
531          */
532         set_rounding_mode(round_mode);
533
534         for (i = 0; i < test_group.num_tests; i++) {
535
536            shift_by = test_group.targs[i].frb_idx;
537
538            if (test_group.precision == LONG_TEST) {
539               u0 = dfp64_vals[test_group.targs[i].fra_idx];
540            } else {
541               u0 = dfp128_vals[test_group.targs[i].fra_idx * 2];
542               u0x = dfp128_vals[(test_group.targs[i].fra_idx * 2) + 1];
543            }
544
545            *(unsigned long long *)d0p = u0;
546            f14 = d0;
547            if (test_group.precision == QUAD_TEST) {
548               *(unsigned long long *)d0xp = u0x;
549               f15 = d0x;
550            }
551
552            (*func)(shift_by);
553            res = f18;
554
555            printf("%s %016llx", test_group.name, u0);
556
557            if (test_group.precision) {
558               printf(" %s %-3d => %016llx",
559                      test_group.op, shift_by, *((unsigned long long *)(&res)));
560            } else {
561               double resx = f19;
562               printf(" %016llx %s %-3d  ==> %016llx %016llx",
563                      u0x, test_group.op, shift_by,
564                      *((unsigned long long *)(&res)),
565                      *((unsigned long long *)(&resx)));
566            }
567            printf("\n" );
568         }
569      }
570
571      k++;
572      printf( "\n" );
573   }
574}
575
576static void test_dcffix_dcffixq(int has_vsx)
577{
578   test_func_t func;
579   unsigned long long u0;
580   double res, d0, *d0p;
581   int k = 0, round_mode;
582
583   d0p = &d0;
584
585
586   while ((func = dfp_dcffix_dcffixq_tests[k].test_func)) {
587      int i;
588
589      if ((!has_vsx) && (!strcmp("dcffix", dfp_dcffix_dcffixq_tests[k].name))) {
590         k++;
591         /* The test instruction is dcffix it is supported on POWER 7
592          * and newer processors.  Skip if not POWER 7 or newer.
593          */
594         continue;
595      }
596
597      for (round_mode = 0; round_mode < NUM_RND_MODES; round_mode++) {
598         /* Do each test with each of the possible rounding modes */
599         dfp_test_t test_group = dfp_dcffix_dcffixq_tests[k];
600
601         printf("\ntest with rounding mode %u \n", round_mode);
602
603         /* The set_rounding_mode() uses the global value f14. Call the
604          * function before setting up the test for the specific instruction
605          * to avoid avoid conflicts using f14.
606          */
607         set_rounding_mode(round_mode);
608
609         for (i = 0; i < test_group.num_tests; i++) {
610
611            /* The instructions take I64 inputs */
612            u0 = int64_vals[test_group.targs[i].fra_idx];
613
614            *(unsigned long long *)d0p = u0;
615            f14 = d0;
616
617            (*func)();
618            res = f18;
619
620            printf("%s %016llx", test_group.name, u0);
621
622            if (test_group.precision) {
623               printf(" %s  => %016llx",
624                      test_group.op, *((unsigned long long *)(&res)));
625            } else {
626               double resx = f19;
627               printf(" %s ==> %016llx %016llx",
628                      test_group.op,
629                      *((unsigned long long *)(&res)),
630                      *((unsigned long long *)(&resx)));
631            }
632            printf("\n" );
633         }
634      }
635
636      k++;
637      printf( "\n" );
638   }
639}
640
641static test_table_t
642all_tests[] =
643{
644   { &test_dfp_one_arg_ops,
645   "Test DFP fomat conversion instructions" },
646   { &test_dfp_two_arg_ops,
647   "Test DFP shift instructions" },
648   { test_dcffix_dcffixq,
649   "Test DCFFIX and DCFFIXQ instructions" },
650   { NULL, NULL }
651};
652#endif // HAS_DFP
653
654int main(int argc, char ** argv, char ** envp) {
655#if defined(HAS_DFP)
656   test_table_t aTest;
657   test_func_t func;
658   int i = 0, has_vsx;
659
660   /* If the processor has the VSX functionality then it is POWER 7
661    * or newer.
662    */
663   my_envp = envp;
664   has_vsx = get_vsx();
665
666   while ((func = all_tests[i].test_category)) {
667      aTest = all_tests[i];
668      printf( "%s\n", aTest.name );
669      (*func)(has_vsx);
670      i++;
671   }
672
673#endif // HAS_DFP
674   return 0;
675}
676