1
2#ifndef USED_AS_INCLUDE
3
4#include "../pub/libvex_basictypes.h"
5#include <stdio.h>
6#include <malloc.h>
7#include <stdlib.h>
8#include <string.h>
9#include <assert.h>
10
11
12/* Test program for developing code for conversions between
13   x87 64-bit and 80-bit floats.
14
15   80-bit format exists only for x86/x86-64, and so the routines
16   hardwire it as little-endian.  The 64-bit format (IEEE double)
17   could exist on any platform, little or big-endian and so we
18   have to take that into account.  IOW, these routines have to
19   work correctly when compiled on both big- and little-endian
20   targets, but the 80-bit images only ever have to exist in
21   little-endian format.
22*/
23static void show_f80 ( UChar* );
24static void show_f64 ( UChar* );
25
26static inline
27UInt read_bit_array ( UChar* arr, UInt n )
28{
29   UChar c = arr[n >> 3];
30   c >>= (n&7);
31   return c & 1;
32}
33
34static inline
35void write_bit_array ( UChar* arr, UInt n, UInt b )
36{
37   UChar c = arr[n >> 3];
38   c &= ~(1 << (n&7));
39   c |= ((b&1) << (n&7));
40   arr[n >> 3] = c;
41}
42
43
44static void convert_f80le_to_f64le_HW ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
45{
46  asm volatile ("ffree %%st(7); fldt (%0); fstpl (%1)"
47                :
48                : "r" (&f80[0]), "r" (&f64[0])
49                : "memory" );
50}
51
52static void convert_f64le_to_f80le_HW ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
53{
54  asm volatile ("ffree %%st(7); fldl (%0); fstpt (%1)"
55                :
56                : "r" (&f64[0]), "r" (&f80[0])
57                : "memory" );
58}
59
60#endif /* ndef USED_AS_INCLUDE */
61
62
63
64/* 80 and 64-bit floating point formats:
65
66   80-bit:
67
68    S  0       0-------0      zero
69    S  0       0X------X      denormals
70    S  1-7FFE  1X------X      normals (all normals have leading 1)
71    S  7FFF    10------0      infinity
72    S  7FFF    10X-----X      snan
73    S  7FFF    11X-----X      qnan
74
75   S is the sign bit.  For runs X----X, at least one of the Xs must be
76   nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
77   there is an explicitly represented leading 1, and a sign bit,
78   giving 80 in total.
79
80   64-bit avoids the confusion of an explicitly represented leading 1
81   and so is simpler:
82
83    S  0      0------0   zero
84    S  0      X------X   denormals
85    S  1-7FE  any        normals
86    S  7FF    0------0   infinity
87    S  7FF    0X-----X   snan
88    S  7FF    1X-----X   qnan
89
90   Exponent is 11 bits, fractional part is 52 bits, and there is a
91   sign bit, giving 64 in total.
92*/
93
94/* Convert a IEEE754 double (64-bit) into an x87 extended double
95   (80-bit), mimicing the hardware fairly closely.  Both numbers are
96   stored little-endian.  Limitations, all of which could be fixed,
97   given some level of hassle:
98
99   * Identity of NaNs is not preserved.
100
101   See comments in the code for more details.
102*/
103static void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
104{
105   Bool  mantissaIsZero;
106   Int   bexp, i, j, shift;
107   UChar sign;
108
109   sign = toUChar( (f64[7] >> 7) & 1 );
110   bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
111   bexp &= 0x7FF;
112
113   mantissaIsZero = False;
114   if (bexp == 0 || bexp == 0x7FF) {
115      /* We'll need to know whether or not the mantissa (bits 51:0) is
116         all zeroes in order to handle these cases.  So figure it
117         out. */
118      mantissaIsZero
119         = toBool(
120              (f64[6] & 0x0F) == 0
121              && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
122              && f64[2] == 0 && f64[1] == 0 && f64[0] == 0
123           );
124   }
125
126   /* If the exponent is zero, either we have a zero or a denormal.
127      Produce a zero.  This is a hack in that it forces denormals to
128      zero.  Could do better. */
129   if (bexp == 0) {
130      f80[9] = toUChar( sign << 7 );
131      f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
132             = f80[3] = f80[2] = f80[1] = f80[0] = 0;
133
134      if (mantissaIsZero)
135         /* It really is zero, so that's all we can do. */
136         return;
137
138      /* There is at least one 1-bit in the mantissa.  So it's a
139         potentially denormalised double -- but we can produce a
140         normalised long double.  Count the leading zeroes in the
141         mantissa so as to decide how much to bump the exponent down
142         by.  Note, this is SLOW. */
143      shift = 0;
144      for (i = 51; i >= 0; i--) {
145        if (read_bit_array(f64, i))
146           break;
147        shift++;
148      }
149
150      /* and copy into place as many bits as we can get our hands on. */
151      j = 63;
152      for (i = 51 - shift; i >= 0; i--) {
153         write_bit_array( f80, j,
154     	 read_bit_array( f64, i ) );
155         j--;
156      }
157
158      /* Set the exponent appropriately, and we're done. */
159      bexp -= shift;
160      bexp += (16383 - 1023);
161      f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
162      f80[8] = toUChar( bexp & 0xFF );
163      return;
164   }
165
166   /* If the exponent is 7FF, this is either an Infinity, a SNaN or
167      QNaN, as determined by examining bits 51:0, thus:
168          0  ... 0    Inf
169          0X ... X    SNaN
170          1X ... X    QNaN
171      where at least one of the Xs is not zero.
172   */
173   if (bexp == 0x7FF) {
174      if (mantissaIsZero) {
175         /* Produce an appropriately signed infinity:
176            S 1--1 (15)  1  0--0 (63)
177         */
178         f80[9] = toUChar( (sign << 7) | 0x7F );
179         f80[8] = 0xFF;
180         f80[7] = 0x80;
181         f80[6] = f80[5] = f80[4] = f80[3]
182                = f80[2] = f80[1] = f80[0] = 0;
183         return;
184      }
185      /* So it's either a QNaN or SNaN.  Distinguish by considering
186         bit 51.  Note, this destroys all the trailing bits
187         (identity?) of the NaN.  IEEE754 doesn't require preserving
188         these (it only requires that there be one QNaN value and one
189         SNaN value), but x87 does seem to have some ability to
190         preserve them.  Anyway, here, the NaN's identity is
191         destroyed.  Could be improved. */
192      if (f64[6] & 8) {
193         /* QNaN.  Make a QNaN:
194            S 1--1 (15)  1  1--1 (63)
195         */
196         f80[9] = toUChar( (sign << 7) | 0x7F );
197         f80[8] = 0xFF;
198         f80[7] = 0xFF;
199         f80[6] = f80[5] = f80[4] = f80[3]
200                = f80[2] = f80[1] = f80[0] = 0xFF;
201      } else {
202         /* SNaN.  Make a SNaN:
203            S 1--1 (15)  0  1--1 (63)
204         */
205         f80[9] = toUChar( (sign << 7) | 0x7F );
206         f80[8] = 0xFF;
207         f80[7] = 0x7F;
208         f80[6] = f80[5] = f80[4] = f80[3]
209                = f80[2] = f80[1] = f80[0] = 0xFF;
210      }
211      return;
212   }
213
214   /* It's not a zero, denormal, infinity or nan.  So it must be a
215      normalised number.  Rebias the exponent and build the new
216      number.  */
217   bexp += (16383 - 1023);
218
219   f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
220   f80[8] = toUChar( bexp & 0xFF );
221   f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
222                              | ((f64[5] >> 5) & 7) );
223   f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
224   f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
225   f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
226   f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
227   f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
228   f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
229   f80[0] = toUChar( 0 );
230}
231
232
233/* Convert a x87 extended double (80-bit) into an IEEE 754 double
234   (64-bit), mimicking the hardware fairly closely.  Both numbers are
235   stored little-endian.  Limitations, both of which could be fixed,
236   given some level of hassle:
237
238   * Rounding following truncation could be a bit better.
239
240   * Identity of NaNs is not preserved.
241
242   See comments in the code for more details.
243*/
244static void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
245{
246   Bool  isInf;
247   Int   bexp, i, j;
248   UChar sign;
249
250   sign = toUChar((f80[9] >> 7) & 1);
251   bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
252   bexp &= 0x7FFF;
253
254   /* If the exponent is zero, either we have a zero or a denormal.
255      But an extended precision denormal becomes a double precision
256      zero, so in either case, just produce the appropriately signed
257      zero. */
258   if (bexp == 0) {
259      f64[7] = toUChar(sign << 7);
260      f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
261      return;
262   }
263
264   /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
265      QNaN, as determined by examining bits 62:0, thus:
266          0  ... 0    Inf
267          0X ... X    SNaN
268          1X ... X    QNaN
269      where at least one of the Xs is not zero.
270   */
271   if (bexp == 0x7FFF) {
272      isInf = toBool(
273                 (f80[7] & 0x7F) == 0
274                 && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
275                 && f80[3] == 0 && f80[2] == 0 && f80[1] == 0
276                 && f80[0] == 0
277              );
278      if (isInf) {
279         if (0 == (f80[7] & 0x80))
280            goto wierd_NaN;
281         /* Produce an appropriately signed infinity:
282            S 1--1 (11)  0--0 (52)
283         */
284         f64[7] = toUChar((sign << 7) | 0x7F);
285         f64[6] = 0xF0;
286         f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
287         return;
288      }
289      /* So it's either a QNaN or SNaN.  Distinguish by considering
290         bit 62.  Note, this destroys all the trailing bits
291         (identity?) of the NaN.  IEEE754 doesn't require preserving
292         these (it only requires that there be one QNaN value and one
293         SNaN value), but x87 does seem to have some ability to
294         preserve them.  Anyway, here, the NaN's identity is
295         destroyed.  Could be improved. */
296      if (f80[8] & 0x40) {
297         /* QNaN.  Make a QNaN:
298            S 1--1 (11)  1  1--1 (51)
299         */
300         f64[7] = toUChar((sign << 7) | 0x7F);
301         f64[6] = 0xFF;
302         f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
303      } else {
304         /* SNaN.  Make a SNaN:
305            S 1--1 (11)  0  1--1 (51)
306         */
307         f64[7] = toUChar((sign << 7) | 0x7F);
308         f64[6] = 0xF7;
309         f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
310      }
311      return;
312   }
313
314   /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
315      zero, the x87 FPU appears to consider the number denormalised
316      and converts it to a QNaN. */
317   if (0 == (f80[7] & 0x80)) {
318      wierd_NaN:
319      /* Strange hardware QNaN:
320         S 1--1 (11)  1  0--0 (51)
321      */
322      /* On a PIII, these QNaNs always appear with sign==1.  I have
323         no idea why. */
324      f64[7] = (1 /*sign*/ << 7) | 0x7F;
325      f64[6] = 0xF8;
326      f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
327      return;
328   }
329
330   /* It's not a zero, denormal, infinity or nan.  So it must be a
331      normalised number.  Rebias the exponent and consider. */
332   bexp -= (16383 - 1023);
333   if (bexp >= 0x7FF) {
334      /* It's too big for a double.  Construct an infinity. */
335      f64[7] = toUChar((sign << 7) | 0x7F);
336      f64[6] = 0xF0;
337      f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
338      return;
339   }
340
341   if (bexp <= 0) {
342      /* It's too small for a normalised double.  First construct a
343         zero and then see if it can be improved into a denormal.  */
344      f64[7] = toUChar(sign << 7);
345      f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
346
347      if (bexp < -52)
348         /* Too small even for a denormal. */
349         return;
350
351      /* Ok, let's make a denormal.  Note, this is SLOW. */
352      /* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
353         indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
354      /* bexp is in range -52 .. 0 inclusive */
355      for (i = 63; i >= 0; i--) {
356         j = i - 12 + bexp;
357         if (j < 0) break;
358         /* We shouldn't really call vassert from generated code. */
359         assert(j >= 0 && j < 52);
360         write_bit_array ( f64,
361                           j,
362                           read_bit_array ( f80, i ) );
363      }
364      /* and now we might have to round ... */
365      if (read_bit_array(f80, 10+1 - bexp) == 1)
366         goto do_rounding;
367
368      return;
369   }
370
371   /* Ok, it's a normalised number which is representable as a double.
372      Copy the exponent and mantissa into place. */
373   /*
374   for (i = 0; i < 52; i++)
375      write_bit_array ( f64,
376                        i,
377                        read_bit_array ( f80, i+11 ) );
378   */
379   f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
380   f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
381   f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
382   f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
383   f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
384   f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
385
386   f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
387
388   f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
389
390   /* Now consider any rounding that needs to happen as a result of
391      truncating the mantissa. */
392   if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
393
394      /* If the bottom bits of f80 are "100 0000 0000", then the
395         infinitely precise value is deemed to be mid-way between the
396         two closest representable values.  Since we're doing
397         round-to-nearest (the default mode), in that case it is the
398         bit immediately above which indicates whether we should round
399         upwards or not -- if 0, we don't.  All that is encapsulated
400         in the following simple test. */
401      if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
402         return;
403
404      do_rounding:
405      /* Round upwards.  This is a kludge.  Once in every 2^24
406         roundings (statistically) the bottom three bytes are all 0xFF
407         and so we don't round at all.  Could be improved. */
408      if (f64[0] != 0xFF) {
409         f64[0]++;
410      }
411      else
412      if (f64[0] == 0xFF && f64[1] != 0xFF) {
413         f64[0] = 0;
414         f64[1]++;
415      }
416      else
417      if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
418         f64[0] = 0;
419         f64[1] = 0;
420         f64[2]++;
421      }
422      /* else we don't round, but we should. */
423   }
424}
425
426
427#ifndef USED_AS_INCLUDE
428
429//////////////
430
431static void show_f80 ( UChar* f80 )
432{
433  Int i;
434  printf("%d ", read_bit_array(f80, 79));
435
436  for (i = 78; i >= 64; i--)
437    printf("%d", read_bit_array(f80, i));
438
439  printf(" %d ", read_bit_array(f80, 63));
440
441  for (i = 62; i >= 0; i--)
442    printf("%d", read_bit_array(f80, i));
443}
444
445static void show_f64le ( UChar* f64 )
446{
447  Int i;
448  printf("%d     ", read_bit_array(f64, 63));
449
450  for (i = 62; i >= 52; i--)
451    printf("%d", read_bit_array(f64, i));
452
453  printf("   ");
454  for (i = 51; i >= 0; i--)
455    printf("%d", read_bit_array(f64, i));
456}
457
458//////////////
459
460
461/* Convert f80 to a 64-bit IEEE double using both the hardware and the
462   soft version, and compare the results.  If they differ, print
463   details and return 1.  If they are identical, return 0.
464*/
465int do_80_to_64_test ( Int test_no, UChar* f80, UChar* f64h, UChar* f64s)
466{
467   Char buf64s[100], buf64h[100];
468   Bool same;
469   Int k;
470   convert_f80le_to_f64le_HW(f80, f64h);
471   convert_f80le_to_f64le(f80, f64s);
472   same = True;
473   for (k = 0; k < 8; k++) {
474      if (f64s[k] != f64h[k]) {
475         same = False; break;
476      }
477   }
478   /* bitwise identical */
479   if (same)
480      return 0;
481
482   sprintf(buf64s, "%.16e", *(double*)f64s);
483   sprintf(buf64h, "%.16e", *(double*)f64h);
484
485   /* Not bitwise identical, but pretty darn close */
486   if (0 == strcmp(buf64s, buf64h))
487      return 0;
488
489    printf("\n");
490    printf("f80:  "); show_f80(f80); printf("\n");
491    printf("f64h: "); show_f64le(f64h); printf("\n");
492    printf("f64s: "); show_f64le(f64s); printf("\n");
493
494    printf("[test %d]  %.16Le -> (hw %s, sw %s)\n",
495           test_no, *(long double*)f80,
496           buf64h, buf64s );
497
498    return 1;
499}
500
501
502/* Convert an IEEE 64-bit double to a x87 extended double (80 bit)
503   using both the hardware and the soft version, and compare the
504   results.  If they differ, print details and return 1.  If they are
505   identical, return 0.
506*/
507int do_64_to_80_test ( Int test_no, UChar* f64, UChar* f80h, UChar* f80s)
508{
509   Char buf80s[100], buf80h[100];
510   Bool same;
511   Int k;
512   convert_f64le_to_f80le_HW(f64, f80h);
513   convert_f64le_to_f80le(f64, f80s);
514   same = True;
515   for (k = 0; k < 10; k++) {
516      if (f80s[k] != f80h[k]) {
517         same = False; break;
518      }
519   }
520   /* bitwise identical */
521   if (same)
522      return 0;
523
524   sprintf(buf80s, "%.20Le", *(long double*)f80s);
525   sprintf(buf80h, "%.20Le", *(long double*)f80h);
526
527   /* Not bitwise identical, but pretty darn close */
528   if (0 == strcmp(buf80s, buf80h))
529      return 0;
530
531    printf("\n");
532    printf("f64:  "); show_f64le(f64); printf("\n");
533    printf("f80h: "); show_f80(f80h); printf("\n");
534    printf("f80s: "); show_f80(f80s); printf("\n");
535
536    printf("[test %d]  %.16e -> (hw %s, sw %s)\n",
537           test_no, *(double*)f64,
538           buf80h, buf80s );
539
540    return 1;
541}
542
543
544
545void do_80_to_64_tests ( void )
546{
547   UInt b9,b8,b7,i, j;
548   Int fails=0, tests=0;
549   UChar* f64h = malloc(8);
550   UChar* f64s = malloc(8);
551   UChar* f80  = malloc(10);
552   int STEP = 1;
553
554   srandom(4343);
555
556   /* Ten million random bit patterns */
557   for (i = 0; i < 10000000; i++) {
558     tests++;
559     for (j = 0; j < 10; j++)
560       f80[j] = (random() >> 7) & 255;
561
562     fails += do_80_to_64_test(tests, f80, f64h, f64s);
563   }
564
565   /* 2^24 numbers in which the first 24 bits are tested exhaustively
566      -- this covers the sign, exponent and leading part of the
567      mantissa. */
568   for (b9 = 0; b9 < 256; b9 += STEP) {
569      for (b8 = 0; b8 < 256; b8 += STEP) {
570         for (b7 = 0; b7 < 256; b7 += STEP) {
571           tests++;
572            for (i = 0; i < 10; i++)
573               f80[i] = 0;
574            for (i = 0; i < 8; i++)
575               f64h[i] = f64s[i] = 0;
576            f80[9] = b9;
577            f80[8] = b8;
578            f80[7] = b7;
579
580    fails += do_80_to_64_test(tests, f80, f64h, f64s);
581   }}}
582
583   printf("\n80 -> 64:  %d tests, %d fails\n\n", tests, fails);
584}
585
586
587void do_64_to_80_tests ( void )
588{
589   UInt b7,b6,b5,i, j;
590   Int fails=0, tests=0;
591   UChar* f80h = malloc(10);
592   UChar* f80s = malloc(10);
593   UChar* f64  = malloc(8);
594   int STEP = 1;
595
596   srandom(2323);
597
598   /* Ten million random bit patterns */
599   for (i = 0; i < 10000000; i++) {
600     tests++;
601     for (j = 0; j < 8; j++)
602       f64[j] = (random() >> 13) & 255;
603
604     fails += do_64_to_80_test(tests, f64, f80h, f80s);
605   }
606
607   /* 2^24 numbers in which the first 24 bits are tested exhaustively
608      -- this covers the sign, exponent and leading part of the
609      mantissa. */
610   for (b7 = 0; b7 < 256; b7 += STEP) {
611      for (b6 = 0; b6 < 256; b6 += STEP) {
612         for (b5 = 0; b5 < 256; b5 += STEP) {
613           tests++;
614            for (i = 0; i < 8; i++)
615               f64[i] = 0;
616            for (i = 0; i < 10; i++)
617               f80h[i] = f80s[i] = 0;
618            f64[7] = b7;
619            f64[6] = b6;
620            f64[5] = b5;
621
622    fails += do_64_to_80_test(tests, f64, f80h, f80s);
623   }}}
624
625   printf("\n64 -> 80:  %d tests, %d fails\n\n", tests, fails);
626}
627
628
629int main ( void )
630{
631   do_80_to_64_tests();
632   do_64_to_80_tests();
633   return 0;
634}
635
636#endif /* ndef USED_AS_INCLUDE */
637