1
2/*  Copyright (C) 2006 Dave Nomura
3       dcnltc@us.ibm.com
4
5    This program is free software; you can redistribute it and/or
6    modify it under the terms of the GNU General Public License as
7    published by the Free Software Foundation; either version 2 of the
8    License, or (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful, but
11    WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18    02111-1307, USA.
19
20    The GNU General Public License is contained in the file COPYING.
21*/
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <limits.h>
26
27typedef enum { FALSE=0, TRUE } bool_t;
28
29typedef enum {
30	FADDS, FSUBS, FMULS, FDIVS,
31	FMADDS, FMSUBS, FNMADDS, FNMSUBS,
32	FADD, FSUB, FMUL, FDIV, FMADD,
33	FMSUB, FNMADD, FNMSUB, FSQRT
34} flt_op_t;
35
36typedef enum {
37	TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
38char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
39
40const char *flt_op_names[] = {
41	"fadds", "fsubs", "fmuls", "fdivs",
42	"fmadds", "fmsubs", "fnmadds", "fnmsubs",
43	"fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
44	"fnmsub", "fsqrt"
45};
46
47typedef unsigned int fpscr_t;
48
49typedef union {
50	float flt;
51	struct {
52#if defined(VGP_ppc64le_linux)
53      unsigned int frac:23;
54      unsigned int exp:8;
55      unsigned int sign:1;
56#else
57		unsigned int sign:1;
58		unsigned int exp:8;
59		unsigned int frac:23;
60#endif
61	} layout;
62} flt_overlay;
63
64typedef union {
65	double dbl;
66	struct {
67#if defined(VGP_ppc64le_linux)
68      unsigned int frac_lo:32;
69      unsigned int frac_hi:20;
70      unsigned int exp:11;
71      unsigned int sign:1;
72#else
73		unsigned int sign:1;
74		unsigned int exp:11;
75		unsigned int frac_hi:20;
76		unsigned int frac_lo:32;
77#endif
78	} layout;
79	struct {
80		unsigned int hi;
81		unsigned int lo;
82	} dbl_pair;
83} dbl_overlay;
84
85void assert_fail(const char *msg,
86	const char* expr, const char* file, int line, const char*fn);
87
88#define STRING(__str)  #__str
89#define assert(msg, expr)                                           \
90  ((void) ((expr) ? 0 :                                         \
91           (assert_fail (msg, STRING(expr),                  \
92                             __FILE__, __LINE__,                \
93                             __PRETTY_FUNCTION__), 0)))
94float denorm_small;
95double dbl_denorm_small;
96float norm_small;
97bool_t debug = FALSE;
98bool_t long_is_64_bits = sizeof(long) == 8;
99
100void assert_fail (msg, expr, file, line, fn)
101const char* msg;
102const char* expr;
103const char* file;
104int line;
105const char*fn;
106{
107   printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
108               msg, file, line, fn, expr );
109   exit( 1 );
110}
111void set_rounding_mode(round_mode_t mode)
112{
113	switch(mode) {
114	case TO_NEAREST:
115		asm volatile("mtfsfi 7, 0");
116		break;
117	case TO_ZERO:
118		asm volatile("mtfsfi 7, 1");
119		break;
120	case TO_PLUS_INFINITY:
121		asm volatile("mtfsfi 7, 2");
122		break;
123	case TO_MINUS_INFINITY:
124		asm volatile("mtfsfi 7, 3");
125		break;
126	}
127}
128
129void print_double(char *msg, double dbl)
130{
131	dbl_overlay D;
132	D.dbl = dbl;
133
134	printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
135			msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
136			D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
137}
138
139void print_single(char *msg, float *flt)
140{
141	flt_overlay F;
142	F.flt = *flt;
143
144	/* NOTE: for the purposes of comparing the fraction of a single with
145	**       a double left shift the .frac so that hex digits are grouped
146	**	     from left to right.  this is necessary because the size of a
147	**		 single mantissa (23) bits is not a multiple of 4
148	*/
149	printf("%15s : flt %-20a = %c(%4d, %06x)\n",
150		msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
151}
152
153int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
154{
155	int status = 0;
156	flt_overlay R, E;
157	char *result;
158
159	set_rounding_mode(mode);
160
161	E.flt = *expected;
162	R.flt = (float)dbl;
163
164	if ((R.layout.sign != E.layout.sign) ||
165		(R.layout.exp != E.layout.exp) ||
166		(R.layout.frac != E.layout.frac)) {
167		result = "FAILED";
168		status = 1;
169	} else {
170		result = "PASSED";
171		status = 0;
172	}
173	printf("%s:%s:(double)(%-20a) = %20a",
174		round_mode_name[mode], result, R.flt, dbl);
175	if (status) {
176		print_single("\n\texpected", &E.flt);
177		print_single("\n\trounded ", &R.flt);
178	}
179	putchar('\n');
180	return status;
181}
182
183int test_dbl_to_float_convert(char *msg, float *base)
184{
185	int status = 0;
186	double half = (double)denorm_small/2;
187	double qtr = half/2;
188	double D_hi = (double)*base + half + qtr;
189	double D_lo = (double)*base + half - qtr;
190	float F_lo = *base;
191	float F_hi = F_lo + denorm_small;
192
193
194	/*
195	** .....+-----+-----+-----+-----+---....
196	**      ^F_lo ^           ^     ^
197	**            D_lo
198	**                        D_hi
199	**                              F_hi
200	** F_lo and F_hi are two consecutive single float model numbers
201	** denorm_small distance apart. D_lo and D_hi are two numbers
202	** within that range that are not representable as single floats
203	** and will be rounded to either F_lo or F_hi.
204	*/
205	printf("-------------------------- %s --------------------------\n", msg);
206	if (debug) {
207		print_double("D_lo", D_lo);
208		print_double("D_hi", D_hi);
209		print_single("F_lo", &F_lo);
210		print_single("F_hi", &F_hi);
211	}
212
213	/* round to nearest */
214	status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
215	status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
216
217	/* round to zero */
218	status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
219	status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
220
221	/* round to +inf */
222	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
223	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
224
225	/* round to -inf */
226	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
227	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
228	return status;
229}
230
231void
232init()
233{
234	flt_overlay F;
235	dbl_overlay D;
236
237	/* small is the smallest denormalized single float number */
238	F.layout.sign = 0;
239	F.layout.exp = 0;
240	F.layout.frac = 1;
241	denorm_small = F.flt;	/* == 2^(-149) */
242	if (debug) {
243		print_single("float small", &F.flt);
244	}
245
246	D.layout.sign = 0;
247	D.layout.exp = 0;
248	D.layout.frac_hi = 0;
249	D.layout.frac_lo = 1;
250	dbl_denorm_small = D.dbl;	/* == 2^(-1022) */
251	if (debug) {
252		print_double("double small", D.dbl);
253	}
254
255	/* n_small is the smallest normalized single precision float */
256	F.layout.exp = 1;
257	norm_small = F.flt;
258}
259
260int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
261{
262	int status = 0;
263	int I = L;
264	char *int_name = "int";
265	flt_overlay R, E;
266	char *result;
267	int iter;
268
269	set_rounding_mode(mode);
270	E.flt = *expected;
271
272	for (iter = 0; iter < 2; iter++) {
273		int stat = 0;
274		R.flt = (iter == 0 ? (float)I : (float)L);
275
276		if ((R.layout.sign != E.layout.sign) ||
277			(R.layout.exp != E.layout.exp) ||
278			(R.layout.frac != E.layout.frac)) {
279			result = "FAILED";
280			stat = 1;
281		} else {
282			result = "PASSED";
283			stat = 0;
284		}
285		printf("%s:%s:(float)(%4s)%9d = %11.1f",
286			round_mode_name[mode], result, int_name, I, R.flt);
287		if (stat) {
288			print_single("\n\texpected: %.1f ", &E.flt);
289			print_single("\n\trounded ", &R.flt);
290		}
291		putchar('\n');
292		status |= stat;
293
294		if (!long_is_64_bits) break;
295		int_name = "long";
296	}
297	return status;
298}
299
300int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
301{
302	int status = 0;
303	dbl_overlay R, E;
304	char *result;
305
306	set_rounding_mode(mode);
307	E.dbl = *expected;
308
309	R.dbl = (double)L;
310
311	if ((R.layout.sign != E.layout.sign) ||
312		(R.layout.exp != E.layout.exp) ||
313		(R.layout.frac_lo != E.layout.frac_lo) ||
314		(R.layout.frac_hi != E.layout.frac_hi)) {
315		result = "FAILED";
316		status = 1;
317	} else {
318		result = "PASSED";
319		status = 0;
320	}
321	printf("%s:%s:(double)(%18ld) = %20.1f",
322		round_mode_name[mode], result, L, R.dbl);
323	if (status) {
324		printf("\n\texpected %.1f : ", E.dbl);
325	}
326	putchar('\n');
327	return status;
328}
329
330int test_int_to_float_convert(char *msg)
331{
332	int status = 0;
333	int int24_hi = 0x03ff0fff;
334	int int24_lo = 0x03ff0ffd;
335	float pos_flt_lo = 67047420.0;
336	float pos_flt_hi = 67047424.0;
337	float neg_flt_lo = -67047420.0;
338	float neg_flt_hi = -67047424.0;
339
340	printf("-------------------------- %s --------------------------\n", msg);
341	status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
342	status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
343	status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
344	status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
345	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
346	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
347	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
348	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
349
350	status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
351	status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
352	status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
353	status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
354	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
355	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
356	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
357	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
358	return status;
359}
360
361#ifdef __powerpc64__
362int test_long_to_double_convert(char *msg)
363{
364	int status = 0;
365	long long55_hi = 0x07ff0ffffffffff;
366	long long55_lo = 0x07ff0fffffffffd;
367	double pos_dbl_lo = 36012304344547324.0;
368	double pos_dbl_hi = 36012304344547328.0;
369	double neg_dbl_lo = -36012304344547324.0;
370	double neg_dbl_hi = -36012304344547328.0;
371
372	printf("-------------------------- %s --------------------------\n", msg);
373	status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
374	status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
375	status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
376	status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
377	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
378	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
379	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
380	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
381
382	status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
383	status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
384	status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
385	status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
386	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
387	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
388	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
389	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
390	return status;
391}
392#endif
393
394int check_single_arithmetic_op(flt_op_t op)
395{
396		char *result;
397        int status = 0;
398        dbl_overlay R, E;
399        double qtr, half, fA, fB, fD;
400		round_mode_t mode;
401		int q, s;
402		bool_t two_args = TRUE;
403		float whole = denorm_small;
404
405#define BINOP(op) \
406        __asm__ volatile( \
407					op" %0, %1, %2\n\t" \
408					: "=f"(fD) : "f"(fA) , "f"(fB));
409#define UNOP(op) \
410        __asm__ volatile( \
411					op" %0, %1\n\t" \
412					: "=f"(fD) : "f"(fA));
413
414		half = (double)whole/2;
415		qtr = half/2;
416
417		if (debug) {
418			print_double("qtr", qtr);
419			print_double("whole", whole);
420			print_double("2*whole", 2*whole);
421		}
422
423		for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
424		for (s = -1; s < 2; s += 2)
425		for (q = 1; q < 4; q += 2) {
426			double expected;
427			double lo = s*whole;
428			double hi = s*2*whole;
429
430			switch(op) {
431			case FADDS:
432				fA = s*whole;
433				fB = s*q*qtr;
434				break;
435			case FSUBS:
436				fA = s*2*whole;
437				fB = s*(q == 1 ? 3 : 1)*qtr;
438				break;
439			case FMULS:
440				fA = 0.5;
441				fB = s*(4+q)*half;
442				break;
443			case FDIVS:
444				fA = s*(4+q)*half;
445				fB = 2.0;
446				break;
447			default:
448				assert("check_single_arithmetic_op: unexpected op",
449					FALSE);
450				break;
451			}
452
453			switch(mode) {
454			case TO_NEAREST:
455				expected = (q == 1 ? lo : hi);
456				break;
457			case TO_ZERO:
458				expected = lo;
459				break;
460			case TO_PLUS_INFINITY:
461				expected = (s == 1 ? hi : lo);
462				break;
463			case TO_MINUS_INFINITY:
464				expected = (s == 1 ? lo : hi);
465				break;
466			}
467
468			set_rounding_mode(mode);
469
470			/*
471			** do the double precision dual operation just for comparison
472			** when debugging
473			*/
474			switch(op) {
475			case FADDS:
476				BINOP("fadds");
477				R.dbl = fD;
478				BINOP("fadd");
479				break;
480			case FSUBS:
481				BINOP("fsubs");
482				R.dbl = fD;
483				BINOP("fsub");
484				break;
485			case FMULS:
486				BINOP("fmuls");
487				R.dbl = fD;
488				BINOP("fmul");
489				break;
490			case FDIVS:
491				BINOP("fdivs");
492				R.dbl = fD;
493				BINOP("fdiv");
494				break;
495			default:
496				assert("check_single_arithmetic_op: unexpected op",
497					FALSE);
498				break;
499			}
500#undef UNOP
501#undef BINOP
502
503			E.dbl = expected;
504
505			if ((R.layout.sign != E.layout.sign) ||
506				(R.layout.exp != E.layout.exp) ||
507				(R.layout.frac_lo != E.layout.frac_lo) ||
508				(R.layout.frac_hi != E.layout.frac_hi)) {
509				result = "FAILED";
510				status = 1;
511			} else {
512				result = "PASSED";
513				status = 0;
514			}
515
516			printf("%s:%s:%s(%-13a",
517				round_mode_name[mode], result, flt_op_names[op], fA);
518			if (two_args) printf(", %-13a", fB);
519			printf(") = %-13a", R.dbl);
520			if (status) printf("\n\texpected %a", E.dbl);
521			putchar('\n');
522
523			if (debug) {
524				print_double("hi", hi);
525				print_double("lo", lo);
526				print_double("expected", expected);
527				print_double("got", R.dbl);
528				print_double("double result", fD);
529			}
530		}
531
532		return status;
533}
534
535int check_single_guarded_arithmetic_op(flt_op_t op)
536{
537		typedef struct {
538			int num, den, frac;
539		} fdivs_t;
540
541		char *result;
542        int status = 0;
543        flt_overlay A, B, Z;
544        dbl_overlay Res, Exp;
545        double fA, fB, fC, fD;
546		round_mode_t mode;
547		int g, s;
548		int arg_count;
549
550		fdivs_t divs_guard_cases[16] = {
551			{ 105, 56, 0x700000 },  /* : 0 */
552			{ 100, 57, 0x608FB8 },  /* : 1 */
553			{ 000, 00, 0x000000 },  /* : X */
554			{ 100, 52, 0x762762 },  /* : 3 */
555			{ 000, 00, 0x000000 },  /* : X */
556			{ 100, 55, 0x68BA2E },  /* : 5 */
557			{ 000, 00, 0x000000 },  /* : X */
558			{ 100, 51, 0x7AFAFA },  /* : 7 */
559			{ 000, 00, 0x000000 },  /* : X */
560			{ 100, 56, 0x649249 },  /* : 9 */
561			{ 000, 00, 0x000000 },  /* : X */
562			{ 100, 54, 0x6D097B },  /* : B */
563			{ 000, 00, 0x000000 },  /* : X */
564			{ 100, 59, 0x58F2FB },  /* : D */
565			{ 000, 00, 0x000000 },  /* : X */
566			{ 101, 52, 0x789D89 }  /* : F */
567		};
568
569		/*	0x1.00000 00000000p-3 */
570		/* set up the invariant fields of B, the arg to cause rounding */
571		B.flt = 0.0;
572		B.layout.exp = 124;  /* -3 */
573
574		/* set up args so result is always Z = 1.200000000000<g>p+0 */
575		Z.flt = 1.0;
576		Z.layout.sign = 0;
577
578#define TERNOP(op) \
579		arg_count = 3; \
580        __asm__ volatile( \
581					op" %0, %1, %2, %3\n\t" \
582					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
583#define BINOP(op) \
584		arg_count = 2; \
585        __asm__ volatile( \
586					op" %0, %1, %2\n\t" \
587					: "=f"(fD) : "f"(fA) , "f"(fB));
588#define UNOP(op) \
589		arg_count = 1; \
590        __asm__ volatile( \
591					op" %0, %1\n\t" \
592					: "=f"(fD) : "f"(fA));
593
594	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
595	for (s = -1; s < 2; s += 2)
596	for (g = 0; g < 16; g += 1) {
597		double lo, hi, expected;
598		int LSB;
599		int guard = 0;
600		int z_sign = s;
601
602		/*
603		** one argument will have exponent = 0 as will the result (by
604		** design) so choose the other argument with exponent -3 to
605		** force a 3 bit shift for scaling leaving us with 3 guard bits
606		** and the LSB bit at the bottom of the manitssa.
607		*/
608		switch(op) {
609		case FADDS:
610			/* 1p+0 + 1.00000<g>p-3 */
611			B.layout.frac = g;
612
613			fB = s*B.flt;
614			fA = s*1.0;
615
616			/* set up Z to be truncated result */
617
618			/* mask off LSB from resulting guard bits */
619			guard = g & 7;
620
621			Z.layout.frac = 0x100000 | (g >> 3);
622			break;
623		case FSUBS:
624			/* 1.200002p+0 - 1.000000000000<g>p-3 */
625			A.flt = 1.125;
626			/* add enough to avoid scaling of the result */
627			A.layout.frac |= 0x2;
628			fA = s*A.flt;
629
630			B.layout.frac = g;
631			fB = s*B.flt;
632
633			/* set up Z to be truncated result */
634			guard = (0x10-g);
635			Z.layout.frac = guard>>3;
636
637			/* mask off LSB from resulting guard bits */
638			guard &= 7;
639			break;
640		case FMULS:
641			/* 1 + g*2^-23 */
642			A.flt = 1.0;
643			A.layout.frac = g;
644			fA = s*A.flt;
645			fB = 1.125;
646
647			/* set up Z to be truncated result */
648			Z.flt = 1.0;
649			Z.layout.frac = 0x100000;
650			Z.layout.frac |= g + (g>>3);
651			guard = g & 7;
652			break;
653		case FDIVS:
654			/* g >> 3 == LSB, g & 7 == guard bits */
655			guard = g & 7;
656			if ((guard & 1) == 0) {
657				/* special case: guard bit X = 0 */
658				A.flt = denorm_small;
659				A.layout.frac = g;
660				fA = A.flt;
661				fB = s*8.0;
662				Z.flt = 0.0;
663				Z.layout.frac |= (g >> 3);
664			} else {
665				fA = s*divs_guard_cases[g].num;
666				fB = divs_guard_cases[g].den;
667
668				Z.flt = 1.0;
669				Z.layout.frac = divs_guard_cases[g].frac;
670			}
671			break;
672		case FMADDS:
673		case FMSUBS:
674		case FNMADDS:
675		case FNMSUBS:
676			/* 1 + g*2^-23 */
677			A.flt = 1.0;
678			A.layout.frac = g;
679			fA = s*A.flt;
680			fB = 1.125;
681
682			/* 1.000001p-1 */
683			A.flt = 0.5;
684			A.layout.frac = 1;
685			fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
686
687			/* set up Z to be truncated result */
688			z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
689			guard = ((g & 7) + 0x4) & 7;
690			Z.flt = 1.0;
691			Z.layout.frac = 0x500000;
692			Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
693			break;
694		default:
695			assert("check_single_arithmetic_op: unexpected op",
696				FALSE);
697			break;
698		}
699
700		/* get LSB for tie breaking */
701		LSB = Z.layout.frac & 1;
702
703		/* set up hi and lo */
704		lo = z_sign*Z.flt;
705		Z.layout.frac += 1;
706		hi = z_sign*Z.flt;
707
708		switch(mode) {
709		case TO_NEAREST:
710			/* look at 3 guard bits to determine expected rounding */
711			switch(guard) {
712			case 0:
713			case 1: case 2: case 3:
714				expected = lo;
715				break;
716			case 4:	/* tie: round to even */
717				if (debug) printf("tie: LSB = %d\n", LSB);
718				expected = (LSB == 0 ? lo : hi);
719				break;
720			case 5: case 6: case 7:
721				expected = hi;
722				break;
723			default:
724				assert("check_single_guarded_arithmetic_op: unexpected guard",
725					FALSE);
726			}
727			break;
728		case TO_ZERO:
729			expected = lo;
730			break;
731		case TO_PLUS_INFINITY:
732			if (guard == 0) {
733				/* no rounding */
734				expected = lo;
735			} else {
736				expected = (s == 1 ? hi : lo);
737			}
738			break;
739		case TO_MINUS_INFINITY:
740			if (guard == 0) {
741				/* no rounding */
742				expected = lo;
743			} else {
744				expected = (s == 1 ? lo : hi);
745			}
746			break;
747		}
748
749		set_rounding_mode(mode);
750
751		/*
752		** do the double precision dual operation just for comparison
753		** when debugging
754		*/
755		switch(op) {
756		case FADDS:
757			BINOP("fadds");
758			Res.dbl = fD;
759			break;
760		case FSUBS:
761			BINOP("fsubs");
762			Res.dbl = fD;
763			break;
764		case FMULS:
765			BINOP("fmuls");
766			Res.dbl = fD;
767			break;
768		case FDIVS:
769			BINOP("fdivs");
770			Res.dbl = fD;
771			break;
772		case FMADDS:
773			TERNOP("fmadds");
774			Res.dbl = fD;
775			break;
776		case FMSUBS:
777			TERNOP("fmsubs");
778			Res.dbl = fD;
779			break;
780		case FNMADDS:
781			TERNOP("fnmadds");
782			Res.dbl = fD;
783			break;
784		case FNMSUBS:
785			TERNOP("fnmsubs");
786			Res.dbl = fD;
787			break;
788		default:
789			assert("check_single_guarded_arithmetic_op: unexpected op",
790				FALSE);
791			break;
792		}
793#undef UNOP
794#undef BINOP
795#undef TERNOP
796
797		Exp.dbl = expected;
798
799		if ((Res.layout.sign != Exp.layout.sign) ||
800			(Res.layout.exp != Exp.layout.exp) ||
801			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
802			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
803			result = "FAILED";
804			status = 1;
805		} else {
806			result = "PASSED";
807			status = 0;
808		}
809
810		/* There seems to be some noise in the lower bits. The value
811		* on the least significant digit seems to vary when printing
812		* based on the rounding mode of the compiler.  Just trying
813		* to get rid of the noise in the least significant bits when
814		* printing the operand.
815		*/
816
817		fA = ((long int)(fA*10000))/10000.0;
818		/* Change -0.0 to a positive 0.0.  Some compilers print -0.0
819		 * others do not.  Make it consistent.
820		 */
821		if (fA == -0.0)
822		  fA = 0.0;
823
824		printf("%s:%s:%s(%-13.6f",
825			round_mode_name[mode], result, flt_op_names[op], fA);
826		if (arg_count > 1) printf(", %-13a", fB);
827		if (arg_count > 2) printf(", %-13a", fC);
828		printf(") = %-13a", Res.dbl);
829		if (status) printf("\n\texpected %a", Exp.dbl);
830		putchar('\n');
831
832		if (debug) {
833			print_double("hi", hi);
834			print_double("lo", lo);
835			print_double("expected", expected);
836			print_double("got", Res.dbl);
837		}
838	}
839
840	return status;
841}
842
843int check_double_guarded_arithmetic_op(flt_op_t op)
844{
845	typedef struct {
846		int num, den, hi, lo;
847	} fdiv_t;
848	typedef struct {
849		double arg;
850		int exp, hi, lo;
851	} fsqrt_t;
852
853	char *result;
854	int status = 0;
855	dbl_overlay A, B, Z;
856	dbl_overlay Res, Exp;
857	double fA, fB, fC, fD;
858	round_mode_t mode;
859	int g, s;
860	int arg_count;
861	fdiv_t div_guard_cases[16] = {
862		{ 62, 62, 0x00000, 0x00000000 },	/* 0 */
863		{ 64, 62, 0x08421, 0x08421084 },	/* 1 */
864		{ 66, 62, 0x10842, 0x10842108 },	/* 2 */
865		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* 3 */
866		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* X */
867		{ 102, 62, 0xa5294, 0xa5294a52 },	/* 5 */
868		{ 106, 62, 0xb5ad6, 0xb5ad6b5a },	/* 6 */
869		{ 108, 62, 0xbdef7, 0xbdef7bde },	/* 7 */
870		{ 108, 108, 0x00000, 0x00000000 },	/* 8 */
871		{ 112, 62, 0xce739, 0xce739ce7 },	/* 9 */
872		{ 114, 62, 0xd6b5a, 0xd6b5ad6b },	/* A */
873		{ 116, 62, 0xdef7b, 0xdef7bdef },	/* B */
874		{ 84, 62, 0x5ad6b, 0x5ad6b5ad },	/* X */
875		{ 118, 62, 0xe739c, 0xe739ce73 },	/* D */
876		{ 90, 62, 0x739ce, 0x739ce739 },	/* E */
877		{ 92, 62, 0x7bdef, 0x7bdef7bd }		/* F */
878	};
879
880
881	fsqrt_t sqrt_guard_cases[16] = {
882		{ 0x1.08800p0,  0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440  */
883		{ 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910  */
884		{ 0x1.A8220p0,  0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411  */
885		{ 0x1.05A20p0,  0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1  */
886		{ 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541  */
887		{ 0x1.DCA20p0,  0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51  */
888		{ 0x1.02C80p0,  0, 0x01630, 0x9cde7483}, /* :6 B6.8164  */
889		{ 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40  */
890		{ 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9  */
891		{ 0x1.1D020p0,  0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81  */
892		{ 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4  */
893		{ 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400  */
894		{ 0x1.48520p0,  0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429  */
895		{ 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61  */
896		{ 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684  */
897		{ 0x1.76B20p0,  0, 0x35b67, 0x81aed827}  /* :F DB.BB59  */
898	};
899
900	/*	0x1.00000 00000000p-3 */
901	/* set up the invariant fields of B, the arg to cause rounding */
902	B.dbl = 0.0;
903	B.layout.exp = 1020;
904
905	/* set up args so result is always Z = 1.200000000000<g>p+0 */
906	Z.dbl = 1.0;
907	Z.layout.sign = 0;
908
909#define TERNOP(op) \
910		arg_count = 3; \
911        __asm__ volatile( \
912					op" %0, %1, %2, %3\n\t" \
913					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
914#define BINOP(op) \
915		arg_count = 2; \
916        __asm__ volatile( \
917					op" %0, %1, %2\n\t" \
918					: "=f"(fD) : "f"(fA) , "f"(fB));
919#define UNOP(op) \
920		arg_count = 1; \
921        __asm__ volatile( \
922					op" %0, %1\n\t" \
923					: "=f"(fD) : "f"(fA));
924
925	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
926	for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
927	for (g = 0; g < 16; g += 1) {
928		double lo, hi, expected;
929		int LSB;
930		int guard;
931		int z_sign = s;
932
933		/*
934		** one argument will have exponent = 0 as will the result (by
935		** design) so choose the other argument with exponent -3 to
936		** force a 3 bit shift for scaling leaving us with 3 guard bits
937		** and the LSB bit at the bottom of the manitssa.
938		*/
939		switch(op) {
940		case FADD:
941			/* 1p+0 + 1.000000000000<g>p-3 */
942			B.layout.frac_lo = g;
943
944			fB = s*B.dbl;
945			fA = s*1.0;
946
947			/* set up Z to be truncated result */
948
949			/* mask off LSB from resulting guard bits */
950			guard = g & 7;
951
952			Z.layout.frac_hi = 0x20000;
953			Z.layout.frac_lo = g >> 3;
954
955			break;
956		case FSUB:
957			/* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
958			A.dbl = 1.125;
959			/* add enough to avoid scaling of the result */
960			A.layout.frac_lo = 0x2;
961			fA = s*A.dbl;
962
963			B.layout.frac_lo = g;
964			fB = s*B.dbl;
965
966			/* set up Z to be truncated result */
967			guard = (0x10-g);
968			Z.layout.frac_hi = 0x0;
969			Z.layout.frac_lo = guard>>3;
970
971			/* mask off LSB from resulting guard bits */
972			guard &= 7;
973			break;
974		case FMUL:
975			/* 1 + g*2^-52 */
976			A.dbl = 1.0;
977			A.layout.frac_lo = g;
978			fA = s*A.dbl;
979			fB = 1.125;
980
981			/* set up Z to be truncated result */
982			Z.dbl = 1.0;
983			Z.layout.frac_hi = 0x20000;
984			Z.layout.frac_lo = g + (g>>3);
985			guard = g & 7;
986			break;
987		case FMADD:
988		case FMSUB:
989		case FNMADD:
990		case FNMSUB:
991			/* 1 + g*2^-52 */
992			A.dbl = 1.0;
993			A.layout.frac_lo = g;
994			fA = s*A.dbl;
995			fB = 1.125;
996
997			/* 1.0000000000001p-1 */
998			A.dbl = 0.5;
999			A.layout.frac_lo = 1;
1000			fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
1001
1002			/* set up Z to be truncated result */
1003			z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
1004			guard = ((g & 7) + 0x4) & 7;
1005			Z.dbl = 1.0;
1006			Z.layout.frac_hi = 0xa0000;
1007			Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
1008			break;
1009		case FDIV:
1010			/* g >> 3 == LSB, g & 7 == guard bits */
1011			guard = g & 7;
1012			if (guard == 0x4) {
1013				/* special case guard bits == 4, inexact tie */
1014				fB = s*2.0;
1015				Z.dbl = 0.0;
1016				if (g >> 3) {
1017					fA = dbl_denorm_small + 2*dbl_denorm_small;
1018					Z.layout.frac_lo = 0x1;
1019				} else {
1020					fA = dbl_denorm_small;
1021				}
1022			} else {
1023				fA = s*div_guard_cases[g].num;
1024				fB = div_guard_cases[g].den;
1025
1026				printf("%d/%d\n",
1027					s*div_guard_cases[g].num,
1028					div_guard_cases[g].den);
1029				Z.dbl = 1.0;
1030				Z.layout.frac_hi = div_guard_cases[g].hi;
1031				Z.layout.frac_lo = div_guard_cases[g].lo;
1032			}
1033			break;
1034		case FSQRT:
1035			fA = s*sqrt_guard_cases[g].arg;
1036			Z.dbl = 1.0;
1037			Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
1038			Z.layout.frac_hi = sqrt_guard_cases[g].hi;
1039			Z.layout.frac_lo = sqrt_guard_cases[g].lo;
1040			guard = g >> 1;
1041			if (g & 1) guard |= 1;
1042			/* don't have test cases for when X bit = 0 */
1043			if (guard == 0 || guard == 4) continue;
1044			break;
1045		default:
1046			assert("check_double_guarded_arithmetic_op: unexpected op",
1047				FALSE);
1048			break;
1049		}
1050
1051		/* get LSB for tie breaking */
1052		LSB = Z.layout.frac_lo & 1;
1053
1054		/* set up hi and lo */
1055		lo = z_sign*Z.dbl;
1056		Z.layout.frac_lo += 1;
1057		hi = z_sign*Z.dbl;
1058
1059		switch(mode) {
1060		case TO_NEAREST:
1061			/* look at 3 guard bits to determine expected rounding */
1062			switch(guard) {
1063			case 0:
1064			case 1: case 2: case 3:
1065				expected = lo;
1066				break;
1067			case 4:	/* tie: round to even */
1068				if (debug) printf("tie: LSB = %d\n", LSB);
1069				expected = (LSB == 0 ? lo : hi);
1070				break;
1071			case 5: case 6: case 7:
1072				expected = hi;
1073				break;
1074			default:
1075				assert("check_double_guarded_arithmetic_op: unexpected guard",
1076					FALSE);
1077			}
1078			break;
1079		case TO_ZERO:
1080			expected = lo;
1081			break;
1082		case TO_PLUS_INFINITY:
1083			if (guard == 0) {
1084				/* no rounding */
1085				expected = lo;
1086			} else {
1087				expected = (s == 1 ? hi : lo);
1088			}
1089			break;
1090		case TO_MINUS_INFINITY:
1091			if (guard == 0) {
1092				/* no rounding */
1093				expected = lo;
1094			} else {
1095				expected = (s == 1 ? lo : hi);
1096			}
1097			break;
1098		}
1099
1100		set_rounding_mode(mode);
1101
1102		/*
1103		** do the double precision dual operation just for comparison
1104		** when debugging
1105		*/
1106		switch(op) {
1107		case FADD:
1108			BINOP("fadd");
1109			Res.dbl = fD;
1110			break;
1111		case FSUB:
1112			BINOP("fsub");
1113			Res.dbl = fD;
1114			break;
1115		case FMUL:
1116			BINOP("fmul");
1117			Res.dbl = fD;
1118			break;
1119		case FMADD:
1120			TERNOP("fmadd");
1121			Res.dbl = fD;
1122			break;
1123		case FMSUB:
1124			TERNOP("fmsub");
1125			Res.dbl = fD;
1126			break;
1127		case FNMADD:
1128			TERNOP("fnmadd");
1129			Res.dbl = fD;
1130			break;
1131		case FNMSUB:
1132			TERNOP("fnmsub");
1133			Res.dbl = fD;
1134			break;
1135		case FDIV:
1136			BINOP("fdiv");
1137			Res.dbl = fD;
1138			break;
1139		case FSQRT:
1140			UNOP("fsqrt");
1141			Res.dbl = fD;
1142			break;
1143		default:
1144			assert("check_double_guarded_arithmetic_op: unexpected op",
1145				FALSE);
1146			break;
1147		}
1148#undef UNOP
1149#undef BINOP
1150#undef TERNOP
1151
1152		Exp.dbl = expected;
1153
1154		if ((Res.layout.sign != Exp.layout.sign) ||
1155			(Res.layout.exp != Exp.layout.exp) ||
1156			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
1157			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
1158			result = "FAILED";
1159			status = 1;
1160		} else {
1161			result = "PASSED";
1162			status = 0;
1163		}
1164
1165		printf("%s:%s:%s(%-13a",
1166			round_mode_name[mode], result, flt_op_names[op], fA);
1167		if (arg_count > 1) printf(", %-13a", fB);
1168		if (arg_count > 2) printf(", %-13a", fC);
1169		printf(") = %-13a", Res.dbl);
1170		if (status) printf("\n\texpected %a", Exp.dbl);
1171		putchar('\n');
1172
1173		if (debug) {
1174			print_double("hi", hi);
1175			print_double("lo", lo);
1176			print_double("expected", expected);
1177			print_double("got", Res.dbl);
1178		}
1179	}
1180
1181	return status;
1182}
1183
1184int test_float_arithmetic_ops()
1185{
1186	int status = 0;
1187	flt_op_t op;
1188
1189	/*
1190	** choose FP operands whose result should be rounded to either
1191	** lo or hi.
1192	*/
1193
1194	printf("-------------------------- %s --------------------------\n",
1195		"test rounding of float operators without guard bits");
1196	for (op = FADDS; op <= FDIVS; op++) {
1197		status |= check_single_arithmetic_op(op);
1198	}
1199
1200	printf("-------------------------- %s --------------------------\n",
1201		"test rounding of float operators with guard bits");
1202	for (op = FADDS; op <= FNMSUBS; op++) {
1203		status |= check_single_guarded_arithmetic_op(op);
1204	}
1205
1206	printf("-------------------------- %s --------------------------\n",
1207		"test rounding of double operators with guard bits");
1208	for (op = FADD; op <= FSQRT; op++) {
1209		status |= check_double_guarded_arithmetic_op(op);
1210	}
1211	return status;
1212}
1213
1214
1215int
1216main()
1217{
1218	int status = 0;
1219
1220	init();
1221
1222	status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
1223	status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
1224	status |= test_int_to_float_convert("test (float)int convert");
1225	status |= test_int_to_float_convert("test (float)int convert");
1226
1227#ifdef __powerpc64__
1228	status |= test_long_to_double_convert("test (double)long convert");
1229#endif
1230	status |= test_float_arithmetic_ops();
1231	return status;
1232}
1233