1
2/*  Copyright (C) 2006 Dave Nomura
3       dcnltc@us.ibm.com
4
5    This program is free software; you can redistribute it and/or
6    modify it under the terms of the GNU General Public License as
7    published by the Free Software Foundation; either version 2 of the
8    License, or (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful, but
11    WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18    02111-1307, USA.
19
20    The GNU General Public License is contained in the file COPYING.
21*/
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <limits.h>
26
27typedef enum { FALSE=0, TRUE } bool_t;
28
29typedef enum {
30	FADDS, FSUBS, FMULS, FDIVS,
31	FMADDS, FMSUBS, FNMADDS, FNMSUBS,
32	FADD, FSUB, FMUL, FDIV, FMADD,
33	FMSUB, FNMADD, FNMSUB, FSQRT
34} flt_op_t;
35
36typedef enum {
37	TO_NEAREST=0, TO_ZERO, TO_PLUS_INFINITY, TO_MINUS_INFINITY } round_mode_t;
38char *round_mode_name[] = { "near", "zero", "+inf", "-inf" };
39
40const char *flt_op_names[] = {
41	"fadds", "fsubs", "fmuls", "fdivs",
42	"fmadds", "fmsubs", "fnmadds", "fnmsubs",
43	"fadd", "fsub", "fmul", "fdiv", "fmadd", "fmsub", "fnmadd",
44	"fnmsub", "fsqrt"
45};
46
47typedef unsigned int fpscr_t;
48
49typedef union {
50	float flt;
51	struct {
52		unsigned int sign:1;
53		unsigned int exp:8;
54		unsigned int frac:23;
55	} layout;
56} flt_overlay;
57
58typedef union {
59	double dbl;
60	struct {
61		unsigned int sign:1;
62		unsigned int exp:11;
63		unsigned int frac_hi:20;
64		unsigned int frac_lo:32;
65	} layout;
66	struct {
67		unsigned int hi;
68		unsigned int lo;
69	} dbl_pair;
70} dbl_overlay;
71
72void assert_fail(const char *msg,
73	const char* expr, const char* file, int line, const char*fn);
74
75#define STRING(__str)  #__str
76#define assert(msg, expr)                                           \
77  ((void) ((expr) ? 0 :                                         \
78           (assert_fail (msg, STRING(expr),                  \
79                             __FILE__, __LINE__,                \
80                             __PRETTY_FUNCTION__), 0)))
81float denorm_small;
82double dbl_denorm_small;
83float norm_small;
84bool_t debug = FALSE;
85bool_t long_is_64_bits = sizeof(long) == 8;
86
87void assert_fail (msg, expr, file, line, fn)
88const char* msg;
89const char* expr;
90const char* file;
91int line;
92const char*fn;
93{
94   printf( "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
95               msg, file, line, fn, expr );
96   exit( 1 );
97}
98void set_rounding_mode(round_mode_t mode)
99{
100	switch(mode) {
101	case TO_NEAREST:
102		asm volatile("mtfsfi 7, 0");
103		break;
104	case TO_ZERO:
105		asm volatile("mtfsfi 7, 1");
106		break;
107	case TO_PLUS_INFINITY:
108		asm volatile("mtfsfi 7, 2");
109		break;
110	case TO_MINUS_INFINITY:
111		asm volatile("mtfsfi 7, 3");
112		break;
113	}
114}
115
116void print_double(char *msg, double dbl)
117{
118	dbl_overlay D;
119	D.dbl = dbl;
120
121	printf("%15s : dbl %-20a = %c(%4d, %05x%08x)\n",
122			msg, D.dbl, (D.layout.sign == 0 ? '+' : '-'),
123			D.layout.exp, D.layout.frac_hi, D.layout.frac_lo);
124}
125
126void print_single(char *msg, float *flt)
127{
128	flt_overlay F;
129	F.flt = *flt;
130
131	/* NOTE: for the purposes of comparing the fraction of a single with
132	**       a double left shift the .frac so that hex digits are grouped
133	**	     from left to right.  this is necessary because the size of a
134	**		 single mantissa (23) bits is not a multiple of 4
135	*/
136	printf("%15s : flt %-20a = %c(%4d, %06x)\n",
137		msg, F.flt, (F.layout.sign == 0 ? '+' : '-'), F.layout.exp, F.layout.frac << 1);
138}
139
140int check_dbl_to_flt_round(round_mode_t mode, double dbl, float *expected)
141{
142	int status = 0;
143	flt_overlay R, E;
144	char *result;
145
146	set_rounding_mode(mode);
147
148	E.flt = *expected;
149	R.flt = (float)dbl;
150
151	if ((R.layout.sign != E.layout.sign) ||
152		(R.layout.exp != E.layout.exp) ||
153		(R.layout.frac != E.layout.frac)) {
154		result = "FAILED";
155		status = 1;
156	} else {
157		result = "PASSED";
158		status = 0;
159	}
160	printf("%s:%s:(double)(%-20a) = %20a",
161		round_mode_name[mode], result, R.flt, dbl);
162	if (status) {
163		print_single("\n\texpected", &E.flt);
164		print_single("\n\trounded ", &R.flt);
165	}
166	putchar('\n');
167	return status;
168}
169
170int test_dbl_to_float_convert(char *msg, float *base)
171{
172	int status = 0;
173	double half = (double)denorm_small/2;
174	double qtr = half/2;
175	double D_hi = (double)*base + half + qtr;
176	double D_lo = (double)*base + half - qtr;
177	float F_lo = *base;
178	float F_hi = F_lo + denorm_small;
179
180
181	/*
182	** .....+-----+-----+-----+-----+---....
183	**      ^F_lo ^           ^     ^
184	**            D_lo
185	**                        D_hi
186	**                              F_hi
187	** F_lo and F_hi are two consecutive single float model numbers
188	** denorm_small distance apart. D_lo and D_hi are two numbers
189	** within that range that are not representable as single floats
190	** and will be rounded to either F_lo or F_hi.
191	*/
192	printf("-------------------------- %s --------------------------\n", msg);
193	if (debug) {
194		print_double("D_lo", D_lo);
195		print_double("D_hi", D_hi);
196		print_single("F_lo", &F_lo);
197		print_single("F_hi", &F_hi);
198	}
199
200	/* round to nearest */
201	status |= check_dbl_to_flt_round(TO_NEAREST, D_hi, &F_hi);
202	status |= check_dbl_to_flt_round(TO_NEAREST, D_lo, &F_lo);
203
204	/* round to zero */
205	status |= check_dbl_to_flt_round(TO_ZERO, D_hi, (D_hi > 0 ? &F_lo : &F_hi));
206	status |= check_dbl_to_flt_round(TO_ZERO, D_lo, (D_hi > 0 ? &F_lo : &F_hi));
207
208	/* round to +inf */
209	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_hi, &F_hi);
210	status |= check_dbl_to_flt_round(TO_PLUS_INFINITY, D_lo, &F_hi);
211
212	/* round to -inf */
213	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_hi, &F_lo);
214	status |= check_dbl_to_flt_round(TO_MINUS_INFINITY, D_lo, &F_lo);
215	return status;
216}
217
218void
219init()
220{
221	flt_overlay F;
222	dbl_overlay D;
223
224	/* small is the smallest denormalized single float number */
225	F.layout.sign = 0;
226	F.layout.exp = 0;
227	F.layout.frac = 1;
228	denorm_small = F.flt;	/* == 2^(-149) */
229	if (debug) {
230		print_double("float small", F.flt);
231	}
232
233	D.layout.sign = 0;
234	D.layout.exp = 0;
235	D.layout.frac_hi = 0;
236	D.layout.frac_lo = 1;
237	dbl_denorm_small = D.dbl;	/* == 2^(-1022) */
238	if (debug) {
239		print_double("double small", D.dbl);
240	}
241
242	/* n_small is the smallest normalized single precision float */
243	F.layout.exp = 1;
244	norm_small = F.flt;
245}
246
247int check_int_to_flt_round(round_mode_t mode, long L, float *expected)
248{
249	int status = 0;
250	int I = L;
251	char *int_name = "int";
252	flt_overlay R, E;
253	char *result;
254	int iter;
255
256	set_rounding_mode(mode);
257	E.flt = *expected;
258
259	for (iter = 0; iter < 2; iter++) {
260		int stat = 0;
261		R.flt = (iter == 0 ? (float)I : (float)L);
262
263		if ((R.layout.sign != E.layout.sign) ||
264			(R.layout.exp != E.layout.exp) ||
265			(R.layout.frac != E.layout.frac)) {
266			result = "FAILED";
267			stat = 1;
268		} else {
269			result = "PASSED";
270			stat = 0;
271		}
272		printf("%s:%s:(float)(%4s)%9d = %11.1f",
273			round_mode_name[mode], result, int_name, I, R.flt);
274		if (stat) {
275			print_single("\n\texpected: %.1f ", &E.flt);
276			print_single("\n\trounded ", &R.flt);
277		}
278		putchar('\n');
279		status |= stat;
280
281		if (!long_is_64_bits) break;
282		int_name = "long";
283	}
284	return status;
285}
286
287int check_long_to_dbl_round(round_mode_t mode, long L, double *expected)
288{
289	int status = 0;
290	dbl_overlay R, E;
291	char *result;
292
293	set_rounding_mode(mode);
294	E.dbl = *expected;
295
296	R.dbl = (double)L;
297
298	if ((R.layout.sign != E.layout.sign) ||
299		(R.layout.exp != E.layout.exp) ||
300		(R.layout.frac_lo != E.layout.frac_lo) ||
301		(R.layout.frac_hi != E.layout.frac_hi)) {
302		result = "FAILED";
303		status = 1;
304	} else {
305		result = "PASSED";
306		status = 0;
307	}
308	printf("%s:%s:(double)(%18ld) = %20.1f",
309		round_mode_name[mode], result, L, R.dbl);
310	if (status) {
311		printf("\n\texpected %.1f : ", E.dbl);
312	}
313	putchar('\n');
314	return status;
315}
316
317int test_int_to_float_convert(char *msg)
318{
319	int status = 0;
320	int int24_hi = 0x03ff0fff;
321	int int24_lo = 0x03ff0ffd;
322	float pos_flt_lo = 67047420.0;
323	float pos_flt_hi = 67047424.0;
324	float neg_flt_lo = -67047420.0;
325	float neg_flt_hi = -67047424.0;
326
327	printf("-------------------------- %s --------------------------\n", msg);
328	status |= check_int_to_flt_round(TO_NEAREST, int24_lo, &pos_flt_lo);
329	status |= check_int_to_flt_round(TO_NEAREST, int24_hi, &pos_flt_hi);
330	status |= check_int_to_flt_round(TO_ZERO, int24_lo, &pos_flt_lo);
331	status |= check_int_to_flt_round(TO_ZERO, int24_hi, &pos_flt_lo);
332	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_lo, &pos_flt_hi);
333	status |= check_int_to_flt_round(TO_PLUS_INFINITY, int24_hi, &pos_flt_hi);
334	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_lo, &pos_flt_lo);
335	status |= check_int_to_flt_round(TO_MINUS_INFINITY, int24_hi, &pos_flt_lo);
336
337	status |= check_int_to_flt_round(TO_NEAREST, -int24_lo, &neg_flt_lo);
338	status |= check_int_to_flt_round(TO_NEAREST, -int24_hi, &neg_flt_hi);
339	status |= check_int_to_flt_round(TO_ZERO, -int24_lo, &neg_flt_lo);
340	status |= check_int_to_flt_round(TO_ZERO, -int24_hi, &neg_flt_lo);
341	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_lo, &neg_flt_lo);
342	status |= check_int_to_flt_round(TO_PLUS_INFINITY, -int24_hi, &neg_flt_lo);
343	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_lo, &neg_flt_hi);
344	status |= check_int_to_flt_round(TO_MINUS_INFINITY, -int24_hi, &neg_flt_hi);
345	return status;
346}
347
348#ifdef __powerpc64__
349int test_long_to_double_convert(char *msg)
350{
351	int status = 0;
352	long long55_hi = 0x07ff0ffffffffff;
353	long long55_lo = 0x07ff0fffffffffd;
354	double pos_dbl_lo = 36012304344547324.0;
355	double pos_dbl_hi = 36012304344547328.0;
356	double neg_dbl_lo = -36012304344547324.0;
357	double neg_dbl_hi = -36012304344547328.0;
358
359	printf("-------------------------- %s --------------------------\n", msg);
360	status |= check_long_to_dbl_round(TO_NEAREST, long55_lo, &pos_dbl_lo);
361	status |= check_long_to_dbl_round(TO_NEAREST, long55_hi, &pos_dbl_hi);
362	status |= check_long_to_dbl_round(TO_ZERO, long55_lo, &pos_dbl_lo);
363	status |= check_long_to_dbl_round(TO_ZERO, long55_hi, &pos_dbl_lo);
364	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_lo, &pos_dbl_hi);
365	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, long55_hi, &pos_dbl_hi);
366	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_lo, &pos_dbl_lo);
367	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, long55_hi, &pos_dbl_lo);
368
369	status |= check_long_to_dbl_round(TO_NEAREST, -long55_lo, &neg_dbl_lo);
370	status |= check_long_to_dbl_round(TO_NEAREST, -long55_hi, &neg_dbl_hi);
371	status |= check_long_to_dbl_round(TO_ZERO, -long55_lo, &neg_dbl_lo);
372	status |= check_long_to_dbl_round(TO_ZERO, -long55_hi, &neg_dbl_lo);
373	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_lo, &neg_dbl_lo);
374	status |= check_long_to_dbl_round(TO_PLUS_INFINITY, -long55_hi, &neg_dbl_lo);
375	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_lo, &neg_dbl_hi);
376	status |= check_long_to_dbl_round(TO_MINUS_INFINITY, -long55_hi, &neg_dbl_hi);
377	return status;
378}
379#endif
380
381int check_single_arithmetic_op(flt_op_t op)
382{
383		char *result;
384        int status = 0;
385        dbl_overlay R, E;
386        double qtr, half, fA, fB, fD;
387		round_mode_t mode;
388		int q, s;
389		bool_t two_args = TRUE;
390		float whole = denorm_small;
391
392#define BINOP(op) \
393        __asm__ volatile( \
394					op" %0, %1, %2\n\t" \
395					: "=f"(fD) : "f"(fA) , "f"(fB));
396#define UNOP(op) \
397        __asm__ volatile( \
398					op" %0, %1\n\t" \
399					: "=f"(fD) : "f"(fA));
400
401		half = (double)whole/2;
402		qtr = half/2;
403
404		if (debug) {
405			print_double("qtr", qtr);
406			print_double("whole", whole);
407			print_double("2*whole", 2*whole);
408		}
409
410		for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
411		for (s = -1; s < 2; s += 2)
412		for (q = 1; q < 4; q += 2) {
413			double expected;
414			double lo = s*whole;
415			double hi = s*2*whole;
416
417			switch(op) {
418			case FADDS:
419				fA = s*whole;
420				fB = s*q*qtr;
421				break;
422			case FSUBS:
423				fA = s*2*whole;
424				fB = s*(q == 1 ? 3 : 1)*qtr;
425				break;
426			case FMULS:
427				fA = 0.5;
428				fB = s*(4+q)*half;
429				break;
430			case FDIVS:
431				fA = s*(4+q)*half;
432				fB = 2.0;
433				break;
434			default:
435				assert("check_single_arithmetic_op: unexpected op",
436					FALSE);
437				break;
438			}
439
440			switch(mode) {
441			case TO_NEAREST:
442				expected = (q == 1 ? lo : hi);
443				break;
444			case TO_ZERO:
445				expected = lo;
446				break;
447			case TO_PLUS_INFINITY:
448				expected = (s == 1 ? hi : lo);
449				break;
450			case TO_MINUS_INFINITY:
451				expected = (s == 1 ? lo : hi);
452				break;
453			}
454
455			set_rounding_mode(mode);
456
457			/*
458			** do the double precision dual operation just for comparison
459			** when debugging
460			*/
461			switch(op) {
462			case FADDS:
463				BINOP("fadds");
464				R.dbl = fD;
465				BINOP("fadd");
466				break;
467			case FSUBS:
468				BINOP("fsubs");
469				R.dbl = fD;
470				BINOP("fsub");
471				break;
472			case FMULS:
473				BINOP("fmuls");
474				R.dbl = fD;
475				BINOP("fmul");
476				break;
477			case FDIVS:
478				BINOP("fdivs");
479				R.dbl = fD;
480				BINOP("fdiv");
481				break;
482			default:
483				assert("check_single_arithmetic_op: unexpected op",
484					FALSE);
485				break;
486			}
487#undef UNOP
488#undef BINOP
489
490			E.dbl = expected;
491
492			if ((R.layout.sign != E.layout.sign) ||
493				(R.layout.exp != E.layout.exp) ||
494				(R.layout.frac_lo != E.layout.frac_lo) ||
495				(R.layout.frac_hi != E.layout.frac_hi)) {
496				result = "FAILED";
497				status = 1;
498			} else {
499				result = "PASSED";
500				status = 0;
501			}
502
503			printf("%s:%s:%s(%-13a",
504				round_mode_name[mode], result, flt_op_names[op], fA);
505			if (two_args) printf(", %-13a", fB);
506			printf(") = %-13a", R.dbl);
507			if (status) printf("\n\texpected %a", E.dbl);
508			putchar('\n');
509
510			if (debug) {
511				print_double("hi", hi);
512				print_double("lo", lo);
513				print_double("expected", expected);
514				print_double("got", R.dbl);
515				print_double("double result", fD);
516			}
517		}
518
519		return status;
520}
521
522int check_single_guarded_arithmetic_op(flt_op_t op)
523{
524		typedef struct {
525			int num, den, frac;
526		} fdivs_t;
527
528		char *result;
529        int status = 0;
530        flt_overlay A, B, Z;
531        dbl_overlay Res, Exp;
532        double fA, fB, fC, fD;
533		round_mode_t mode;
534		int g, s;
535		int arg_count;
536
537		fdivs_t divs_guard_cases[16] = {
538			{ 105, 56, 0x700000 },  /* : 0 */
539			{ 100, 57, 0x608FB8 },  /* : 1 */
540			{ 000, 00, 0x000000 },  /* : X */
541			{ 100, 52, 0x762762 },  /* : 3 */
542			{ 000, 00, 0x000000 },  /* : X */
543			{ 100, 55, 0x68BA2E },  /* : 5 */
544			{ 000, 00, 0x000000 },  /* : X */
545			{ 100, 51, 0x7AFAFA },  /* : 7 */
546			{ 000, 00, 0x000000 },  /* : X */
547			{ 100, 56, 0x649249 },  /* : 9 */
548			{ 000, 00, 0x000000 },  /* : X */
549			{ 100, 54, 0x6D097B },  /* : B */
550			{ 000, 00, 0x000000 },  /* : X */
551			{ 100, 59, 0x58F2FB },  /* : D */
552			{ 000, 00, 0x000000 },  /* : X */
553			{ 101, 52, 0x789D89 }  /* : F */
554		};
555
556		/*	0x1.00000 00000000p-3 */
557		/* set up the invariant fields of B, the arg to cause rounding */
558		B.flt = 0.0;
559		B.layout.exp = 124;  /* -3 */
560
561		/* set up args so result is always Z = 1.200000000000<g>p+0 */
562		Z.flt = 1.0;
563		Z.layout.sign = 0;
564
565#define TERNOP(op) \
566		arg_count = 3; \
567        __asm__ volatile( \
568					op" %0, %1, %2, %3\n\t" \
569					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
570#define BINOP(op) \
571		arg_count = 2; \
572        __asm__ volatile( \
573					op" %0, %1, %2\n\t" \
574					: "=f"(fD) : "f"(fA) , "f"(fB));
575#define UNOP(op) \
576		arg_count = 1; \
577        __asm__ volatile( \
578					op" %0, %1\n\t" \
579					: "=f"(fD) : "f"(fA));
580
581	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
582	for (s = -1; s < 2; s += 2)
583	for (g = 0; g < 16; g += 1) {
584		double lo, hi, expected;
585		int LSB;
586		int guard = 0;
587		int z_sign = s;
588
589		/*
590		** one argument will have exponent = 0 as will the result (by
591		** design) so choose the other argument with exponent -3 to
592		** force a 3 bit shift for scaling leaving us with 3 guard bits
593		** and the LSB bit at the bottom of the manitssa.
594		*/
595		switch(op) {
596		case FADDS:
597			/* 1p+0 + 1.00000<g>p-3 */
598			B.layout.frac = g;
599
600			fB = s*B.flt;
601			fA = s*1.0;
602
603			/* set up Z to be truncated result */
604
605			/* mask off LSB from resulting guard bits */
606			guard = g & 7;
607
608			Z.layout.frac = 0x100000 | (g >> 3);
609			break;
610		case FSUBS:
611			/* 1.200002p+0 - 1.000000000000<g>p-3 */
612			A.flt = 1.125;
613			/* add enough to avoid scaling of the result */
614			A.layout.frac |= 0x2;
615			fA = s*A.flt;
616
617			B.layout.frac = g;
618			fB = s*B.flt;
619
620			/* set up Z to be truncated result */
621			guard = (0x10-g);
622			Z.layout.frac = guard>>3;
623
624			/* mask off LSB from resulting guard bits */
625			guard &= 7;
626			break;
627		case FMULS:
628			/* 1 + g*2^-23 */
629			A.flt = 1.0;
630			A.layout.frac = g;
631			fA = s*A.flt;
632			fB = 1.125;
633
634			/* set up Z to be truncated result */
635			Z.flt = 1.0;
636			Z.layout.frac = 0x100000;
637			Z.layout.frac |= g + (g>>3);
638			guard = g & 7;
639			break;
640		case FDIVS:
641			/* g >> 3 == LSB, g & 7 == guard bits */
642			guard = g & 7;
643			if ((guard & 1) == 0) {
644				/* special case: guard bit X = 0 */
645				A.flt = denorm_small;
646				A.layout.frac = g;
647				fA = A.flt;
648				fB = s*8.0;
649				Z.flt = 0.0;
650				Z.layout.frac |= (g >> 3);
651			} else {
652				fA = s*divs_guard_cases[g].num;
653				fB = divs_guard_cases[g].den;
654
655				Z.flt = 1.0;
656				Z.layout.frac = divs_guard_cases[g].frac;
657			}
658			break;
659		case FMADDS:
660		case FMSUBS:
661		case FNMADDS:
662		case FNMSUBS:
663			/* 1 + g*2^-23 */
664			A.flt = 1.0;
665			A.layout.frac = g;
666			fA = s*A.flt;
667			fB = 1.125;
668
669			/* 1.000001p-1 */
670			A.flt = 0.5;
671			A.layout.frac = 1;
672			fC = (op == FMADDS || op == FNMADDS ? s : -s)*A.flt;
673
674			/* set up Z to be truncated result */
675			z_sign = (op == FNMADDS || op == FNMSUBS ? -s : s);
676			guard = ((g & 7) + 0x4) & 7;
677			Z.flt = 1.0;
678			Z.layout.frac = 0x500000;
679			Z.layout.frac |= g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
680			break;
681		default:
682			assert("check_single_arithmetic_op: unexpected op",
683				FALSE);
684			break;
685		}
686
687		/* get LSB for tie breaking */
688		LSB = Z.layout.frac & 1;
689
690		/* set up hi and lo */
691		lo = z_sign*Z.flt;
692		Z.layout.frac += 1;
693		hi = z_sign*Z.flt;
694
695		switch(mode) {
696		case TO_NEAREST:
697			/* look at 3 guard bits to determine expected rounding */
698			switch(guard) {
699			case 0:
700			case 1: case 2: case 3:
701				expected = lo;
702				break;
703			case 4:	/* tie: round to even */
704				if (debug) printf("tie: LSB = %d\n", LSB);
705				expected = (LSB == 0 ? lo : hi);
706				break;
707			case 5: case 6: case 7:
708				expected = hi;
709				break;
710			default:
711				assert("check_single_guarded_arithmetic_op: unexpected guard",
712					FALSE);
713			}
714			break;
715		case TO_ZERO:
716			expected = lo;
717			break;
718		case TO_PLUS_INFINITY:
719			if (guard == 0) {
720				/* no rounding */
721				expected = lo;
722			} else {
723				expected = (s == 1 ? hi : lo);
724			}
725			break;
726		case TO_MINUS_INFINITY:
727			if (guard == 0) {
728				/* no rounding */
729				expected = lo;
730			} else {
731				expected = (s == 1 ? lo : hi);
732			}
733			break;
734		}
735
736		set_rounding_mode(mode);
737
738		/*
739		** do the double precision dual operation just for comparison
740		** when debugging
741		*/
742		switch(op) {
743		case FADDS:
744			BINOP("fadds");
745			Res.dbl = fD;
746			break;
747		case FSUBS:
748			BINOP("fsubs");
749			Res.dbl = fD;
750			break;
751		case FMULS:
752			BINOP("fmuls");
753			Res.dbl = fD;
754			break;
755		case FDIVS:
756			BINOP("fdivs");
757			Res.dbl = fD;
758			break;
759		case FMADDS:
760			TERNOP("fmadds");
761			Res.dbl = fD;
762			break;
763		case FMSUBS:
764			TERNOP("fmsubs");
765			Res.dbl = fD;
766			break;
767		case FNMADDS:
768			TERNOP("fnmadds");
769			Res.dbl = fD;
770			break;
771		case FNMSUBS:
772			TERNOP("fnmsubs");
773			Res.dbl = fD;
774			break;
775		default:
776			assert("check_single_guarded_arithmetic_op: unexpected op",
777				FALSE);
778			break;
779		}
780#undef UNOP
781#undef BINOP
782#undef TERNOP
783
784		Exp.dbl = expected;
785
786		if ((Res.layout.sign != Exp.layout.sign) ||
787			(Res.layout.exp != Exp.layout.exp) ||
788			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
789			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
790			result = "FAILED";
791			status = 1;
792		} else {
793			result = "PASSED";
794			status = 0;
795		}
796
797		printf("%s:%s:%s(%-13f",
798			round_mode_name[mode], result, flt_op_names[op], fA);
799		if (arg_count > 1) printf(", %-13a", fB);
800		if (arg_count > 2) printf(", %-13a", fC);
801		printf(") = %-13a", Res.dbl);
802		if (status) printf("\n\texpected %a", Exp.dbl);
803		putchar('\n');
804
805		if (debug) {
806			print_double("hi", hi);
807			print_double("lo", lo);
808			print_double("expected", expected);
809			print_double("got", Res.dbl);
810		}
811	}
812
813	return status;
814}
815
816int check_double_guarded_arithmetic_op(flt_op_t op)
817{
818	typedef struct {
819		int num, den, hi, lo;
820	} fdiv_t;
821	typedef struct {
822		double arg;
823		int exp, hi, lo;
824	} fsqrt_t;
825
826	char *result;
827	int status = 0;
828	dbl_overlay A, B, Z;
829	dbl_overlay Res, Exp;
830	double fA, fB, fC, fD;
831	round_mode_t mode;
832	int g, s;
833	int arg_count;
834	fdiv_t div_guard_cases[16] = {
835		{ 62, 62, 0x00000, 0x00000000 },	/* 0 */
836		{ 64, 62, 0x08421, 0x08421084 },	/* 1 */
837		{ 66, 62, 0x10842, 0x10842108 },	/* 2 */
838		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* 3 */
839		{ 100, 62, 0x9ce73, 0x9ce739ce },	/* X */
840		{ 102, 62, 0xa5294, 0xa5294a52 },	/* 5 */
841		{ 106, 62, 0xb5ad6, 0xb5ad6b5a },	/* 6 */
842		{ 108, 62, 0xbdef7, 0xbdef7bde },	/* 7 */
843		{ 108, 108, 0x00000, 0x00000000 },	/* 8 */
844		{ 112, 62, 0xce739, 0xce739ce7 },	/* 9 */
845		{ 114, 62, 0xd6b5a, 0xd6b5ad6b },	/* A */
846		{ 116, 62, 0xdef7b, 0xdef7bdef },	/* B */
847		{ 84, 62, 0x5ad6b, 0x5ad6b5ad },	/* X */
848		{ 118, 62, 0xe739c, 0xe739ce73 },	/* D */
849		{ 90, 62, 0x739ce, 0x739ce739 },	/* E */
850		{ 92, 62, 0x7bdef, 0x7bdef7bd }		/* F */
851	};
852
853
854	fsqrt_t sqrt_guard_cases[16] = {
855		{ 0x1.08800p0,  0, 0x04371, 0xd9ab72fb}, /* :0 B8.8440  */
856		{ 0x0.D2200p0, -1, 0xcfdca, 0xf353049e}, /* :1 A4.6910  */
857		{ 0x1.A8220p0,  0, 0x49830, 0x2b49cd6d}, /* :2 E9.D411  */
858		{ 0x1.05A20p0,  0, 0x02cd1, 0x3b44f3bf}, /* :3 B7.82D1  */
859		{ 0x0.CA820p0, -1, 0xc7607, 0x3cec0937}, /* :4 A1.6541  */
860		{ 0x1.DCA20p0,  0, 0x5d4f8, 0xd4e4c2b2}, /* :5 F7.EE51  */
861		{ 0x1.02C80p0,  0, 0x01630, 0x9cde7483}, /* :6 B6.8164  */
862		{ 0x0.DC800p0, -1, 0xdb2cf, 0xe686fe7c}, /* :7 A8.6E40  */
863		{ 0x0.CF920p0, -1, 0xcd089, 0xb6860626}, /* :8 A3.67C9  */
864		{ 0x1.1D020p0,  0, 0x0e1d6, 0x2e78ed9d}, /* :9 BF.8E81  */
865		{ 0x0.E1C80p0, -1, 0xe0d52, 0x6020fb6b}, /* :A AA.70E4  */
866		{ 0x0.C8000p0, -1, 0xc48c6, 0x001f0abf}, /* :B A0.6400  */
867		{ 0x1.48520p0,  0, 0x21e9e, 0xd813e2e2}, /* :C CD.A429  */
868		{ 0x0.F4C20p0, -1, 0xf4a1b, 0x09bbf0b0}, /* :D B1.7A61  */
869		{ 0x0.CD080p0, -1, 0xca348, 0x79b907ae}, /* :E A2.6684  */
870		{ 0x1.76B20p0,  0, 0x35b67, 0x81aed827}  /* :F DB.BB59  */
871	};
872
873	/*	0x1.00000 00000000p-3 */
874	/* set up the invariant fields of B, the arg to cause rounding */
875	B.dbl = 0.0;
876	B.layout.exp = 1020;
877
878	/* set up args so result is always Z = 1.200000000000<g>p+0 */
879	Z.dbl = 1.0;
880	Z.layout.sign = 0;
881
882#define TERNOP(op) \
883		arg_count = 3; \
884        __asm__ volatile( \
885					op" %0, %1, %2, %3\n\t" \
886					: "=f"(fD) : "f"(fA) , "f"(fB), "f"(fC));
887#define BINOP(op) \
888		arg_count = 2; \
889        __asm__ volatile( \
890					op" %0, %1, %2\n\t" \
891					: "=f"(fD) : "f"(fA) , "f"(fB));
892#define UNOP(op) \
893		arg_count = 1; \
894        __asm__ volatile( \
895					op" %0, %1\n\t" \
896					: "=f"(fD) : "f"(fA));
897
898	for (mode = TO_NEAREST; mode <= TO_MINUS_INFINITY; mode++)
899	for (s = (op != FSQRT ? -1 : 1); s < 2; s += 2)
900	for (g = 0; g < 16; g += 1) {
901		double lo, hi, expected;
902		int LSB;
903		int guard;
904		int z_sign = s;
905
906		/*
907		** one argument will have exponent = 0 as will the result (by
908		** design) so choose the other argument with exponent -3 to
909		** force a 3 bit shift for scaling leaving us with 3 guard bits
910		** and the LSB bit at the bottom of the manitssa.
911		*/
912		switch(op) {
913		case FADD:
914			/* 1p+0 + 1.000000000000<g>p-3 */
915			B.layout.frac_lo = g;
916
917			fB = s*B.dbl;
918			fA = s*1.0;
919
920			/* set up Z to be truncated result */
921
922			/* mask off LSB from resulting guard bits */
923			guard = g & 7;
924
925			Z.layout.frac_hi = 0x20000;
926			Z.layout.frac_lo = g >> 3;
927
928			break;
929		case FSUB:
930			/* 1.2000000000002p+0 - 1.000000000000<g>p-3 */
931			A.dbl = 1.125;
932			/* add enough to avoid scaling of the result */
933			A.layout.frac_lo = 0x2;
934			fA = s*A.dbl;
935
936			B.layout.frac_lo = g;
937			fB = s*B.dbl;
938
939			/* set up Z to be truncated result */
940			guard = (0x10-g);
941			Z.layout.frac_hi = 0x0;
942			Z.layout.frac_lo = guard>>3;
943
944			/* mask off LSB from resulting guard bits */
945			guard &= 7;
946			break;
947		case FMUL:
948			/* 1 + g*2^-52 */
949			A.dbl = 1.0;
950			A.layout.frac_lo = g;
951			fA = s*A.dbl;
952			fB = 1.125;
953
954			/* set up Z to be truncated result */
955			Z.dbl = 1.0;
956			Z.layout.frac_hi = 0x20000;
957			Z.layout.frac_lo = g + (g>>3);
958			guard = g & 7;
959			break;
960		case FMADD:
961		case FMSUB:
962		case FNMADD:
963		case FNMSUB:
964			/* 1 + g*2^-52 */
965			A.dbl = 1.0;
966			A.layout.frac_lo = g;
967			fA = s*A.dbl;
968			fB = 1.125;
969
970			/* 1.0000000000001p-1 */
971			A.dbl = 0.5;
972			A.layout.frac_lo = 1;
973			fC = (op == FMADD || op == FNMADD ? s : -s)*A.dbl;
974
975			/* set up Z to be truncated result */
976			z_sign = (op == FNMADD || op == FNMSUB ? -s : s);
977			guard = ((g & 7) + 0x4) & 7;
978			Z.dbl = 1.0;
979			Z.layout.frac_hi = 0xa0000;
980			Z.layout.frac_lo = g + (g>>3) + ((g & 7)>> 2 ? 1 : 0);
981			break;
982		case FDIV:
983			/* g >> 3 == LSB, g & 7 == guard bits */
984			guard = g & 7;
985			if (guard == 0x4) {
986				/* special case guard bits == 4, inexact tie */
987				fB = s*2.0;
988				Z.dbl = 0.0;
989				if (g >> 3) {
990					fA = dbl_denorm_small + 2*dbl_denorm_small;
991					Z.layout.frac_lo = 0x1;
992				} else {
993					fA = dbl_denorm_small;
994				}
995			} else {
996				fA = s*div_guard_cases[g].num;
997				fB = div_guard_cases[g].den;
998
999				printf("%d/%d\n",
1000					s*div_guard_cases[g].num,
1001					div_guard_cases[g].den);
1002				Z.dbl = 1.0;
1003				Z.layout.frac_hi = div_guard_cases[g].hi;
1004				Z.layout.frac_lo = div_guard_cases[g].lo;
1005			}
1006			break;
1007		case FSQRT:
1008			fA = s*sqrt_guard_cases[g].arg;
1009			Z.dbl = 1.0;
1010			Z.layout.exp = sqrt_guard_cases[g].exp + 1023;
1011			Z.layout.frac_hi = sqrt_guard_cases[g].hi;
1012			Z.layout.frac_lo = sqrt_guard_cases[g].lo;
1013			guard = g >> 1;
1014			if (g & 1) guard |= 1;
1015			/* don't have test cases for when X bit = 0 */
1016			if (guard == 0 || guard == 4) continue;
1017			break;
1018		default:
1019			assert("check_double_guarded_arithmetic_op: unexpected op",
1020				FALSE);
1021			break;
1022		}
1023
1024		/* get LSB for tie breaking */
1025		LSB = Z.layout.frac_lo & 1;
1026
1027		/* set up hi and lo */
1028		lo = z_sign*Z.dbl;
1029		Z.layout.frac_lo += 1;
1030		hi = z_sign*Z.dbl;
1031
1032		switch(mode) {
1033		case TO_NEAREST:
1034			/* look at 3 guard bits to determine expected rounding */
1035			switch(guard) {
1036			case 0:
1037			case 1: case 2: case 3:
1038				expected = lo;
1039				break;
1040			case 4:	/* tie: round to even */
1041				if (debug) printf("tie: LSB = %d\n", LSB);
1042				expected = (LSB == 0 ? lo : hi);
1043				break;
1044			case 5: case 6: case 7:
1045				expected = hi;
1046				break;
1047			default:
1048				assert("check_double_guarded_arithmetic_op: unexpected guard",
1049					FALSE);
1050			}
1051			break;
1052		case TO_ZERO:
1053			expected = lo;
1054			break;
1055		case TO_PLUS_INFINITY:
1056			if (guard == 0) {
1057				/* no rounding */
1058				expected = lo;
1059			} else {
1060				expected = (s == 1 ? hi : lo);
1061			}
1062			break;
1063		case TO_MINUS_INFINITY:
1064			if (guard == 0) {
1065				/* no rounding */
1066				expected = lo;
1067			} else {
1068				expected = (s == 1 ? lo : hi);
1069			}
1070			break;
1071		}
1072
1073		set_rounding_mode(mode);
1074
1075		/*
1076		** do the double precision dual operation just for comparison
1077		** when debugging
1078		*/
1079		switch(op) {
1080		case FADD:
1081			BINOP("fadd");
1082			Res.dbl = fD;
1083			break;
1084		case FSUB:
1085			BINOP("fsub");
1086			Res.dbl = fD;
1087			break;
1088		case FMUL:
1089			BINOP("fmul");
1090			Res.dbl = fD;
1091			break;
1092		case FMADD:
1093			TERNOP("fmadd");
1094			Res.dbl = fD;
1095			break;
1096		case FMSUB:
1097			TERNOP("fmsub");
1098			Res.dbl = fD;
1099			break;
1100		case FNMADD:
1101			TERNOP("fnmadd");
1102			Res.dbl = fD;
1103			break;
1104		case FNMSUB:
1105			TERNOP("fnmsub");
1106			Res.dbl = fD;
1107			break;
1108		case FDIV:
1109			BINOP("fdiv");
1110			Res.dbl = fD;
1111			break;
1112		case FSQRT:
1113			UNOP("fsqrt");
1114			Res.dbl = fD;
1115			break;
1116		default:
1117			assert("check_double_guarded_arithmetic_op: unexpected op",
1118				FALSE);
1119			break;
1120		}
1121#undef UNOP
1122#undef BINOP
1123#undef TERNOP
1124
1125		Exp.dbl = expected;
1126
1127		if ((Res.layout.sign != Exp.layout.sign) ||
1128			(Res.layout.exp != Exp.layout.exp) ||
1129			(Res.layout.frac_lo != Exp.layout.frac_lo) ||
1130			(Res.layout.frac_hi != Exp.layout.frac_hi)) {
1131			result = "FAILED";
1132			status = 1;
1133		} else {
1134			result = "PASSED";
1135			status = 0;
1136		}
1137
1138		printf("%s:%s:%s(%-13a",
1139			round_mode_name[mode], result, flt_op_names[op], fA);
1140		if (arg_count > 1) printf(", %-13a", fB);
1141		if (arg_count > 2) printf(", %-13a", fC);
1142		printf(") = %-13a", Res.dbl);
1143		if (status) printf("\n\texpected %a", Exp.dbl);
1144		putchar('\n');
1145
1146		if (debug) {
1147			print_double("hi", hi);
1148			print_double("lo", lo);
1149			print_double("expected", expected);
1150			print_double("got", Res.dbl);
1151		}
1152	}
1153
1154	return status;
1155}
1156
1157int test_float_arithmetic_ops()
1158{
1159	int status = 0;
1160	flt_op_t op;
1161
1162	/*
1163	** choose FP operands whose result should be rounded to either
1164	** lo or hi.
1165	*/
1166
1167	printf("-------------------------- %s --------------------------\n",
1168		"test rounding of float operators without guard bits");
1169	for (op = FADDS; op <= FDIVS; op++) {
1170		status |= check_single_arithmetic_op(op);
1171	}
1172
1173	printf("-------------------------- %s --------------------------\n",
1174		"test rounding of float operators with guard bits");
1175	for (op = FADDS; op <= FNMSUBS; op++) {
1176		status |= check_single_guarded_arithmetic_op(op);
1177	}
1178
1179	printf("-------------------------- %s --------------------------\n",
1180		"test rounding of double operators with guard bits");
1181	for (op = FADD; op <= FSQRT; op++) {
1182		status |= check_double_guarded_arithmetic_op(op);
1183	}
1184	return status;
1185}
1186
1187
1188int
1189main()
1190{
1191	int status = 0;
1192
1193	init();
1194
1195	status |= test_dbl_to_float_convert("test denormalized convert", &denorm_small);
1196	status |= test_dbl_to_float_convert("test normalized convert", &norm_small);
1197	status |= test_int_to_float_convert("test (float)int convert");
1198	status |= test_int_to_float_convert("test (float)int convert");
1199
1200#ifdef __powerpc64__
1201	status |= test_long_to_double_convert("test (double)long convert");
1202#endif
1203	status |= test_float_arithmetic_ops();
1204	return status;
1205}
1206