vfpsingle.c revision 42d3fb5a8771b840e0bd6dbcd0c734883dd90b6f
1/*
2 *  linux/arch/arm/vfp/vfpsingle.c
3 *
4 * This code is derived in part from John R. Housers softfloat library, which
5 * carries the following notice:
6 *
7 * ===========================================================================
8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
9 * Arithmetic Package, Release 2.
10 *
11 * Written by John R. Hauser.  This work was made possible in part by the
12 * International Computer Science Institute, located at Suite 600, 1947 Center
13 * Street, Berkeley, California 94704.  Funding was partially provided by the
14 * National Science Foundation under grant MIP-9311980.  The original version
15 * of this code was written as part of a project to build a fixed-point vector
16 * processor in collaboration with the University of California at Berkeley,
17 * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
19 * arithmetic/softfloat.html'.
20 *
21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
23 * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26 *
27 * Derivative works are acceptable, even for commercial purposes, so long as
28 * (1) they include prominent notice that the work is derivative, and (2) they
29 * include prominent notice akin to these three paragraphs for those parts of
30 * this code that are retained.
31 * ===========================================================================
32 */
33#include <linux/kernel.h>
34#include <linux/bitops.h>
35
36#include <asm/div64.h>
37#include <asm/ptrace.h>
38#include <asm/vfp.h>
39
40#include "vfpinstr.h"
41#include "vfp.h"
42
43static struct vfp_single vfp_single_default_qnan = {
44	.exponent	= 255,
45	.sign		= 0,
46	.significand	= VFP_SINGLE_SIGNIFICAND_QNAN,
47};
48
49static void vfp_single_dump(const char *str, struct vfp_single *s)
50{
51	pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
52		 str, s->sign != 0, s->exponent, s->significand);
53}
54
55static void vfp_single_normalise_denormal(struct vfp_single *vs)
56{
57	int bits = 31 - fls(vs->significand);
58
59	vfp_single_dump("normalise_denormal: in", vs);
60
61	if (bits) {
62		vs->exponent -= bits - 1;
63		vs->significand <<= bits;
64	}
65
66	vfp_single_dump("normalise_denormal: out", vs);
67}
68
69#ifndef DEBUG
70#define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
71u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
72#else
73u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
74#endif
75{
76	u32 significand, incr, rmode;
77	int exponent, shift, underflow;
78
79	vfp_single_dump("pack: in", vs);
80
81	/*
82	 * Infinities and NaNs are a special case.
83	 */
84	if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
85		goto pack;
86
87	/*
88	 * Special-case zero.
89	 */
90	if (vs->significand == 0) {
91		vs->exponent = 0;
92		goto pack;
93	}
94
95	exponent = vs->exponent;
96	significand = vs->significand;
97
98	/*
99	 * Normalise first.  Note that we shift the significand up to
100	 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
101	 * significant bit.
102	 */
103	shift = 32 - fls(significand);
104	if (shift < 32 && shift) {
105		exponent -= shift;
106		significand <<= shift;
107	}
108
109#ifdef DEBUG
110	vs->exponent = exponent;
111	vs->significand = significand;
112	vfp_single_dump("pack: normalised", vs);
113#endif
114
115	/*
116	 * Tiny number?
117	 */
118	underflow = exponent < 0;
119	if (underflow) {
120		significand = vfp_shiftright32jamming(significand, -exponent);
121		exponent = 0;
122#ifdef DEBUG
123		vs->exponent = exponent;
124		vs->significand = significand;
125		vfp_single_dump("pack: tiny number", vs);
126#endif
127		if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
128			underflow = 0;
129	}
130
131	/*
132	 * Select rounding increment.
133	 */
134	incr = 0;
135	rmode = fpscr & FPSCR_RMODE_MASK;
136
137	if (rmode == FPSCR_ROUND_NEAREST) {
138		incr = 1 << VFP_SINGLE_LOW_BITS;
139		if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
140			incr -= 1;
141	} else if (rmode == FPSCR_ROUND_TOZERO) {
142		incr = 0;
143	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
144		incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
145
146	pr_debug("VFP: rounding increment = 0x%08x\n", incr);
147
148	/*
149	 * Is our rounding going to overflow?
150	 */
151	if ((significand + incr) < significand) {
152		exponent += 1;
153		significand = (significand >> 1) | (significand & 1);
154		incr >>= 1;
155#ifdef DEBUG
156		vs->exponent = exponent;
157		vs->significand = significand;
158		vfp_single_dump("pack: overflow", vs);
159#endif
160	}
161
162	/*
163	 * If any of the low bits (which will be shifted out of the
164	 * number) are non-zero, the result is inexact.
165	 */
166	if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
167		exceptions |= FPSCR_IXC;
168
169	/*
170	 * Do our rounding.
171	 */
172	significand += incr;
173
174	/*
175	 * Infinity?
176	 */
177	if (exponent >= 254) {
178		exceptions |= FPSCR_OFC | FPSCR_IXC;
179		if (incr == 0) {
180			vs->exponent = 253;
181			vs->significand = 0x7fffffff;
182		} else {
183			vs->exponent = 255;		/* infinity */
184			vs->significand = 0;
185		}
186	} else {
187		if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
188			exponent = 0;
189		if (exponent || significand > 0x80000000)
190			underflow = 0;
191		if (underflow)
192			exceptions |= FPSCR_UFC;
193		vs->exponent = exponent;
194		vs->significand = significand >> 1;
195	}
196
197 pack:
198	vfp_single_dump("pack: final", vs);
199	{
200		s32 d = vfp_single_pack(vs);
201#ifdef DEBUG
202		pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
203			 sd, d, exceptions);
204#endif
205		vfp_put_float(d, sd);
206	}
207
208	return exceptions;
209}
210
211/*
212 * Propagate the NaN, setting exceptions if it is signalling.
213 * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
214 */
215static u32
216vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
217		  struct vfp_single *vsm, u32 fpscr)
218{
219	struct vfp_single *nan;
220	int tn, tm = 0;
221
222	tn = vfp_single_type(vsn);
223
224	if (vsm)
225		tm = vfp_single_type(vsm);
226
227	if (fpscr & FPSCR_DEFAULT_NAN)
228		/*
229		 * Default NaN mode - always returns a quiet NaN
230		 */
231		nan = &vfp_single_default_qnan;
232	else {
233		/*
234		 * Contemporary mode - select the first signalling
235		 * NAN, or if neither are signalling, the first
236		 * quiet NAN.
237		 */
238		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
239			nan = vsn;
240		else
241			nan = vsm;
242		/*
243		 * Make the NaN quiet.
244		 */
245		nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
246	}
247
248	*vsd = *nan;
249
250	/*
251	 * If one was a signalling NAN, raise invalid operation.
252	 */
253	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
254}
255
256
257/*
258 * Extended operations
259 */
260static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
261{
262	vfp_put_float(vfp_single_packed_abs(m), sd);
263	return 0;
264}
265
266static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
267{
268	vfp_put_float(m, sd);
269	return 0;
270}
271
272static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
273{
274	vfp_put_float(vfp_single_packed_negate(m), sd);
275	return 0;
276}
277
278static const u16 sqrt_oddadjust[] = {
279	0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
280	0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
281};
282
283static const u16 sqrt_evenadjust[] = {
284	0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
285	0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
286};
287
288u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
289{
290	int index;
291	u32 z, a;
292
293	if ((significand & 0xc0000000) != 0x40000000) {
294		printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
295	}
296
297	a = significand << 1;
298	index = (a >> 27) & 15;
299	if (exponent & 1) {
300		z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
301		z = ((a / z) << 14) + (z << 15);
302		a >>= 1;
303	} else {
304		z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
305		z = a / z + z;
306		z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
307		if (z <= a)
308			return (s32)a >> 1;
309	}
310	{
311		u64 v = (u64)a << 31;
312		do_div(v, z);
313		return v + (z >> 1);
314	}
315}
316
317static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
318{
319	struct vfp_single vsm, vsd;
320	int ret, tm;
321
322	vfp_single_unpack(&vsm, m);
323	tm = vfp_single_type(&vsm);
324	if (tm & (VFP_NAN|VFP_INFINITY)) {
325		struct vfp_single *vsp = &vsd;
326
327		if (tm & VFP_NAN)
328			ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
329		else if (vsm.sign == 0) {
330 sqrt_copy:
331			vsp = &vsm;
332			ret = 0;
333		} else {
334 sqrt_invalid:
335			vsp = &vfp_single_default_qnan;
336			ret = FPSCR_IOC;
337		}
338		vfp_put_float(vfp_single_pack(vsp), sd);
339		return ret;
340	}
341
342	/*
343	 * sqrt(+/- 0) == +/- 0
344	 */
345	if (tm & VFP_ZERO)
346		goto sqrt_copy;
347
348	/*
349	 * Normalise a denormalised number
350	 */
351	if (tm & VFP_DENORMAL)
352		vfp_single_normalise_denormal(&vsm);
353
354	/*
355	 * sqrt(<0) = invalid
356	 */
357	if (vsm.sign)
358		goto sqrt_invalid;
359
360	vfp_single_dump("sqrt", &vsm);
361
362	/*
363	 * Estimate the square root.
364	 */
365	vsd.sign = 0;
366	vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
367	vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
368
369	vfp_single_dump("sqrt estimate", &vsd);
370
371	/*
372	 * And now adjust.
373	 */
374	if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
375		if (vsd.significand < 2) {
376			vsd.significand = 0xffffffff;
377		} else {
378			u64 term;
379			s64 rem;
380			vsm.significand <<= !(vsm.exponent & 1);
381			term = (u64)vsd.significand * vsd.significand;
382			rem = ((u64)vsm.significand << 32) - term;
383
384			pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
385
386			while (rem < 0) {
387				vsd.significand -= 1;
388				rem += ((u64)vsd.significand << 1) | 1;
389			}
390			vsd.significand |= rem != 0;
391		}
392	}
393	vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
394
395	return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
396}
397
398/*
399 * Equal	:= ZC
400 * Less than	:= N
401 * Greater than	:= C
402 * Unordered	:= CV
403 */
404static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
405{
406	s32 d;
407	u32 ret = 0;
408
409	d = vfp_get_float(sd);
410	if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
411		ret |= FPSCR_C | FPSCR_V;
412		if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
413			/*
414			 * Signalling NaN, or signalling on quiet NaN
415			 */
416			ret |= FPSCR_IOC;
417	}
418
419	if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
420		ret |= FPSCR_C | FPSCR_V;
421		if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
422			/*
423			 * Signalling NaN, or signalling on quiet NaN
424			 */
425			ret |= FPSCR_IOC;
426	}
427
428	if (ret == 0) {
429		if (d == m || vfp_single_packed_abs(d | m) == 0) {
430			/*
431			 * equal
432			 */
433			ret |= FPSCR_Z | FPSCR_C;
434		} else if (vfp_single_packed_sign(d ^ m)) {
435			/*
436			 * different signs
437			 */
438			if (vfp_single_packed_sign(d))
439				/*
440				 * d is negative, so d < m
441				 */
442				ret |= FPSCR_N;
443			else
444				/*
445				 * d is positive, so d > m
446				 */
447				ret |= FPSCR_C;
448		} else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
449			/*
450			 * d < m
451			 */
452			ret |= FPSCR_N;
453		} else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
454			/*
455			 * d > m
456			 */
457			ret |= FPSCR_C;
458		}
459	}
460	return ret;
461}
462
463static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
464{
465	return vfp_compare(sd, 0, m, fpscr);
466}
467
468static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
469{
470	return vfp_compare(sd, 1, m, fpscr);
471}
472
473static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
474{
475	return vfp_compare(sd, 0, 0, fpscr);
476}
477
478static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
479{
480	return vfp_compare(sd, 1, 0, fpscr);
481}
482
483static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
484{
485	struct vfp_single vsm;
486	struct vfp_double vdd;
487	int tm;
488	u32 exceptions = 0;
489
490	vfp_single_unpack(&vsm, m);
491
492	tm = vfp_single_type(&vsm);
493
494	/*
495	 * If we have a signalling NaN, signal invalid operation.
496	 */
497	if (tm == VFP_SNAN)
498		exceptions = FPSCR_IOC;
499
500	if (tm & VFP_DENORMAL)
501		vfp_single_normalise_denormal(&vsm);
502
503	vdd.sign = vsm.sign;
504	vdd.significand = (u64)vsm.significand << 32;
505
506	/*
507	 * If we have an infinity or NaN, the exponent must be 2047.
508	 */
509	if (tm & (VFP_INFINITY|VFP_NAN)) {
510		vdd.exponent = 2047;
511		if (tm == VFP_QNAN)
512			vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
513		goto pack_nan;
514	} else if (tm & VFP_ZERO)
515		vdd.exponent = 0;
516	else
517		vdd.exponent = vsm.exponent + (1023 - 127);
518
519	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
520
521 pack_nan:
522	vfp_put_double(vfp_double_pack(&vdd), dd);
523	return exceptions;
524}
525
526static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
527{
528	struct vfp_single vs;
529
530	vs.sign = 0;
531	vs.exponent = 127 + 31 - 1;
532	vs.significand = (u32)m;
533
534	return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
535}
536
537static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
538{
539	struct vfp_single vs;
540
541	vs.sign = (m & 0x80000000) >> 16;
542	vs.exponent = 127 + 31 - 1;
543	vs.significand = vs.sign ? -m : m;
544
545	return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
546}
547
548static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
549{
550	struct vfp_single vsm;
551	u32 d, exceptions = 0;
552	int rmode = fpscr & FPSCR_RMODE_MASK;
553	int tm;
554
555	vfp_single_unpack(&vsm, m);
556	vfp_single_dump("VSM", &vsm);
557
558	/*
559	 * Do we have a denormalised number?
560	 */
561	tm = vfp_single_type(&vsm);
562	if (tm & VFP_DENORMAL)
563		exceptions |= FPSCR_IDC;
564
565	if (tm & VFP_NAN)
566		vsm.sign = 0;
567
568	if (vsm.exponent >= 127 + 32) {
569		d = vsm.sign ? 0 : 0xffffffff;
570		exceptions = FPSCR_IOC;
571	} else if (vsm.exponent >= 127 - 1) {
572		int shift = 127 + 31 - vsm.exponent;
573		u32 rem, incr = 0;
574
575		/*
576		 * 2^0 <= m < 2^32-2^8
577		 */
578		d = (vsm.significand << 1) >> shift;
579		rem = vsm.significand << (33 - shift);
580
581		if (rmode == FPSCR_ROUND_NEAREST) {
582			incr = 0x80000000;
583			if ((d & 1) == 0)
584				incr -= 1;
585		} else if (rmode == FPSCR_ROUND_TOZERO) {
586			incr = 0;
587		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
588			incr = ~0;
589		}
590
591		if ((rem + incr) < rem) {
592			if (d < 0xffffffff)
593				d += 1;
594			else
595				exceptions |= FPSCR_IOC;
596		}
597
598		if (d && vsm.sign) {
599			d = 0;
600			exceptions |= FPSCR_IOC;
601		} else if (rem)
602			exceptions |= FPSCR_IXC;
603	} else {
604		d = 0;
605		if (vsm.exponent | vsm.significand) {
606			exceptions |= FPSCR_IXC;
607			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
608				d = 1;
609			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
610				d = 0;
611				exceptions |= FPSCR_IOC;
612			}
613		}
614	}
615
616	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
617
618	vfp_put_float(d, sd);
619
620	return exceptions;
621}
622
623static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
624{
625	return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
626}
627
628static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
629{
630	struct vfp_single vsm;
631	u32 d, exceptions = 0;
632	int rmode = fpscr & FPSCR_RMODE_MASK;
633	int tm;
634
635	vfp_single_unpack(&vsm, m);
636	vfp_single_dump("VSM", &vsm);
637
638	/*
639	 * Do we have a denormalised number?
640	 */
641	tm = vfp_single_type(&vsm);
642	if (vfp_single_type(&vsm) & VFP_DENORMAL)
643		exceptions |= FPSCR_IDC;
644
645	if (tm & VFP_NAN) {
646		d = 0;
647		exceptions |= FPSCR_IOC;
648	} else if (vsm.exponent >= 127 + 32) {
649		/*
650		 * m >= 2^31-2^7: invalid
651		 */
652		d = 0x7fffffff;
653		if (vsm.sign)
654			d = ~d;
655		exceptions |= FPSCR_IOC;
656	} else if (vsm.exponent >= 127 - 1) {
657		int shift = 127 + 31 - vsm.exponent;
658		u32 rem, incr = 0;
659
660		/* 2^0 <= m <= 2^31-2^7 */
661		d = (vsm.significand << 1) >> shift;
662		rem = vsm.significand << (33 - shift);
663
664		if (rmode == FPSCR_ROUND_NEAREST) {
665			incr = 0x80000000;
666			if ((d & 1) == 0)
667				incr -= 1;
668		} else if (rmode == FPSCR_ROUND_TOZERO) {
669			incr = 0;
670		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
671			incr = ~0;
672		}
673
674		if ((rem + incr) < rem && d < 0xffffffff)
675			d += 1;
676		if (d > 0x7fffffff + (vsm.sign != 0)) {
677			d = 0x7fffffff + (vsm.sign != 0);
678			exceptions |= FPSCR_IOC;
679		} else if (rem)
680			exceptions |= FPSCR_IXC;
681
682		if (vsm.sign)
683			d = -d;
684	} else {
685		d = 0;
686		if (vsm.exponent | vsm.significand) {
687			exceptions |= FPSCR_IXC;
688			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
689				d = 1;
690			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
691				d = -1;
692		}
693	}
694
695	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
696
697	vfp_put_float((s32)d, sd);
698
699	return exceptions;
700}
701
702static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
703{
704	return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
705}
706
707static struct op fops_ext[32] = {
708	[FEXT_TO_IDX(FEXT_FCPY)]	= { vfp_single_fcpy,   0 },
709	[FEXT_TO_IDX(FEXT_FABS)]	= { vfp_single_fabs,   0 },
710	[FEXT_TO_IDX(FEXT_FNEG)]	= { vfp_single_fneg,   0 },
711	[FEXT_TO_IDX(FEXT_FSQRT)]	= { vfp_single_fsqrt,  0 },
712	[FEXT_TO_IDX(FEXT_FCMP)]	= { vfp_single_fcmp,   OP_SCALAR },
713	[FEXT_TO_IDX(FEXT_FCMPE)]	= { vfp_single_fcmpe,  OP_SCALAR },
714	[FEXT_TO_IDX(FEXT_FCMPZ)]	= { vfp_single_fcmpz,  OP_SCALAR },
715	[FEXT_TO_IDX(FEXT_FCMPEZ)]	= { vfp_single_fcmpez, OP_SCALAR },
716	[FEXT_TO_IDX(FEXT_FCVT)]	= { vfp_single_fcvtd,  OP_SCALAR|OP_DD },
717	[FEXT_TO_IDX(FEXT_FUITO)]	= { vfp_single_fuito,  OP_SCALAR },
718	[FEXT_TO_IDX(FEXT_FSITO)]	= { vfp_single_fsito,  OP_SCALAR },
719	[FEXT_TO_IDX(FEXT_FTOUI)]	= { vfp_single_ftoui,  OP_SCALAR },
720	[FEXT_TO_IDX(FEXT_FTOUIZ)]	= { vfp_single_ftouiz, OP_SCALAR },
721	[FEXT_TO_IDX(FEXT_FTOSI)]	= { vfp_single_ftosi,  OP_SCALAR },
722	[FEXT_TO_IDX(FEXT_FTOSIZ)]	= { vfp_single_ftosiz, OP_SCALAR },
723};
724
725
726
727
728
729static u32
730vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
731			  struct vfp_single *vsm, u32 fpscr)
732{
733	struct vfp_single *vsp;
734	u32 exceptions = 0;
735	int tn, tm;
736
737	tn = vfp_single_type(vsn);
738	tm = vfp_single_type(vsm);
739
740	if (tn & tm & VFP_INFINITY) {
741		/*
742		 * Two infinities.  Are they different signs?
743		 */
744		if (vsn->sign ^ vsm->sign) {
745			/*
746			 * different signs -> invalid
747			 */
748			exceptions = FPSCR_IOC;
749			vsp = &vfp_single_default_qnan;
750		} else {
751			/*
752			 * same signs -> valid
753			 */
754			vsp = vsn;
755		}
756	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
757		/*
758		 * One infinity and one number -> infinity
759		 */
760		vsp = vsn;
761	} else {
762		/*
763		 * 'n' is a NaN of some type
764		 */
765		return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
766	}
767	*vsd = *vsp;
768	return exceptions;
769}
770
771static u32
772vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
773	       struct vfp_single *vsm, u32 fpscr)
774{
775	u32 exp_diff, m_sig;
776
777	if (vsn->significand & 0x80000000 ||
778	    vsm->significand & 0x80000000) {
779		pr_info("VFP: bad FP values in %s\n", __func__);
780		vfp_single_dump("VSN", vsn);
781		vfp_single_dump("VSM", vsm);
782	}
783
784	/*
785	 * Ensure that 'n' is the largest magnitude number.  Note that
786	 * if 'n' and 'm' have equal exponents, we do not swap them.
787	 * This ensures that NaN propagation works correctly.
788	 */
789	if (vsn->exponent < vsm->exponent) {
790		struct vfp_single *t = vsn;
791		vsn = vsm;
792		vsm = t;
793	}
794
795	/*
796	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
797	 * infinity or a NaN here.
798	 */
799	if (vsn->exponent == 255)
800		return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
801
802	/*
803	 * We have two proper numbers, where 'vsn' is the larger magnitude.
804	 *
805	 * Copy 'n' to 'd' before doing the arithmetic.
806	 */
807	*vsd = *vsn;
808
809	/*
810	 * Align both numbers.
811	 */
812	exp_diff = vsn->exponent - vsm->exponent;
813	m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
814
815	/*
816	 * If the signs are different, we are really subtracting.
817	 */
818	if (vsn->sign ^ vsm->sign) {
819		m_sig = vsn->significand - m_sig;
820		if ((s32)m_sig < 0) {
821			vsd->sign = vfp_sign_negate(vsd->sign);
822			m_sig = -m_sig;
823		} else if (m_sig == 0) {
824			vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
825				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
826		}
827	} else {
828		m_sig = vsn->significand + m_sig;
829	}
830	vsd->significand = m_sig;
831
832	return 0;
833}
834
835static u32
836vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
837{
838	vfp_single_dump("VSN", vsn);
839	vfp_single_dump("VSM", vsm);
840
841	/*
842	 * Ensure that 'n' is the largest magnitude number.  Note that
843	 * if 'n' and 'm' have equal exponents, we do not swap them.
844	 * This ensures that NaN propagation works correctly.
845	 */
846	if (vsn->exponent < vsm->exponent) {
847		struct vfp_single *t = vsn;
848		vsn = vsm;
849		vsm = t;
850		pr_debug("VFP: swapping M <-> N\n");
851	}
852
853	vsd->sign = vsn->sign ^ vsm->sign;
854
855	/*
856	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
857	 */
858	if (vsn->exponent == 255) {
859		if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
860			return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
861		if ((vsm->exponent | vsm->significand) == 0) {
862			*vsd = vfp_single_default_qnan;
863			return FPSCR_IOC;
864		}
865		vsd->exponent = vsn->exponent;
866		vsd->significand = 0;
867		return 0;
868	}
869
870	/*
871	 * If 'm' is zero, the result is always zero.  In this case,
872	 * 'n' may be zero or a number, but it doesn't matter which.
873	 */
874	if ((vsm->exponent | vsm->significand) == 0) {
875		vsd->exponent = 0;
876		vsd->significand = 0;
877		return 0;
878	}
879
880	/*
881	 * We add 2 to the destination exponent for the same reason as
882	 * the addition case - though this time we have +1 from each
883	 * input operand.
884	 */
885	vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
886	vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
887
888	vfp_single_dump("VSD", vsd);
889	return 0;
890}
891
892#define NEG_MULTIPLY	(1 << 0)
893#define NEG_SUBTRACT	(1 << 1)
894
895static u32
896vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
897{
898	struct vfp_single vsd, vsp, vsn, vsm;
899	u32 exceptions;
900	s32 v;
901
902	v = vfp_get_float(sn);
903	pr_debug("VFP: s%u = %08x\n", sn, v);
904	vfp_single_unpack(&vsn, v);
905	if (vsn.exponent == 0 && vsn.significand)
906		vfp_single_normalise_denormal(&vsn);
907
908	vfp_single_unpack(&vsm, m);
909	if (vsm.exponent == 0 && vsm.significand)
910		vfp_single_normalise_denormal(&vsm);
911
912	exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
913	if (negate & NEG_MULTIPLY)
914		vsp.sign = vfp_sign_negate(vsp.sign);
915
916	v = vfp_get_float(sd);
917	pr_debug("VFP: s%u = %08x\n", sd, v);
918	vfp_single_unpack(&vsn, v);
919	if (negate & NEG_SUBTRACT)
920		vsn.sign = vfp_sign_negate(vsn.sign);
921
922	exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
923
924	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
925}
926
927/*
928 * Standard operations
929 */
930
931/*
932 * sd = sd + (sn * sm)
933 */
934static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
935{
936	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
937}
938
939/*
940 * sd = sd - (sn * sm)
941 */
942static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
943{
944	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
945}
946
947/*
948 * sd = -sd + (sn * sm)
949 */
950static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
951{
952	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
953}
954
955/*
956 * sd = -sd - (sn * sm)
957 */
958static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
959{
960	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
961}
962
963/*
964 * sd = sn * sm
965 */
966static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
967{
968	struct vfp_single vsd, vsn, vsm;
969	u32 exceptions;
970	s32 n = vfp_get_float(sn);
971
972	pr_debug("VFP: s%u = %08x\n", sn, n);
973
974	vfp_single_unpack(&vsn, n);
975	if (vsn.exponent == 0 && vsn.significand)
976		vfp_single_normalise_denormal(&vsn);
977
978	vfp_single_unpack(&vsm, m);
979	if (vsm.exponent == 0 && vsm.significand)
980		vfp_single_normalise_denormal(&vsm);
981
982	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
983	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
984}
985
986/*
987 * sd = -(sn * sm)
988 */
989static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
990{
991	struct vfp_single vsd, vsn, vsm;
992	u32 exceptions;
993	s32 n = vfp_get_float(sn);
994
995	pr_debug("VFP: s%u = %08x\n", sn, n);
996
997	vfp_single_unpack(&vsn, n);
998	if (vsn.exponent == 0 && vsn.significand)
999		vfp_single_normalise_denormal(&vsn);
1000
1001	vfp_single_unpack(&vsm, m);
1002	if (vsm.exponent == 0 && vsm.significand)
1003		vfp_single_normalise_denormal(&vsm);
1004
1005	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
1006	vsd.sign = vfp_sign_negate(vsd.sign);
1007	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1008}
1009
1010/*
1011 * sd = sn + sm
1012 */
1013static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1014{
1015	struct vfp_single vsd, vsn, vsm;
1016	u32 exceptions;
1017	s32 n = vfp_get_float(sn);
1018
1019	pr_debug("VFP: s%u = %08x\n", sn, n);
1020
1021	/*
1022	 * Unpack and normalise denormals.
1023	 */
1024	vfp_single_unpack(&vsn, n);
1025	if (vsn.exponent == 0 && vsn.significand)
1026		vfp_single_normalise_denormal(&vsn);
1027
1028	vfp_single_unpack(&vsm, m);
1029	if (vsm.exponent == 0 && vsm.significand)
1030		vfp_single_normalise_denormal(&vsm);
1031
1032	exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1033
1034	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1035}
1036
1037/*
1038 * sd = sn - sm
1039 */
1040static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1041{
1042	/*
1043	 * Subtraction is addition with one sign inverted.
1044	 */
1045	return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1046}
1047
1048/*
1049 * sd = sn / sm
1050 */
1051static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1052{
1053	struct vfp_single vsd, vsn, vsm;
1054	u32 exceptions = 0;
1055	s32 n = vfp_get_float(sn);
1056	int tm, tn;
1057
1058	pr_debug("VFP: s%u = %08x\n", sn, n);
1059
1060	vfp_single_unpack(&vsn, n);
1061	vfp_single_unpack(&vsm, m);
1062
1063	vsd.sign = vsn.sign ^ vsm.sign;
1064
1065	tn = vfp_single_type(&vsn);
1066	tm = vfp_single_type(&vsm);
1067
1068	/*
1069	 * Is n a NAN?
1070	 */
1071	if (tn & VFP_NAN)
1072		goto vsn_nan;
1073
1074	/*
1075	 * Is m a NAN?
1076	 */
1077	if (tm & VFP_NAN)
1078		goto vsm_nan;
1079
1080	/*
1081	 * If n and m are infinity, the result is invalid
1082	 * If n and m are zero, the result is invalid
1083	 */
1084	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1085		goto invalid;
1086
1087	/*
1088	 * If n is infinity, the result is infinity
1089	 */
1090	if (tn & VFP_INFINITY)
1091		goto infinity;
1092
1093	/*
1094	 * If m is zero, raise div0 exception
1095	 */
1096	if (tm & VFP_ZERO)
1097		goto divzero;
1098
1099	/*
1100	 * If m is infinity, or n is zero, the result is zero
1101	 */
1102	if (tm & VFP_INFINITY || tn & VFP_ZERO)
1103		goto zero;
1104
1105	if (tn & VFP_DENORMAL)
1106		vfp_single_normalise_denormal(&vsn);
1107	if (tm & VFP_DENORMAL)
1108		vfp_single_normalise_denormal(&vsm);
1109
1110	/*
1111	 * Ok, we have two numbers, we can perform division.
1112	 */
1113	vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1114	vsm.significand <<= 1;
1115	if (vsm.significand <= (2 * vsn.significand)) {
1116		vsn.significand >>= 1;
1117		vsd.exponent++;
1118	}
1119	{
1120		u64 significand = (u64)vsn.significand << 32;
1121		do_div(significand, vsm.significand);
1122		vsd.significand = significand;
1123	}
1124	if ((vsd.significand & 0x3f) == 0)
1125		vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1126
1127	return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1128
1129 vsn_nan:
1130	exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1131 pack:
1132	vfp_put_float(vfp_single_pack(&vsd), sd);
1133	return exceptions;
1134
1135 vsm_nan:
1136	exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1137	goto pack;
1138
1139 zero:
1140	vsd.exponent = 0;
1141	vsd.significand = 0;
1142	goto pack;
1143
1144 divzero:
1145	exceptions = FPSCR_DZC;
1146 infinity:
1147	vsd.exponent = 255;
1148	vsd.significand = 0;
1149	goto pack;
1150
1151 invalid:
1152	vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd);
1153	return FPSCR_IOC;
1154}
1155
1156static struct op fops[16] = {
1157	[FOP_TO_IDX(FOP_FMAC)]	= { vfp_single_fmac,  0 },
1158	[FOP_TO_IDX(FOP_FNMAC)]	= { vfp_single_fnmac, 0 },
1159	[FOP_TO_IDX(FOP_FMSC)]	= { vfp_single_fmsc,  0 },
1160	[FOP_TO_IDX(FOP_FNMSC)]	= { vfp_single_fnmsc, 0 },
1161	[FOP_TO_IDX(FOP_FMUL)]	= { vfp_single_fmul,  0 },
1162	[FOP_TO_IDX(FOP_FNMUL)]	= { vfp_single_fnmul, 0 },
1163	[FOP_TO_IDX(FOP_FADD)]	= { vfp_single_fadd,  0 },
1164	[FOP_TO_IDX(FOP_FSUB)]	= { vfp_single_fsub,  0 },
1165	[FOP_TO_IDX(FOP_FDIV)]	= { vfp_single_fdiv,  0 },
1166};
1167
1168#define FREG_BANK(x)	((x) & 0x18)
1169#define FREG_IDX(x)	((x) & 7)
1170
1171u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1172{
1173	u32 op = inst & FOP_MASK;
1174	u32 exceptions = 0;
1175	unsigned int dest;
1176	unsigned int sn = vfp_get_sn(inst);
1177	unsigned int sm = vfp_get_sm(inst);
1178	unsigned int vecitr, veclen, vecstride;
1179	struct op *fop;
1180
1181	vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1182
1183	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1184
1185	/*
1186	 * fcvtsd takes a dN register number as destination, not sN.
1187	 * Technically, if bit 0 of dd is set, this is an invalid
1188	 * instruction.  However, we ignore this for efficiency.
1189	 * It also only operates on scalars.
1190	 */
1191	if (fop->flags & OP_DD)
1192		dest = vfp_get_dd(inst);
1193	else
1194		dest = vfp_get_sd(inst);
1195
1196	/*
1197	 * If destination bank is zero, vector length is always '1'.
1198	 * ARM DDI0100F C5.1.3, C5.3.2.
1199	 */
1200	if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
1201		veclen = 0;
1202	else
1203		veclen = fpscr & FPSCR_LENGTH_MASK;
1204
1205	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1206		 (veclen >> FPSCR_LENGTH_BIT) + 1);
1207
1208	if (!fop->fn)
1209		goto invalid;
1210
1211	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1212		s32 m = vfp_get_float(sm);
1213		u32 except;
1214		char type;
1215
1216		type = fop->flags & OP_DD ? 'd' : 's';
1217		if (op == FOP_EXT)
1218			pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
1219				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1220				 sm, m);
1221		else
1222			pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
1223				 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1224				 FOP_TO_IDX(op), sm, m);
1225
1226		except = fop->fn(dest, sn, m, fpscr);
1227		pr_debug("VFP: itr%d: exceptions=%08x\n",
1228			 vecitr >> FPSCR_LENGTH_BIT, except);
1229
1230		exceptions |= except;
1231
1232		/*
1233		 * CHECK: It appears to be undefined whether we stop when
1234		 * we encounter an exception.  We continue.
1235		 */
1236		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
1237		sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1238		if (FREG_BANK(sm) != 0)
1239			sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1240	}
1241	return exceptions;
1242
1243 invalid:
1244	return (u32)-1;
1245}
1246