vfpsingle.c revision 438a76167959061e371025f727fabec2ad9e70a7
1/*
2 *  linux/arch/arm/vfp/vfpsingle.c
3 *
4 * This code is derived in part from John R. Housers softfloat library, which
5 * carries the following notice:
6 *
7 * ===========================================================================
8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
9 * Arithmetic Package, Release 2.
10 *
11 * Written by John R. Hauser.  This work was made possible in part by the
12 * International Computer Science Institute, located at Suite 600, 1947 Center
13 * Street, Berkeley, California 94704.  Funding was partially provided by the
14 * National Science Foundation under grant MIP-9311980.  The original version
15 * of this code was written as part of a project to build a fixed-point vector
16 * processor in collaboration with the University of California at Berkeley,
17 * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
19 * arithmetic/softfloat.html'.
20 *
21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
23 * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26 *
27 * Derivative works are acceptable, even for commercial purposes, so long as
28 * (1) they include prominent notice that the work is derivative, and (2) they
29 * include prominent notice akin to these three paragraphs for those parts of
30 * this code that are retained.
31 * ===========================================================================
32 */
33#include <linux/kernel.h>
34#include <linux/bitops.h>
35
36#include <asm/div64.h>
37#include <asm/ptrace.h>
38#include <asm/vfp.h>
39
40#include "vfpinstr.h"
41#include "vfp.h"
42
43static struct vfp_single vfp_single_default_qnan = {
44	.exponent	= 255,
45	.sign		= 0,
46	.significand	= VFP_SINGLE_SIGNIFICAND_QNAN,
47};
48
49static void vfp_single_dump(const char *str, struct vfp_single *s)
50{
51	pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
52		 str, s->sign != 0, s->exponent, s->significand);
53}
54
55static void vfp_single_normalise_denormal(struct vfp_single *vs)
56{
57	int bits = 31 - fls(vs->significand);
58
59	vfp_single_dump("normalise_denormal: in", vs);
60
61	if (bits) {
62		vs->exponent -= bits - 1;
63		vs->significand <<= bits;
64	}
65
66	vfp_single_dump("normalise_denormal: out", vs);
67}
68
69#ifndef DEBUG
70#define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
71u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
72#else
73u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
74#endif
75{
76	u32 significand, incr, rmode;
77	int exponent, shift, underflow;
78
79	vfp_single_dump("pack: in", vs);
80
81	/*
82	 * Infinities and NaNs are a special case.
83	 */
84	if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
85		goto pack;
86
87	/*
88	 * Special-case zero.
89	 */
90	if (vs->significand == 0) {
91		vs->exponent = 0;
92		goto pack;
93	}
94
95	exponent = vs->exponent;
96	significand = vs->significand;
97
98	/*
99	 * Normalise first.  Note that we shift the significand up to
100	 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
101	 * significant bit.
102	 */
103	shift = 32 - fls(significand);
104	if (shift < 32 && shift) {
105		exponent -= shift;
106		significand <<= shift;
107	}
108
109#ifdef DEBUG
110	vs->exponent = exponent;
111	vs->significand = significand;
112	vfp_single_dump("pack: normalised", vs);
113#endif
114
115	/*
116	 * Tiny number?
117	 */
118	underflow = exponent < 0;
119	if (underflow) {
120		significand = vfp_shiftright32jamming(significand, -exponent);
121		exponent = 0;
122#ifdef DEBUG
123		vs->exponent = exponent;
124		vs->significand = significand;
125		vfp_single_dump("pack: tiny number", vs);
126#endif
127		if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
128			underflow = 0;
129	}
130
131	/*
132	 * Select rounding increment.
133	 */
134	incr = 0;
135	rmode = fpscr & FPSCR_RMODE_MASK;
136
137	if (rmode == FPSCR_ROUND_NEAREST) {
138		incr = 1 << VFP_SINGLE_LOW_BITS;
139		if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
140			incr -= 1;
141	} else if (rmode == FPSCR_ROUND_TOZERO) {
142		incr = 0;
143	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
144		incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
145
146	pr_debug("VFP: rounding increment = 0x%08x\n", incr);
147
148	/*
149	 * Is our rounding going to overflow?
150	 */
151	if ((significand + incr) < significand) {
152		exponent += 1;
153		significand = (significand >> 1) | (significand & 1);
154		incr >>= 1;
155#ifdef DEBUG
156		vs->exponent = exponent;
157		vs->significand = significand;
158		vfp_single_dump("pack: overflow", vs);
159#endif
160	}
161
162	/*
163	 * If any of the low bits (which will be shifted out of the
164	 * number) are non-zero, the result is inexact.
165	 */
166	if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
167		exceptions |= FPSCR_IXC;
168
169	/*
170	 * Do our rounding.
171	 */
172	significand += incr;
173
174	/*
175	 * Infinity?
176	 */
177	if (exponent >= 254) {
178		exceptions |= FPSCR_OFC | FPSCR_IXC;
179		if (incr == 0) {
180			vs->exponent = 253;
181			vs->significand = 0x7fffffff;
182		} else {
183			vs->exponent = 255;		/* infinity */
184			vs->significand = 0;
185		}
186	} else {
187		if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
188			exponent = 0;
189		if (exponent || significand > 0x80000000)
190			underflow = 0;
191		if (underflow)
192			exceptions |= FPSCR_UFC;
193		vs->exponent = exponent;
194		vs->significand = significand >> 1;
195	}
196
197 pack:
198	vfp_single_dump("pack: final", vs);
199	{
200		s32 d = vfp_single_pack(vs);
201		pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
202			 sd, d, exceptions);
203		vfp_put_float(sd, d);
204	}
205
206	return exceptions & ~VFP_NAN_FLAG;
207}
208
209/*
210 * Propagate the NaN, setting exceptions if it is signalling.
211 * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
212 */
213static u32
214vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
215		  struct vfp_single *vsm, u32 fpscr)
216{
217	struct vfp_single *nan;
218	int tn, tm = 0;
219
220	tn = vfp_single_type(vsn);
221
222	if (vsm)
223		tm = vfp_single_type(vsm);
224
225	if (fpscr & FPSCR_DEFAULT_NAN)
226		/*
227		 * Default NaN mode - always returns a quiet NaN
228		 */
229		nan = &vfp_single_default_qnan;
230	else {
231		/*
232		 * Contemporary mode - select the first signalling
233		 * NAN, or if neither are signalling, the first
234		 * quiet NAN.
235		 */
236		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
237			nan = vsn;
238		else
239			nan = vsm;
240		/*
241		 * Make the NaN quiet.
242		 */
243		nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
244	}
245
246	*vsd = *nan;
247
248	/*
249	 * If one was a signalling NAN, raise invalid operation.
250	 */
251	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
252}
253
254
255/*
256 * Extended operations
257 */
258static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
259{
260	vfp_put_float(sd, vfp_single_packed_abs(m));
261	return 0;
262}
263
264static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
265{
266	vfp_put_float(sd, m);
267	return 0;
268}
269
270static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
271{
272	vfp_put_float(sd, vfp_single_packed_negate(m));
273	return 0;
274}
275
276static const u16 sqrt_oddadjust[] = {
277	0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
278	0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
279};
280
281static const u16 sqrt_evenadjust[] = {
282	0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
283	0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
284};
285
286u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
287{
288	int index;
289	u32 z, a;
290
291	if ((significand & 0xc0000000) != 0x40000000) {
292		printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
293	}
294
295	a = significand << 1;
296	index = (a >> 27) & 15;
297	if (exponent & 1) {
298		z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
299		z = ((a / z) << 14) + (z << 15);
300		a >>= 1;
301	} else {
302		z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
303		z = a / z + z;
304		z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
305		if (z <= a)
306			return (s32)a >> 1;
307	}
308	{
309		u64 v = (u64)a << 31;
310		do_div(v, z);
311		return v + (z >> 1);
312	}
313}
314
315static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
316{
317	struct vfp_single vsm, vsd;
318	int ret, tm;
319
320	vfp_single_unpack(&vsm, m);
321	tm = vfp_single_type(&vsm);
322	if (tm & (VFP_NAN|VFP_INFINITY)) {
323		struct vfp_single *vsp = &vsd;
324
325		if (tm & VFP_NAN)
326			ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
327		else if (vsm.sign == 0) {
328 sqrt_copy:
329			vsp = &vsm;
330			ret = 0;
331		} else {
332 sqrt_invalid:
333			vsp = &vfp_single_default_qnan;
334			ret = FPSCR_IOC;
335		}
336		vfp_put_float(sd, vfp_single_pack(vsp));
337		return ret;
338	}
339
340	/*
341	 * sqrt(+/- 0) == +/- 0
342	 */
343	if (tm & VFP_ZERO)
344		goto sqrt_copy;
345
346	/*
347	 * Normalise a denormalised number
348	 */
349	if (tm & VFP_DENORMAL)
350		vfp_single_normalise_denormal(&vsm);
351
352	/*
353	 * sqrt(<0) = invalid
354	 */
355	if (vsm.sign)
356		goto sqrt_invalid;
357
358	vfp_single_dump("sqrt", &vsm);
359
360	/*
361	 * Estimate the square root.
362	 */
363	vsd.sign = 0;
364	vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
365	vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
366
367	vfp_single_dump("sqrt estimate", &vsd);
368
369	/*
370	 * And now adjust.
371	 */
372	if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
373		if (vsd.significand < 2) {
374			vsd.significand = 0xffffffff;
375		} else {
376			u64 term;
377			s64 rem;
378			vsm.significand <<= !(vsm.exponent & 1);
379			term = (u64)vsd.significand * vsd.significand;
380			rem = ((u64)vsm.significand << 32) - term;
381
382			pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
383
384			while (rem < 0) {
385				vsd.significand -= 1;
386				rem += ((u64)vsd.significand << 1) | 1;
387			}
388			vsd.significand |= rem != 0;
389		}
390	}
391	vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
392
393	return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
394}
395
396/*
397 * Equal	:= ZC
398 * Less than	:= N
399 * Greater than	:= C
400 * Unordered	:= CV
401 */
402static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
403{
404	s32 d;
405	u32 ret = 0;
406
407	d = vfp_get_float(sd);
408	if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
409		ret |= FPSCR_C | FPSCR_V;
410		if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
411			/*
412			 * Signalling NaN, or signalling on quiet NaN
413			 */
414			ret |= FPSCR_IOC;
415	}
416
417	if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
418		ret |= FPSCR_C | FPSCR_V;
419		if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
420			/*
421			 * Signalling NaN, or signalling on quiet NaN
422			 */
423			ret |= FPSCR_IOC;
424	}
425
426	if (ret == 0) {
427		if (d == m || vfp_single_packed_abs(d | m) == 0) {
428			/*
429			 * equal
430			 */
431			ret |= FPSCR_Z | FPSCR_C;
432		} else if (vfp_single_packed_sign(d ^ m)) {
433			/*
434			 * different signs
435			 */
436			if (vfp_single_packed_sign(d))
437				/*
438				 * d is negative, so d < m
439				 */
440				ret |= FPSCR_N;
441			else
442				/*
443				 * d is positive, so d > m
444				 */
445				ret |= FPSCR_C;
446		} else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
447			/*
448			 * d < m
449			 */
450			ret |= FPSCR_N;
451		} else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
452			/*
453			 * d > m
454			 */
455			ret |= FPSCR_C;
456		}
457	}
458	return ret;
459}
460
461static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
462{
463	return vfp_compare(sd, 0, m, fpscr);
464}
465
466static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
467{
468	return vfp_compare(sd, 1, m, fpscr);
469}
470
471static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
472{
473	return vfp_compare(sd, 0, 0, fpscr);
474}
475
476static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
477{
478	return vfp_compare(sd, 1, 0, fpscr);
479}
480
481static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
482{
483	struct vfp_single vsm;
484	struct vfp_double vdd;
485	int tm;
486	u32 exceptions = 0;
487
488	vfp_single_unpack(&vsm, m);
489
490	tm = vfp_single_type(&vsm);
491
492	/*
493	 * If we have a signalling NaN, signal invalid operation.
494	 */
495	if (tm == VFP_SNAN)
496		exceptions = FPSCR_IOC;
497
498	if (tm & VFP_DENORMAL)
499		vfp_single_normalise_denormal(&vsm);
500
501	vdd.sign = vsm.sign;
502	vdd.significand = (u64)vsm.significand << 32;
503
504	/*
505	 * If we have an infinity or NaN, the exponent must be 2047.
506	 */
507	if (tm & (VFP_INFINITY|VFP_NAN)) {
508		vdd.exponent = 2047;
509		if (tm & VFP_NAN)
510			vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
511		goto pack_nan;
512	} else if (tm & VFP_ZERO)
513		vdd.exponent = 0;
514	else
515		vdd.exponent = vsm.exponent + (1023 - 127);
516
517	/*
518	 * Technically, if bit 0 of dd is set, this is an invalid
519	 * instruction.  However, we ignore this for efficiency.
520	 */
521	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
522
523 pack_nan:
524	vfp_put_double(dd, vfp_double_pack(&vdd));
525	return exceptions;
526}
527
528static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
529{
530	struct vfp_single vs;
531
532	vs.sign = 0;
533	vs.exponent = 127 + 31 - 1;
534	vs.significand = (u32)m;
535
536	return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
537}
538
539static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
540{
541	struct vfp_single vs;
542
543	vs.sign = (m & 0x80000000) >> 16;
544	vs.exponent = 127 + 31 - 1;
545	vs.significand = vs.sign ? -m : m;
546
547	return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
548}
549
550static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
551{
552	struct vfp_single vsm;
553	u32 d, exceptions = 0;
554	int rmode = fpscr & FPSCR_RMODE_MASK;
555	int tm;
556
557	vfp_single_unpack(&vsm, m);
558	vfp_single_dump("VSM", &vsm);
559
560	/*
561	 * Do we have a denormalised number?
562	 */
563	tm = vfp_single_type(&vsm);
564	if (tm & VFP_DENORMAL)
565		exceptions |= FPSCR_IDC;
566
567	if (tm & VFP_NAN)
568		vsm.sign = 0;
569
570	if (vsm.exponent >= 127 + 32) {
571		d = vsm.sign ? 0 : 0xffffffff;
572		exceptions = FPSCR_IOC;
573	} else if (vsm.exponent >= 127 - 1) {
574		int shift = 127 + 31 - vsm.exponent;
575		u32 rem, incr = 0;
576
577		/*
578		 * 2^0 <= m < 2^32-2^8
579		 */
580		d = (vsm.significand << 1) >> shift;
581		rem = vsm.significand << (33 - shift);
582
583		if (rmode == FPSCR_ROUND_NEAREST) {
584			incr = 0x80000000;
585			if ((d & 1) == 0)
586				incr -= 1;
587		} else if (rmode == FPSCR_ROUND_TOZERO) {
588			incr = 0;
589		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
590			incr = ~0;
591		}
592
593		if ((rem + incr) < rem) {
594			if (d < 0xffffffff)
595				d += 1;
596			else
597				exceptions |= FPSCR_IOC;
598		}
599
600		if (d && vsm.sign) {
601			d = 0;
602			exceptions |= FPSCR_IOC;
603		} else if (rem)
604			exceptions |= FPSCR_IXC;
605	} else {
606		d = 0;
607		if (vsm.exponent | vsm.significand) {
608			exceptions |= FPSCR_IXC;
609			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
610				d = 1;
611			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
612				d = 0;
613				exceptions |= FPSCR_IOC;
614			}
615		}
616	}
617
618	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
619
620	vfp_put_float(sd, d);
621
622	return exceptions;
623}
624
625static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
626{
627	return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
628}
629
630static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
631{
632	struct vfp_single vsm;
633	u32 d, exceptions = 0;
634	int rmode = fpscr & FPSCR_RMODE_MASK;
635
636	vfp_single_unpack(&vsm, m);
637	vfp_single_dump("VSM", &vsm);
638
639	/*
640	 * Do we have a denormalised number?
641	 */
642	if (vfp_single_type(&vsm) & VFP_DENORMAL)
643		exceptions |= FPSCR_IDC;
644
645	if (vsm.exponent >= 127 + 32) {
646		/*
647		 * m >= 2^31-2^7: invalid
648		 */
649		d = 0x7fffffff;
650		if (vsm.sign)
651			d = ~d;
652		exceptions |= FPSCR_IOC;
653	} else if (vsm.exponent >= 127 - 1) {
654		int shift = 127 + 31 - vsm.exponent;
655		u32 rem, incr = 0;
656
657		/* 2^0 <= m <= 2^31-2^7 */
658		d = (vsm.significand << 1) >> shift;
659		rem = vsm.significand << (33 - shift);
660
661		if (rmode == FPSCR_ROUND_NEAREST) {
662			incr = 0x80000000;
663			if ((d & 1) == 0)
664				incr -= 1;
665		} else if (rmode == FPSCR_ROUND_TOZERO) {
666			incr = 0;
667		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
668			incr = ~0;
669		}
670
671		if ((rem + incr) < rem && d < 0xffffffff)
672			d += 1;
673		if (d > 0x7fffffff + (vsm.sign != 0)) {
674			d = 0x7fffffff + (vsm.sign != 0);
675			exceptions |= FPSCR_IOC;
676		} else if (rem)
677			exceptions |= FPSCR_IXC;
678
679		if (vsm.sign)
680			d = -d;
681	} else {
682		d = 0;
683		if (vsm.exponent | vsm.significand) {
684			exceptions |= FPSCR_IXC;
685			if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
686				d = 1;
687			else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
688				d = -1;
689		}
690	}
691
692	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
693
694	vfp_put_float(sd, (s32)d);
695
696	return exceptions;
697}
698
699static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
700{
701	return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
702}
703
704static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = {
705	[FEXT_TO_IDX(FEXT_FCPY)]	= vfp_single_fcpy,
706	[FEXT_TO_IDX(FEXT_FABS)]	= vfp_single_fabs,
707	[FEXT_TO_IDX(FEXT_FNEG)]	= vfp_single_fneg,
708	[FEXT_TO_IDX(FEXT_FSQRT)]	= vfp_single_fsqrt,
709	[FEXT_TO_IDX(FEXT_FCMP)]	= vfp_single_fcmp,
710	[FEXT_TO_IDX(FEXT_FCMPE)]	= vfp_single_fcmpe,
711	[FEXT_TO_IDX(FEXT_FCMPZ)]	= vfp_single_fcmpz,
712	[FEXT_TO_IDX(FEXT_FCMPEZ)]	= vfp_single_fcmpez,
713	[FEXT_TO_IDX(FEXT_FCVT)]	= vfp_single_fcvtd,
714	[FEXT_TO_IDX(FEXT_FUITO)]	= vfp_single_fuito,
715	[FEXT_TO_IDX(FEXT_FSITO)]	= vfp_single_fsito,
716	[FEXT_TO_IDX(FEXT_FTOUI)]	= vfp_single_ftoui,
717	[FEXT_TO_IDX(FEXT_FTOUIZ)]	= vfp_single_ftouiz,
718	[FEXT_TO_IDX(FEXT_FTOSI)]	= vfp_single_ftosi,
719	[FEXT_TO_IDX(FEXT_FTOSIZ)]	= vfp_single_ftosiz,
720};
721
722
723
724
725
726static u32
727vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
728			  struct vfp_single *vsm, u32 fpscr)
729{
730	struct vfp_single *vsp;
731	u32 exceptions = 0;
732	int tn, tm;
733
734	tn = vfp_single_type(vsn);
735	tm = vfp_single_type(vsm);
736
737	if (tn & tm & VFP_INFINITY) {
738		/*
739		 * Two infinities.  Are they different signs?
740		 */
741		if (vsn->sign ^ vsm->sign) {
742			/*
743			 * different signs -> invalid
744			 */
745			exceptions = FPSCR_IOC;
746			vsp = &vfp_single_default_qnan;
747		} else {
748			/*
749			 * same signs -> valid
750			 */
751			vsp = vsn;
752		}
753	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
754		/*
755		 * One infinity and one number -> infinity
756		 */
757		vsp = vsn;
758	} else {
759		/*
760		 * 'n' is a NaN of some type
761		 */
762		return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
763	}
764	*vsd = *vsp;
765	return exceptions;
766}
767
768static u32
769vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
770	       struct vfp_single *vsm, u32 fpscr)
771{
772	u32 exp_diff, m_sig;
773
774	if (vsn->significand & 0x80000000 ||
775	    vsm->significand & 0x80000000) {
776		pr_info("VFP: bad FP values in %s\n", __func__);
777		vfp_single_dump("VSN", vsn);
778		vfp_single_dump("VSM", vsm);
779	}
780
781	/*
782	 * Ensure that 'n' is the largest magnitude number.  Note that
783	 * if 'n' and 'm' have equal exponents, we do not swap them.
784	 * This ensures that NaN propagation works correctly.
785	 */
786	if (vsn->exponent < vsm->exponent) {
787		struct vfp_single *t = vsn;
788		vsn = vsm;
789		vsm = t;
790	}
791
792	/*
793	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
794	 * infinity or a NaN here.
795	 */
796	if (vsn->exponent == 255)
797		return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
798
799	/*
800	 * We have two proper numbers, where 'vsn' is the larger magnitude.
801	 *
802	 * Copy 'n' to 'd' before doing the arithmetic.
803	 */
804	*vsd = *vsn;
805
806	/*
807	 * Align both numbers.
808	 */
809	exp_diff = vsn->exponent - vsm->exponent;
810	m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
811
812	/*
813	 * If the signs are different, we are really subtracting.
814	 */
815	if (vsn->sign ^ vsm->sign) {
816		m_sig = vsn->significand - m_sig;
817		if ((s32)m_sig < 0) {
818			vsd->sign = vfp_sign_negate(vsd->sign);
819			m_sig = -m_sig;
820		} else if (m_sig == 0) {
821			vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
822				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
823		}
824	} else {
825		m_sig = vsn->significand + m_sig;
826	}
827	vsd->significand = m_sig;
828
829	return 0;
830}
831
832static u32
833vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
834{
835	vfp_single_dump("VSN", vsn);
836	vfp_single_dump("VSM", vsm);
837
838	/*
839	 * Ensure that 'n' is the largest magnitude number.  Note that
840	 * if 'n' and 'm' have equal exponents, we do not swap them.
841	 * This ensures that NaN propagation works correctly.
842	 */
843	if (vsn->exponent < vsm->exponent) {
844		struct vfp_single *t = vsn;
845		vsn = vsm;
846		vsm = t;
847		pr_debug("VFP: swapping M <-> N\n");
848	}
849
850	vsd->sign = vsn->sign ^ vsm->sign;
851
852	/*
853	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
854	 */
855	if (vsn->exponent == 255) {
856		if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
857			return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
858		if ((vsm->exponent | vsm->significand) == 0) {
859			*vsd = vfp_single_default_qnan;
860			return FPSCR_IOC;
861		}
862		vsd->exponent = vsn->exponent;
863		vsd->significand = 0;
864		return 0;
865	}
866
867	/*
868	 * If 'm' is zero, the result is always zero.  In this case,
869	 * 'n' may be zero or a number, but it doesn't matter which.
870	 */
871	if ((vsm->exponent | vsm->significand) == 0) {
872		vsd->exponent = 0;
873		vsd->significand = 0;
874		return 0;
875	}
876
877	/*
878	 * We add 2 to the destination exponent for the same reason as
879	 * the addition case - though this time we have +1 from each
880	 * input operand.
881	 */
882	vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
883	vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
884
885	vfp_single_dump("VSD", vsd);
886	return 0;
887}
888
889#define NEG_MULTIPLY	(1 << 0)
890#define NEG_SUBTRACT	(1 << 1)
891
892static u32
893vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
894{
895	struct vfp_single vsd, vsp, vsn, vsm;
896	u32 exceptions;
897	s32 v;
898
899	v = vfp_get_float(sn);
900	pr_debug("VFP: s%u = %08x\n", sn, v);
901	vfp_single_unpack(&vsn, v);
902	if (vsn.exponent == 0 && vsn.significand)
903		vfp_single_normalise_denormal(&vsn);
904
905	vfp_single_unpack(&vsm, m);
906	if (vsm.exponent == 0 && vsm.significand)
907		vfp_single_normalise_denormal(&vsm);
908
909	exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
910	if (negate & NEG_MULTIPLY)
911		vsp.sign = vfp_sign_negate(vsp.sign);
912
913	v = vfp_get_float(sd);
914	pr_debug("VFP: s%u = %08x\n", sd, v);
915	vfp_single_unpack(&vsn, v);
916	if (negate & NEG_SUBTRACT)
917		vsn.sign = vfp_sign_negate(vsn.sign);
918
919	exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
920
921	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
922}
923
924/*
925 * Standard operations
926 */
927
928/*
929 * sd = sd + (sn * sm)
930 */
931static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
932{
933	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
934}
935
936/*
937 * sd = sd - (sn * sm)
938 */
939static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
940{
941	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
942}
943
944/*
945 * sd = -sd + (sn * sm)
946 */
947static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
948{
949	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
950}
951
952/*
953 * sd = -sd - (sn * sm)
954 */
955static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
956{
957	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
958}
959
960/*
961 * sd = sn * sm
962 */
963static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
964{
965	struct vfp_single vsd, vsn, vsm;
966	u32 exceptions;
967	s32 n = vfp_get_float(sn);
968
969	pr_debug("VFP: s%u = %08x\n", sn, n);
970
971	vfp_single_unpack(&vsn, n);
972	if (vsn.exponent == 0 && vsn.significand)
973		vfp_single_normalise_denormal(&vsn);
974
975	vfp_single_unpack(&vsm, m);
976	if (vsm.exponent == 0 && vsm.significand)
977		vfp_single_normalise_denormal(&vsm);
978
979	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
980	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
981}
982
983/*
984 * sd = -(sn * sm)
985 */
986static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
987{
988	struct vfp_single vsd, vsn, vsm;
989	u32 exceptions;
990	s32 n = vfp_get_float(sn);
991
992	pr_debug("VFP: s%u = %08x\n", sn, n);
993
994	vfp_single_unpack(&vsn, n);
995	if (vsn.exponent == 0 && vsn.significand)
996		vfp_single_normalise_denormal(&vsn);
997
998	vfp_single_unpack(&vsm, m);
999	if (vsm.exponent == 0 && vsm.significand)
1000		vfp_single_normalise_denormal(&vsm);
1001
1002	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
1003	vsd.sign = vfp_sign_negate(vsd.sign);
1004	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1005}
1006
1007/*
1008 * sd = sn + sm
1009 */
1010static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1011{
1012	struct vfp_single vsd, vsn, vsm;
1013	u32 exceptions;
1014	s32 n = vfp_get_float(sn);
1015
1016	pr_debug("VFP: s%u = %08x\n", sn, n);
1017
1018	/*
1019	 * Unpack and normalise denormals.
1020	 */
1021	vfp_single_unpack(&vsn, n);
1022	if (vsn.exponent == 0 && vsn.significand)
1023		vfp_single_normalise_denormal(&vsn);
1024
1025	vfp_single_unpack(&vsm, m);
1026	if (vsm.exponent == 0 && vsm.significand)
1027		vfp_single_normalise_denormal(&vsm);
1028
1029	exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1030
1031	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1032}
1033
1034/*
1035 * sd = sn - sm
1036 */
1037static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1038{
1039	/*
1040	 * Subtraction is addition with one sign inverted.
1041	 */
1042	return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1043}
1044
1045/*
1046 * sd = sn / sm
1047 */
1048static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1049{
1050	struct vfp_single vsd, vsn, vsm;
1051	u32 exceptions = 0;
1052	s32 n = vfp_get_float(sn);
1053	int tm, tn;
1054
1055	pr_debug("VFP: s%u = %08x\n", sn, n);
1056
1057	vfp_single_unpack(&vsn, n);
1058	vfp_single_unpack(&vsm, m);
1059
1060	vsd.sign = vsn.sign ^ vsm.sign;
1061
1062	tn = vfp_single_type(&vsn);
1063	tm = vfp_single_type(&vsm);
1064
1065	/*
1066	 * Is n a NAN?
1067	 */
1068	if (tn & VFP_NAN)
1069		goto vsn_nan;
1070
1071	/*
1072	 * Is m a NAN?
1073	 */
1074	if (tm & VFP_NAN)
1075		goto vsm_nan;
1076
1077	/*
1078	 * If n and m are infinity, the result is invalid
1079	 * If n and m are zero, the result is invalid
1080	 */
1081	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1082		goto invalid;
1083
1084	/*
1085	 * If n is infinity, the result is infinity
1086	 */
1087	if (tn & VFP_INFINITY)
1088		goto infinity;
1089
1090	/*
1091	 * If m is zero, raise div0 exception
1092	 */
1093	if (tm & VFP_ZERO)
1094		goto divzero;
1095
1096	/*
1097	 * If m is infinity, or n is zero, the result is zero
1098	 */
1099	if (tm & VFP_INFINITY || tn & VFP_ZERO)
1100		goto zero;
1101
1102	if (tn & VFP_DENORMAL)
1103		vfp_single_normalise_denormal(&vsn);
1104	if (tm & VFP_DENORMAL)
1105		vfp_single_normalise_denormal(&vsm);
1106
1107	/*
1108	 * Ok, we have two numbers, we can perform division.
1109	 */
1110	vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1111	vsm.significand <<= 1;
1112	if (vsm.significand <= (2 * vsn.significand)) {
1113		vsn.significand >>= 1;
1114		vsd.exponent++;
1115	}
1116	{
1117		u64 significand = (u64)vsn.significand << 32;
1118		do_div(significand, vsm.significand);
1119		vsd.significand = significand;
1120	}
1121	if ((vsd.significand & 0x3f) == 0)
1122		vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1123
1124	return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1125
1126 vsn_nan:
1127	exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1128 pack:
1129	vfp_put_float(sd, vfp_single_pack(&vsd));
1130	return exceptions;
1131
1132 vsm_nan:
1133	exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1134	goto pack;
1135
1136 zero:
1137	vsd.exponent = 0;
1138	vsd.significand = 0;
1139	goto pack;
1140
1141 divzero:
1142	exceptions = FPSCR_DZC;
1143 infinity:
1144	vsd.exponent = 255;
1145	vsd.significand = 0;
1146	goto pack;
1147
1148 invalid:
1149	vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan));
1150	return FPSCR_IOC;
1151}
1152
1153static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = {
1154	[FOP_TO_IDX(FOP_FMAC)]	= vfp_single_fmac,
1155	[FOP_TO_IDX(FOP_FNMAC)]	= vfp_single_fnmac,
1156	[FOP_TO_IDX(FOP_FMSC)]	= vfp_single_fmsc,
1157	[FOP_TO_IDX(FOP_FNMSC)]	= vfp_single_fnmsc,
1158	[FOP_TO_IDX(FOP_FMUL)]	= vfp_single_fmul,
1159	[FOP_TO_IDX(FOP_FNMUL)]	= vfp_single_fnmul,
1160	[FOP_TO_IDX(FOP_FADD)]	= vfp_single_fadd,
1161	[FOP_TO_IDX(FOP_FSUB)]	= vfp_single_fsub,
1162	[FOP_TO_IDX(FOP_FDIV)]	= vfp_single_fdiv,
1163};
1164
1165#define FREG_BANK(x)	((x) & 0x18)
1166#define FREG_IDX(x)	((x) & 7)
1167
1168u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1169{
1170	u32 op = inst & FOP_MASK;
1171	u32 exceptions = 0;
1172	unsigned int sd = vfp_get_sd(inst);
1173	unsigned int sn = vfp_get_sn(inst);
1174	unsigned int sm = vfp_get_sm(inst);
1175	unsigned int vecitr, veclen, vecstride;
1176	u32 (*fop)(int, int, s32, u32);
1177
1178	veclen = fpscr & FPSCR_LENGTH_MASK;
1179	vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1180
1181	/*
1182	 * If destination bank is zero, vector length is always '1'.
1183	 * ARM DDI0100F C5.1.3, C5.3.2.
1184	 */
1185	if (FREG_BANK(sd) == 0)
1186		veclen = 0;
1187
1188	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1189		 (veclen >> FPSCR_LENGTH_BIT) + 1);
1190
1191	fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)];
1192	if (!fop)
1193		goto invalid;
1194
1195	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1196		s32 m = vfp_get_float(sm);
1197		u32 except;
1198
1199		if (op == FOP_EXT)
1200			pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n",
1201				 vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m);
1202		else
1203			pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n",
1204				 vecitr >> FPSCR_LENGTH_BIT, sd, sn,
1205				 FOP_TO_IDX(op), sm, m);
1206
1207		except = fop(sd, sn, m, fpscr);
1208		pr_debug("VFP: itr%d: exceptions=%08x\n",
1209			 vecitr >> FPSCR_LENGTH_BIT, except);
1210
1211		exceptions |= except;
1212
1213		/*
1214		 * This ensures that comparisons only operate on scalars;
1215		 * comparisons always return with one FPSCR status bit set.
1216		 */
1217		if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
1218			break;
1219
1220		/*
1221		 * CHECK: It appears to be undefined whether we stop when
1222		 * we encounter an exception.  We continue.
1223		 */
1224
1225		sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7);
1226		sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1227		if (FREG_BANK(sm) != 0)
1228			sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1229	}
1230	return exceptions;
1231
1232 invalid:
1233	return (u32)-1;
1234}
1235