vfpsingle.c revision 438a76167959061e371025f727fabec2ad9e70a7
1/* 2 * linux/arch/arm/vfp/vfpsingle.c 3 * 4 * This code is derived in part from John R. Housers softfloat library, which 5 * carries the following notice: 6 * 7 * =========================================================================== 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 9 * Arithmetic Package, Release 2. 10 * 11 * Written by John R. Hauser. This work was made possible in part by the 12 * International Computer Science Institute, located at Suite 600, 1947 Center 13 * Street, Berkeley, California 94704. Funding was partially provided by the 14 * National Science Foundation under grant MIP-9311980. The original version 15 * of this code was written as part of a project to build a fixed-point vector 16 * processor in collaboration with the University of California at Berkeley, 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 19 * arithmetic/softfloat.html'. 20 * 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 26 * 27 * Derivative works are acceptable, even for commercial purposes, so long as 28 * (1) they include prominent notice that the work is derivative, and (2) they 29 * include prominent notice akin to these three paragraphs for those parts of 30 * this code that are retained. 31 * =========================================================================== 32 */ 33#include <linux/kernel.h> 34#include <linux/bitops.h> 35 36#include <asm/div64.h> 37#include <asm/ptrace.h> 38#include <asm/vfp.h> 39 40#include "vfpinstr.h" 41#include "vfp.h" 42 43static struct vfp_single vfp_single_default_qnan = { 44 .exponent = 255, 45 .sign = 0, 46 .significand = VFP_SINGLE_SIGNIFICAND_QNAN, 47}; 48 49static void vfp_single_dump(const char *str, struct vfp_single *s) 50{ 51 pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", 52 str, s->sign != 0, s->exponent, s->significand); 53} 54 55static void vfp_single_normalise_denormal(struct vfp_single *vs) 56{ 57 int bits = 31 - fls(vs->significand); 58 59 vfp_single_dump("normalise_denormal: in", vs); 60 61 if (bits) { 62 vs->exponent -= bits - 1; 63 vs->significand <<= bits; 64 } 65 66 vfp_single_dump("normalise_denormal: out", vs); 67} 68 69#ifndef DEBUG 70#define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) 71u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) 72#else 73u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) 74#endif 75{ 76 u32 significand, incr, rmode; 77 int exponent, shift, underflow; 78 79 vfp_single_dump("pack: in", vs); 80 81 /* 82 * Infinities and NaNs are a special case. 83 */ 84 if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) 85 goto pack; 86 87 /* 88 * Special-case zero. 89 */ 90 if (vs->significand == 0) { 91 vs->exponent = 0; 92 goto pack; 93 } 94 95 exponent = vs->exponent; 96 significand = vs->significand; 97 98 /* 99 * Normalise first. Note that we shift the significand up to 100 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least 101 * significant bit. 102 */ 103 shift = 32 - fls(significand); 104 if (shift < 32 && shift) { 105 exponent -= shift; 106 significand <<= shift; 107 } 108 109#ifdef DEBUG 110 vs->exponent = exponent; 111 vs->significand = significand; 112 vfp_single_dump("pack: normalised", vs); 113#endif 114 115 /* 116 * Tiny number? 117 */ 118 underflow = exponent < 0; 119 if (underflow) { 120 significand = vfp_shiftright32jamming(significand, -exponent); 121 exponent = 0; 122#ifdef DEBUG 123 vs->exponent = exponent; 124 vs->significand = significand; 125 vfp_single_dump("pack: tiny number", vs); 126#endif 127 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) 128 underflow = 0; 129 } 130 131 /* 132 * Select rounding increment. 133 */ 134 incr = 0; 135 rmode = fpscr & FPSCR_RMODE_MASK; 136 137 if (rmode == FPSCR_ROUND_NEAREST) { 138 incr = 1 << VFP_SINGLE_LOW_BITS; 139 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) 140 incr -= 1; 141 } else if (rmode == FPSCR_ROUND_TOZERO) { 142 incr = 0; 143 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) 144 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; 145 146 pr_debug("VFP: rounding increment = 0x%08x\n", incr); 147 148 /* 149 * Is our rounding going to overflow? 150 */ 151 if ((significand + incr) < significand) { 152 exponent += 1; 153 significand = (significand >> 1) | (significand & 1); 154 incr >>= 1; 155#ifdef DEBUG 156 vs->exponent = exponent; 157 vs->significand = significand; 158 vfp_single_dump("pack: overflow", vs); 159#endif 160 } 161 162 /* 163 * If any of the low bits (which will be shifted out of the 164 * number) are non-zero, the result is inexact. 165 */ 166 if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) 167 exceptions |= FPSCR_IXC; 168 169 /* 170 * Do our rounding. 171 */ 172 significand += incr; 173 174 /* 175 * Infinity? 176 */ 177 if (exponent >= 254) { 178 exceptions |= FPSCR_OFC | FPSCR_IXC; 179 if (incr == 0) { 180 vs->exponent = 253; 181 vs->significand = 0x7fffffff; 182 } else { 183 vs->exponent = 255; /* infinity */ 184 vs->significand = 0; 185 } 186 } else { 187 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) 188 exponent = 0; 189 if (exponent || significand > 0x80000000) 190 underflow = 0; 191 if (underflow) 192 exceptions |= FPSCR_UFC; 193 vs->exponent = exponent; 194 vs->significand = significand >> 1; 195 } 196 197 pack: 198 vfp_single_dump("pack: final", vs); 199 { 200 s32 d = vfp_single_pack(vs); 201 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, 202 sd, d, exceptions); 203 vfp_put_float(sd, d); 204 } 205 206 return exceptions & ~VFP_NAN_FLAG; 207} 208 209/* 210 * Propagate the NaN, setting exceptions if it is signalling. 211 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 212 */ 213static u32 214vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, 215 struct vfp_single *vsm, u32 fpscr) 216{ 217 struct vfp_single *nan; 218 int tn, tm = 0; 219 220 tn = vfp_single_type(vsn); 221 222 if (vsm) 223 tm = vfp_single_type(vsm); 224 225 if (fpscr & FPSCR_DEFAULT_NAN) 226 /* 227 * Default NaN mode - always returns a quiet NaN 228 */ 229 nan = &vfp_single_default_qnan; 230 else { 231 /* 232 * Contemporary mode - select the first signalling 233 * NAN, or if neither are signalling, the first 234 * quiet NAN. 235 */ 236 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 237 nan = vsn; 238 else 239 nan = vsm; 240 /* 241 * Make the NaN quiet. 242 */ 243 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 244 } 245 246 *vsd = *nan; 247 248 /* 249 * If one was a signalling NAN, raise invalid operation. 250 */ 251 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 252} 253 254 255/* 256 * Extended operations 257 */ 258static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) 259{ 260 vfp_put_float(sd, vfp_single_packed_abs(m)); 261 return 0; 262} 263 264static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) 265{ 266 vfp_put_float(sd, m); 267 return 0; 268} 269 270static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) 271{ 272 vfp_put_float(sd, vfp_single_packed_negate(m)); 273 return 0; 274} 275 276static const u16 sqrt_oddadjust[] = { 277 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, 278 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 279}; 280 281static const u16 sqrt_evenadjust[] = { 282 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, 283 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 284}; 285 286u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) 287{ 288 int index; 289 u32 z, a; 290 291 if ((significand & 0xc0000000) != 0x40000000) { 292 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); 293 } 294 295 a = significand << 1; 296 index = (a >> 27) & 15; 297 if (exponent & 1) { 298 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; 299 z = ((a / z) << 14) + (z << 15); 300 a >>= 1; 301 } else { 302 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; 303 z = a / z + z; 304 z = (z >= 0x20000) ? 0xffff8000 : (z << 15); 305 if (z <= a) 306 return (s32)a >> 1; 307 } 308 { 309 u64 v = (u64)a << 31; 310 do_div(v, z); 311 return v + (z >> 1); 312 } 313} 314 315static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) 316{ 317 struct vfp_single vsm, vsd; 318 int ret, tm; 319 320 vfp_single_unpack(&vsm, m); 321 tm = vfp_single_type(&vsm); 322 if (tm & (VFP_NAN|VFP_INFINITY)) { 323 struct vfp_single *vsp = &vsd; 324 325 if (tm & VFP_NAN) 326 ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); 327 else if (vsm.sign == 0) { 328 sqrt_copy: 329 vsp = &vsm; 330 ret = 0; 331 } else { 332 sqrt_invalid: 333 vsp = &vfp_single_default_qnan; 334 ret = FPSCR_IOC; 335 } 336 vfp_put_float(sd, vfp_single_pack(vsp)); 337 return ret; 338 } 339 340 /* 341 * sqrt(+/- 0) == +/- 0 342 */ 343 if (tm & VFP_ZERO) 344 goto sqrt_copy; 345 346 /* 347 * Normalise a denormalised number 348 */ 349 if (tm & VFP_DENORMAL) 350 vfp_single_normalise_denormal(&vsm); 351 352 /* 353 * sqrt(<0) = invalid 354 */ 355 if (vsm.sign) 356 goto sqrt_invalid; 357 358 vfp_single_dump("sqrt", &vsm); 359 360 /* 361 * Estimate the square root. 362 */ 363 vsd.sign = 0; 364 vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; 365 vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; 366 367 vfp_single_dump("sqrt estimate", &vsd); 368 369 /* 370 * And now adjust. 371 */ 372 if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { 373 if (vsd.significand < 2) { 374 vsd.significand = 0xffffffff; 375 } else { 376 u64 term; 377 s64 rem; 378 vsm.significand <<= !(vsm.exponent & 1); 379 term = (u64)vsd.significand * vsd.significand; 380 rem = ((u64)vsm.significand << 32) - term; 381 382 pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); 383 384 while (rem < 0) { 385 vsd.significand -= 1; 386 rem += ((u64)vsd.significand << 1) | 1; 387 } 388 vsd.significand |= rem != 0; 389 } 390 } 391 vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); 392 393 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); 394} 395 396/* 397 * Equal := ZC 398 * Less than := N 399 * Greater than := C 400 * Unordered := CV 401 */ 402static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) 403{ 404 s32 d; 405 u32 ret = 0; 406 407 d = vfp_get_float(sd); 408 if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { 409 ret |= FPSCR_C | FPSCR_V; 410 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 411 /* 412 * Signalling NaN, or signalling on quiet NaN 413 */ 414 ret |= FPSCR_IOC; 415 } 416 417 if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { 418 ret |= FPSCR_C | FPSCR_V; 419 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 420 /* 421 * Signalling NaN, or signalling on quiet NaN 422 */ 423 ret |= FPSCR_IOC; 424 } 425 426 if (ret == 0) { 427 if (d == m || vfp_single_packed_abs(d | m) == 0) { 428 /* 429 * equal 430 */ 431 ret |= FPSCR_Z | FPSCR_C; 432 } else if (vfp_single_packed_sign(d ^ m)) { 433 /* 434 * different signs 435 */ 436 if (vfp_single_packed_sign(d)) 437 /* 438 * d is negative, so d < m 439 */ 440 ret |= FPSCR_N; 441 else 442 /* 443 * d is positive, so d > m 444 */ 445 ret |= FPSCR_C; 446 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { 447 /* 448 * d < m 449 */ 450 ret |= FPSCR_N; 451 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { 452 /* 453 * d > m 454 */ 455 ret |= FPSCR_C; 456 } 457 } 458 return ret; 459} 460 461static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) 462{ 463 return vfp_compare(sd, 0, m, fpscr); 464} 465 466static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) 467{ 468 return vfp_compare(sd, 1, m, fpscr); 469} 470 471static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) 472{ 473 return vfp_compare(sd, 0, 0, fpscr); 474} 475 476static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) 477{ 478 return vfp_compare(sd, 1, 0, fpscr); 479} 480 481static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) 482{ 483 struct vfp_single vsm; 484 struct vfp_double vdd; 485 int tm; 486 u32 exceptions = 0; 487 488 vfp_single_unpack(&vsm, m); 489 490 tm = vfp_single_type(&vsm); 491 492 /* 493 * If we have a signalling NaN, signal invalid operation. 494 */ 495 if (tm == VFP_SNAN) 496 exceptions = FPSCR_IOC; 497 498 if (tm & VFP_DENORMAL) 499 vfp_single_normalise_denormal(&vsm); 500 501 vdd.sign = vsm.sign; 502 vdd.significand = (u64)vsm.significand << 32; 503 504 /* 505 * If we have an infinity or NaN, the exponent must be 2047. 506 */ 507 if (tm & (VFP_INFINITY|VFP_NAN)) { 508 vdd.exponent = 2047; 509 if (tm & VFP_NAN) 510 vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 511 goto pack_nan; 512 } else if (tm & VFP_ZERO) 513 vdd.exponent = 0; 514 else 515 vdd.exponent = vsm.exponent + (1023 - 127); 516 517 /* 518 * Technically, if bit 0 of dd is set, this is an invalid 519 * instruction. However, we ignore this for efficiency. 520 */ 521 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); 522 523 pack_nan: 524 vfp_put_double(dd, vfp_double_pack(&vdd)); 525 return exceptions; 526} 527 528static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) 529{ 530 struct vfp_single vs; 531 532 vs.sign = 0; 533 vs.exponent = 127 + 31 - 1; 534 vs.significand = (u32)m; 535 536 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); 537} 538 539static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) 540{ 541 struct vfp_single vs; 542 543 vs.sign = (m & 0x80000000) >> 16; 544 vs.exponent = 127 + 31 - 1; 545 vs.significand = vs.sign ? -m : m; 546 547 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); 548} 549 550static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) 551{ 552 struct vfp_single vsm; 553 u32 d, exceptions = 0; 554 int rmode = fpscr & FPSCR_RMODE_MASK; 555 int tm; 556 557 vfp_single_unpack(&vsm, m); 558 vfp_single_dump("VSM", &vsm); 559 560 /* 561 * Do we have a denormalised number? 562 */ 563 tm = vfp_single_type(&vsm); 564 if (tm & VFP_DENORMAL) 565 exceptions |= FPSCR_IDC; 566 567 if (tm & VFP_NAN) 568 vsm.sign = 0; 569 570 if (vsm.exponent >= 127 + 32) { 571 d = vsm.sign ? 0 : 0xffffffff; 572 exceptions = FPSCR_IOC; 573 } else if (vsm.exponent >= 127 - 1) { 574 int shift = 127 + 31 - vsm.exponent; 575 u32 rem, incr = 0; 576 577 /* 578 * 2^0 <= m < 2^32-2^8 579 */ 580 d = (vsm.significand << 1) >> shift; 581 rem = vsm.significand << (33 - shift); 582 583 if (rmode == FPSCR_ROUND_NEAREST) { 584 incr = 0x80000000; 585 if ((d & 1) == 0) 586 incr -= 1; 587 } else if (rmode == FPSCR_ROUND_TOZERO) { 588 incr = 0; 589 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 590 incr = ~0; 591 } 592 593 if ((rem + incr) < rem) { 594 if (d < 0xffffffff) 595 d += 1; 596 else 597 exceptions |= FPSCR_IOC; 598 } 599 600 if (d && vsm.sign) { 601 d = 0; 602 exceptions |= FPSCR_IOC; 603 } else if (rem) 604 exceptions |= FPSCR_IXC; 605 } else { 606 d = 0; 607 if (vsm.exponent | vsm.significand) { 608 exceptions |= FPSCR_IXC; 609 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 610 d = 1; 611 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { 612 d = 0; 613 exceptions |= FPSCR_IOC; 614 } 615 } 616 } 617 618 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 619 620 vfp_put_float(sd, d); 621 622 return exceptions; 623} 624 625static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) 626{ 627 return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); 628} 629 630static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) 631{ 632 struct vfp_single vsm; 633 u32 d, exceptions = 0; 634 int rmode = fpscr & FPSCR_RMODE_MASK; 635 636 vfp_single_unpack(&vsm, m); 637 vfp_single_dump("VSM", &vsm); 638 639 /* 640 * Do we have a denormalised number? 641 */ 642 if (vfp_single_type(&vsm) & VFP_DENORMAL) 643 exceptions |= FPSCR_IDC; 644 645 if (vsm.exponent >= 127 + 32) { 646 /* 647 * m >= 2^31-2^7: invalid 648 */ 649 d = 0x7fffffff; 650 if (vsm.sign) 651 d = ~d; 652 exceptions |= FPSCR_IOC; 653 } else if (vsm.exponent >= 127 - 1) { 654 int shift = 127 + 31 - vsm.exponent; 655 u32 rem, incr = 0; 656 657 /* 2^0 <= m <= 2^31-2^7 */ 658 d = (vsm.significand << 1) >> shift; 659 rem = vsm.significand << (33 - shift); 660 661 if (rmode == FPSCR_ROUND_NEAREST) { 662 incr = 0x80000000; 663 if ((d & 1) == 0) 664 incr -= 1; 665 } else if (rmode == FPSCR_ROUND_TOZERO) { 666 incr = 0; 667 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 668 incr = ~0; 669 } 670 671 if ((rem + incr) < rem && d < 0xffffffff) 672 d += 1; 673 if (d > 0x7fffffff + (vsm.sign != 0)) { 674 d = 0x7fffffff + (vsm.sign != 0); 675 exceptions |= FPSCR_IOC; 676 } else if (rem) 677 exceptions |= FPSCR_IXC; 678 679 if (vsm.sign) 680 d = -d; 681 } else { 682 d = 0; 683 if (vsm.exponent | vsm.significand) { 684 exceptions |= FPSCR_IXC; 685 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 686 d = 1; 687 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) 688 d = -1; 689 } 690 } 691 692 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 693 694 vfp_put_float(sd, (s32)d); 695 696 return exceptions; 697} 698 699static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) 700{ 701 return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); 702} 703 704static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = { 705 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy, 706 [FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs, 707 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg, 708 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt, 709 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp, 710 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe, 711 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz, 712 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez, 713 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd, 714 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito, 715 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito, 716 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui, 717 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz, 718 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi, 719 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz, 720}; 721 722 723 724 725 726static u32 727vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, 728 struct vfp_single *vsm, u32 fpscr) 729{ 730 struct vfp_single *vsp; 731 u32 exceptions = 0; 732 int tn, tm; 733 734 tn = vfp_single_type(vsn); 735 tm = vfp_single_type(vsm); 736 737 if (tn & tm & VFP_INFINITY) { 738 /* 739 * Two infinities. Are they different signs? 740 */ 741 if (vsn->sign ^ vsm->sign) { 742 /* 743 * different signs -> invalid 744 */ 745 exceptions = FPSCR_IOC; 746 vsp = &vfp_single_default_qnan; 747 } else { 748 /* 749 * same signs -> valid 750 */ 751 vsp = vsn; 752 } 753 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 754 /* 755 * One infinity and one number -> infinity 756 */ 757 vsp = vsn; 758 } else { 759 /* 760 * 'n' is a NaN of some type 761 */ 762 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 763 } 764 *vsd = *vsp; 765 return exceptions; 766} 767 768static u32 769vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, 770 struct vfp_single *vsm, u32 fpscr) 771{ 772 u32 exp_diff, m_sig; 773 774 if (vsn->significand & 0x80000000 || 775 vsm->significand & 0x80000000) { 776 pr_info("VFP: bad FP values in %s\n", __func__); 777 vfp_single_dump("VSN", vsn); 778 vfp_single_dump("VSM", vsm); 779 } 780 781 /* 782 * Ensure that 'n' is the largest magnitude number. Note that 783 * if 'n' and 'm' have equal exponents, we do not swap them. 784 * This ensures that NaN propagation works correctly. 785 */ 786 if (vsn->exponent < vsm->exponent) { 787 struct vfp_single *t = vsn; 788 vsn = vsm; 789 vsm = t; 790 } 791 792 /* 793 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 794 * infinity or a NaN here. 795 */ 796 if (vsn->exponent == 255) 797 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); 798 799 /* 800 * We have two proper numbers, where 'vsn' is the larger magnitude. 801 * 802 * Copy 'n' to 'd' before doing the arithmetic. 803 */ 804 *vsd = *vsn; 805 806 /* 807 * Align both numbers. 808 */ 809 exp_diff = vsn->exponent - vsm->exponent; 810 m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); 811 812 /* 813 * If the signs are different, we are really subtracting. 814 */ 815 if (vsn->sign ^ vsm->sign) { 816 m_sig = vsn->significand - m_sig; 817 if ((s32)m_sig < 0) { 818 vsd->sign = vfp_sign_negate(vsd->sign); 819 m_sig = -m_sig; 820 } else if (m_sig == 0) { 821 vsd->sign = (fpscr & FPSCR_RMODE_MASK) == 822 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 823 } 824 } else { 825 m_sig = vsn->significand + m_sig; 826 } 827 vsd->significand = m_sig; 828 829 return 0; 830} 831 832static u32 833vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) 834{ 835 vfp_single_dump("VSN", vsn); 836 vfp_single_dump("VSM", vsm); 837 838 /* 839 * Ensure that 'n' is the largest magnitude number. Note that 840 * if 'n' and 'm' have equal exponents, we do not swap them. 841 * This ensures that NaN propagation works correctly. 842 */ 843 if (vsn->exponent < vsm->exponent) { 844 struct vfp_single *t = vsn; 845 vsn = vsm; 846 vsm = t; 847 pr_debug("VFP: swapping M <-> N\n"); 848 } 849 850 vsd->sign = vsn->sign ^ vsm->sign; 851 852 /* 853 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 854 */ 855 if (vsn->exponent == 255) { 856 if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) 857 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 858 if ((vsm->exponent | vsm->significand) == 0) { 859 *vsd = vfp_single_default_qnan; 860 return FPSCR_IOC; 861 } 862 vsd->exponent = vsn->exponent; 863 vsd->significand = 0; 864 return 0; 865 } 866 867 /* 868 * If 'm' is zero, the result is always zero. In this case, 869 * 'n' may be zero or a number, but it doesn't matter which. 870 */ 871 if ((vsm->exponent | vsm->significand) == 0) { 872 vsd->exponent = 0; 873 vsd->significand = 0; 874 return 0; 875 } 876 877 /* 878 * We add 2 to the destination exponent for the same reason as 879 * the addition case - though this time we have +1 from each 880 * input operand. 881 */ 882 vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; 883 vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); 884 885 vfp_single_dump("VSD", vsd); 886 return 0; 887} 888 889#define NEG_MULTIPLY (1 << 0) 890#define NEG_SUBTRACT (1 << 1) 891 892static u32 893vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) 894{ 895 struct vfp_single vsd, vsp, vsn, vsm; 896 u32 exceptions; 897 s32 v; 898 899 v = vfp_get_float(sn); 900 pr_debug("VFP: s%u = %08x\n", sn, v); 901 vfp_single_unpack(&vsn, v); 902 if (vsn.exponent == 0 && vsn.significand) 903 vfp_single_normalise_denormal(&vsn); 904 905 vfp_single_unpack(&vsm, m); 906 if (vsm.exponent == 0 && vsm.significand) 907 vfp_single_normalise_denormal(&vsm); 908 909 exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); 910 if (negate & NEG_MULTIPLY) 911 vsp.sign = vfp_sign_negate(vsp.sign); 912 913 v = vfp_get_float(sd); 914 pr_debug("VFP: s%u = %08x\n", sd, v); 915 vfp_single_unpack(&vsn, v); 916 if (negate & NEG_SUBTRACT) 917 vsn.sign = vfp_sign_negate(vsn.sign); 918 919 exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); 920 921 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); 922} 923 924/* 925 * Standard operations 926 */ 927 928/* 929 * sd = sd + (sn * sm) 930 */ 931static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) 932{ 933 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); 934} 935 936/* 937 * sd = sd - (sn * sm) 938 */ 939static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) 940{ 941 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); 942} 943 944/* 945 * sd = -sd + (sn * sm) 946 */ 947static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) 948{ 949 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); 950} 951 952/* 953 * sd = -sd - (sn * sm) 954 */ 955static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) 956{ 957 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 958} 959 960/* 961 * sd = sn * sm 962 */ 963static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) 964{ 965 struct vfp_single vsd, vsn, vsm; 966 u32 exceptions; 967 s32 n = vfp_get_float(sn); 968 969 pr_debug("VFP: s%u = %08x\n", sn, n); 970 971 vfp_single_unpack(&vsn, n); 972 if (vsn.exponent == 0 && vsn.significand) 973 vfp_single_normalise_denormal(&vsn); 974 975 vfp_single_unpack(&vsm, m); 976 if (vsm.exponent == 0 && vsm.significand) 977 vfp_single_normalise_denormal(&vsm); 978 979 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 980 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); 981} 982 983/* 984 * sd = -(sn * sm) 985 */ 986static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) 987{ 988 struct vfp_single vsd, vsn, vsm; 989 u32 exceptions; 990 s32 n = vfp_get_float(sn); 991 992 pr_debug("VFP: s%u = %08x\n", sn, n); 993 994 vfp_single_unpack(&vsn, n); 995 if (vsn.exponent == 0 && vsn.significand) 996 vfp_single_normalise_denormal(&vsn); 997 998 vfp_single_unpack(&vsm, m); 999 if (vsm.exponent == 0 && vsm.significand) 1000 vfp_single_normalise_denormal(&vsm); 1001 1002 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 1003 vsd.sign = vfp_sign_negate(vsd.sign); 1004 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); 1005} 1006 1007/* 1008 * sd = sn + sm 1009 */ 1010static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) 1011{ 1012 struct vfp_single vsd, vsn, vsm; 1013 u32 exceptions; 1014 s32 n = vfp_get_float(sn); 1015 1016 pr_debug("VFP: s%u = %08x\n", sn, n); 1017 1018 /* 1019 * Unpack and normalise denormals. 1020 */ 1021 vfp_single_unpack(&vsn, n); 1022 if (vsn.exponent == 0 && vsn.significand) 1023 vfp_single_normalise_denormal(&vsn); 1024 1025 vfp_single_unpack(&vsm, m); 1026 if (vsm.exponent == 0 && vsm.significand) 1027 vfp_single_normalise_denormal(&vsm); 1028 1029 exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); 1030 1031 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); 1032} 1033 1034/* 1035 * sd = sn - sm 1036 */ 1037static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) 1038{ 1039 /* 1040 * Subtraction is addition with one sign inverted. 1041 */ 1042 return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); 1043} 1044 1045/* 1046 * sd = sn / sm 1047 */ 1048static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) 1049{ 1050 struct vfp_single vsd, vsn, vsm; 1051 u32 exceptions = 0; 1052 s32 n = vfp_get_float(sn); 1053 int tm, tn; 1054 1055 pr_debug("VFP: s%u = %08x\n", sn, n); 1056 1057 vfp_single_unpack(&vsn, n); 1058 vfp_single_unpack(&vsm, m); 1059 1060 vsd.sign = vsn.sign ^ vsm.sign; 1061 1062 tn = vfp_single_type(&vsn); 1063 tm = vfp_single_type(&vsm); 1064 1065 /* 1066 * Is n a NAN? 1067 */ 1068 if (tn & VFP_NAN) 1069 goto vsn_nan; 1070 1071 /* 1072 * Is m a NAN? 1073 */ 1074 if (tm & VFP_NAN) 1075 goto vsm_nan; 1076 1077 /* 1078 * If n and m are infinity, the result is invalid 1079 * If n and m are zero, the result is invalid 1080 */ 1081 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1082 goto invalid; 1083 1084 /* 1085 * If n is infinity, the result is infinity 1086 */ 1087 if (tn & VFP_INFINITY) 1088 goto infinity; 1089 1090 /* 1091 * If m is zero, raise div0 exception 1092 */ 1093 if (tm & VFP_ZERO) 1094 goto divzero; 1095 1096 /* 1097 * If m is infinity, or n is zero, the result is zero 1098 */ 1099 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1100 goto zero; 1101 1102 if (tn & VFP_DENORMAL) 1103 vfp_single_normalise_denormal(&vsn); 1104 if (tm & VFP_DENORMAL) 1105 vfp_single_normalise_denormal(&vsm); 1106 1107 /* 1108 * Ok, we have two numbers, we can perform division. 1109 */ 1110 vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; 1111 vsm.significand <<= 1; 1112 if (vsm.significand <= (2 * vsn.significand)) { 1113 vsn.significand >>= 1; 1114 vsd.exponent++; 1115 } 1116 { 1117 u64 significand = (u64)vsn.significand << 32; 1118 do_div(significand, vsm.significand); 1119 vsd.significand = significand; 1120 } 1121 if ((vsd.significand & 0x3f) == 0) 1122 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); 1123 1124 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); 1125 1126 vsn_nan: 1127 exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); 1128 pack: 1129 vfp_put_float(sd, vfp_single_pack(&vsd)); 1130 return exceptions; 1131 1132 vsm_nan: 1133 exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); 1134 goto pack; 1135 1136 zero: 1137 vsd.exponent = 0; 1138 vsd.significand = 0; 1139 goto pack; 1140 1141 divzero: 1142 exceptions = FPSCR_DZC; 1143 infinity: 1144 vsd.exponent = 255; 1145 vsd.significand = 0; 1146 goto pack; 1147 1148 invalid: 1149 vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan)); 1150 return FPSCR_IOC; 1151} 1152 1153static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = { 1154 [FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac, 1155 [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac, 1156 [FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc, 1157 [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc, 1158 [FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul, 1159 [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul, 1160 [FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd, 1161 [FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub, 1162 [FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv, 1163}; 1164 1165#define FREG_BANK(x) ((x) & 0x18) 1166#define FREG_IDX(x) ((x) & 7) 1167 1168u32 vfp_single_cpdo(u32 inst, u32 fpscr) 1169{ 1170 u32 op = inst & FOP_MASK; 1171 u32 exceptions = 0; 1172 unsigned int sd = vfp_get_sd(inst); 1173 unsigned int sn = vfp_get_sn(inst); 1174 unsigned int sm = vfp_get_sm(inst); 1175 unsigned int vecitr, veclen, vecstride; 1176 u32 (*fop)(int, int, s32, u32); 1177 1178 veclen = fpscr & FPSCR_LENGTH_MASK; 1179 vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); 1180 1181 /* 1182 * If destination bank is zero, vector length is always '1'. 1183 * ARM DDI0100F C5.1.3, C5.3.2. 1184 */ 1185 if (FREG_BANK(sd) == 0) 1186 veclen = 0; 1187 1188 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1189 (veclen >> FPSCR_LENGTH_BIT) + 1); 1190 1191 fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)]; 1192 if (!fop) 1193 goto invalid; 1194 1195 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1196 s32 m = vfp_get_float(sm); 1197 u32 except; 1198 1199 if (op == FOP_EXT) 1200 pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n", 1201 vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m); 1202 else 1203 pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n", 1204 vecitr >> FPSCR_LENGTH_BIT, sd, sn, 1205 FOP_TO_IDX(op), sm, m); 1206 1207 except = fop(sd, sn, m, fpscr); 1208 pr_debug("VFP: itr%d: exceptions=%08x\n", 1209 vecitr >> FPSCR_LENGTH_BIT, except); 1210 1211 exceptions |= except; 1212 1213 /* 1214 * This ensures that comparisons only operate on scalars; 1215 * comparisons always return with one FPSCR status bit set. 1216 */ 1217 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) 1218 break; 1219 1220 /* 1221 * CHECK: It appears to be undefined whether we stop when 1222 * we encounter an exception. We continue. 1223 */ 1224 1225 sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7); 1226 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); 1227 if (FREG_BANK(sm) != 0) 1228 sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); 1229 } 1230 return exceptions; 1231 1232 invalid: 1233 return (u32)-1; 1234} 1235