vfpsingle.c revision 42d3fb5a8771b840e0bd6dbcd0c734883dd90b6f
1/* 2 * linux/arch/arm/vfp/vfpsingle.c 3 * 4 * This code is derived in part from John R. Housers softfloat library, which 5 * carries the following notice: 6 * 7 * =========================================================================== 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 9 * Arithmetic Package, Release 2. 10 * 11 * Written by John R. Hauser. This work was made possible in part by the 12 * International Computer Science Institute, located at Suite 600, 1947 Center 13 * Street, Berkeley, California 94704. Funding was partially provided by the 14 * National Science Foundation under grant MIP-9311980. The original version 15 * of this code was written as part of a project to build a fixed-point vector 16 * processor in collaboration with the University of California at Berkeley, 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 19 * arithmetic/softfloat.html'. 20 * 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 26 * 27 * Derivative works are acceptable, even for commercial purposes, so long as 28 * (1) they include prominent notice that the work is derivative, and (2) they 29 * include prominent notice akin to these three paragraphs for those parts of 30 * this code that are retained. 31 * =========================================================================== 32 */ 33#include <linux/kernel.h> 34#include <linux/bitops.h> 35 36#include <asm/div64.h> 37#include <asm/ptrace.h> 38#include <asm/vfp.h> 39 40#include "vfpinstr.h" 41#include "vfp.h" 42 43static struct vfp_single vfp_single_default_qnan = { 44 .exponent = 255, 45 .sign = 0, 46 .significand = VFP_SINGLE_SIGNIFICAND_QNAN, 47}; 48 49static void vfp_single_dump(const char *str, struct vfp_single *s) 50{ 51 pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", 52 str, s->sign != 0, s->exponent, s->significand); 53} 54 55static void vfp_single_normalise_denormal(struct vfp_single *vs) 56{ 57 int bits = 31 - fls(vs->significand); 58 59 vfp_single_dump("normalise_denormal: in", vs); 60 61 if (bits) { 62 vs->exponent -= bits - 1; 63 vs->significand <<= bits; 64 } 65 66 vfp_single_dump("normalise_denormal: out", vs); 67} 68 69#ifndef DEBUG 70#define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) 71u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) 72#else 73u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) 74#endif 75{ 76 u32 significand, incr, rmode; 77 int exponent, shift, underflow; 78 79 vfp_single_dump("pack: in", vs); 80 81 /* 82 * Infinities and NaNs are a special case. 83 */ 84 if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) 85 goto pack; 86 87 /* 88 * Special-case zero. 89 */ 90 if (vs->significand == 0) { 91 vs->exponent = 0; 92 goto pack; 93 } 94 95 exponent = vs->exponent; 96 significand = vs->significand; 97 98 /* 99 * Normalise first. Note that we shift the significand up to 100 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least 101 * significant bit. 102 */ 103 shift = 32 - fls(significand); 104 if (shift < 32 && shift) { 105 exponent -= shift; 106 significand <<= shift; 107 } 108 109#ifdef DEBUG 110 vs->exponent = exponent; 111 vs->significand = significand; 112 vfp_single_dump("pack: normalised", vs); 113#endif 114 115 /* 116 * Tiny number? 117 */ 118 underflow = exponent < 0; 119 if (underflow) { 120 significand = vfp_shiftright32jamming(significand, -exponent); 121 exponent = 0; 122#ifdef DEBUG 123 vs->exponent = exponent; 124 vs->significand = significand; 125 vfp_single_dump("pack: tiny number", vs); 126#endif 127 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) 128 underflow = 0; 129 } 130 131 /* 132 * Select rounding increment. 133 */ 134 incr = 0; 135 rmode = fpscr & FPSCR_RMODE_MASK; 136 137 if (rmode == FPSCR_ROUND_NEAREST) { 138 incr = 1 << VFP_SINGLE_LOW_BITS; 139 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) 140 incr -= 1; 141 } else if (rmode == FPSCR_ROUND_TOZERO) { 142 incr = 0; 143 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) 144 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; 145 146 pr_debug("VFP: rounding increment = 0x%08x\n", incr); 147 148 /* 149 * Is our rounding going to overflow? 150 */ 151 if ((significand + incr) < significand) { 152 exponent += 1; 153 significand = (significand >> 1) | (significand & 1); 154 incr >>= 1; 155#ifdef DEBUG 156 vs->exponent = exponent; 157 vs->significand = significand; 158 vfp_single_dump("pack: overflow", vs); 159#endif 160 } 161 162 /* 163 * If any of the low bits (which will be shifted out of the 164 * number) are non-zero, the result is inexact. 165 */ 166 if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) 167 exceptions |= FPSCR_IXC; 168 169 /* 170 * Do our rounding. 171 */ 172 significand += incr; 173 174 /* 175 * Infinity? 176 */ 177 if (exponent >= 254) { 178 exceptions |= FPSCR_OFC | FPSCR_IXC; 179 if (incr == 0) { 180 vs->exponent = 253; 181 vs->significand = 0x7fffffff; 182 } else { 183 vs->exponent = 255; /* infinity */ 184 vs->significand = 0; 185 } 186 } else { 187 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) 188 exponent = 0; 189 if (exponent || significand > 0x80000000) 190 underflow = 0; 191 if (underflow) 192 exceptions |= FPSCR_UFC; 193 vs->exponent = exponent; 194 vs->significand = significand >> 1; 195 } 196 197 pack: 198 vfp_single_dump("pack: final", vs); 199 { 200 s32 d = vfp_single_pack(vs); 201#ifdef DEBUG 202 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, 203 sd, d, exceptions); 204#endif 205 vfp_put_float(d, sd); 206 } 207 208 return exceptions; 209} 210 211/* 212 * Propagate the NaN, setting exceptions if it is signalling. 213 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 214 */ 215static u32 216vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, 217 struct vfp_single *vsm, u32 fpscr) 218{ 219 struct vfp_single *nan; 220 int tn, tm = 0; 221 222 tn = vfp_single_type(vsn); 223 224 if (vsm) 225 tm = vfp_single_type(vsm); 226 227 if (fpscr & FPSCR_DEFAULT_NAN) 228 /* 229 * Default NaN mode - always returns a quiet NaN 230 */ 231 nan = &vfp_single_default_qnan; 232 else { 233 /* 234 * Contemporary mode - select the first signalling 235 * NAN, or if neither are signalling, the first 236 * quiet NAN. 237 */ 238 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 239 nan = vsn; 240 else 241 nan = vsm; 242 /* 243 * Make the NaN quiet. 244 */ 245 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 246 } 247 248 *vsd = *nan; 249 250 /* 251 * If one was a signalling NAN, raise invalid operation. 252 */ 253 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 254} 255 256 257/* 258 * Extended operations 259 */ 260static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) 261{ 262 vfp_put_float(vfp_single_packed_abs(m), sd); 263 return 0; 264} 265 266static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) 267{ 268 vfp_put_float(m, sd); 269 return 0; 270} 271 272static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) 273{ 274 vfp_put_float(vfp_single_packed_negate(m), sd); 275 return 0; 276} 277 278static const u16 sqrt_oddadjust[] = { 279 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, 280 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 281}; 282 283static const u16 sqrt_evenadjust[] = { 284 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, 285 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 286}; 287 288u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) 289{ 290 int index; 291 u32 z, a; 292 293 if ((significand & 0xc0000000) != 0x40000000) { 294 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); 295 } 296 297 a = significand << 1; 298 index = (a >> 27) & 15; 299 if (exponent & 1) { 300 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; 301 z = ((a / z) << 14) + (z << 15); 302 a >>= 1; 303 } else { 304 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; 305 z = a / z + z; 306 z = (z >= 0x20000) ? 0xffff8000 : (z << 15); 307 if (z <= a) 308 return (s32)a >> 1; 309 } 310 { 311 u64 v = (u64)a << 31; 312 do_div(v, z); 313 return v + (z >> 1); 314 } 315} 316 317static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) 318{ 319 struct vfp_single vsm, vsd; 320 int ret, tm; 321 322 vfp_single_unpack(&vsm, m); 323 tm = vfp_single_type(&vsm); 324 if (tm & (VFP_NAN|VFP_INFINITY)) { 325 struct vfp_single *vsp = &vsd; 326 327 if (tm & VFP_NAN) 328 ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); 329 else if (vsm.sign == 0) { 330 sqrt_copy: 331 vsp = &vsm; 332 ret = 0; 333 } else { 334 sqrt_invalid: 335 vsp = &vfp_single_default_qnan; 336 ret = FPSCR_IOC; 337 } 338 vfp_put_float(vfp_single_pack(vsp), sd); 339 return ret; 340 } 341 342 /* 343 * sqrt(+/- 0) == +/- 0 344 */ 345 if (tm & VFP_ZERO) 346 goto sqrt_copy; 347 348 /* 349 * Normalise a denormalised number 350 */ 351 if (tm & VFP_DENORMAL) 352 vfp_single_normalise_denormal(&vsm); 353 354 /* 355 * sqrt(<0) = invalid 356 */ 357 if (vsm.sign) 358 goto sqrt_invalid; 359 360 vfp_single_dump("sqrt", &vsm); 361 362 /* 363 * Estimate the square root. 364 */ 365 vsd.sign = 0; 366 vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; 367 vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; 368 369 vfp_single_dump("sqrt estimate", &vsd); 370 371 /* 372 * And now adjust. 373 */ 374 if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { 375 if (vsd.significand < 2) { 376 vsd.significand = 0xffffffff; 377 } else { 378 u64 term; 379 s64 rem; 380 vsm.significand <<= !(vsm.exponent & 1); 381 term = (u64)vsd.significand * vsd.significand; 382 rem = ((u64)vsm.significand << 32) - term; 383 384 pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); 385 386 while (rem < 0) { 387 vsd.significand -= 1; 388 rem += ((u64)vsd.significand << 1) | 1; 389 } 390 vsd.significand |= rem != 0; 391 } 392 } 393 vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); 394 395 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); 396} 397 398/* 399 * Equal := ZC 400 * Less than := N 401 * Greater than := C 402 * Unordered := CV 403 */ 404static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) 405{ 406 s32 d; 407 u32 ret = 0; 408 409 d = vfp_get_float(sd); 410 if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { 411 ret |= FPSCR_C | FPSCR_V; 412 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 413 /* 414 * Signalling NaN, or signalling on quiet NaN 415 */ 416 ret |= FPSCR_IOC; 417 } 418 419 if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { 420 ret |= FPSCR_C | FPSCR_V; 421 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 422 /* 423 * Signalling NaN, or signalling on quiet NaN 424 */ 425 ret |= FPSCR_IOC; 426 } 427 428 if (ret == 0) { 429 if (d == m || vfp_single_packed_abs(d | m) == 0) { 430 /* 431 * equal 432 */ 433 ret |= FPSCR_Z | FPSCR_C; 434 } else if (vfp_single_packed_sign(d ^ m)) { 435 /* 436 * different signs 437 */ 438 if (vfp_single_packed_sign(d)) 439 /* 440 * d is negative, so d < m 441 */ 442 ret |= FPSCR_N; 443 else 444 /* 445 * d is positive, so d > m 446 */ 447 ret |= FPSCR_C; 448 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { 449 /* 450 * d < m 451 */ 452 ret |= FPSCR_N; 453 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { 454 /* 455 * d > m 456 */ 457 ret |= FPSCR_C; 458 } 459 } 460 return ret; 461} 462 463static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) 464{ 465 return vfp_compare(sd, 0, m, fpscr); 466} 467 468static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) 469{ 470 return vfp_compare(sd, 1, m, fpscr); 471} 472 473static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) 474{ 475 return vfp_compare(sd, 0, 0, fpscr); 476} 477 478static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) 479{ 480 return vfp_compare(sd, 1, 0, fpscr); 481} 482 483static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) 484{ 485 struct vfp_single vsm; 486 struct vfp_double vdd; 487 int tm; 488 u32 exceptions = 0; 489 490 vfp_single_unpack(&vsm, m); 491 492 tm = vfp_single_type(&vsm); 493 494 /* 495 * If we have a signalling NaN, signal invalid operation. 496 */ 497 if (tm == VFP_SNAN) 498 exceptions = FPSCR_IOC; 499 500 if (tm & VFP_DENORMAL) 501 vfp_single_normalise_denormal(&vsm); 502 503 vdd.sign = vsm.sign; 504 vdd.significand = (u64)vsm.significand << 32; 505 506 /* 507 * If we have an infinity or NaN, the exponent must be 2047. 508 */ 509 if (tm & (VFP_INFINITY|VFP_NAN)) { 510 vdd.exponent = 2047; 511 if (tm == VFP_QNAN) 512 vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 513 goto pack_nan; 514 } else if (tm & VFP_ZERO) 515 vdd.exponent = 0; 516 else 517 vdd.exponent = vsm.exponent + (1023 - 127); 518 519 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); 520 521 pack_nan: 522 vfp_put_double(vfp_double_pack(&vdd), dd); 523 return exceptions; 524} 525 526static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) 527{ 528 struct vfp_single vs; 529 530 vs.sign = 0; 531 vs.exponent = 127 + 31 - 1; 532 vs.significand = (u32)m; 533 534 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); 535} 536 537static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) 538{ 539 struct vfp_single vs; 540 541 vs.sign = (m & 0x80000000) >> 16; 542 vs.exponent = 127 + 31 - 1; 543 vs.significand = vs.sign ? -m : m; 544 545 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); 546} 547 548static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) 549{ 550 struct vfp_single vsm; 551 u32 d, exceptions = 0; 552 int rmode = fpscr & FPSCR_RMODE_MASK; 553 int tm; 554 555 vfp_single_unpack(&vsm, m); 556 vfp_single_dump("VSM", &vsm); 557 558 /* 559 * Do we have a denormalised number? 560 */ 561 tm = vfp_single_type(&vsm); 562 if (tm & VFP_DENORMAL) 563 exceptions |= FPSCR_IDC; 564 565 if (tm & VFP_NAN) 566 vsm.sign = 0; 567 568 if (vsm.exponent >= 127 + 32) { 569 d = vsm.sign ? 0 : 0xffffffff; 570 exceptions = FPSCR_IOC; 571 } else if (vsm.exponent >= 127 - 1) { 572 int shift = 127 + 31 - vsm.exponent; 573 u32 rem, incr = 0; 574 575 /* 576 * 2^0 <= m < 2^32-2^8 577 */ 578 d = (vsm.significand << 1) >> shift; 579 rem = vsm.significand << (33 - shift); 580 581 if (rmode == FPSCR_ROUND_NEAREST) { 582 incr = 0x80000000; 583 if ((d & 1) == 0) 584 incr -= 1; 585 } else if (rmode == FPSCR_ROUND_TOZERO) { 586 incr = 0; 587 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 588 incr = ~0; 589 } 590 591 if ((rem + incr) < rem) { 592 if (d < 0xffffffff) 593 d += 1; 594 else 595 exceptions |= FPSCR_IOC; 596 } 597 598 if (d && vsm.sign) { 599 d = 0; 600 exceptions |= FPSCR_IOC; 601 } else if (rem) 602 exceptions |= FPSCR_IXC; 603 } else { 604 d = 0; 605 if (vsm.exponent | vsm.significand) { 606 exceptions |= FPSCR_IXC; 607 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 608 d = 1; 609 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { 610 d = 0; 611 exceptions |= FPSCR_IOC; 612 } 613 } 614 } 615 616 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 617 618 vfp_put_float(d, sd); 619 620 return exceptions; 621} 622 623static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) 624{ 625 return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); 626} 627 628static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) 629{ 630 struct vfp_single vsm; 631 u32 d, exceptions = 0; 632 int rmode = fpscr & FPSCR_RMODE_MASK; 633 int tm; 634 635 vfp_single_unpack(&vsm, m); 636 vfp_single_dump("VSM", &vsm); 637 638 /* 639 * Do we have a denormalised number? 640 */ 641 tm = vfp_single_type(&vsm); 642 if (vfp_single_type(&vsm) & VFP_DENORMAL) 643 exceptions |= FPSCR_IDC; 644 645 if (tm & VFP_NAN) { 646 d = 0; 647 exceptions |= FPSCR_IOC; 648 } else if (vsm.exponent >= 127 + 32) { 649 /* 650 * m >= 2^31-2^7: invalid 651 */ 652 d = 0x7fffffff; 653 if (vsm.sign) 654 d = ~d; 655 exceptions |= FPSCR_IOC; 656 } else if (vsm.exponent >= 127 - 1) { 657 int shift = 127 + 31 - vsm.exponent; 658 u32 rem, incr = 0; 659 660 /* 2^0 <= m <= 2^31-2^7 */ 661 d = (vsm.significand << 1) >> shift; 662 rem = vsm.significand << (33 - shift); 663 664 if (rmode == FPSCR_ROUND_NEAREST) { 665 incr = 0x80000000; 666 if ((d & 1) == 0) 667 incr -= 1; 668 } else if (rmode == FPSCR_ROUND_TOZERO) { 669 incr = 0; 670 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 671 incr = ~0; 672 } 673 674 if ((rem + incr) < rem && d < 0xffffffff) 675 d += 1; 676 if (d > 0x7fffffff + (vsm.sign != 0)) { 677 d = 0x7fffffff + (vsm.sign != 0); 678 exceptions |= FPSCR_IOC; 679 } else if (rem) 680 exceptions |= FPSCR_IXC; 681 682 if (vsm.sign) 683 d = -d; 684 } else { 685 d = 0; 686 if (vsm.exponent | vsm.significand) { 687 exceptions |= FPSCR_IXC; 688 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 689 d = 1; 690 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) 691 d = -1; 692 } 693 } 694 695 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 696 697 vfp_put_float((s32)d, sd); 698 699 return exceptions; 700} 701 702static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) 703{ 704 return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); 705} 706 707static struct op fops_ext[32] = { 708 [FEXT_TO_IDX(FEXT_FCPY)] = { vfp_single_fcpy, 0 }, 709 [FEXT_TO_IDX(FEXT_FABS)] = { vfp_single_fabs, 0 }, 710 [FEXT_TO_IDX(FEXT_FNEG)] = { vfp_single_fneg, 0 }, 711 [FEXT_TO_IDX(FEXT_FSQRT)] = { vfp_single_fsqrt, 0 }, 712 [FEXT_TO_IDX(FEXT_FCMP)] = { vfp_single_fcmp, OP_SCALAR }, 713 [FEXT_TO_IDX(FEXT_FCMPE)] = { vfp_single_fcmpe, OP_SCALAR }, 714 [FEXT_TO_IDX(FEXT_FCMPZ)] = { vfp_single_fcmpz, OP_SCALAR }, 715 [FEXT_TO_IDX(FEXT_FCMPEZ)] = { vfp_single_fcmpez, OP_SCALAR }, 716 [FEXT_TO_IDX(FEXT_FCVT)] = { vfp_single_fcvtd, OP_SCALAR|OP_DD }, 717 [FEXT_TO_IDX(FEXT_FUITO)] = { vfp_single_fuito, OP_SCALAR }, 718 [FEXT_TO_IDX(FEXT_FSITO)] = { vfp_single_fsito, OP_SCALAR }, 719 [FEXT_TO_IDX(FEXT_FTOUI)] = { vfp_single_ftoui, OP_SCALAR }, 720 [FEXT_TO_IDX(FEXT_FTOUIZ)] = { vfp_single_ftouiz, OP_SCALAR }, 721 [FEXT_TO_IDX(FEXT_FTOSI)] = { vfp_single_ftosi, OP_SCALAR }, 722 [FEXT_TO_IDX(FEXT_FTOSIZ)] = { vfp_single_ftosiz, OP_SCALAR }, 723}; 724 725 726 727 728 729static u32 730vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, 731 struct vfp_single *vsm, u32 fpscr) 732{ 733 struct vfp_single *vsp; 734 u32 exceptions = 0; 735 int tn, tm; 736 737 tn = vfp_single_type(vsn); 738 tm = vfp_single_type(vsm); 739 740 if (tn & tm & VFP_INFINITY) { 741 /* 742 * Two infinities. Are they different signs? 743 */ 744 if (vsn->sign ^ vsm->sign) { 745 /* 746 * different signs -> invalid 747 */ 748 exceptions = FPSCR_IOC; 749 vsp = &vfp_single_default_qnan; 750 } else { 751 /* 752 * same signs -> valid 753 */ 754 vsp = vsn; 755 } 756 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 757 /* 758 * One infinity and one number -> infinity 759 */ 760 vsp = vsn; 761 } else { 762 /* 763 * 'n' is a NaN of some type 764 */ 765 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 766 } 767 *vsd = *vsp; 768 return exceptions; 769} 770 771static u32 772vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, 773 struct vfp_single *vsm, u32 fpscr) 774{ 775 u32 exp_diff, m_sig; 776 777 if (vsn->significand & 0x80000000 || 778 vsm->significand & 0x80000000) { 779 pr_info("VFP: bad FP values in %s\n", __func__); 780 vfp_single_dump("VSN", vsn); 781 vfp_single_dump("VSM", vsm); 782 } 783 784 /* 785 * Ensure that 'n' is the largest magnitude number. Note that 786 * if 'n' and 'm' have equal exponents, we do not swap them. 787 * This ensures that NaN propagation works correctly. 788 */ 789 if (vsn->exponent < vsm->exponent) { 790 struct vfp_single *t = vsn; 791 vsn = vsm; 792 vsm = t; 793 } 794 795 /* 796 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 797 * infinity or a NaN here. 798 */ 799 if (vsn->exponent == 255) 800 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); 801 802 /* 803 * We have two proper numbers, where 'vsn' is the larger magnitude. 804 * 805 * Copy 'n' to 'd' before doing the arithmetic. 806 */ 807 *vsd = *vsn; 808 809 /* 810 * Align both numbers. 811 */ 812 exp_diff = vsn->exponent - vsm->exponent; 813 m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); 814 815 /* 816 * If the signs are different, we are really subtracting. 817 */ 818 if (vsn->sign ^ vsm->sign) { 819 m_sig = vsn->significand - m_sig; 820 if ((s32)m_sig < 0) { 821 vsd->sign = vfp_sign_negate(vsd->sign); 822 m_sig = -m_sig; 823 } else if (m_sig == 0) { 824 vsd->sign = (fpscr & FPSCR_RMODE_MASK) == 825 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 826 } 827 } else { 828 m_sig = vsn->significand + m_sig; 829 } 830 vsd->significand = m_sig; 831 832 return 0; 833} 834 835static u32 836vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) 837{ 838 vfp_single_dump("VSN", vsn); 839 vfp_single_dump("VSM", vsm); 840 841 /* 842 * Ensure that 'n' is the largest magnitude number. Note that 843 * if 'n' and 'm' have equal exponents, we do not swap them. 844 * This ensures that NaN propagation works correctly. 845 */ 846 if (vsn->exponent < vsm->exponent) { 847 struct vfp_single *t = vsn; 848 vsn = vsm; 849 vsm = t; 850 pr_debug("VFP: swapping M <-> N\n"); 851 } 852 853 vsd->sign = vsn->sign ^ vsm->sign; 854 855 /* 856 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 857 */ 858 if (vsn->exponent == 255) { 859 if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) 860 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 861 if ((vsm->exponent | vsm->significand) == 0) { 862 *vsd = vfp_single_default_qnan; 863 return FPSCR_IOC; 864 } 865 vsd->exponent = vsn->exponent; 866 vsd->significand = 0; 867 return 0; 868 } 869 870 /* 871 * If 'm' is zero, the result is always zero. In this case, 872 * 'n' may be zero or a number, but it doesn't matter which. 873 */ 874 if ((vsm->exponent | vsm->significand) == 0) { 875 vsd->exponent = 0; 876 vsd->significand = 0; 877 return 0; 878 } 879 880 /* 881 * We add 2 to the destination exponent for the same reason as 882 * the addition case - though this time we have +1 from each 883 * input operand. 884 */ 885 vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; 886 vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); 887 888 vfp_single_dump("VSD", vsd); 889 return 0; 890} 891 892#define NEG_MULTIPLY (1 << 0) 893#define NEG_SUBTRACT (1 << 1) 894 895static u32 896vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) 897{ 898 struct vfp_single vsd, vsp, vsn, vsm; 899 u32 exceptions; 900 s32 v; 901 902 v = vfp_get_float(sn); 903 pr_debug("VFP: s%u = %08x\n", sn, v); 904 vfp_single_unpack(&vsn, v); 905 if (vsn.exponent == 0 && vsn.significand) 906 vfp_single_normalise_denormal(&vsn); 907 908 vfp_single_unpack(&vsm, m); 909 if (vsm.exponent == 0 && vsm.significand) 910 vfp_single_normalise_denormal(&vsm); 911 912 exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); 913 if (negate & NEG_MULTIPLY) 914 vsp.sign = vfp_sign_negate(vsp.sign); 915 916 v = vfp_get_float(sd); 917 pr_debug("VFP: s%u = %08x\n", sd, v); 918 vfp_single_unpack(&vsn, v); 919 if (negate & NEG_SUBTRACT) 920 vsn.sign = vfp_sign_negate(vsn.sign); 921 922 exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); 923 924 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); 925} 926 927/* 928 * Standard operations 929 */ 930 931/* 932 * sd = sd + (sn * sm) 933 */ 934static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) 935{ 936 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); 937} 938 939/* 940 * sd = sd - (sn * sm) 941 */ 942static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) 943{ 944 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); 945} 946 947/* 948 * sd = -sd + (sn * sm) 949 */ 950static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) 951{ 952 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); 953} 954 955/* 956 * sd = -sd - (sn * sm) 957 */ 958static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) 959{ 960 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 961} 962 963/* 964 * sd = sn * sm 965 */ 966static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) 967{ 968 struct vfp_single vsd, vsn, vsm; 969 u32 exceptions; 970 s32 n = vfp_get_float(sn); 971 972 pr_debug("VFP: s%u = %08x\n", sn, n); 973 974 vfp_single_unpack(&vsn, n); 975 if (vsn.exponent == 0 && vsn.significand) 976 vfp_single_normalise_denormal(&vsn); 977 978 vfp_single_unpack(&vsm, m); 979 if (vsm.exponent == 0 && vsm.significand) 980 vfp_single_normalise_denormal(&vsm); 981 982 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 983 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); 984} 985 986/* 987 * sd = -(sn * sm) 988 */ 989static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) 990{ 991 struct vfp_single vsd, vsn, vsm; 992 u32 exceptions; 993 s32 n = vfp_get_float(sn); 994 995 pr_debug("VFP: s%u = %08x\n", sn, n); 996 997 vfp_single_unpack(&vsn, n); 998 if (vsn.exponent == 0 && vsn.significand) 999 vfp_single_normalise_denormal(&vsn); 1000 1001 vfp_single_unpack(&vsm, m); 1002 if (vsm.exponent == 0 && vsm.significand) 1003 vfp_single_normalise_denormal(&vsm); 1004 1005 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 1006 vsd.sign = vfp_sign_negate(vsd.sign); 1007 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); 1008} 1009 1010/* 1011 * sd = sn + sm 1012 */ 1013static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) 1014{ 1015 struct vfp_single vsd, vsn, vsm; 1016 u32 exceptions; 1017 s32 n = vfp_get_float(sn); 1018 1019 pr_debug("VFP: s%u = %08x\n", sn, n); 1020 1021 /* 1022 * Unpack and normalise denormals. 1023 */ 1024 vfp_single_unpack(&vsn, n); 1025 if (vsn.exponent == 0 && vsn.significand) 1026 vfp_single_normalise_denormal(&vsn); 1027 1028 vfp_single_unpack(&vsm, m); 1029 if (vsm.exponent == 0 && vsm.significand) 1030 vfp_single_normalise_denormal(&vsm); 1031 1032 exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); 1033 1034 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); 1035} 1036 1037/* 1038 * sd = sn - sm 1039 */ 1040static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) 1041{ 1042 /* 1043 * Subtraction is addition with one sign inverted. 1044 */ 1045 return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); 1046} 1047 1048/* 1049 * sd = sn / sm 1050 */ 1051static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) 1052{ 1053 struct vfp_single vsd, vsn, vsm; 1054 u32 exceptions = 0; 1055 s32 n = vfp_get_float(sn); 1056 int tm, tn; 1057 1058 pr_debug("VFP: s%u = %08x\n", sn, n); 1059 1060 vfp_single_unpack(&vsn, n); 1061 vfp_single_unpack(&vsm, m); 1062 1063 vsd.sign = vsn.sign ^ vsm.sign; 1064 1065 tn = vfp_single_type(&vsn); 1066 tm = vfp_single_type(&vsm); 1067 1068 /* 1069 * Is n a NAN? 1070 */ 1071 if (tn & VFP_NAN) 1072 goto vsn_nan; 1073 1074 /* 1075 * Is m a NAN? 1076 */ 1077 if (tm & VFP_NAN) 1078 goto vsm_nan; 1079 1080 /* 1081 * If n and m are infinity, the result is invalid 1082 * If n and m are zero, the result is invalid 1083 */ 1084 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1085 goto invalid; 1086 1087 /* 1088 * If n is infinity, the result is infinity 1089 */ 1090 if (tn & VFP_INFINITY) 1091 goto infinity; 1092 1093 /* 1094 * If m is zero, raise div0 exception 1095 */ 1096 if (tm & VFP_ZERO) 1097 goto divzero; 1098 1099 /* 1100 * If m is infinity, or n is zero, the result is zero 1101 */ 1102 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1103 goto zero; 1104 1105 if (tn & VFP_DENORMAL) 1106 vfp_single_normalise_denormal(&vsn); 1107 if (tm & VFP_DENORMAL) 1108 vfp_single_normalise_denormal(&vsm); 1109 1110 /* 1111 * Ok, we have two numbers, we can perform division. 1112 */ 1113 vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; 1114 vsm.significand <<= 1; 1115 if (vsm.significand <= (2 * vsn.significand)) { 1116 vsn.significand >>= 1; 1117 vsd.exponent++; 1118 } 1119 { 1120 u64 significand = (u64)vsn.significand << 32; 1121 do_div(significand, vsm.significand); 1122 vsd.significand = significand; 1123 } 1124 if ((vsd.significand & 0x3f) == 0) 1125 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); 1126 1127 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); 1128 1129 vsn_nan: 1130 exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); 1131 pack: 1132 vfp_put_float(vfp_single_pack(&vsd), sd); 1133 return exceptions; 1134 1135 vsm_nan: 1136 exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); 1137 goto pack; 1138 1139 zero: 1140 vsd.exponent = 0; 1141 vsd.significand = 0; 1142 goto pack; 1143 1144 divzero: 1145 exceptions = FPSCR_DZC; 1146 infinity: 1147 vsd.exponent = 255; 1148 vsd.significand = 0; 1149 goto pack; 1150 1151 invalid: 1152 vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd); 1153 return FPSCR_IOC; 1154} 1155 1156static struct op fops[16] = { 1157 [FOP_TO_IDX(FOP_FMAC)] = { vfp_single_fmac, 0 }, 1158 [FOP_TO_IDX(FOP_FNMAC)] = { vfp_single_fnmac, 0 }, 1159 [FOP_TO_IDX(FOP_FMSC)] = { vfp_single_fmsc, 0 }, 1160 [FOP_TO_IDX(FOP_FNMSC)] = { vfp_single_fnmsc, 0 }, 1161 [FOP_TO_IDX(FOP_FMUL)] = { vfp_single_fmul, 0 }, 1162 [FOP_TO_IDX(FOP_FNMUL)] = { vfp_single_fnmul, 0 }, 1163 [FOP_TO_IDX(FOP_FADD)] = { vfp_single_fadd, 0 }, 1164 [FOP_TO_IDX(FOP_FSUB)] = { vfp_single_fsub, 0 }, 1165 [FOP_TO_IDX(FOP_FDIV)] = { vfp_single_fdiv, 0 }, 1166}; 1167 1168#define FREG_BANK(x) ((x) & 0x18) 1169#define FREG_IDX(x) ((x) & 7) 1170 1171u32 vfp_single_cpdo(u32 inst, u32 fpscr) 1172{ 1173 u32 op = inst & FOP_MASK; 1174 u32 exceptions = 0; 1175 unsigned int dest; 1176 unsigned int sn = vfp_get_sn(inst); 1177 unsigned int sm = vfp_get_sm(inst); 1178 unsigned int vecitr, veclen, vecstride; 1179 struct op *fop; 1180 1181 vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); 1182 1183 fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)]; 1184 1185 /* 1186 * fcvtsd takes a dN register number as destination, not sN. 1187 * Technically, if bit 0 of dd is set, this is an invalid 1188 * instruction. However, we ignore this for efficiency. 1189 * It also only operates on scalars. 1190 */ 1191 if (fop->flags & OP_DD) 1192 dest = vfp_get_dd(inst); 1193 else 1194 dest = vfp_get_sd(inst); 1195 1196 /* 1197 * If destination bank is zero, vector length is always '1'. 1198 * ARM DDI0100F C5.1.3, C5.3.2. 1199 */ 1200 if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0) 1201 veclen = 0; 1202 else 1203 veclen = fpscr & FPSCR_LENGTH_MASK; 1204 1205 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1206 (veclen >> FPSCR_LENGTH_BIT) + 1); 1207 1208 if (!fop->fn) 1209 goto invalid; 1210 1211 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1212 s32 m = vfp_get_float(sm); 1213 u32 except; 1214 char type; 1215 1216 type = fop->flags & OP_DD ? 'd' : 's'; 1217 if (op == FOP_EXT) 1218 pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n", 1219 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn, 1220 sm, m); 1221 else 1222 pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n", 1223 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn, 1224 FOP_TO_IDX(op), sm, m); 1225 1226 except = fop->fn(dest, sn, m, fpscr); 1227 pr_debug("VFP: itr%d: exceptions=%08x\n", 1228 vecitr >> FPSCR_LENGTH_BIT, except); 1229 1230 exceptions |= except; 1231 1232 /* 1233 * CHECK: It appears to be undefined whether we stop when 1234 * we encounter an exception. We continue. 1235 */ 1236 dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7); 1237 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); 1238 if (FREG_BANK(sm) != 0) 1239 sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); 1240 } 1241 return exceptions; 1242 1243 invalid: 1244 return (u32)-1; 1245} 1246