1436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 2436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/*---------------------------------------------------------------*/ 3436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/*--- begin host_generic_maddf.c ---*/ 4436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/*---------------------------------------------------------------*/ 5436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 6436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/* 7436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Compute x * y + z as ternary operation. 8436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Copyright (C) 2010-2013 Free Software Foundation, Inc. 9436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov This file is part of the GNU C Library. 10436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Contributed by Jakub Jelinek <jakub@redhat.com>, 2010. 11436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 12436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov The GNU C Library is free software; you can redistribute it and/or 13436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov modify it under the terms of the GNU Lesser General Public 14436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov License as published by the Free Software Foundation; either 15436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov version 2.1 of the License, or (at your option) any later version. 16436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 17436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov The GNU C Library is distributed in the hope that it will be useful, 18436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov but WITHOUT ANY WARRANTY; without even the implied warranty of 19436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Lesser General Public License for more details. 21436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 22436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov You should have received a copy of the GNU Lesser General Public 23436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov License along with the GNU C Library; if not, see 24436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov <http://www.gnu.org/licenses/>. 25436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov*/ 26436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 27436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/* Generic helper functions for doing FMA, i.e. compute x * y + z 28436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov as ternary operation. 29436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov These are purely back-end entities and cannot be seen/referenced 30436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov from IR. */ 31436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 32436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "libvex_basictypes.h" 33436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "host_generic_maddf.h" 34436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#include "main_util.h" 35436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 36436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/* This implementation relies on Double being more than twice as 37436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov precise as Float and uses rounding to odd in order to avoid problems 38436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov with double rounding. 39436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov See a paper by Boldo and Melquiond: 40436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov http://www.lri.fr/~melquion/doc/08-tc.pdf */ 41436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 42436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#define FORCE_EVAL(X) __asm __volatile__ ("" : : "m" (X)) 43436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 44436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#if defined(__x86_64__) && defined(__SSE2_MATH__) 45436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov# define ENV_TYPE unsigned int 46436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/* Save current rounding mode into ENV, hold exceptions, set rounding 47436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov mode to rounding toward zero. */ 48436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov# define ROUNDTOZERO(env) \ 49436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov do { \ 50436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int mxcsr; \ 51436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("stmxcsr %0" : "=m" (mxcsr)); \ 52436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov (env) = mxcsr; \ 53436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov mxcsr = (mxcsr | 0x7f80) & ~0x3f; \ 54436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));\ 55436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } while (0) 56436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/* Restore exceptions from ENV, return if inexact exception has been raised 57436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov since ROUNDTOZERO. */ 58436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov# define RESET_TESTINEXACT(env) \ 59436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ({ \ 60436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int mxcsr, ret; \ 61436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("stmxcsr %0" : "=m" (mxcsr)); \ 62436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ret = (mxcsr >> 5) & 1; \ 63436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov mxcsr = (mxcsr & 0x3d) | (env); \ 64436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));\ 65436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ret; \ 66436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov }) 67436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/* Return if inexact exception has been raised since ROUNDTOZERO. */ 68436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov# define TESTINEXACT() \ 69436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ({ \ 70436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int mxcsr; \ 71436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov __asm __volatile__ ("stmxcsr %0" : "=m" (mxcsr)); \ 72436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov (mxcsr >> 5) & 1; \ 73436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov }) 74436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#endif 75436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 76436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#define DBL_MANT_DIG 53 77436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#define IEEE754_DOUBLE_BIAS 0x3ff 78436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 79436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovunion vg_ieee754_double { 80436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double d; 81436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 82436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* This is the IEEE 754 double-precision format. */ 83436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov struct { 84436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#ifdef VKI_BIG_ENDIAN 85436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int negative:1; 86436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int exponent:11; 87436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int mantissa0:20; 88436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int mantissa1:32; 89436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#else 90436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int mantissa1:32; 91436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int mantissa0:20; 92436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int exponent:11; 93436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov unsigned int negative:1; 94436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#endif 95436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } ieee; 96436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov}; 97436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 98436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovvoid VEX_REGPARM(3) 99436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov h_generic_calc_MAddF32 ( /*OUT*/Float* res, 100436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Float* argX, Float* argY, Float* argZ ) 101436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{ 102436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#ifndef ENV_TYPE 103436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Lame fallback implementation. */ 104436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = *argX * *argY + *argZ; 105436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#else 106436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ENV_TYPE env; 107436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Multiplication is always exact. */ 108436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double temp = (Double) *argX * (Double) *argY; 109436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov union vg_ieee754_double u; 110436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 111436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ROUNDTOZERO (env); 112436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 113436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Perform addition with round to odd. */ 114436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.d = temp + (Double) *argZ; 115436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Ensure the addition is not scheduled after fetestexcept call. */ 116436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov FORCE_EVAL (u.d); 117436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 118436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Reset rounding mode and test for inexact simultaneously. */ 119436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int j = RESET_TESTINEXACT (env); 120436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 121436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff) 122436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.ieee.mantissa1 |= j; 123436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 124436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* And finally truncation with round to nearest. */ 125436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = (Float) u.d; 126436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#endif 127436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} 128436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 129436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 130436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanovvoid VEX_REGPARM(3) 131436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov h_generic_calc_MAddF64 ( /*OUT*/Double* res, 132436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double* argX, Double* argY, Double* argZ ) 133436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov{ 134436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#ifndef ENV_TYPE 135436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Lame fallback implementation. */ 136436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = *argX * *argY + *argZ; 137436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#else 138436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double x = *argX, y = *argY, z = *argZ; 139436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov union vg_ieee754_double u, v, w; 140436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int adjust = 0; 141436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.d = x; 142436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.d = y; 143436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov w.d = z; 144436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (UNLIKELY (u.ieee.exponent + v.ieee.exponent 145436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG) 146436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov || UNLIKELY (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG) 147436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov || UNLIKELY (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG) 148436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov || UNLIKELY (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG) 149436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov || UNLIKELY (u.ieee.exponent + v.ieee.exponent 150436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG)) { 151436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* If z is Inf, but x and y are finite, the result should be 152436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov z rather than NaN. */ 153436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (w.ieee.exponent == 0x7ff 154436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov && u.ieee.exponent != 0x7ff 155436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov && v.ieee.exponent != 0x7ff) { 156436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = (z + x) + y; 157436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return; 158436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 159436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* If x or y or z is Inf/NaN, or if fma will certainly overflow, 160436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov or if x * y is less than half of DBL_DENORM_MIN, 161436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov compute as x * y + z. */ 162436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (u.ieee.exponent == 0x7ff 163436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov || v.ieee.exponent == 0x7ff 164436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov || w.ieee.exponent == 0x7ff 165436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov || u.ieee.exponent + v.ieee.exponent > 0x7ff + IEEE754_DOUBLE_BIAS 166436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov || u.ieee.exponent + v.ieee.exponent 167436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2) { 168436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = x * y + z; 169436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return; 170436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 171436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (u.ieee.exponent + v.ieee.exponent 172436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG) { 173436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Compute 1p-53 times smaller result and multiply 174436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov at the end. */ 175436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (u.ieee.exponent > v.ieee.exponent) 176436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.ieee.exponent -= DBL_MANT_DIG; 177436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov else 178436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.ieee.exponent -= DBL_MANT_DIG; 179436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* If x + y exponent is very large and z exponent is very small, 180436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov it doesn't matter if we don't adjust it. */ 181436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (w.ieee.exponent > DBL_MANT_DIG) 182436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov w.ieee.exponent -= DBL_MANT_DIG; 183436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov adjust = 1; 184436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG) { 185436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Similarly. 186436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov If z exponent is very large and x and y exponents are 187436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov very small, it doesn't matter if we don't adjust it. */ 188436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (u.ieee.exponent > v.ieee.exponent) { 189436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (u.ieee.exponent > DBL_MANT_DIG) 190436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.ieee.exponent -= DBL_MANT_DIG; 191436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } else if (v.ieee.exponent > DBL_MANT_DIG) 192436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.ieee.exponent -= DBL_MANT_DIG; 193436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov w.ieee.exponent -= DBL_MANT_DIG; 194436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov adjust = 1; 195436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG) { 196436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.ieee.exponent -= DBL_MANT_DIG; 197436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (v.ieee.exponent) 198436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.ieee.exponent += DBL_MANT_DIG; 199436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov else 200436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.d *= 0x1p53; 201436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG) { 202436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.ieee.exponent -= DBL_MANT_DIG; 203436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (u.ieee.exponent) 204436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.ieee.exponent += DBL_MANT_DIG; 205436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov else 206436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.d *= 0x1p53; 207436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } else /* if (u.ieee.exponent + v.ieee.exponent 208436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */ { 209436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (u.ieee.exponent > v.ieee.exponent) 210436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.ieee.exponent += 2 * DBL_MANT_DIG; 211436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov else 212436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.ieee.exponent += 2 * DBL_MANT_DIG; 213436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4) { 214436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (w.ieee.exponent) 215436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov w.ieee.exponent += 2 * DBL_MANT_DIG; 216436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov else 217436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov w.d *= 0x1p106; 218436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov adjust = -1; 219436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 220436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Otherwise x * y should just affect inexact 221436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov and nothing else. */ 222436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 223436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov x = u.d; 224436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov y = v.d; 225436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov z = w.d; 226436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 227436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ 228436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov# define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1) 229436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double x1 = x * C; 230436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double y1 = y * C; 231436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double m1 = x * y; 232436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov x1 = (x - x1) + x1; 233436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov y1 = (y - y1) + y1; 234436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double x2 = x - x1; 235436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double y2 = y - y1; 236436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2; 237436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov# undef C 238436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 239436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */ 240436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double a1 = z + m1; 241436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double t1 = a1 - z; 242436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double t2 = a1 - t1; 243436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov t1 = m1 - t1; 244436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov t2 = z - t2; 245436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Double a2 = t1 + t2; 246436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 247436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ENV_TYPE env; 248436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov ROUNDTOZERO (env); 249436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 250436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Perform m2 + a2 addition with round to odd. */ 251436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.d = a2 + m2; 252436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 253436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (UNLIKELY (adjust < 0)) { 254436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if ((u.ieee.mantissa1 & 1) == 0) 255436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.ieee.mantissa1 |= TESTINEXACT (); 256436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.d = a1 + u.d; 257436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Ensure the addition is not scheduled after fetestexcept call. */ 258436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov FORCE_EVAL (v.d); 259436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 260436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 261436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Reset rounding mode and test for inexact simultaneously. */ 262436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov int j = RESET_TESTINEXACT (env) != 0; 263436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 264436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (LIKELY (adjust == 0)) { 265436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff) 266436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.ieee.mantissa1 |= j; 267436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Result is a1 + u.d. */ 268436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = a1 + u.d; 269436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } else if (LIKELY (adjust > 0)) { 270436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff) 271436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov u.ieee.mantissa1 |= j; 272436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* Result is a1 + u.d, scaled up. */ 273436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = (a1 + u.d) * 0x1p53; 274436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } else { 275436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* If a1 + u.d is exact, the only rounding happens during 276436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov scaling down. */ 277436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (j == 0) { 278436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = v.d * 0x1p-106; 279436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return; 280436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 281436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* If result rounded to zero is not subnormal, no double 282436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov rounding will occur. */ 283436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (v.ieee.exponent > 106) { 284436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = (a1 + u.d) * 0x1p-106; 285436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return; 286436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 287436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* If v.d * 0x1p-106 with round to zero is a subnormal above 288436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa 289436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov down just by 1 bit, which means v.ieee.mantissa1 |= j would 290436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov change the round bit, not sticky or guard bit. 291436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.d * 0x1p-106 never normalizes by shifting up, 292436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov so round bit plus sticky bit should be already enough 293436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov for proper rounding. */ 294436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (v.ieee.exponent == 106) { 295436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, 296436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.ieee.mantissa1 & 1 is the round bit and j is our sticky 297436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov bit. In round-to-nearest 001 rounds down like 00, 298436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 011 rounds up, even though 01 rounds down (thus we need 299436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov to adjust), 101 rounds down like 10 and 111 rounds up 300436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov like 11. */ 301436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if ((v.ieee.mantissa1 & 3) == 1) { 302436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.d *= 0x1p-106; 303436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov if (v.ieee.negative) 304436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = v.d - 0x1p-1074; 305436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov else 306436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = v.d + 0x1p-1074; 307436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } else 308436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = v.d * 0x1p-106; 309436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return; 310436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 311436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov v.ieee.mantissa1 |= j; 312436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov *res = v.d * 0x1p-106; 313436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov return; 314436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov } 315436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#endif 316436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov} 317436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov 318436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/*---------------------------------------------------------------*/ 319436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/*--- end host_generic_maddf.c --*/ 320436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov/*---------------------------------------------------------------*/ 321