1/*-
2 * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $
27 */
28
29#include <sys/cdefs.h>
30#include <sys/types.h>
31#include "npx.h"
32#include "fenv.h"
33
34#define ROUND_MASK   (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO)
35
36/*
37 * As compared to the x87 control word, the SSE unit's control word
38 * has the rounding control bits offset by 3 and the exception mask
39 * bits offset by 7.
40 */
41#define _SSE_ROUND_SHIFT 3
42#define _SSE_EMASK_SHIFT 7
43
44const fenv_t __fe_dfl_env = {
45  __INITIAL_NPXCW__, /*__control*/
46  0x0000,            /*__mxcsr_hi*/
47  0x0000,            /*__status*/
48  0x1f80,            /*__mxcsr_lo*/
49  0xffffffff,        /*__tag*/
50  { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
51    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/
52};
53
54#define __fldcw(__cw)           __asm __volatile("fldcw %0" : : "m" (__cw))
55#define __fldenv(__env)         __asm __volatile("fldenv %0" : : "m" (__env))
56#define __fldenvx(__env)        __asm __volatile("fldenv %0" : : "m" (__env)  \
57                                : "st", "st(1)", "st(2)", "st(3)", "st(4)",   \
58                                "st(5)", "st(6)", "st(7)")
59#define __fnclex()              __asm __volatile("fnclex")
60#define __fnstenv(__env)        __asm __volatile("fnstenv %0" : "=m" (*(__env)))
61#define __fnstcw(__cw)          __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
62#define __fnstsw(__sw)          __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
63#define __fwait()               __asm __volatile("fwait")
64#define __ldmxcsr(__csr)        __asm __volatile("ldmxcsr %0" : : "m" (__csr))
65#define __stmxcsr(__csr)        __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
66
67/* After testing for SSE support once, we cache the result in __has_sse. */
68enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
69#ifdef __SSE__
70#define __HAS_SSE()     1
71#else
72#define __HAS_SSE()     (__has_sse == __SSE_YES ||                      \
73                        (__has_sse == __SSE_UNK && __test_sse()))
74#endif
75
76enum __sse_support __has_sse =
77#ifdef __SSE__
78  __SSE_YES;
79#else
80  __SSE_UNK;
81#endif
82
83#ifndef __SSE__
84#define getfl(x)    __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
85#define setfl(x)    __asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
86#define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t"  \
87                    "cpuid\n\tpopl %%ebx"          \
88                    : "=d" (*(x)) : : "eax", "ecx")
89
90/*
91 * Test for SSE support on this processor.  We need to do this because
92 * we need to use ldmxcsr/stmxcsr to get correct results if any part
93 * of the program was compiled to use SSE floating-point, but we can't
94 * use SSE on older processors.
95 */
96int
97__test_sse(void)
98{
99  int flag, nflag;
100  int dx_features;
101
102  /* Am I a 486? */
103  getfl(&flag);
104  nflag = flag ^ 0x200000;
105  setfl(nflag);
106  getfl(&nflag);
107  if (flag != nflag) {
108    /* Not a 486, so CPUID should work. */
109    cpuid_dx(&dx_features);
110    if (dx_features & 0x2000000) {
111      __has_sse = __SSE_YES;
112      return (1);
113    }
114  }
115  __has_sse = __SSE_NO;
116  return (0);
117}
118#endif /* __SSE__ */
119
120int
121fesetexceptflag(const fexcept_t *flagp, int excepts)
122{
123  fenv_t env;
124  __uint32_t mxcsr;
125
126  excepts &= FE_ALL_EXCEPT;
127  if (excepts) { /* Do nothing if excepts is 0 */
128    __fnstenv(&env);
129    env.__status &= ~excepts;
130    env.__status |= *flagp & excepts;
131    __fnclex();
132    __fldenv(env);
133    if (__HAS_SSE()) {
134      __stmxcsr(&mxcsr);
135      mxcsr &= ~excepts;
136      mxcsr |= *flagp & excepts;
137      __ldmxcsr(mxcsr);
138    }
139  }
140
141  return (0);
142}
143
144int
145feraiseexcept(int excepts)
146{
147  fexcept_t ex = excepts;
148
149  fesetexceptflag(&ex, excepts);
150  __fwait();
151  return (0);
152}
153
154int
155fegetenv(fenv_t *envp)
156{
157  __uint32_t mxcsr;
158
159  __fnstenv(envp);
160  /*
161   * fnstenv masks all exceptions, so we need to restore
162   * the old control word to avoid this side effect.
163   */
164  __fldcw(envp->__control);
165  if (__HAS_SSE()) {
166    __stmxcsr(&mxcsr);
167    envp->__mxcsr_hi = mxcsr >> 16;
168    envp->__mxcsr_lo = mxcsr & 0xffff;
169  }
170  return (0);
171}
172
173int
174feholdexcept(fenv_t *envp)
175{
176  __uint32_t mxcsr;
177  fenv_t env;
178
179  __fnstenv(&env);
180  *envp = env;
181  env.__status &= ~FE_ALL_EXCEPT;
182  env.__control |= FE_ALL_EXCEPT;
183  __fnclex();
184  __fldenv(env);
185  if (__HAS_SSE()) {
186    __stmxcsr(&mxcsr);
187    envp->__mxcsr_hi = mxcsr >> 16;
188    envp->__mxcsr_lo = mxcsr & 0xffff;
189    mxcsr &= ~FE_ALL_EXCEPT;
190    mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
191    __ldmxcsr(mxcsr);
192  }
193  return (0);
194}
195
196int
197feupdateenv(const fenv_t *envp)
198{
199  __uint32_t mxcsr;
200  __uint16_t status;
201
202  __fnstsw(&status);
203  if (__HAS_SSE()) {
204    __stmxcsr(&mxcsr);
205  } else {
206    mxcsr = 0;
207  }
208  fesetenv(envp);
209  feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
210  return (0);
211}
212
213int
214feenableexcept(int mask)
215{
216  __uint32_t mxcsr;
217  __uint16_t control, omask;
218
219  mask &= FE_ALL_EXCEPT;
220  __fnstcw(&control);
221  if (__HAS_SSE()) {
222    __stmxcsr(&mxcsr);
223  } else {
224    mxcsr = 0;
225  }
226  omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
227  if (mask) {
228    control &= ~mask;
229    __fldcw(control);
230    if (__HAS_SSE()) {
231      mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
232      __ldmxcsr(mxcsr);
233    }
234  }
235  return (omask);
236}
237
238int
239fedisableexcept(int mask)
240{
241  __uint32_t mxcsr;
242  __uint16_t control, omask;
243
244  mask &= FE_ALL_EXCEPT;
245  __fnstcw(&control);
246  if (__HAS_SSE()) {
247    __stmxcsr(&mxcsr);
248  } else {
249    mxcsr = 0;
250  }
251  omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
252  if (mask) {
253    control |= mask;
254    __fldcw(control);
255    if (__HAS_SSE()) {
256      mxcsr |= mask << _SSE_EMASK_SHIFT;
257      __ldmxcsr(mxcsr);
258    }
259  }
260  return (omask);
261}
262
263int
264feclearexcept(int excepts)
265{
266  fenv_t env;
267  __uint32_t mxcsr;
268
269  excepts &= FE_ALL_EXCEPT;
270  if (excepts) { /* Do nothing if excepts is 0 */
271    __fnstenv(&env);
272    env.__status &= ~excepts;
273    __fnclex();
274    __fldenv(env);
275    if (__HAS_SSE()) {
276      __stmxcsr(&mxcsr);
277      mxcsr &= ~excepts;
278      __ldmxcsr(mxcsr);
279    }
280  }
281  return (0);
282}
283
284int
285fegetexceptflag(fexcept_t *flagp, int excepts)
286{
287  __uint32_t mxcsr;
288  __uint16_t status;
289
290  excepts &= FE_ALL_EXCEPT;
291  __fnstsw(&status);
292  if (__HAS_SSE()) {
293    __stmxcsr(&mxcsr);
294  } else {
295    mxcsr = 0;
296  }
297  *flagp = (status | mxcsr) & excepts;
298  return (0);
299}
300
301int
302fetestexcept(int excepts)
303{
304  __uint32_t mxcsr;
305  __uint16_t status;
306
307  excepts &= FE_ALL_EXCEPT;
308  if (excepts) { /* Do nothing if excepts is 0 */
309    __fnstsw(&status);
310    if (__HAS_SSE()) {
311      __stmxcsr(&mxcsr);
312    } else {
313      mxcsr = 0;
314    }
315    return ((status | mxcsr) & excepts);
316  }
317  return (0);
318}
319
320int
321fegetround(void)
322{
323  __uint16_t control;
324
325  /*
326   * We assume that the x87 and the SSE unit agree on the
327   * rounding mode.  Reading the control word on the x87 turns
328   * out to be about 5 times faster than reading it on the SSE
329   * unit on an Opteron 244.
330   */
331  __fnstcw(&control);
332  return (control & ROUND_MASK);
333}
334
335int
336fesetround(int round)
337{
338  __uint32_t mxcsr;
339  __uint16_t control;
340
341  if (round & ~ROUND_MASK) {
342    return (-1);
343  } else {
344    __fnstcw(&control);
345    control &= ~ROUND_MASK;
346    control |= round;
347    __fldcw(control);
348    if (__HAS_SSE()) {
349      __stmxcsr(&mxcsr);
350      mxcsr &= ~(ROUND_MASK << _SSE_ROUND_SHIFT);
351      mxcsr |= round << _SSE_ROUND_SHIFT;
352      __ldmxcsr(mxcsr);
353    }
354    return (0);
355  }
356}
357
358int
359fesetenv(const fenv_t *envp)
360{
361  fenv_t env = *envp;
362  __uint32_t mxcsr;
363
364  mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo);
365  env.__mxcsr_hi = 0xffff;
366  env.__mxcsr_lo = 0xffff;
367  /*
368   * XXX Using fldenvx() instead of fldenv() tells the compiler that this
369   * instruction clobbers the i387 register stack.  This happens because
370   * we restore the tag word from the saved environment.  Normally, this
371   * would happen anyway and we wouldn't care, because the ABI allows
372   * function calls to clobber the i387 regs.  However, fesetenv() is
373   * inlined, so we need to be more careful.
374   */
375  __fldenvx(env);
376  if (__HAS_SSE()) {
377    __ldmxcsr(mxcsr);
378  }
379  return (0);
380}
381
382int
383fegetexcept(void)
384{
385  __uint16_t control;
386
387  /*
388   * We assume that the masks for the x87 and the SSE unit are
389   * the same.
390   */
391  __fnstcw(&control);
392  return (~control & FE_ALL_EXCEPT);
393}
394