1/*  $OpenBSD: fenv.c,v 1.3 2012/12/05 23:20:02 deraadt Exp $  */
2/*  $NetBSD: fenv.c,v 1.1 2010/07/31 21:47:53 joerg Exp $ */
3
4/*-
5 * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <fenv.h>
31#include <machine/fpu.h>
32
33#define SSE_MASK_SHIFT 7
34
35/*
36 * The following symbol is simply the bitwise-inclusive OR of all floating-point
37 * rounding direction constants defined above.
38 */
39#define X87_ROUND_MASK  (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO)
40#define SSE_ROUND_SHIFT 3
41
42/*
43 * The following constant represents the default floating-point environment
44 * (that is, the one installed at program startup) and has type pointer to
45 * const-qualified fenv_t.
46 *
47 * It can be used as an argument to the functions within the <fenv.h> header
48 * that manage the floating-point environment, namely fesetenv() and
49 * feupdateenv().
50 *
51 * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
52 * RESERVED.
53 */
54const fenv_t __fe_dfl_env = {
55  {
56    0xffff0000 | __INITIAL_NPXCW__, /* Control word register */
57    0xffff0000,                     /* Status word register */
58    0xffffffff,                     /* Tag word register */
59    {
60      0x00000000,
61      0x00000000,
62      0x00000000,
63      0xffff0000
64    }
65  },
66  __INITIAL_MXCSR__                 /* MXCSR register */
67};
68
69
70/*
71 * The feclearexcept() function clears the supported floating-point exceptions
72 * represented by `excepts'.
73 */
74int
75feclearexcept(int excepts)
76{
77  fenv_t fenv;
78  unsigned int mxcsr;
79
80  excepts &= FE_ALL_EXCEPT;
81
82  /* Store the current x87 floating-point environment */
83  __asm__ __volatile__ ("fnstenv %0" : "=m" (fenv));
84
85  /* Clear the requested floating-point exceptions */
86  fenv.__x87.__status &= ~excepts;
87
88  /* Load the x87 floating-point environent */
89  __asm__ __volatile__ ("fldenv %0" : : "m" (fenv));
90
91  /* Same for SSE environment */
92  __asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
93  mxcsr &= ~excepts;
94  __asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
95
96  return (0);
97}
98
99/*
100 * The fegetexceptflag() function stores an implementation-defined
101 * representation of the states of the floating-point status flags indicated by
102 * the argument excepts in the object pointed to by the argument flagp.
103 */
104int
105fegetexceptflag(fexcept_t *flagp, int excepts)
106{
107  unsigned short status;
108  unsigned int mxcsr;
109
110  excepts &= FE_ALL_EXCEPT;
111
112  /* Store the current x87 status register */
113  __asm__ __volatile__ ("fnstsw %0" : "=am" (status));
114
115  /* Store the MXCSR register */
116  __asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
117
118  /* Store the results in flagp */
119  *flagp = (status | mxcsr) & excepts;
120
121  return (0);
122}
123
124/*
125 * The feraiseexcept() function raises the supported floating-point exceptions
126 * represented by the argument `excepts'.
127 *
128 * The standard explicitly allows us to execute an instruction that has the
129 * exception as a side effect, but we choose to manipulate the status register
130 * directly.
131 *
132 * The validation of input is being deferred to fesetexceptflag().
133 */
134int
135feraiseexcept(int excepts)
136{
137  excepts &= FE_ALL_EXCEPT;
138
139  fesetexceptflag((fexcept_t *)&excepts, excepts);
140  __asm__ __volatile__ ("fwait");
141
142  return (0);
143}
144
145/*
146 * This function sets the floating-point status flags indicated by the argument
147 * `excepts' to the states stored in the object pointed to by `flagp'. It does
148 * NOT raise any floating-point exceptions, but only sets the state of the flags.
149 */
150int
151fesetexceptflag(const fexcept_t *flagp, int excepts)
152{
153  fenv_t fenv;
154  unsigned int mxcsr;
155
156  excepts &= FE_ALL_EXCEPT;
157
158  /* Store the current x87 floating-point environment */
159  __asm__ __volatile__ ("fnstenv %0" : "=m" (fenv));
160
161  /* Set the requested status flags */
162  fenv.__x87.__status &= ~excepts;
163  fenv.__x87.__status |= *flagp & excepts;
164
165  /* Load the x87 floating-point environent */
166  __asm__ __volatile__ ("fldenv %0" : : "m" (fenv));
167
168  /* Same for SSE environment */
169  __asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
170  mxcsr &= ~excepts;
171  mxcsr |= *flagp & excepts;
172  __asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
173
174  return (0);
175}
176
177/*
178 * The fetestexcept() function determines which of a specified subset of the
179 * floating-point exception flags are currently set. The `excepts' argument
180 * specifies the floating-point status flags to be queried.
181 */
182int
183fetestexcept(int excepts)
184{
185  unsigned short status;
186  unsigned int mxcsr;
187
188  excepts &= FE_ALL_EXCEPT;
189
190  /* Store the current x87 status register */
191  __asm__ __volatile__ ("fnstsw %0" : "=am" (status));
192
193  /* Store the MXCSR register state */
194  __asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
195
196  return ((status | mxcsr) & excepts);
197}
198
199/*
200 * The fegetround() function gets the current rounding direction.
201 */
202int
203fegetround(void)
204{
205  unsigned short control;
206
207  /*
208   * We assume that the x87 and the SSE unit agree on the
209   * rounding mode.  Reading the control word on the x87 turns
210   * out to be about 5 times faster than reading it on the SSE
211   * unit on an Opteron 244.
212   */
213  __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
214
215  return (control & X87_ROUND_MASK);
216}
217
218/*
219 * The fesetround() function establishes the rounding direction represented by
220 * its argument `round'. If the argument is not equal to the value of a rounding
221 * direction macro, the rounding direction is not changed.
222 */
223int
224fesetround(int round)
225{
226  unsigned short control;
227  unsigned int mxcsr;
228
229  /* Check whether requested rounding direction is supported */
230  if (round & ~X87_ROUND_MASK)
231    return (-1);
232
233  /* Store the current x87 control word register */
234  __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
235
236  /* Set the rounding direction */
237  control &= ~X87_ROUND_MASK;
238  control |= round;
239
240  /* Load the x87 control word register */
241  __asm__ __volatile__ ("fldcw %0" : : "m" (control));
242
243  /* Same for the SSE environment */
244  __asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
245  mxcsr &= ~(X87_ROUND_MASK << SSE_ROUND_SHIFT);
246  mxcsr |= round << SSE_ROUND_SHIFT;
247  __asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
248
249  return (0);
250}
251
252/*
253 * The fegetenv() function attempts to store the current floating-point
254 * environment in the object pointed to by envp.
255 */
256int
257fegetenv(fenv_t *envp)
258{
259  /* Store the current x87 floating-point environment */
260  __asm__ __volatile__ ("fnstenv %0" : "=m" (*envp));
261
262  /* Store the MXCSR register state */
263  __asm__ __volatile__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
264
265  /*
266   * When an FNSTENV instruction is executed, all pending exceptions are
267   * essentially lost (either the x87 FPU status register is cleared or
268   * all exceptions are masked).
269   *
270   * 8.6 X87 FPU EXCEPTION SYNCHRONIZATION -
271   * Intel(R) 64 and IA-32 Architectures Softare Developer's Manual - Vol1
272   */
273  __asm__ __volatile__ ("fldcw %0" : : "m" (envp->__x87.__control));
274
275  return (0);
276}
277
278/*
279 * The feholdexcept() function saves the current floating-point environment
280 * in the object pointed to by envp, clears the floating-point status flags, and
281 * then installs a non-stop (continue on floating-point exceptions) mode, if
282 * available, for all floating-point exceptions.
283 */
284int
285feholdexcept(fenv_t *envp)
286{
287  unsigned int mxcsr;
288
289  /* Store the current x87 floating-point environment */
290  __asm__ __volatile__ ("fnstenv %0" : "=m" (*envp));
291
292  /* Clear all exception flags in FPU */
293  __asm__ __volatile__ ("fnclex");
294
295  /* Store the MXCSR register state */
296  __asm__ __volatile__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
297
298  /* Clear exception flags in MXCSR */
299  mxcsr = envp->__mxcsr;
300  mxcsr &= ~FE_ALL_EXCEPT;
301
302  /* Mask all exceptions */
303  mxcsr |= FE_ALL_EXCEPT << SSE_MASK_SHIFT;
304
305  /* Store the MXCSR register */
306  __asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
307
308  return (0);
309}
310
311/*
312 * The fesetenv() function attempts to establish the floating-point environment
313 * represented by the object pointed to by envp. The argument `envp' points
314 * to an object set by a call to fegetenv() or feholdexcept(), or equal a
315 * floating-point environment macro. The fesetenv() function does not raise
316 * floating-point exceptions, but only installs the state of the floating-point
317 * status flags represented through its argument.
318 */
319int
320fesetenv(const fenv_t *envp)
321{
322  /* Load the x87 floating-point environent */
323  __asm__ __volatile__ ("fldenv %0" : : "m" (*envp));
324
325  /* Store the MXCSR register */
326  __asm__ __volatile__ ("ldmxcsr %0" : : "m" (envp->__mxcsr));
327
328  return (0);
329}
330
331/*
332 * The feupdateenv() function saves the currently raised floating-point
333 * exceptions in its automatic storage, installs the floating-point environment
334 * represented by the object pointed to by `envp', and then raises the saved
335 * floating-point exceptions. The argument `envp' shall point to an object set
336 * by a call to feholdexcept() or fegetenv(), or equal a floating-point
337 * environment macro.
338 */
339int
340feupdateenv(const fenv_t *envp)
341{
342  unsigned short status;
343  unsigned int mxcsr;
344
345  /* Store the x87 status register */
346  __asm__ __volatile__ ("fnstsw %0" : "=am" (status));
347
348  /* Store the MXCSR register */
349  __asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
350
351  /* Install new floating-point environment */
352  fesetenv(envp);
353
354  /* Raise any previously accumulated exceptions */
355  feraiseexcept(status | mxcsr);
356
357  return (0);
358}
359
360/*
361 * The following functions are extentions to the standard
362 */
363int
364feenableexcept(int mask)
365{
366  unsigned int mxcsr, omask;
367  unsigned short control;
368
369  mask &= FE_ALL_EXCEPT;
370
371  __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
372  __asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
373
374  omask = ~(control | (mxcsr >> SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
375  control &= ~mask;
376  __asm__ __volatile__ ("fldcw %0" : : "m" (control));
377
378  mxcsr &= ~(mask << SSE_MASK_SHIFT);
379  __asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
380
381  return (omask);
382}
383
384int
385fedisableexcept(int mask)
386{
387  unsigned int mxcsr, omask;
388  unsigned short control;
389
390  mask &= FE_ALL_EXCEPT;
391
392  __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
393  __asm__ __volatile__ ("stmxcsr %0" : "=m" (mxcsr));
394
395  omask = ~(control | (mxcsr >> SSE_MASK_SHIFT)) & FE_ALL_EXCEPT;
396  control |= mask;
397  __asm__ __volatile__ ("fldcw %0" : : "m" (control));
398
399  mxcsr |= mask << SSE_MASK_SHIFT;
400  __asm__ __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));
401
402  return (omask);
403}
404
405int
406fegetexcept(void)
407{
408  unsigned short control;
409
410  /*
411   * We assume that the masks for the x87 and the SSE unit are
412   * the same.
413   */
414  __asm__ __volatile__ ("fnstcw %0" : "=m" (control));
415
416  return (~control & FE_ALL_EXCEPT);
417}
418