1/*
2 * Copyright (C) 2011, Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1.  Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2.  Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25#ifndef DenormalDisabler_h
26#define DenormalDisabler_h
27
28#include "wtf/CPU.h"
29#include "wtf/MathExtras.h"
30#include <float.h>
31
32namespace blink {
33
34// Deal with denormals. They can very seriously impact performance on x86.
35
36// Define HAVE_DENORMAL if we support flushing denormals to zero.
37
38#if OS(WIN) && COMPILER(MSVC)
39// Windows compiled using MSVC with SSE2
40#define HAVE_DENORMAL 1
41#endif
42
43#if COMPILER(GCC) && (CPU(X86) || CPU(X86_64))
44// X86 chips can flush denormals
45#define HAVE_DENORMAL 1
46#endif
47
48#if CPU(ARM) || CPU(ARM64)
49#define HAVE_DENORMAL 1
50#endif
51
52#if HAVE(DENORMAL)
53class DenormalDisabler {
54public:
55    DenormalDisabler()
56            : m_savedCSR(0)
57    {
58        disableDenormals();
59    }
60
61    ~DenormalDisabler()
62    {
63        restoreState();
64    }
65
66    // This is a nop if we can flush denormals to zero in hardware.
67    static inline float flushDenormalFloatToZero(float f)
68    {
69        return f;
70    }
71private:
72    unsigned m_savedCSR;
73
74#if COMPILER(GCC) && (CPU(X86) || CPU(X86_64))
75    inline void disableDenormals()
76    {
77        m_savedCSR = getCSR();
78        setCSR(m_savedCSR | 0x8040);
79    }
80
81    inline void restoreState()
82    {
83        setCSR(m_savedCSR);
84    }
85
86    inline int getCSR()
87    {
88        int result;
89        asm volatile("stmxcsr %0" : "=m" (result));
90        return result;
91    }
92
93    inline void setCSR(int a)
94    {
95        int temp = a;
96        asm volatile("ldmxcsr %0" : : "m" (temp));
97    }
98
99#elif OS(WIN) && COMPILER(MSVC)
100    inline void disableDenormals()
101    {
102        // Save the current state, and set mode to flush denormals.
103        //
104        // http://stackoverflow.com/questions/637175/possible-bug-in-controlfp-s-may-not-restore-control-word-correctly
105        _controlfp_s(&m_savedCSR, 0, 0);
106        unsigned unused;
107        _controlfp_s(&unused, _DN_FLUSH, _MCW_DN);
108    }
109
110    inline void restoreState()
111    {
112        unsigned unused;
113        _controlfp_s(&unused, m_savedCSR, _MCW_DN);
114    }
115#elif CPU(ARM) || CPU(ARM64)
116    inline void disableDenormals()
117    {
118        m_savedCSR = getStatusWord();
119        // Bit 24 is the flush-to-zero mode control bit. Setting it to 1 flushes denormals to 0.
120        setStatusWord(m_savedCSR | (1 << 24));
121    }
122
123    inline void restoreState()
124    {
125        setStatusWord(m_savedCSR);
126    }
127
128    inline int getStatusWord()
129    {
130        int result;
131#if CPU(ARM64)
132        asm volatile("mrs %[result], FPCR" : [result] "=r" (result));
133#else
134        asm volatile("vmrs %[result], FPSCR" : [result] "=r" (result));
135#endif
136        return result;
137    }
138
139    inline void setStatusWord(int a)
140    {
141#if CPU(ARM64)
142        asm volatile("msr FPCR, %[src]" : : [src] "r" (a));
143#else
144        asm volatile("vmsr FPSCR, %[src]" : : [src] "r" (a));
145#endif
146    }
147
148#endif
149
150};
151
152#else
153// FIXME: add implementations for other architectures and compilers
154class DenormalDisabler {
155public:
156    DenormalDisabler() { }
157
158    // Assume the worst case that other architectures and compilers
159    // need to flush denormals to zero manually.
160    static inline float flushDenormalFloatToZero(float f)
161    {
162        return (fabs(f) < FLT_MIN) ? 0.0f : f;
163    }
164};
165
166#endif
167
168} // namespace blink
169
170#undef HAVE_DENORMAL
171#endif // DenormalDisabler_h
172