DenormalDisabler.h (5008B)
1 /* 2 * Copyright (C) 2011, Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR 17 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 19 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 20 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 21 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25 #ifndef DenormalDisabler_h 26 #define DenormalDisabler_h 27 28 #include <float.h> 29 30 #include <cinttypes> 31 #include <cmath> 32 #include <cstring> 33 34 namespace WebCore { 35 36 // Deal with denormals. They can very seriously impact performance on x86. 37 38 // Define HAVE_DENORMAL if we support flushing denormals to zero. 39 40 #if defined(XP_WIN) && defined(_MSC_VER) 41 // Windows compiled using MSVC with SSE2 42 # define HAVE_DENORMAL 1 43 #endif 44 45 #if defined(__GNUC__) && defined(__SSE__) 46 # define HAVE_DENORMAL 1 47 #endif 48 49 #if defined(__arm__) || defined(__aarch64__) 50 # define HAVE_DENORMAL 1 51 #endif 52 53 #ifdef HAVE_DENORMAL 54 class DenormalDisabler { 55 public: 56 DenormalDisabler() : m_savedCSR(0) { disableDenormals(); } 57 58 ~DenormalDisabler() { restoreState(); } 59 60 // This is a nop if we can flush denormals to zero in hardware. 61 static inline float flushDenormalFloatToZero(float f) { return f; } 62 63 private: 64 unsigned m_savedCSR; 65 66 # if defined(__GNUC__) && defined(__SSE__) 67 static inline bool isDAZSupported() { 68 # if defined(__x86_64__) 69 return true; 70 # else 71 static bool s_isInited = false; 72 static bool s_isSupported = false; 73 if (s_isInited) { 74 return s_isSupported; 75 } 76 77 struct fxsaveResult { 78 uint8_t before[28]; 79 uint32_t CSRMask; 80 uint8_t after[480]; 81 } __attribute__((aligned(16))); 82 83 fxsaveResult registerData; 84 memset(®isterData, 0, sizeof(fxsaveResult)); 85 asm volatile("fxsave %0" : "=m"(registerData)); 86 s_isSupported = registerData.CSRMask & 0x0040; 87 s_isInited = true; 88 return s_isSupported; 89 # endif 90 } 91 92 inline void disableDenormals() { 93 m_savedCSR = getCSR(); 94 setCSR(m_savedCSR | (isDAZSupported() ? 0x8040 : 0x8000)); 95 } 96 97 inline void restoreState() { setCSR(m_savedCSR); } 98 99 inline int getCSR() { 100 int result; 101 asm volatile("stmxcsr %0" : "=m"(result)); 102 return result; 103 } 104 105 inline void setCSR(int a) { 106 int temp = a; 107 asm volatile("ldmxcsr %0" : : "m"(temp)); 108 } 109 110 # elif defined(XP_WIN) && defined(_MSC_VER) 111 inline void disableDenormals() { 112 // Save the current state, and set mode to flush denormals. 113 // 114 // http://stackoverflow.com/questions/637175/possible-bug-in-controlfp-s-may-not-restore-control-word-correctly 115 _controlfp_s(&m_savedCSR, 0, 0); 116 unsigned unused; 117 _controlfp_s(&unused, _DN_FLUSH, _MCW_DN); 118 } 119 120 inline void restoreState() { 121 unsigned unused; 122 _controlfp_s(&unused, m_savedCSR, _MCW_DN); 123 } 124 # elif defined(__arm__) || defined(__aarch64__) 125 inline void disableDenormals() { 126 m_savedCSR = getStatusWord(); 127 // Bit 24 is the flush-to-zero mode control bit. Setting it to 1 flushes 128 // denormals to 0. 129 setStatusWord(m_savedCSR | (1 << 24)); 130 } 131 132 inline void restoreState() { setStatusWord(m_savedCSR); } 133 134 inline int getStatusWord() { 135 int result; 136 # if defined(__aarch64__) 137 asm volatile("mrs %x[result], FPCR" : [result] "=r"(result)); 138 # else 139 asm volatile("vmrs %[result], FPSCR" : [result] "=r"(result)); 140 # endif 141 return result; 142 } 143 144 inline void setStatusWord(int a) { 145 # if defined(__aarch64__) 146 asm volatile("msr FPCR, %x[src]" : : [src] "r"(a)); 147 # else 148 asm volatile("vmsr FPSCR, %[src]" : : [src] "r"(a)); 149 # endif 150 } 151 152 # endif 153 }; 154 155 #else 156 // FIXME: add implementations for other architectures and compilers 157 class DenormalDisabler { 158 public: 159 DenormalDisabler() {} 160 161 // Assume the worst case that other architectures and compilers 162 // need to flush denormals to zero manually. 163 static inline float flushDenormalFloatToZero(float f) { 164 return (fabs(f) < FLT_MIN) ? 0.0f : f; 165 } 166 }; 167 168 #endif 169 170 } // namespace WebCore 171 #endif // DenormalDisabler_h