FIRFilter.cpp (9827B)
1 //////////////////////////////////////////////////////////////////////////////// 2 /// 3 /// General FIR digital filter routines with MMX optimization. 4 /// 5 /// Notes : MMX optimized functions reside in a separate, platform-specific file, 6 /// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' 7 /// 8 /// This source file contains OpenMP optimizations that allow speeding up the 9 /// corss-correlation algorithm by executing it in several threads / CPU cores 10 /// in parallel. See the following article link for more detailed discussion 11 /// about SoundTouch OpenMP optimizations: 12 /// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices 13 /// 14 /// Author : Copyright (c) Olli Parviainen 15 /// Author e-mail : oparviai 'at' iki.fi 16 /// SoundTouch WWW: http://www.surina.net/soundtouch 17 /// 18 //////////////////////////////////////////////////////////////////////////////// 19 // 20 // License : 21 // 22 // SoundTouch audio processing library 23 // Copyright (c) Olli Parviainen 24 // 25 // This library is free software; you can redistribute it and/or 26 // modify it under the terms of the GNU Lesser General Public 27 // License as published by the Free Software Foundation; either 28 // version 2.1 of the License, or (at your option) any later version. 29 // 30 // This library is distributed in the hope that it will be useful, 31 // but WITHOUT ANY WARRANTY; without even the implied warranty of 32 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 33 // Lesser General Public License for more details. 34 // 35 // You should have received a copy of the GNU Lesser General Public 36 // License along with this library; if not, write to the Free Software 37 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 38 // 39 //////////////////////////////////////////////////////////////////////////////// 40 41 #include <memory.h> 42 #include <assert.h> 43 #include <math.h> 44 #include <stdlib.h> 45 #include "FIRFilter.h" 46 #include "cpu_detect.h" 47 48 using namespace soundtouch; 49 50 /***************************************************************************** 51 * 52 * Implementation of the class 'FIRFilter' 53 * 54 *****************************************************************************/ 55 56 FIRFilter::FIRFilter() 57 { 58 resultDivFactor = 0; 59 resultDivider = 0; 60 length = 0; 61 lengthDiv8 = 0; 62 filterCoeffs = NULL; 63 filterCoeffsStereo = NULL; 64 } 65 66 67 FIRFilter::~FIRFilter() 68 { 69 delete[] filterCoeffs; 70 delete[] filterCoeffsStereo; 71 } 72 73 74 // Usual C-version of the filter routine for stereo sound 75 uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const 76 { 77 int j, end; 78 #ifdef SOUNDTOUCH_FLOAT_SAMPLES 79 // when using floating point samples, use a scaler instead of a divider 80 // because division is much slower operation than multiplying. 81 double dScaler = 1.0 / (double)resultDivider; 82 #endif 83 // hint compiler autovectorization that loop length is divisible by 8 84 int ilength = length & -8; 85 86 assert((length != 0) && (length == ilength) && (src != NULL) && (dest != NULL) && (filterCoeffs != NULL)); 87 88 end = 2 * (numSamples - ilength); 89 90 #pragma omp parallel for 91 for (j = 0; j < end; j += 2) 92 { 93 const SAMPLETYPE *ptr; 94 LONG_SAMPLETYPE suml, sumr; 95 96 suml = sumr = 0; 97 ptr = src + j; 98 99 for (int i = 0; i < ilength; i ++) 100 { 101 suml += ptr[2 * i] * filterCoeffsStereo[2 * i]; 102 sumr += ptr[2 * i + 1] * filterCoeffsStereo[2 * i + 1]; 103 } 104 105 #ifdef SOUNDTOUCH_INTEGER_SAMPLES 106 suml >>= resultDivFactor; 107 sumr >>= resultDivFactor; 108 // saturate to 16 bit integer limits 109 suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml; 110 // saturate to 16 bit integer limits 111 sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr; 112 #endif // SOUNDTOUCH_INTEGER_SAMPLES 113 dest[j] = (SAMPLETYPE)suml; 114 dest[j + 1] = (SAMPLETYPE)sumr; 115 } 116 return numSamples - ilength; 117 } 118 119 120 // Usual C-version of the filter routine for mono sound 121 uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const 122 { 123 int j, end; 124 #ifdef SOUNDTOUCH_FLOAT_SAMPLES 125 // when using floating point samples, use a scaler instead of a divider 126 // because division is much slower operation than multiplying. 127 double dScaler = 1.0 / (double)resultDivider; 128 #endif 129 130 // hint compiler autovectorization that loop length is divisible by 8 131 int ilength = length & -8; 132 133 assert(ilength != 0); 134 135 end = numSamples - ilength; 136 #pragma omp parallel for 137 for (j = 0; j < end; j ++) 138 { 139 const SAMPLETYPE *pSrc = src + j; 140 LONG_SAMPLETYPE sum; 141 int i; 142 143 sum = 0; 144 for (i = 0; i < ilength; i ++) 145 { 146 sum += pSrc[i] * filterCoeffs[i]; 147 } 148 #ifdef SOUNDTOUCH_INTEGER_SAMPLES 149 sum >>= resultDivFactor; 150 // saturate to 16 bit integer limits 151 sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum; 152 #endif // SOUNDTOUCH_INTEGER_SAMPLES 153 dest[j] = (SAMPLETYPE)sum; 154 } 155 return end; 156 } 157 158 159 uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) 160 { 161 int j, end; 162 163 #ifdef SOUNDTOUCH_FLOAT_SAMPLES 164 // when using floating point samples, use a scaler instead of a divider 165 // because division is much slower operation than multiplying. 166 double dScaler = 1.0 / (double)resultDivider; 167 #endif 168 169 assert(length != 0); 170 assert(src != NULL); 171 assert(dest != NULL); 172 assert(filterCoeffs != NULL); 173 assert(numChannels < 16); 174 175 // hint compiler autovectorization that loop length is divisible by 8 176 int ilength = length & -8; 177 178 end = numChannels * (numSamples - ilength); 179 180 #pragma omp parallel for 181 for (j = 0; j < end; j += numChannels) 182 { 183 const SAMPLETYPE *ptr; 184 LONG_SAMPLETYPE sums[16]; 185 uint c; 186 int i; 187 188 for (c = 0; c < numChannels; c ++) 189 { 190 sums[c] = 0; 191 } 192 193 ptr = src + j; 194 195 for (i = 0; i < ilength; i ++) 196 { 197 SAMPLETYPE coef=filterCoeffs[i]; 198 for (c = 0; c < numChannels; c ++) 199 { 200 sums[c] += ptr[0] * coef; 201 ptr ++; 202 } 203 } 204 205 for (c = 0; c < numChannels; c ++) 206 { 207 #ifdef SOUNDTOUCH_INTEGER_SAMPLES 208 sums[c] >>= resultDivFactor; 209 #endif // SOUNDTOUCH_INTEGER_SAMPLES 210 dest[j+c] = (SAMPLETYPE)sums[c]; 211 } 212 } 213 return numSamples - ilength; 214 } 215 216 217 // Set filter coeffiecients and length. 218 // 219 // Throws an exception if filter length isn't divisible by 8 220 void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint uResultDivFactor) 221 { 222 assert(newLength > 0); 223 if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8"); 224 225 #ifdef SOUNDTOUCH_FLOAT_SAMPLES 226 // scale coefficients already here if using floating samples 227 double scale = 1.0 / resultDivider; 228 #else 229 short scale = 1; 230 #endif 231 232 lengthDiv8 = newLength / 8; 233 length = lengthDiv8 * 8; 234 assert(length == newLength); 235 236 resultDivFactor = uResultDivFactor; 237 resultDivider = (SAMPLETYPE)::pow(2.0, (int)resultDivFactor); 238 239 delete[] filterCoeffs; 240 filterCoeffs = new SAMPLETYPE[length]; 241 delete[] filterCoeffsStereo; 242 filterCoeffsStereo = new SAMPLETYPE[length*2]; 243 for (uint i = 0; i < length; i ++) 244 { 245 filterCoeffs[i] = (SAMPLETYPE)(coeffs[i] * scale); 246 // create also stereo set of filter coefficients: this allows compiler 247 // to autovectorize filter evaluation much more efficiently 248 filterCoeffsStereo[2 * i] = (SAMPLETYPE)(coeffs[i] * scale); 249 filterCoeffsStereo[2 * i + 1] = (SAMPLETYPE)(coeffs[i] * scale); 250 } 251 } 252 253 254 uint FIRFilter::getLength() const 255 { 256 return length; 257 } 258 259 260 // Applies the filter to the given sequence of samples. 261 // 262 // Note : The amount of outputted samples is by value of 'filter_length' 263 // smaller than the amount of input samples. 264 uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) 265 { 266 assert(length > 0); 267 assert(lengthDiv8 * 8 == length); 268 269 if (numSamples < length) return 0; 270 271 #ifndef USE_MULTICH_ALWAYS 272 if (numChannels == 1) 273 { 274 return evaluateFilterMono(dest, src, numSamples); 275 } 276 else if (numChannels == 2) 277 { 278 return evaluateFilterStereo(dest, src, numSamples); 279 } 280 else 281 #endif // USE_MULTICH_ALWAYS 282 { 283 assert(numChannels > 0); 284 return evaluateFilterMulti(dest, src, numSamples, numChannels); 285 } 286 } 287 288 289 // Operator 'new' is overloaded so that it automatically creates a suitable instance 290 // depending on if we've a MMX-capable CPU available or not. 291 void * FIRFilter::operator new(size_t s) 292 { 293 // Notice! don't use "new FIRFilter" directly, use "newInstance" to create a new instance instead! 294 ST_THROW_RT_ERROR("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!"); 295 return newInstance(); 296 } 297 298 299 FIRFilter * FIRFilter::newInstance() 300 { 301 #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE) 302 uint uExtensions; 303 304 uExtensions = detectCPUextensions(); 305 #endif 306 307 // Check if MMX/SSE instruction set extensions supported by CPU 308 309 #ifdef SOUNDTOUCH_ALLOW_MMX 310 // MMX routines available only with integer sample types 311 if (uExtensions & SUPPORT_MMX) 312 { 313 return ::new FIRFilterMMX; 314 } 315 else 316 #endif // SOUNDTOUCH_ALLOW_MMX 317 318 #ifdef SOUNDTOUCH_ALLOW_SSE 319 if (uExtensions & SUPPORT_SSE) 320 { 321 // SSE support 322 return ::new FIRFilterSSE; 323 } 324 else 325 #endif // SOUNDTOUCH_ALLOW_SSE 326 327 { 328 // ISA optimizations not supported, use plain C version 329 return ::new FIRFilter; 330 } 331 }