tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

FIRFilter.cpp (9827B)


      1 ////////////////////////////////////////////////////////////////////////////////
      2 ///
      3 /// General FIR digital filter routines with MMX optimization. 
      4 ///
      5 /// Notes : MMX optimized functions reside in a separate, platform-specific file, 
      6 /// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
      7 ///
      8 /// This source file contains OpenMP optimizations that allow speeding up the
      9 /// corss-correlation algorithm by executing it in several threads / CPU cores 
     10 /// in parallel. See the following article link for more detailed discussion 
     11 /// about SoundTouch OpenMP optimizations:
     12 /// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
     13 ///
     14 /// Author        : Copyright (c) Olli Parviainen
     15 /// Author e-mail : oparviai 'at' iki.fi
     16 /// SoundTouch WWW: http://www.surina.net/soundtouch
     17 ///
     18 ////////////////////////////////////////////////////////////////////////////////
     19 //
     20 // License :
     21 //
     22 //  SoundTouch audio processing library
     23 //  Copyright (c) Olli Parviainen
     24 //
     25 //  This library is free software; you can redistribute it and/or
     26 //  modify it under the terms of the GNU Lesser General Public
     27 //  License as published by the Free Software Foundation; either
     28 //  version 2.1 of the License, or (at your option) any later version.
     29 //
     30 //  This library is distributed in the hope that it will be useful,
     31 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
     32 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     33 //  Lesser General Public License for more details.
     34 //
     35 //  You should have received a copy of the GNU Lesser General Public
     36 //  License along with this library; if not, write to the Free Software
     37 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     38 //
     39 ////////////////////////////////////////////////////////////////////////////////
     40 
     41 #include <memory.h>
     42 #include <assert.h>
     43 #include <math.h>
     44 #include <stdlib.h>
     45 #include "FIRFilter.h"
     46 #include "cpu_detect.h"
     47 
     48 using namespace soundtouch;
     49 
     50 /*****************************************************************************
     51 *
     52 * Implementation of the class 'FIRFilter'
     53 *
     54 *****************************************************************************/
     55 
     56 FIRFilter::FIRFilter()
     57 {
     58    resultDivFactor = 0;
     59    resultDivider = 0;
     60    length = 0;
     61    lengthDiv8 = 0;
     62    filterCoeffs = NULL;
     63    filterCoeffsStereo = NULL;
     64 }
     65 
     66 
     67 FIRFilter::~FIRFilter()
     68 {
     69    delete[] filterCoeffs;
     70    delete[] filterCoeffsStereo;
     71 }
     72 
     73 
     74 // Usual C-version of the filter routine for stereo sound
     75 uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
     76 {
     77    int j, end;
     78 #ifdef SOUNDTOUCH_FLOAT_SAMPLES
     79    // when using floating point samples, use a scaler instead of a divider
     80    // because division is much slower operation than multiplying.
     81    double dScaler = 1.0 / (double)resultDivider;
     82 #endif
     83    // hint compiler autovectorization that loop length is divisible by 8
     84    int ilength = length & -8;
     85 
     86    assert((length != 0) && (length == ilength) && (src != NULL) && (dest != NULL) && (filterCoeffs != NULL));
     87 
     88    end = 2 * (numSamples - ilength);
     89 
     90    #pragma omp parallel for
     91    for (j = 0; j < end; j += 2) 
     92    {
     93        const SAMPLETYPE *ptr;
     94        LONG_SAMPLETYPE suml, sumr;
     95 
     96        suml = sumr = 0;
     97        ptr = src + j;
     98 
     99        for (int i = 0; i < ilength; i ++)
    100        {
    101            suml += ptr[2 * i] * filterCoeffsStereo[2 * i];
    102            sumr += ptr[2 * i + 1] * filterCoeffsStereo[2 * i + 1];
    103        }
    104 
    105 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
    106        suml >>= resultDivFactor;
    107        sumr >>= resultDivFactor;
    108        // saturate to 16 bit integer limits
    109        suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
    110        // saturate to 16 bit integer limits
    111        sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
    112 #endif // SOUNDTOUCH_INTEGER_SAMPLES
    113        dest[j] = (SAMPLETYPE)suml;
    114        dest[j + 1] = (SAMPLETYPE)sumr;
    115    }
    116    return numSamples - ilength;
    117 }
    118 
    119 
    120 // Usual C-version of the filter routine for mono sound
    121 uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
    122 {
    123    int j, end;
    124 #ifdef SOUNDTOUCH_FLOAT_SAMPLES
    125    // when using floating point samples, use a scaler instead of a divider
    126    // because division is much slower operation than multiplying.
    127    double dScaler = 1.0 / (double)resultDivider;
    128 #endif
    129 
    130    // hint compiler autovectorization that loop length is divisible by 8
    131    int ilength = length & -8;
    132 
    133    assert(ilength != 0);
    134 
    135    end = numSamples - ilength;
    136    #pragma omp parallel for
    137    for (j = 0; j < end; j ++)
    138    {
    139        const SAMPLETYPE *pSrc = src + j;
    140        LONG_SAMPLETYPE sum;
    141        int i;
    142 
    143        sum = 0;
    144        for (i = 0; i < ilength; i ++)
    145        {
    146            sum += pSrc[i] * filterCoeffs[i];
    147        }
    148 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
    149        sum >>= resultDivFactor;
    150        // saturate to 16 bit integer limits
    151        sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
    152 #endif // SOUNDTOUCH_INTEGER_SAMPLES
    153        dest[j] = (SAMPLETYPE)sum;
    154    }
    155    return end;
    156 }
    157 
    158 
    159 uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels)
    160 {
    161    int j, end;
    162 
    163 #ifdef SOUNDTOUCH_FLOAT_SAMPLES
    164    // when using floating point samples, use a scaler instead of a divider
    165    // because division is much slower operation than multiplying.
    166    double dScaler = 1.0 / (double)resultDivider;
    167 #endif
    168 
    169    assert(length != 0);
    170    assert(src != NULL);
    171    assert(dest != NULL);
    172    assert(filterCoeffs != NULL);
    173    assert(numChannels < 16);
    174 
    175    // hint compiler autovectorization that loop length is divisible by 8
    176    int ilength = length & -8;
    177 
    178    end = numChannels * (numSamples - ilength);
    179 
    180    #pragma omp parallel for
    181    for (j = 0; j < end; j += numChannels)
    182    {
    183        const SAMPLETYPE *ptr;
    184        LONG_SAMPLETYPE sums[16];
    185        uint c;
    186        int i;
    187 
    188        for (c = 0; c < numChannels; c ++)
    189        {
    190            sums[c] = 0;
    191        }
    192 
    193        ptr = src + j;
    194 
    195        for (i = 0; i < ilength; i ++)
    196        {
    197            SAMPLETYPE coef=filterCoeffs[i];
    198            for (c = 0; c < numChannels; c ++)
    199            {
    200                sums[c] += ptr[0] * coef;
    201                ptr ++;
    202            }
    203        }
    204        
    205        for (c = 0; c < numChannels; c ++)
    206        {
    207 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
    208            sums[c] >>= resultDivFactor;
    209 #endif // SOUNDTOUCH_INTEGER_SAMPLES
    210            dest[j+c] = (SAMPLETYPE)sums[c];
    211        }
    212    }
    213    return numSamples - ilength;
    214 }
    215 
    216 
    217 // Set filter coeffiecients and length.
    218 //
    219 // Throws an exception if filter length isn't divisible by 8
    220 void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint uResultDivFactor)
    221 {
    222    assert(newLength > 0);
    223    if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
    224 
    225    #ifdef SOUNDTOUCH_FLOAT_SAMPLES
    226        // scale coefficients already here if using floating samples
    227        double scale = 1.0 / resultDivider;
    228    #else
    229        short scale = 1;
    230    #endif
    231 
    232    lengthDiv8 = newLength / 8;
    233    length = lengthDiv8 * 8;
    234    assert(length == newLength);
    235 
    236    resultDivFactor = uResultDivFactor;
    237    resultDivider = (SAMPLETYPE)::pow(2.0, (int)resultDivFactor);
    238 
    239    delete[] filterCoeffs;
    240    filterCoeffs = new SAMPLETYPE[length];
    241    delete[] filterCoeffsStereo;
    242    filterCoeffsStereo = new SAMPLETYPE[length*2];
    243    for (uint i = 0; i < length; i ++)
    244    {
    245        filterCoeffs[i] = (SAMPLETYPE)(coeffs[i] * scale);
    246        // create also stereo set of filter coefficients: this allows compiler
    247        // to autovectorize filter evaluation much more efficiently
    248        filterCoeffsStereo[2 * i] = (SAMPLETYPE)(coeffs[i] * scale);
    249        filterCoeffsStereo[2 * i + 1] = (SAMPLETYPE)(coeffs[i] * scale);
    250    }
    251 }
    252 
    253 
    254 uint FIRFilter::getLength() const
    255 {
    256    return length;
    257 }
    258 
    259 
    260 // Applies the filter to the given sequence of samples. 
    261 //
    262 // Note : The amount of outputted samples is by value of 'filter_length' 
    263 // smaller than the amount of input samples.
    264 uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) 
    265 {
    266    assert(length > 0);
    267    assert(lengthDiv8 * 8 == length);
    268 
    269    if (numSamples < length) return 0;
    270 
    271 #ifndef USE_MULTICH_ALWAYS
    272    if (numChannels == 1)
    273    {
    274        return evaluateFilterMono(dest, src, numSamples);
    275    } 
    276    else if (numChannels == 2)
    277    {
    278        return evaluateFilterStereo(dest, src, numSamples);
    279    }
    280    else
    281 #endif // USE_MULTICH_ALWAYS
    282    {
    283        assert(numChannels > 0);
    284        return evaluateFilterMulti(dest, src, numSamples, numChannels);
    285    }
    286 }
    287 
    288 
    289 // Operator 'new' is overloaded so that it automatically creates a suitable instance 
    290 // depending on if we've a MMX-capable CPU available or not.
    291 void * FIRFilter::operator new(size_t s)
    292 {
    293    // Notice! don't use "new FIRFilter" directly, use "newInstance" to create a new instance instead!
    294    ST_THROW_RT_ERROR("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!");
    295    return newInstance();
    296 }
    297 
    298 
    299 FIRFilter * FIRFilter::newInstance()
    300 {
    301 #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)
    302    uint uExtensions;
    303 
    304    uExtensions = detectCPUextensions();
    305 #endif
    306 
    307    // Check if MMX/SSE instruction set extensions supported by CPU
    308 
    309 #ifdef SOUNDTOUCH_ALLOW_MMX
    310    // MMX routines available only with integer sample types
    311    if (uExtensions & SUPPORT_MMX)
    312    {
    313        return ::new FIRFilterMMX;
    314    }
    315    else
    316 #endif // SOUNDTOUCH_ALLOW_MMX
    317 
    318 #ifdef SOUNDTOUCH_ALLOW_SSE
    319    if (uExtensions & SUPPORT_SSE)
    320    {
    321        // SSE support
    322        return ::new FIRFilterSSE;
    323    }
    324    else
    325 #endif // SOUNDTOUCH_ALLOW_SSE
    326 
    327    {
    328        // ISA optimizations not supported, use plain C version
    329        return ::new FIRFilter;
    330    }
    331 }