tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

kiss_fft_mipsr1.h (8542B)


      1 /*Copyright (c) 2013, Xiph.Org Foundation and contributors.
      2 
      3  All rights reserved.
      4 
      5  Redistribution and use in source and binary forms, with or without
      6   modification, are permitted provided that the following conditions are met:
      7 
      8    * Redistributions of source code must retain the above copyright notice,
      9       this list of conditions and the following disclaimer.
     10    * Redistributions in binary form must reproduce the above copyright notice,
     11       this list of conditions and the following disclaimer in the
     12       documentation and/or other materials provided with the distribution.
     13 
     14  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     15  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     18  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     19  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     20  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     21  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     22  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     23  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     24  POSSIBILITY OF SUCH DAMAGE.*/
     25 
     26 #ifndef KISS_FFT_MIPSR1_H
     27 #define KISS_FFT_MIPSR1_H
     28 
     29 #if !defined(KISS_FFT_GUTS_H)
     30 #error "This file should only be included from _kiss_fft_guts.h"
     31 #endif
     32 
     33 #ifdef FIXED_POINT
     34 
     35 #if __mips == 32 && defined (__mips_dsp)
     36 
     37 static inline int S_MUL_ADD(int a, int b, int c, int d) {
     38    long long acc = __builtin_mips_mult(a, b);
     39    acc = __builtin_mips_madd(acc, c, d);
     40    return __builtin_mips_extr_w(acc, 15);
     41 }
     42 
     43 static inline int S_MUL_SUB(int a, int b, int c, int d) {
     44    long long acc = __builtin_mips_mult(a, b);
     45    acc = __builtin_mips_msub(acc, c, d);
     46    return __builtin_mips_extr_w(acc, 15);
     47 }
     48 
     49 #undef C_MUL
     50 #   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
     51 static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
     52    kiss_fft_cpx m;
     53 
     54    long long acc1 = __builtin_mips_mult((int)a.r, (int)b.r);
     55    long long acc2 = __builtin_mips_mult((int)a.r, (int)b.i);
     56    acc1 = __builtin_mips_msub(acc1, (int)a.i, (int)b.i);
     57    acc2 = __builtin_mips_madd(acc2, (int)a.i, (int)b.r);
     58    m.r = __builtin_mips_extr_w(acc1, 15);
     59    m.i = __builtin_mips_extr_w(acc2, 15);
     60    return m;
     61 }
     62 #undef C_MULC
     63 #   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
     64 static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
     65    kiss_fft_cpx m;
     66 
     67    long long acc1 = __builtin_mips_mult((int)a.r, (int)b.r);
     68    long long acc2 = __builtin_mips_mult((int)a.i, (int)b.r);
     69    acc1 = __builtin_mips_madd(acc1, (int)a.i, (int)b.i);
     70    acc2 = __builtin_mips_msub(acc2, (int)a.r, (int)b.i);
     71    m.r = __builtin_mips_extr_w(acc1, 15);
     72    m.i = __builtin_mips_extr_w(acc2, 15);
     73    return m;
     74 }
     75 
     76 #define OVERRIDE_kf_bfly5
     77 
     78 #elif __mips == 32 && defined(__mips_isa_rev) && __mips_isa_rev < 6
     79 
     80 static inline int S_MUL_ADD(int a, int b, int c, int d) {
     81    long long acc;
     82 
     83    asm volatile (
     84            "mult %[a], %[b]  \n"
     85            "madd %[c], %[d]  \n"
     86        : [acc] "=x"(acc)
     87        : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d)
     88        :
     89    );
     90    return (int)(acc >> 15);
     91 }
     92 
     93 static inline int S_MUL_SUB(int a, int b, int c, int d) {
     94    long long acc;
     95 
     96    asm volatile (
     97            "mult %[a], %[b]  \n"
     98            "msub %[c], %[d]  \n"
     99        : [acc] "=x"(acc)
    100        : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d)
    101        :
    102    );
    103    return (int)(acc >> 15);
    104 }
    105 
    106 #undef C_MUL
    107 #   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
    108 static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
    109    kiss_fft_cpx m;
    110 
    111    m.r = S_MUL_SUB(a.r, b.r, a.i, b.i);
    112    m.i = S_MUL_ADD(a.r, b.i, a.i, b.r);
    113 
    114    return m;
    115 }
    116 
    117 #undef C_MULC
    118 #   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
    119 static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
    120    kiss_fft_cpx m;
    121 
    122    m.r = S_MUL_ADD(a.r, b.r, a.i, b.i);
    123    m.i = S_MUL_SUB(a.i, b.r, a.r, b.i);
    124 
    125    return m;
    126 }
    127 
    128 #define OVERRIDE_kf_bfly5
    129 
    130 #endif
    131 
    132 #endif /* FIXED_POINT */
    133 
    134 #if defined(OVERRIDE_kf_bfly5)
    135 
    136 static void kf_bfly5(
    137                     kiss_fft_cpx * Fout,
    138                     const size_t fstride,
    139                     const kiss_fft_state *st,
    140                     int m,
    141                     int N,
    142                     int mm
    143                    )
    144 {
    145   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
    146   int i, u;
    147   kiss_fft_cpx scratch[13];
    148 
    149   const kiss_twiddle_cpx *tw;
    150   kiss_twiddle_cpx ya,yb;
    151   kiss_fft_cpx * Fout_beg = Fout;
    152 
    153 #ifdef FIXED_POINT
    154   ya.r = 10126;
    155   ya.i = -31164;
    156   yb.r = -26510;
    157   yb.i = -19261;
    158 #else
    159   ya = st->twiddles[fstride*m];
    160   yb = st->twiddles[fstride*2*m];
    161 #endif
    162 
    163   tw=st->twiddles;
    164 
    165   for (i=0;i<N;i++)
    166   {
    167      Fout = Fout_beg + i*mm;
    168      Fout0=Fout;
    169      Fout1=Fout0+m;
    170      Fout2=Fout0+2*m;
    171      Fout3=Fout0+3*m;
    172      Fout4=Fout0+4*m;
    173 
    174      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
    175      for ( u=0; u<m; ++u ) {
    176         scratch[0] = *Fout0;
    177 
    178 
    179         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
    180         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
    181         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
    182         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
    183 
    184         C_ADD( scratch[7],scratch[1],scratch[4]);
    185         C_SUB( scratch[10],scratch[1],scratch[4]);
    186         C_ADD( scratch[8],scratch[2],scratch[3]);
    187         C_SUB( scratch[9],scratch[2],scratch[3]);
    188 
    189         Fout0->r += scratch[7].r + scratch[8].r;
    190         Fout0->i += scratch[7].i + scratch[8].i;
    191         scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
    192         scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
    193 
    194         scratch[6].r =  S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
    195         scratch[6].i =  -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
    196 
    197         C_SUB(*Fout1,scratch[5],scratch[6]);
    198         C_ADD(*Fout4,scratch[5],scratch[6]);
    199 
    200         scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
    201         scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
    202 
    203         scratch[12].r =  S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
    204         scratch[12].i =  S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
    205 
    206         C_ADD(*Fout2,scratch[11],scratch[12]);
    207         C_SUB(*Fout3,scratch[11],scratch[12]);
    208 
    209         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
    210      }
    211   }
    212 }
    213 
    214 #endif /* defined(OVERRIDE_kf_bfly5) */
    215 
    216 #define OVERRIDE_fft_downshift
    217 /* Just unroll tight loop, should be ok for any mips */
    218 static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
    219    int shift;
    220    shift = IMIN(step, *total);
    221    *total -= shift;
    222    if (shift == 1) {
    223        int i;
    224        for (i = 0; i < N - 1; i += 2) {
    225            x[i].r   = SHR32(x[i].r,   1);
    226            x[i].i   = SHR32(x[i].i,   1);
    227            x[i+1].r = SHR32(x[i+1].r, 1);
    228            x[i+1].i = SHR32(x[i+1].i, 1);
    229        }
    230        if (N & 1) {
    231            x[i].r = SHR32(x[i].r, 1);
    232            x[i].i = SHR32(x[i].i, 1);
    233        }
    234    } else if (shift > 0) {
    235        int i;
    236        for (i = 0; i < N - 3; i += 4) {
    237            x[i].r   = PSHR32(x[i].r,   shift);
    238            x[i].i   = PSHR32(x[i].i,   shift);
    239            x[i+1].r = PSHR32(x[i+1].r, shift);
    240            x[i+1].i = PSHR32(x[i+1].i, shift);
    241            x[i+2].r = PSHR32(x[i+2].r, shift);
    242            x[i+2].i = PSHR32(x[i+2].i, shift);
    243            x[i+3].r = PSHR32(x[i+3].r, shift);
    244            x[i+3].i = PSHR32(x[i+3].i, shift);
    245        }
    246        switch (N & 3) {
    247        case 3:
    248            x[i].r   = PSHR32(x[i].r,   shift);
    249            x[i].i   = PSHR32(x[i].i,   shift);
    250            x[i+1].r = PSHR32(x[i+1].r, shift);
    251            x[i+1].i = PSHR32(x[i+1].i, shift);
    252            x[i+2].r = PSHR32(x[i+2].r, shift);
    253            x[i+2].i = PSHR32(x[i+2].i, shift);
    254            break;
    255        case 2:
    256            x[i].r   = PSHR32(x[i].r,   shift);
    257            x[i].i   = PSHR32(x[i].i,   shift);
    258            x[i+1].r = PSHR32(x[i+1].r, shift);
    259            x[i+1].i = PSHR32(x[i+1].i, shift);
    260            break;
    261        case 1:
    262            x[i].r   = PSHR32(x[i].r,   shift);
    263            x[i].i   = PSHR32(x[i].i,   shift);
    264            break;
    265        case 0:
    266            break;
    267        }
    268    }
    269 }
    270 
    271 #endif /* KISS_FFT_MIPSR1_H */