kiss_fft_mipsr1.h (8542B)
1 /*Copyright (c) 2013, Xiph.Org Foundation and contributors. 2 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright notice, 11 this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 18 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 POSSIBILITY OF SUCH DAMAGE.*/ 25 26 #ifndef KISS_FFT_MIPSR1_H 27 #define KISS_FFT_MIPSR1_H 28 29 #if !defined(KISS_FFT_GUTS_H) 30 #error "This file should only be included from _kiss_fft_guts.h" 31 #endif 32 33 #ifdef FIXED_POINT 34 35 #if __mips == 32 && defined (__mips_dsp) 36 37 static inline int S_MUL_ADD(int a, int b, int c, int d) { 38 long long acc = __builtin_mips_mult(a, b); 39 acc = __builtin_mips_madd(acc, c, d); 40 return __builtin_mips_extr_w(acc, 15); 41 } 42 43 static inline int S_MUL_SUB(int a, int b, int c, int d) { 44 long long acc = __builtin_mips_mult(a, b); 45 acc = __builtin_mips_msub(acc, c, d); 46 return __builtin_mips_extr_w(acc, 15); 47 } 48 49 #undef C_MUL 50 # define C_MUL(m,a,b) (m=C_MUL_fun(a,b)) 51 static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { 52 kiss_fft_cpx m; 53 54 long long acc1 = __builtin_mips_mult((int)a.r, (int)b.r); 55 long long acc2 = __builtin_mips_mult((int)a.r, (int)b.i); 56 acc1 = __builtin_mips_msub(acc1, (int)a.i, (int)b.i); 57 acc2 = __builtin_mips_madd(acc2, (int)a.i, (int)b.r); 58 m.r = __builtin_mips_extr_w(acc1, 15); 59 m.i = __builtin_mips_extr_w(acc2, 15); 60 return m; 61 } 62 #undef C_MULC 63 # define C_MULC(m,a,b) (m=C_MULC_fun(a,b)) 64 static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { 65 kiss_fft_cpx m; 66 67 long long acc1 = __builtin_mips_mult((int)a.r, (int)b.r); 68 long long acc2 = __builtin_mips_mult((int)a.i, (int)b.r); 69 acc1 = __builtin_mips_madd(acc1, (int)a.i, (int)b.i); 70 acc2 = __builtin_mips_msub(acc2, (int)a.r, (int)b.i); 71 m.r = __builtin_mips_extr_w(acc1, 15); 72 m.i = __builtin_mips_extr_w(acc2, 15); 73 return m; 74 } 75 76 #define OVERRIDE_kf_bfly5 77 78 #elif __mips == 32 && defined(__mips_isa_rev) && __mips_isa_rev < 6 79 80 static inline int S_MUL_ADD(int a, int b, int c, int d) { 81 long long acc; 82 83 asm volatile ( 84 "mult %[a], %[b] \n" 85 "madd %[c], %[d] \n" 86 : [acc] "=x"(acc) 87 : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d) 88 : 89 ); 90 return (int)(acc >> 15); 91 } 92 93 static inline int S_MUL_SUB(int a, int b, int c, int d) { 94 long long acc; 95 96 asm volatile ( 97 "mult %[a], %[b] \n" 98 "msub %[c], %[d] \n" 99 : [acc] "=x"(acc) 100 : [a] "r"(a), [b] "r"(b), [c] "r"(c), [d] "r"(d) 101 : 102 ); 103 return (int)(acc >> 15); 104 } 105 106 #undef C_MUL 107 # define C_MUL(m,a,b) (m=C_MUL_fun(a,b)) 108 static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { 109 kiss_fft_cpx m; 110 111 m.r = S_MUL_SUB(a.r, b.r, a.i, b.i); 112 m.i = S_MUL_ADD(a.r, b.i, a.i, b.r); 113 114 return m; 115 } 116 117 #undef C_MULC 118 # define C_MULC(m,a,b) (m=C_MULC_fun(a,b)) 119 static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { 120 kiss_fft_cpx m; 121 122 m.r = S_MUL_ADD(a.r, b.r, a.i, b.i); 123 m.i = S_MUL_SUB(a.i, b.r, a.r, b.i); 124 125 return m; 126 } 127 128 #define OVERRIDE_kf_bfly5 129 130 #endif 131 132 #endif /* FIXED_POINT */ 133 134 #if defined(OVERRIDE_kf_bfly5) 135 136 static void kf_bfly5( 137 kiss_fft_cpx * Fout, 138 const size_t fstride, 139 const kiss_fft_state *st, 140 int m, 141 int N, 142 int mm 143 ) 144 { 145 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 146 int i, u; 147 kiss_fft_cpx scratch[13]; 148 149 const kiss_twiddle_cpx *tw; 150 kiss_twiddle_cpx ya,yb; 151 kiss_fft_cpx * Fout_beg = Fout; 152 153 #ifdef FIXED_POINT 154 ya.r = 10126; 155 ya.i = -31164; 156 yb.r = -26510; 157 yb.i = -19261; 158 #else 159 ya = st->twiddles[fstride*m]; 160 yb = st->twiddles[fstride*2*m]; 161 #endif 162 163 tw=st->twiddles; 164 165 for (i=0;i<N;i++) 166 { 167 Fout = Fout_beg + i*mm; 168 Fout0=Fout; 169 Fout1=Fout0+m; 170 Fout2=Fout0+2*m; 171 Fout3=Fout0+3*m; 172 Fout4=Fout0+4*m; 173 174 /* For non-custom modes, m is guaranteed to be a multiple of 4. */ 175 for ( u=0; u<m; ++u ) { 176 scratch[0] = *Fout0; 177 178 179 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 180 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); 181 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); 182 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); 183 184 C_ADD( scratch[7],scratch[1],scratch[4]); 185 C_SUB( scratch[10],scratch[1],scratch[4]); 186 C_ADD( scratch[8],scratch[2],scratch[3]); 187 C_SUB( scratch[9],scratch[2],scratch[3]); 188 189 Fout0->r += scratch[7].r + scratch[8].r; 190 Fout0->i += scratch[7].i + scratch[8].i; 191 scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r); 192 scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r); 193 194 scratch[6].r = S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i); 195 scratch[6].i = -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i); 196 197 C_SUB(*Fout1,scratch[5],scratch[6]); 198 C_ADD(*Fout4,scratch[5],scratch[6]); 199 200 scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r); 201 scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r); 202 203 scratch[12].r = S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i); 204 scratch[12].i = S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i); 205 206 C_ADD(*Fout2,scratch[11],scratch[12]); 207 C_SUB(*Fout3,scratch[11],scratch[12]); 208 209 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 210 } 211 } 212 } 213 214 #endif /* defined(OVERRIDE_kf_bfly5) */ 215 216 #define OVERRIDE_fft_downshift 217 /* Just unroll tight loop, should be ok for any mips */ 218 static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) { 219 int shift; 220 shift = IMIN(step, *total); 221 *total -= shift; 222 if (shift == 1) { 223 int i; 224 for (i = 0; i < N - 1; i += 2) { 225 x[i].r = SHR32(x[i].r, 1); 226 x[i].i = SHR32(x[i].i, 1); 227 x[i+1].r = SHR32(x[i+1].r, 1); 228 x[i+1].i = SHR32(x[i+1].i, 1); 229 } 230 if (N & 1) { 231 x[i].r = SHR32(x[i].r, 1); 232 x[i].i = SHR32(x[i].i, 1); 233 } 234 } else if (shift > 0) { 235 int i; 236 for (i = 0; i < N - 3; i += 4) { 237 x[i].r = PSHR32(x[i].r, shift); 238 x[i].i = PSHR32(x[i].i, shift); 239 x[i+1].r = PSHR32(x[i+1].r, shift); 240 x[i+1].i = PSHR32(x[i+1].i, shift); 241 x[i+2].r = PSHR32(x[i+2].r, shift); 242 x[i+2].i = PSHR32(x[i+2].i, shift); 243 x[i+3].r = PSHR32(x[i+3].r, shift); 244 x[i+3].i = PSHR32(x[i+3].i, shift); 245 } 246 switch (N & 3) { 247 case 3: 248 x[i].r = PSHR32(x[i].r, shift); 249 x[i].i = PSHR32(x[i].i, shift); 250 x[i+1].r = PSHR32(x[i+1].r, shift); 251 x[i+1].i = PSHR32(x[i+1].i, shift); 252 x[i+2].r = PSHR32(x[i+2].r, shift); 253 x[i+2].i = PSHR32(x[i+2].i, shift); 254 break; 255 case 2: 256 x[i].r = PSHR32(x[i].r, shift); 257 x[i].i = PSHR32(x[i].i, shift); 258 x[i+1].r = PSHR32(x[i+1].r, shift); 259 x[i+1].i = PSHR32(x[i+1].i, shift); 260 break; 261 case 1: 262 x[i].r = PSHR32(x[i].r, shift); 263 x[i].i = PSHR32(x[i].i, shift); 264 break; 265 case 0: 266 break; 267 } 268 } 269 } 270 271 #endif /* KISS_FFT_MIPSR1_H */