pitch_mipsr1.h (10276B)
1 /* Copyright (c) 2007-2008 CSIRO 2 Copyright (c) 2007-2009 Xiph.Org Foundation 3 Written by Jean-Marc Valin */ 4 /** 5 @file pitch.h 6 @brief Pitch analysis 7 */ 8 9 /* 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 - Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 - Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in the 19 documentation and/or other materials provided with the distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef PITCH_MIPSR1_H 35 #define PITCH_MIPSR1_H 36 37 #include "fixed_generic_mipsr1.h" 38 39 #if defined (__mips_dsp) && __mips == 32 40 41 #define accumulator_t opus_int64 42 #define MIPS_MAC(acc,a,b) \ 43 __builtin_mips_madd((acc), (int)(a), (int)(b)) 44 45 #define MIPS_MAC16x16_2X(acc,a2x,b2x) \ 46 __builtin_mips_dpaq_s_w_ph((acc), (a2x), (b2x)) 47 48 #define OVERRIDE_CELT_INNER_PROD 49 #define OVERRIDE_DUAL_INNER_PROD 50 #define OVERRIDE_XCORR_KERNEL 51 52 #else /* any other MIPS */ 53 54 /* using madd is slower due to single accumulator */ 55 #define accumulator_t opus_int32 56 #define MIPS_MAC MAC16_16 57 58 #define OVERRIDE_CELT_INNER_PROD 59 #define OVERRIDE_DUAL_INNER_PROD 60 #define OVERRIDE_XCORR_KERNEL 61 62 #endif /* any other MIPS */ 63 64 65 #if defined(OVERRIDE_CELT_INNER_PROD) 66 67 static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, 68 const opus_val16 *y, int N, int arch) 69 { 70 int j; 71 accumulator_t acc = 0; 72 73 #if defined (MIPS_MAC16x16_2X) 74 const v2i16 *x2x; 75 const v2i16 *y2x; 76 int loops; 77 78 /* misaligned */ 79 if (((long)x | (long)y) & 3) 80 goto fallback; 81 82 x2x = __builtin_assume_aligned(x, 4); 83 y2x = __builtin_assume_aligned(y, 4); 84 loops = N / 8; 85 for (j = 0; j < loops; j++) 86 { 87 acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); 88 acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); 89 acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]); 90 acc = MIPS_MAC16x16_2X(acc, x2x[3], y2x[3]); 91 x2x += 4; y2x += 4; 92 } 93 94 switch (N & 7) { 95 case 7: 96 acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); 97 acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); 98 acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]); 99 acc = MIPS_MAC(acc, x[N-1], y[N-1]); 100 break; 101 case 6: 102 acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); 103 acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); 104 acc = MIPS_MAC16x16_2X(acc, x2x[2], y2x[2]); 105 break; 106 case 5: 107 acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); 108 acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); 109 acc = MIPS_MAC(acc, x[N-1], y[N-1]); 110 break; 111 case 4: 112 acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); 113 acc = MIPS_MAC16x16_2X(acc, x2x[1], y2x[1]); 114 break; 115 case 3: 116 acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); 117 acc = MIPS_MAC(acc, x[N-1], y[N-1]); 118 break; 119 case 2: 120 acc = MIPS_MAC16x16_2X(acc, x2x[0], y2x[0]); 121 break; 122 case 1: 123 acc = MIPS_MAC(acc, x[N-1], y[N-1]); 124 break; 125 case 0: 126 break; 127 } 128 return __builtin_mips_extr_w(acc, 1); 129 130 fallback: 131 #endif 132 for (j = 0; j < N - 3; j += 4) 133 { 134 acc = MIPS_MAC(acc, x[j], y[j]); 135 acc = MIPS_MAC(acc, x[j+1], y[j+1]); 136 acc = MIPS_MAC(acc, x[j+2], y[j+2]); 137 acc = MIPS_MAC(acc, x[j+3], y[j+3]); 138 } 139 140 switch (N & 3) { 141 case 3: 142 acc = MIPS_MAC(acc, x[j], y[j]); 143 acc = MIPS_MAC(acc, x[j+1], y[j+1]); 144 acc = MIPS_MAC(acc, x[j+2], y[j+2]); 145 break; 146 case 2: 147 acc = MIPS_MAC(acc, x[j], y[j]); 148 acc = MIPS_MAC(acc, x[j+1], y[j+1]); 149 break; 150 case 1: 151 acc = MIPS_MAC(acc, x[j], y[j]); 152 break; 153 case 0: 154 break; 155 } 156 157 (void)arch; 158 159 return (opus_val32)acc; 160 } 161 #endif /* OVERRIDE_CELT_INNER_PROD */ 162 163 #if defined(OVERRIDE_DUAL_INNER_PROD) 164 static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, 165 int N, opus_val32 *xy1, opus_val32 *xy2, int arch) 166 { 167 int j; 168 accumulator_t acc1 = 0; 169 accumulator_t acc2 = 0; 170 171 #if defined (MIPS_MAC16x16_2X) 172 const v2i16 *x2x; 173 const v2i16 *y01_2x; 174 const v2i16 *y02_2x; 175 176 /* misaligned */ 177 if (((long)x | (long)y01 | (long)y02) & 3) 178 goto fallback; 179 180 x2x = __builtin_assume_aligned(x, 4); 181 y01_2x = __builtin_assume_aligned(y01, 4); 182 y02_2x = __builtin_assume_aligned(y02, 4); 183 N /= 2; 184 185 for (j = 0; j < N - 3; j += 4) 186 { 187 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j], y01_2x[j]); 188 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j], y02_2x[j]); 189 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]); 190 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]); 191 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+2], y01_2x[j+2]); 192 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+2], y02_2x[j+2]); 193 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+3], y01_2x[j+3]); 194 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+3], y02_2x[j+3]); 195 } 196 197 switch (N & 3) { 198 case 3: 199 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j], y01_2x[j]); 200 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j], y02_2x[j]); 201 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]); 202 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]); 203 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+2], y01_2x[j+2]); 204 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+2], y02_2x[j+2]); 205 break; 206 case 2: 207 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j], y01_2x[j]); 208 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j], y02_2x[j]); 209 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j+1], y01_2x[j+1]); 210 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j+1], y02_2x[j+1]); 211 break; 212 case 1: 213 acc1 = MIPS_MAC16x16_2X(acc1, x2x[j], y01_2x[j]); 214 acc2 = MIPS_MAC16x16_2X(acc2, x2x[j], y02_2x[j]); 215 break; 216 case 0: 217 break; 218 } 219 220 *xy1 = __builtin_mips_extr_w(acc1, 1); 221 *xy2 = __builtin_mips_extr_w(acc2, 1); 222 return; 223 224 fallback: 225 #endif 226 /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ 227 for (j = 0; j < N - 3; j += 4) 228 { 229 acc1 = MIPS_MAC(acc1, x[j], y01[j]); 230 acc2 = MIPS_MAC(acc2, x[j], y02[j]); 231 acc1 = MIPS_MAC(acc1, x[j+1], y01[j+1]); 232 acc2 = MIPS_MAC(acc2, x[j+1], y02[j+1]); 233 acc1 = MIPS_MAC(acc1, x[j+2], y01[j+2]); 234 acc2 = MIPS_MAC(acc2, x[j+2], y02[j+2]); 235 acc1 = MIPS_MAC(acc1, x[j+3], y01[j+3]); 236 acc2 = MIPS_MAC(acc2, x[j+3], y02[j+3]); 237 } 238 239 if (j < N) { 240 acc1 = MIPS_MAC(acc1, x[j], y01[j]); 241 acc2 = MIPS_MAC(acc2, x[j], y02[j]); 242 acc1 = MIPS_MAC(acc1, x[j+1], y01[j+1]); 243 acc2 = MIPS_MAC(acc2, x[j+1], y02[j+1]); 244 } 245 246 (void)arch; 247 248 *xy1 = (opus_val32)acc1; 249 *xy2 = (opus_val32)acc2; 250 } 251 #endif /* OVERRIDE_DUAL_INNER_PROD */ 252 253 #if defined(OVERRIDE_XCORR_KERNEL) 254 255 static inline void xcorr_kernel_mips(const opus_val16 * x, 256 const opus_val16 * y, opus_val32 sum[4], int len) 257 { 258 int j; 259 opus_val16 y_0, y_1, y_2, y_3; 260 261 accumulator_t sum_0, sum_1, sum_2, sum_3; 262 sum_0 = (accumulator_t)sum[0]; 263 sum_1 = (accumulator_t)sum[1]; 264 sum_2 = (accumulator_t)sum[2]; 265 sum_3 = (accumulator_t)sum[3]; 266 267 y_0=*y++; 268 y_1=*y++; 269 y_2=*y++; 270 for (j=0;j<len-3;j+=4) 271 { 272 opus_val16 tmp; 273 tmp = *x++; 274 y_3=*y++; 275 276 sum_0 = MIPS_MAC(sum_0, tmp, y_0); 277 sum_1 = MIPS_MAC(sum_1, tmp, y_1); 278 sum_2 = MIPS_MAC(sum_2, tmp, y_2); 279 sum_3 = MIPS_MAC(sum_3, tmp, y_3); 280 281 tmp=*x++; 282 y_0=*y++; 283 284 sum_0 = MIPS_MAC(sum_0, tmp, y_1); 285 sum_1 = MIPS_MAC(sum_1, tmp, y_2); 286 sum_2 = MIPS_MAC(sum_2, tmp, y_3); 287 sum_3 = MIPS_MAC(sum_3, tmp, y_0); 288 289 tmp=*x++; 290 y_1=*y++; 291 292 sum_0 = MIPS_MAC(sum_0, tmp, y_2); 293 sum_1 = MIPS_MAC(sum_1, tmp, y_3); 294 sum_2 = MIPS_MAC(sum_2, tmp, y_0); 295 sum_3 = MIPS_MAC(sum_3, tmp, y_1); 296 297 298 tmp=*x++; 299 y_2=*y++; 300 301 sum_0 = MIPS_MAC(sum_0, tmp, y_3); 302 sum_1 = MIPS_MAC(sum_1, tmp, y_0); 303 sum_2 = MIPS_MAC(sum_2, tmp, y_1); 304 sum_3 = MIPS_MAC(sum_3, tmp, y_2); 305 } 306 307 switch (len & 3) { 308 case 3: 309 sum_0 = MIPS_MAC(sum_0, x[2], y_2); 310 sum_1 = MIPS_MAC(sum_1, x[2], y[0]); 311 sum_2 = MIPS_MAC(sum_2, x[2], y[1]); 312 sum_3 = MIPS_MAC(sum_3, x[2], y[2]); 313 314 sum_0 = MIPS_MAC(sum_0, x[1], y_1); 315 sum_1 = MIPS_MAC(sum_1, x[1], y_2); 316 sum_2 = MIPS_MAC(sum_2, x[1], y[0]); 317 sum_3 = MIPS_MAC(sum_3, x[1], y[1]); 318 319 sum_0 = MIPS_MAC(sum_0, x[0], y_0); 320 sum_1 = MIPS_MAC(sum_1, x[0], y_1); 321 sum_2 = MIPS_MAC(sum_2, x[0], y_2); 322 sum_3 = MIPS_MAC(sum_3, x[0], y[0]); 323 break; 324 case 2: 325 sum_0 = MIPS_MAC(sum_0, x[1], y_1); 326 sum_1 = MIPS_MAC(sum_1, x[1], y_2); 327 sum_2 = MIPS_MAC(sum_2, x[1], y[0]); 328 sum_3 = MIPS_MAC(sum_3, x[1], y[1]); 329 330 sum_0 = MIPS_MAC(sum_0, x[0], y_0); 331 sum_1 = MIPS_MAC(sum_1, x[0], y_1); 332 sum_2 = MIPS_MAC(sum_2, x[0], y_2); 333 sum_3 = MIPS_MAC(sum_3, x[0], y[0]); 334 break; 335 case 1: 336 sum_0 = MIPS_MAC(sum_0, x[0], y_0); 337 sum_1 = MIPS_MAC(sum_1, x[0], y_1); 338 sum_2 = MIPS_MAC(sum_2, x[0], y_2); 339 sum_3 = MIPS_MAC(sum_3, x[0], y[0]); 340 break; 341 case 0: 342 break; 343 } 344 345 sum[0] = (opus_val32)sum_0; 346 sum[1] = (opus_val32)sum_1; 347 sum[2] = (opus_val32)sum_2; 348 sum[3] = (opus_val32)sum_3; 349 } 350 351 #define xcorr_kernel(x, y, sum, len, arch) \ 352 ((void)(arch), xcorr_kernel_mips(x, y, sum, len)) 353 354 #undef accumulator_t 355 #undef MIPS_MAC 356 357 #endif /* OVERRIDE_XCORR_KERNEL */ 358 359 #endif /* PITCH_MIPSR1_H */