celt_lpc.c (10753B)
1 /* Copyright (c) 2009-2010 Xiph.Org Foundation 2 Written by Jean-Marc Valin */ 3 /* 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 - Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 - Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 #include "celt_lpc.h" 33 #include "stack_alloc.h" 34 #include "mathops.h" 35 #include "pitch.h" 36 37 void _celt_lpc( 38 opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ 39 const opus_val32 *ac, /* in: [0...p] autocorrelation values */ 40 int p 41 ) 42 { 43 int i, j; 44 opus_val32 r; 45 opus_val32 error = ac[0]; 46 #ifdef FIXED_POINT 47 opus_val32 lpc[CELT_LPC_ORDER]; 48 #else 49 float *lpc = _lpc; 50 #endif 51 52 OPUS_CLEAR(lpc, p); 53 #ifdef FIXED_POINT 54 if (ac[0] != 0) 55 #else 56 if (ac[0] > 1e-10f) 57 #endif 58 { 59 for (i = 0; i < p; i++) { 60 /* Sum up this iteration's reflection coefficient */ 61 opus_val32 rr = 0; 62 #if defined (FIXED_POINT) && OPUS_FAST_INT64 63 opus_int64 acc = 0; 64 for (j = 0; j < i; j++) 65 acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]); 66 rr = (opus_val32)SHR64(acc, 31); 67 #else 68 for (j = 0; j < i; j++) 69 rr += MULT32_32_Q31(lpc[j],ac[i - j]); 70 #endif 71 rr += SHR32(ac[i + 1],6); 72 r = -frac_div32(SHL32(rr,6), error); 73 /* Update LPC coefficients and total error */ 74 lpc[i] = SHR32(r,6); 75 for (j = 0; j < (i+1)>>1; j++) 76 { 77 opus_val32 tmp1, tmp2; 78 tmp1 = lpc[j]; 79 tmp2 = lpc[i-1-j]; 80 lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); 81 lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); 82 } 83 84 error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); 85 /* Bail out once we get 30 dB gain */ 86 #ifdef FIXED_POINT 87 if (error<=SHR32(ac[0],10)) 88 break; 89 #else 90 if (error<=.001f*ac[0]) 91 break; 92 #endif 93 } 94 } 95 #ifdef FIXED_POINT 96 { 97 /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds. 98 This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug 99 fixes should also be applied there. */ 100 int iter, idx = 0; 101 opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16; 102 103 for (iter = 0; iter < 10; iter++) { 104 maxabs = 0; 105 for (i = 0; i < p; i++) { 106 absval = ABS32(lpc[i]); 107 if (absval > maxabs) { 108 maxabs = absval; 109 idx = i; 110 } 111 } 112 maxabs = PSHR32(maxabs, 13); /* Q25->Q12 */ 113 114 if (maxabs > 32767) { 115 maxabs = MIN32(maxabs, 163838); 116 chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14), 117 SHR32(MULT32_32_32(maxabs, idx + 1), 2)); 118 chirp_minus_one_Q16 = chirp_Q16 - 65536; 119 120 /* Apply bandwidth expansion. */ 121 for (i = 0; i < p - 1; i++) { 122 lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]); 123 chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16); 124 } 125 lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]); 126 } else { 127 break; 128 } 129 } 130 131 if (iter == 10) { 132 /* If the coeffs still do not fit into the 16 bit range after 10 iterations, 133 fall back to the A(z)=1 filter. */ 134 OPUS_CLEAR(lpc, p); 135 _lpc[0] = 4096; /* Q12 */ 136 } else { 137 for (i = 0; i < p; i++) { 138 _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13)); /* Q25->Q12 */ 139 } 140 } 141 } 142 #endif 143 } 144 145 146 void celt_fir_c( 147 const opus_val16 *x, 148 const opus_val16 *num, 149 opus_val16 *y, 150 int N, 151 int ord, 152 int arch) 153 { 154 int i,j; 155 VARDECL(opus_val16, rnum); 156 SAVE_STACK; 157 celt_assert(x != y); 158 ALLOC(rnum, ord, opus_val16); 159 for(i=0;i<ord;i++) 160 rnum[i] = num[ord-i-1]; 161 for (i=0;i<N-3;i+=4) 162 { 163 opus_val32 sum[4]; 164 sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT); 165 sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT); 166 sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT); 167 sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT); 168 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) 169 { 170 opus_val32 sum_c[4]; 171 memcpy(sum_c, sum, sizeof(sum_c)); 172 xcorr_kernel_c(rnum, x+i-ord, sum_c, ord); 173 #endif 174 xcorr_kernel(rnum, x+i-ord, sum, ord, arch); 175 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) 176 celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0); 177 } 178 #endif 179 y[i ] = SROUND16(sum[0], SIG_SHIFT); 180 y[i+1] = SROUND16(sum[1], SIG_SHIFT); 181 y[i+2] = SROUND16(sum[2], SIG_SHIFT); 182 y[i+3] = SROUND16(sum[3], SIG_SHIFT); 183 } 184 for (;i<N;i++) 185 { 186 opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); 187 for (j=0;j<ord;j++) 188 sum = MAC16_16(sum,rnum[j],x[i+j-ord]); 189 y[i] = SROUND16(sum, SIG_SHIFT); 190 } 191 RESTORE_STACK; 192 } 193 194 void celt_iir(const opus_val32 *_x, 195 const opus_val16 *den, 196 opus_val32 *_y, 197 int N, 198 int ord, 199 opus_val16 *mem, 200 int arch) 201 { 202 #ifdef SMALL_FOOTPRINT 203 int i,j; 204 (void)arch; 205 for (i=0;i<N;i++) 206 { 207 opus_val32 sum = _x[i]; 208 for (j=0;j<ord;j++) 209 { 210 sum -= MULT16_16(den[j],mem[j]); 211 } 212 for (j=ord-1;j>=1;j--) 213 { 214 mem[j]=mem[j-1]; 215 } 216 mem[0] = SROUND16(sum, SIG_SHIFT); 217 _y[i] = sum; 218 } 219 #else 220 int i,j; 221 VARDECL(opus_val16, rden); 222 VARDECL(opus_val16, y); 223 SAVE_STACK; 224 225 celt_assert((ord&3)==0); 226 ALLOC(rden, ord, opus_val16); 227 ALLOC(y, N+ord, opus_val16); 228 for(i=0;i<ord;i++) 229 rden[i] = den[ord-i-1]; 230 for(i=0;i<ord;i++) 231 y[i] = -mem[ord-i-1]; 232 for(;i<N+ord;i++) 233 y[i]=0; 234 for (i=0;i<N-3;i+=4) 235 { 236 /* Unroll by 4 as if it were an FIR filter */ 237 opus_val32 sum[4]; 238 sum[0]=_x[i]; 239 sum[1]=_x[i+1]; 240 sum[2]=_x[i+2]; 241 sum[3]=_x[i+3]; 242 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) 243 { 244 opus_val32 sum_c[4]; 245 memcpy(sum_c, sum, sizeof(sum_c)); 246 xcorr_kernel_c(rden, y+i, sum_c, ord); 247 #endif 248 xcorr_kernel(rden, y+i, sum, ord, arch); 249 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) 250 celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0); 251 } 252 #endif 253 /* Patch up the result to compensate for the fact that this is an IIR */ 254 y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT); 255 _y[i ] = sum[0]; 256 sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); 257 y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT); 258 _y[i+1] = sum[1]; 259 sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); 260 sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); 261 y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT); 262 _y[i+2] = sum[2]; 263 264 sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); 265 sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); 266 sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); 267 y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT); 268 _y[i+3] = sum[3]; 269 } 270 for (;i<N;i++) 271 { 272 opus_val32 sum = _x[i]; 273 for (j=0;j<ord;j++) 274 sum -= MULT16_16(rden[j],y[i+j]); 275 y[i+ord] = SROUND16(sum,SIG_SHIFT); 276 _y[i] = sum; 277 } 278 for(i=0;i<ord;i++) 279 mem[i] = _y[N-i-1]; 280 RESTORE_STACK; 281 #endif 282 } 283 284 int _celt_autocorr( 285 const opus_val16 *x, /* in: [0...n-1] samples x */ 286 opus_val32 *ac, /* out: [0...lag-1] ac values */ 287 const celt_coef *window, 288 int overlap, 289 int lag, 290 int n, 291 int arch 292 ) 293 { 294 opus_val32 d; 295 int i, k; 296 int fastN=n-lag; 297 int shift; 298 const opus_val16 *xptr; 299 VARDECL(opus_val16, xx); 300 SAVE_STACK; 301 ALLOC(xx, n, opus_val16); 302 celt_assert(n>0); 303 celt_assert(overlap>=0); 304 if (overlap == 0) 305 { 306 xptr = x; 307 } else { 308 for (i=0;i<n;i++) 309 xx[i] = x[i]; 310 for (i=0;i<overlap;i++) 311 { 312 opus_val16 w = COEF2VAL16(window[i]); 313 xx[i] = MULT16_16_Q15(x[i],w); 314 xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w); 315 } 316 xptr = xx; 317 } 318 shift=0; 319 #ifdef FIXED_POINT 320 { 321 opus_val32 ac0; 322 int ac0_shift = celt_ilog2(n + (n>>4)); 323 ac0 = 1+(n<<7); 324 if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift); 325 for(i=(n&1);i<n;i+=2) 326 { 327 ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift); 328 ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift); 329 } 330 /* Consider the effect of rounding-to-nearest when scaling the signal. */ 331 ac0 += SHR32(ac0,7); 332 333 shift = celt_ilog2(ac0)-30+ac0_shift+1; 334 shift = (shift)/2; 335 if (shift>0) 336 { 337 for(i=0;i<n;i++) 338 xx[i] = PSHR32(xptr[i], shift); 339 xptr = xx; 340 } else 341 shift = 0; 342 } 343 #endif 344 celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); 345 for (k=0;k<=lag;k++) 346 { 347 for (i = k+fastN, d = 0; i < n; i++) 348 d = MAC16_16(d, xptr[i], xptr[i-k]); 349 ac[k] += d; 350 } 351 #ifdef FIXED_POINT 352 shift = 2*shift; 353 if (shift<=0) 354 ac[0] += SHL32((opus_int32)1, -shift); 355 if (ac[0] < 268435456) 356 { 357 int shift2 = 29 - EC_ILOG(ac[0]); 358 for (i=0;i<=lag;i++) 359 ac[i] = SHL32(ac[i], shift2); 360 shift -= shift2; 361 } else if (ac[0] >= 536870912) 362 { 363 int shift2=1; 364 if (ac[0] >= 1073741824) 365 shift2++; 366 for (i=0;i<=lag;i++) 367 ac[i] = SHR32(ac[i], shift2); 368 shift += shift2; 369 } 370 #endif 371 372 RESTORE_STACK; 373 return shift; 374 }