[ tor-browser ].git.dasho

celt_lpc.c (10753B)
      1 /* Copyright (c) 2009-2010 Xiph.Org Foundation
      2   Written by Jean-Marc Valin */
      3 /*
      4   Redistribution and use in source and binary forms, with or without
      5   modification, are permitted provided that the following conditions
      6   are met:
      7 
      8   - Redistributions of source code must retain the above copyright
      9   notice, this list of conditions and the following disclaimer.
     10 
     11   - Redistributions in binary form must reproduce the above copyright
     12   notice, this list of conditions and the following disclaimer in the
     13   documentation and/or other materials provided with the distribution.
     14 
     15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     18   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
     19   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     23   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     24   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 
     32 #include "celt_lpc.h"
     33 #include "stack_alloc.h"
     34 #include "mathops.h"
     35 #include "pitch.h"
     36 
     37 void _celt_lpc(
     38      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
     39 const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
     40 int          p
     41 )
     42 {
     43   int i, j;
     44   opus_val32 r;
     45   opus_val32 error = ac[0];
     46 #ifdef FIXED_POINT
     47   opus_val32 lpc[CELT_LPC_ORDER];
     48 #else
     49   float *lpc = _lpc;
     50 #endif
     51 
     52   OPUS_CLEAR(lpc, p);
     53 #ifdef FIXED_POINT
     54   if (ac[0] != 0)
     55 #else
     56   if (ac[0] > 1e-10f)
     57 #endif
     58   {
     59      for (i = 0; i < p; i++) {
     60         /* Sum up this iteration's reflection coefficient */
     61         opus_val32 rr = 0;
     62 #if defined (FIXED_POINT) && OPUS_FAST_INT64
     63         opus_int64 acc = 0;
     64         for (j = 0; j < i; j++)
     65            acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]);
     66         rr = (opus_val32)SHR64(acc, 31);
     67 #else
     68         for (j = 0; j < i; j++)
     69            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
     70 #endif
     71         rr += SHR32(ac[i + 1],6);
     72         r = -frac_div32(SHL32(rr,6), error);
     73         /*  Update LPC coefficients and total error */
     74         lpc[i] = SHR32(r,6);
     75         for (j = 0; j < (i+1)>>1; j++)
     76         {
     77            opus_val32 tmp1, tmp2;
     78            tmp1 = lpc[j];
     79            tmp2 = lpc[i-1-j];
     80            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
     81            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
     82         }
     83 
     84         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
     85         /* Bail out once we get 30 dB gain */
     86 #ifdef FIXED_POINT
     87         if (error<=SHR32(ac[0],10))
     88            break;
     89 #else
     90         if (error<=.001f*ac[0])
     91            break;
     92 #endif
     93      }
     94   }
     95 #ifdef FIXED_POINT
     96   {
     97      /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
     98         This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
     99         fixes should also be applied there. */
    100      int iter, idx = 0;
    101      opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
    102 
    103      for (iter = 0; iter < 10; iter++) {
    104         maxabs = 0;
    105         for (i = 0; i < p; i++) {
    106            absval = ABS32(lpc[i]);
    107            if (absval > maxabs) {
    108               maxabs = absval;
    109               idx = i;
    110            }
    111         }
    112         maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
    113 
    114         if (maxabs > 32767) {
    115            maxabs = MIN32(maxabs, 163838);
    116            chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
    117                                                    SHR32(MULT32_32_32(maxabs, idx + 1), 2));
    118            chirp_minus_one_Q16 = chirp_Q16 - 65536;
    119 
    120            /* Apply bandwidth expansion. */
    121            for (i = 0; i < p - 1; i++) {
    122               lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
    123               chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
    124            }
    125            lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
    126         } else {
    127            break;
    128         }
    129      }
    130 
    131      if (iter == 10) {
    132         /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
    133            fall back to the A(z)=1 filter. */
    134         OPUS_CLEAR(lpc, p);
    135         _lpc[0] = 4096;  /* Q12 */
    136      } else {
    137         for (i = 0; i < p; i++) {
    138            _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
    139         }
    140      }
    141   }
    142 #endif
    143 }
    144 
    145 
    146 void celt_fir_c(
    147         const opus_val16 *x,
    148         const opus_val16 *num,
    149         opus_val16 *y,
    150         int N,
    151         int ord,
    152         int arch)
    153 {
    154   int i,j;
    155   VARDECL(opus_val16, rnum);
    156   SAVE_STACK;
    157   celt_assert(x != y);
    158   ALLOC(rnum, ord, opus_val16);
    159   for(i=0;i<ord;i++)
    160      rnum[i] = num[ord-i-1];
    161   for (i=0;i<N-3;i+=4)
    162   {
    163      opus_val32 sum[4];
    164      sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
    165      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
    166      sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
    167      sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
    168 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
    169      {
    170         opus_val32 sum_c[4];
    171         memcpy(sum_c, sum, sizeof(sum_c));
    172         xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
    173 #endif
    174         xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
    175 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
    176         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
    177      }
    178 #endif
    179      y[i  ] = SROUND16(sum[0], SIG_SHIFT);
    180      y[i+1] = SROUND16(sum[1], SIG_SHIFT);
    181      y[i+2] = SROUND16(sum[2], SIG_SHIFT);
    182      y[i+3] = SROUND16(sum[3], SIG_SHIFT);
    183   }
    184   for (;i<N;i++)
    185   {
    186      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
    187      for (j=0;j<ord;j++)
    188         sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
    189      y[i] = SROUND16(sum, SIG_SHIFT);
    190   }
    191   RESTORE_STACK;
    192 }
    193 
    194 void celt_iir(const opus_val32 *_x,
    195         const opus_val16 *den,
    196         opus_val32 *_y,
    197         int N,
    198         int ord,
    199         opus_val16 *mem,
    200         int arch)
    201 {
    202 #ifdef SMALL_FOOTPRINT
    203   int i,j;
    204   (void)arch;
    205   for (i=0;i<N;i++)
    206   {
    207      opus_val32 sum = _x[i];
    208      for (j=0;j<ord;j++)
    209      {
    210         sum -= MULT16_16(den[j],mem[j]);
    211      }
    212      for (j=ord-1;j>=1;j--)
    213      {
    214         mem[j]=mem[j-1];
    215      }
    216      mem[0] = SROUND16(sum, SIG_SHIFT);
    217      _y[i] = sum;
    218   }
    219 #else
    220   int i,j;
    221   VARDECL(opus_val16, rden);
    222   VARDECL(opus_val16, y);
    223   SAVE_STACK;
    224 
    225   celt_assert((ord&3)==0);
    226   ALLOC(rden, ord, opus_val16);
    227   ALLOC(y, N+ord, opus_val16);
    228   for(i=0;i<ord;i++)
    229      rden[i] = den[ord-i-1];
    230   for(i=0;i<ord;i++)
    231      y[i] = -mem[ord-i-1];
    232   for(;i<N+ord;i++)
    233      y[i]=0;
    234   for (i=0;i<N-3;i+=4)
    235   {
    236      /* Unroll by 4 as if it were an FIR filter */
    237      opus_val32 sum[4];
    238      sum[0]=_x[i];
    239      sum[1]=_x[i+1];
    240      sum[2]=_x[i+2];
    241      sum[3]=_x[i+3];
    242 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
    243      {
    244         opus_val32 sum_c[4];
    245         memcpy(sum_c, sum, sizeof(sum_c));
    246         xcorr_kernel_c(rden, y+i, sum_c, ord);
    247 #endif
    248         xcorr_kernel(rden, y+i, sum, ord, arch);
    249 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
    250         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
    251      }
    252 #endif
    253      /* Patch up the result to compensate for the fact that this is an IIR */
    254      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
    255      _y[i  ] = sum[0];
    256      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
    257      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
    258      _y[i+1] = sum[1];
    259      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
    260      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
    261      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
    262      _y[i+2] = sum[2];
    263 
    264      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
    265      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
    266      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
    267      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
    268      _y[i+3] = sum[3];
    269   }
    270   for (;i<N;i++)
    271   {
    272      opus_val32 sum = _x[i];
    273      for (j=0;j<ord;j++)
    274         sum -= MULT16_16(rden[j],y[i+j]);
    275      y[i+ord] = SROUND16(sum,SIG_SHIFT);
    276      _y[i] = sum;
    277   }
    278   for(i=0;i<ord;i++)
    279      mem[i] = _y[N-i-1];
    280   RESTORE_STACK;
    281 #endif
    282 }
    283 
    284 int _celt_autocorr(
    285                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
    286                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
    287                   const celt_coef  *window,
    288                   int          overlap,
    289                   int          lag,
    290                   int          n,
    291                   int          arch
    292                  )
    293 {
    294   opus_val32 d;
    295   int i, k;
    296   int fastN=n-lag;
    297   int shift;
    298   const opus_val16 *xptr;
    299   VARDECL(opus_val16, xx);
    300   SAVE_STACK;
    301   ALLOC(xx, n, opus_val16);
    302   celt_assert(n>0);
    303   celt_assert(overlap>=0);
    304   if (overlap == 0)
    305   {
    306      xptr = x;
    307   } else {
    308      for (i=0;i<n;i++)
    309         xx[i] = x[i];
    310      for (i=0;i<overlap;i++)
    311      {
    312         opus_val16 w = COEF2VAL16(window[i]);
    313         xx[i] = MULT16_16_Q15(x[i],w);
    314         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
    315      }
    316      xptr = xx;
    317   }
    318   shift=0;
    319 #ifdef FIXED_POINT
    320   {
    321      opus_val32 ac0;
    322      int ac0_shift = celt_ilog2(n + (n>>4));
    323      ac0 = 1+(n<<7);
    324      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift);
    325      for(i=(n&1);i<n;i+=2)
    326      {
    327         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift);
    328         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift);
    329      }
    330      /* Consider the effect of rounding-to-nearest when scaling the signal. */
    331      ac0 += SHR32(ac0,7);
    332 
    333      shift = celt_ilog2(ac0)-30+ac0_shift+1;
    334      shift = (shift)/2;
    335      if (shift>0)
    336      {
    337         for(i=0;i<n;i++)
    338            xx[i] = PSHR32(xptr[i], shift);
    339         xptr = xx;
    340      } else
    341         shift = 0;
    342   }
    343 #endif
    344   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
    345   for (k=0;k<=lag;k++)
    346   {
    347      for (i = k+fastN, d = 0; i < n; i++)
    348         d = MAC16_16(d, xptr[i], xptr[i-k]);
    349      ac[k] += d;
    350   }
    351 #ifdef FIXED_POINT
    352   shift = 2*shift;
    353   if (shift<=0)
    354      ac[0] += SHL32((opus_int32)1, -shift);
    355   if (ac[0] < 268435456)
    356   {
    357      int shift2 = 29 - EC_ILOG(ac[0]);
    358      for (i=0;i<=lag;i++)
    359         ac[i] = SHL32(ac[i], shift2);
    360      shift -= shift2;
    361   } else if (ac[0] >= 536870912)
    362   {
    363      int shift2=1;
    364      if (ac[0] >= 1073741824)
    365         shift2++;
    366      for (i=0;i<=lag;i++)
    367         ac[i] = SHR32(ac[i], shift2);
    368      shift += shift2;
    369   }
    370 #endif
    371 
    372   RESTORE_STACK;
    373   return shift;
    374 }
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE