tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

analysis.c (36284B)


      1 /* Copyright (c) 2011 Xiph.Org Foundation
      2   Written by Jean-Marc Valin */
      3 /*
      4   Redistribution and use in source and binary forms, with or without
      5   modification, are permitted provided that the following conditions
      6   are met:
      7 
      8   - Redistributions of source code must retain the above copyright
      9   notice, this list of conditions and the following disclaimer.
     10 
     11   - Redistributions in binary form must reproduce the above copyright
     12   notice, this list of conditions and the following disclaimer in the
     13   documentation and/or other materials provided with the distribution.
     14 
     15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     18   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
     19   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     23   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     24   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 
     32 #define ANALYSIS_C
     33 
     34 #ifdef MLP_TRAINING
     35 #include <stdio.h>
     36 #endif
     37 
     38 #include "mathops.h"
     39 #include "kiss_fft.h"
     40 #include "celt.h"
     41 #include "modes.h"
     42 #include "arch.h"
     43 #include "quant_bands.h"
     44 #include "analysis.h"
     45 #include "mlp.h"
     46 #include "stack_alloc.h"
     47 #include "float_cast.h"
     48 
     49 #ifndef M_PI
     50 #define M_PI 3.141592653
     51 #endif
     52 
     53 #ifndef DISABLE_FLOAT_API
     54 
     55 #define TRANSITION_PENALTY 10
     56 
     57 static const float dct_table[128] = {
     58        0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
     59        0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
     60        0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
     61       -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
     62        0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
     63       -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
     64        0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
     65        0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
     66        0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
     67        0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
     68        0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
     69       -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
     70        0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
     71       -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
     72        0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
     73        0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
     74 };
     75 
     76 static const float analysis_window[240] = {
     77      0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
     78      0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
     79      0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
     80      0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
     81      0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
     82      0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
     83      0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
     84      0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
     85      0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
     86      0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
     87      0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
     88      0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
     89      0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
     90      0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
     91      0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
     92      0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
     93      0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
     94      0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
     95      0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
     96      0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
     97      0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
     98      0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
     99      0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
    100      0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
    101      0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
    102      0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
    103      0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
    104      0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
    105      0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
    106      0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
    107 };
    108 
    109 static const int tbands[NB_TBANDS+1] = {
    110      4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 136, 160, 192, 240
    111 };
    112 
    113 #define NB_TONAL_SKIP_BANDS 9
    114 
    115 static opus_val32 silk_resampler_down2_hp(
    116    opus_val32                  *S,                 /* I/O  State vector [ 2 ]                                          */
    117    opus_val32                  *out,               /* O    Output signal [ floor(len/2) ]                              */
    118    const opus_val32            *in,                /* I    Input signal [ len ]                                        */
    119    int                         inLen               /* I    Number of input samples                                     */
    120 )
    121 {
    122    int k, len2 = inLen/2;
    123    opus_val32 in32, out32, out32_hp, Y, X;
    124    opus_val64 hp_ener = 0;
    125    /* Internal variables and state are in Q10 format */
    126    for( k = 0; k < len2; k++ ) {
    127        /* Convert to Q10 */
    128        in32 = in[ 2 * k ];
    129 
    130        /* All-pass section for even input sample */
    131        Y      = SUB32( in32, S[ 0 ] );
    132        X      = MULT16_32_Q15(QCONST16(0.6074371f, 15), Y);
    133        out32  = ADD32( S[ 0 ], X );
    134        S[ 0 ] = ADD32( in32, X );
    135        out32_hp = out32;
    136        /* Convert to Q10 */
    137        in32 = in[ 2 * k + 1 ];
    138 
    139        /* All-pass section for odd input sample, and add to output of previous section */
    140        Y      = SUB32( in32, S[ 1 ] );
    141        X      = MULT16_32_Q15(QCONST16(0.15063f, 15), Y);
    142        out32  = ADD32( out32, S[ 1 ] );
    143        out32  = ADD32( out32, X );
    144        S[ 1 ] = ADD32( in32, X );
    145 
    146        Y      = SUB32( -in32, S[ 2 ] );
    147        X      = MULT16_32_Q15(QCONST16(0.15063f, 15), Y);
    148        out32_hp  = ADD32( out32_hp, S[ 2 ] );
    149        out32_hp  = ADD32( out32_hp, X );
    150        S[ 2 ] = ADD32( -in32, X );
    151 
    152        /* len2 can be up to 480, so we shift by 8 to make it fit. */
    153        hp_ener += SHR64(out32_hp*(opus_val64)out32_hp, 8);
    154        /* Add, convert back to int16 and store to output */
    155        out[ k ] = HALF32(out32);
    156    }
    157 #ifdef FIXED_POINT
    158    /* Fitting in 32 bits. */
    159    hp_ener = hp_ener >> (2*SIG_SHIFT);
    160    if (hp_ener > 2147483647) hp_ener = 2147483647;
    161 #endif
    162    return (opus_val32)hp_ener;
    163 }
    164 
    165 static opus_val32 downmix_and_resample(downmix_func downmix, const void *_x, opus_val32 *y, opus_val32 S[3], int subframe, int offset, int c1, int c2, int C, int Fs)
    166 {
    167   VARDECL(opus_val32, tmp);
    168   int j;
    169   opus_val32 ret = 0;
    170   SAVE_STACK;
    171 
    172   if (subframe==0) return 0;
    173   if (Fs == 48000)
    174   {
    175      subframe *= 2;
    176      offset *= 2;
    177   } else if (Fs == 16000) {
    178      subframe = subframe*2/3;
    179      offset = offset*2/3;
    180   }
    181   else if (Fs != 24000) celt_assert(0);
    182   ALLOC(tmp, subframe, opus_val32);
    183 
    184   downmix(_x, tmp, subframe, offset, c1, c2, C);
    185   if ((c2==-2 && C==2) || c2>-1) {
    186      for (j=0;j<subframe;j++) {
    187         tmp[j] = HALF32(tmp[j]);
    188      }
    189   }
    190   if (Fs == 48000)
    191   {
    192      ret = silk_resampler_down2_hp(S, y, tmp, subframe);
    193   } else if (Fs == 24000) {
    194      OPUS_COPY(y, tmp, subframe);
    195   } else if (Fs == 16000) {
    196      VARDECL(opus_val32, tmp3x);
    197      ALLOC(tmp3x, 3*subframe, opus_val32);
    198      /* Don't do this at home! This resampler is horrible and it's only (barely)
    199         usable for the purpose of the analysis because we don't care about all
    200         the aliasing between 8 kHz and 12 kHz. */
    201      for (j=0;j<subframe;j++)
    202      {
    203         tmp3x[3*j] = tmp[j];
    204         tmp3x[3*j+1] = tmp[j];
    205         tmp3x[3*j+2] = tmp[j];
    206      }
    207      silk_resampler_down2_hp(S, y, tmp3x, 3*subframe);
    208   }
    209   RESTORE_STACK;
    210 #ifndef FIXED_POINT
    211   ret *= 1.f/32768/32768;
    212 #endif
    213   return ret;
    214 }
    215 
    216 void tonality_analysis_init(TonalityAnalysisState *tonal, opus_int32 Fs)
    217 {
    218  /* Initialize reusable fields. */
    219  tonal->arch = opus_select_arch();
    220  tonal->Fs = Fs;
    221  /* Clear remaining fields. */
    222  tonality_analysis_reset(tonal);
    223 }
    224 
    225 void tonality_analysis_reset(TonalityAnalysisState *tonal)
    226 {
    227  /* Clear non-reusable fields. */
    228  char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START;
    229  OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal));
    230 }
    231 
    232 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
    233 {
    234   int pos;
    235   int curr_lookahead;
    236   float tonality_max;
    237   float tonality_avg;
    238   int tonality_count;
    239   int i;
    240   int pos0;
    241   float prob_avg;
    242   float prob_count;
    243   float prob_min, prob_max;
    244   float vad_prob;
    245   int mpos, vpos;
    246   int bandwidth_span;
    247 
    248   pos = tonal->read_pos;
    249   curr_lookahead = tonal->write_pos-tonal->read_pos;
    250   if (curr_lookahead<0)
    251      curr_lookahead += DETECT_SIZE;
    252 
    253   tonal->read_subframe += len/(tonal->Fs/400);
    254   while (tonal->read_subframe>=8)
    255   {
    256      tonal->read_subframe -= 8;
    257      tonal->read_pos++;
    258   }
    259   if (tonal->read_pos>=DETECT_SIZE)
    260      tonal->read_pos-=DETECT_SIZE;
    261 
    262   /* On long frames, look at the second analysis window rather than the first. */
    263   if (len > tonal->Fs/50 && pos != tonal->write_pos)
    264   {
    265      pos++;
    266      if (pos==DETECT_SIZE)
    267         pos=0;
    268   }
    269   if (pos == tonal->write_pos)
    270      pos--;
    271   if (pos<0)
    272      pos = DETECT_SIZE-1;
    273   pos0 = pos;
    274   OPUS_COPY(info_out, &tonal->info[pos], 1);
    275   if (!info_out->valid)
    276      return;
    277   tonality_max = tonality_avg = info_out->tonality;
    278   tonality_count = 1;
    279   /* Look at the neighbouring frames and pick largest bandwidth found (to be safe). */
    280   bandwidth_span = 6;
    281   /* If possible, look ahead for a tone to compensate for the delay in the tone detector. */
    282   for (i=0;i<3;i++)
    283   {
    284      pos++;
    285      if (pos==DETECT_SIZE)
    286         pos = 0;
    287      if (pos == tonal->write_pos)
    288         break;
    289      tonality_max = MAX32(tonality_max, tonal->info[pos].tonality);
    290      tonality_avg += tonal->info[pos].tonality;
    291      tonality_count++;
    292      info_out->bandwidth = IMAX(info_out->bandwidth, tonal->info[pos].bandwidth);
    293      bandwidth_span--;
    294   }
    295   pos = pos0;
    296   /* Look back in time to see if any has a wider bandwidth than the current frame. */
    297   for (i=0;i<bandwidth_span;i++)
    298   {
    299      pos--;
    300      if (pos < 0)
    301         pos = DETECT_SIZE-1;
    302      if (pos == tonal->write_pos)
    303         break;
    304      info_out->bandwidth = IMAX(info_out->bandwidth, tonal->info[pos].bandwidth);
    305   }
    306   info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f);
    307 
    308   mpos = vpos = pos0;
    309   /* If we have enough look-ahead, compensate for the ~5-frame delay in the music prob and
    310      ~1 frame delay in the VAD prob. */
    311   if (curr_lookahead > 15)
    312   {
    313      mpos += 5;
    314      if (mpos>=DETECT_SIZE)
    315         mpos -= DETECT_SIZE;
    316      vpos += 1;
    317      if (vpos>=DETECT_SIZE)
    318         vpos -= DETECT_SIZE;
    319   }
    320 
    321   /* The following calculations attempt to minimize a "badness function"
    322      for the transition. When switching from speech to music, the badness
    323      of switching at frame k is
    324      b_k = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T)
    325      where
    326      v_i is the activity probability (VAD) at frame i,
    327      p_i is the music probability at frame i
    328      T is the probability threshold for switching
    329      S is the penalty for switching during active audio rather than silence
    330      the current frame has index i=0
    331 
    332      Rather than apply badness to directly decide when to switch, what we compute
    333      instead is the threshold for which the optimal switching point is now. When
    334      considering whether to switch now (frame 0) or at frame k, we have:
    335      S*v_0 = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T)
    336      which gives us:
    337      T = ( \sum_{i=0}^{k-1} v_i*p_i + S*(v_k-v_0) ) / ( \sum_{i=0}^{k-1} v_i )
    338      We take the min threshold across all positive values of k (up to the maximum
    339      amount of lookahead we have) to give us the threshold for which the current
    340      frame is the optimal switch point.
    341 
    342      The last step is that we need to consider whether we want to switch at all.
    343      For that we use the average of the music probability over the entire window.
    344      If the threshold is higher than that average we're not going to
    345      switch, so we compute a min with the average as well. The result of all these
    346      min operations is music_prob_min, which gives the threshold for switching to music
    347      if we're currently encoding for speech.
    348 
    349      We do the exact opposite to compute music_prob_max which is used for switching
    350      from music to speech.
    351    */
    352   prob_min = 1.f;
    353   prob_max = 0.f;
    354   vad_prob = tonal->info[vpos].activity_probability;
    355   prob_count = MAX16(.1f, vad_prob);
    356   prob_avg = MAX16(.1f, vad_prob)*tonal->info[mpos].music_prob;
    357   while (1)
    358   {
    359      float pos_vad;
    360      mpos++;
    361      if (mpos==DETECT_SIZE)
    362         mpos = 0;
    363      if (mpos == tonal->write_pos)
    364         break;
    365      vpos++;
    366      if (vpos==DETECT_SIZE)
    367         vpos = 0;
    368      if (vpos == tonal->write_pos)
    369         break;
    370      pos_vad = tonal->info[vpos].activity_probability;
    371      prob_min = MIN16((prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_min);
    372      prob_max = MAX16((prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_max);
    373      prob_count += MAX16(.1f, pos_vad);
    374      prob_avg += MAX16(.1f, pos_vad)*tonal->info[mpos].music_prob;
    375   }
    376   info_out->music_prob = prob_avg/prob_count;
    377   prob_min = MIN16(prob_avg/prob_count, prob_min);
    378   prob_max = MAX16(prob_avg/prob_count, prob_max);
    379   prob_min = MAX16(prob_min, 0.f);
    380   prob_max = MIN16(prob_max, 1.f);
    381 
    382   /* If we don't have enough look-ahead, do our best to make a decent decision. */
    383   if (curr_lookahead < 10)
    384   {
    385      float pmin, pmax;
    386      pmin = prob_min;
    387      pmax = prob_max;
    388      pos = pos0;
    389      /* Look for min/max in the past. */
    390      for (i=0;i<IMIN(tonal->count-1, 15);i++)
    391      {
    392         pos--;
    393         if (pos < 0)
    394            pos = DETECT_SIZE-1;
    395         pmin = MIN16(pmin, tonal->info[pos].music_prob);
    396         pmax = MAX16(pmax, tonal->info[pos].music_prob);
    397      }
    398      /* Bias against switching on active audio. */
    399      pmin = MAX16(0.f, pmin - .1f*vad_prob);
    400      pmax = MIN16(1.f, pmax + .1f*vad_prob);
    401      prob_min += (1.f-.1f*curr_lookahead)*(pmin - prob_min);
    402      prob_max += (1.f-.1f*curr_lookahead)*(pmax - prob_max);
    403   }
    404   info_out->music_prob_min = prob_min;
    405   info_out->music_prob_max = prob_max;
    406 
    407   /* printf("%f %f %f %f %f\n", prob_min, prob_max, prob_avg/prob_count, vad_prob, info_out->music_prob); */
    408 }
    409 
    410 static const float std_feature_bias[9] = {
    411      5.684947f, 3.475288f, 1.770634f, 1.599784f, 3.773215f,
    412      2.163313f, 1.260756f, 1.116868f, 1.918795f
    413 };
    414 
    415 #define LEAKAGE_OFFSET 2.5f
    416 #define LEAKAGE_SLOPE 2.f
    417 
    418 #ifdef FIXED_POINT
    419 /* For fixed-point, the input is +/-2^15 shifted up by SIG_SHIFT, so we need to
    420   compensate for that in the energy. */
    421 #define SCALE_COMPENS (1.f/((opus_int32)1<<(15+SIG_SHIFT)))
    422 #define SCALE_ENER(e) ((SCALE_COMPENS*SCALE_COMPENS)*(e))
    423 #else
    424 #define SCALE_ENER(e) ((1.f/32768/32768)*e)
    425 #endif
    426 
    427 #ifdef FIXED_POINT
    428 static int is_digital_silence32(const opus_val32* pcm, int frame_size, int channels, int lsb_depth)
    429 {
    430   int silence = 0;
    431   opus_val32 sample_max = 0;
    432 #ifdef MLP_TRAINING
    433   return 0;
    434 #endif
    435   sample_max = celt_maxabs32(pcm, frame_size*channels);
    436 
    437   silence = (sample_max == 0);
    438   (void)lsb_depth;
    439   return silence;
    440 }
    441 #else
    442 #define is_digital_silence32(pcm, frame_size, channels, lsb_depth) is_digital_silence(pcm, frame_size, channels, lsb_depth)
    443 #endif
    444 
    445 static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
    446 {
    447    int i, b;
    448    const kiss_fft_state *kfft;
    449    VARDECL(kiss_fft_cpx, in);
    450    VARDECL(kiss_fft_cpx, out);
    451    int N = 480, N2=240;
    452    float * OPUS_RESTRICT A = tonal->angle;
    453    float * OPUS_RESTRICT dA = tonal->d_angle;
    454    float * OPUS_RESTRICT d2A = tonal->d2_angle;
    455    VARDECL(float, tonality);
    456    VARDECL(float, noisiness);
    457    float band_tonality[NB_TBANDS];
    458    float logE[NB_TBANDS];
    459    float BFCC[8];
    460    float features[25];
    461    float frame_tonality;
    462    float max_frame_tonality;
    463    /*float tw_sum=0;*/
    464    float frame_noisiness;
    465    const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI);
    466    float slope=0;
    467    float frame_stationarity;
    468    float relativeE;
    469    float frame_probs[2];
    470    float alpha, alphaE, alphaE2;
    471    float frame_loudness;
    472    float bandwidth_mask;
    473    int is_masked[NB_TBANDS+1];
    474    int bandwidth=0;
    475    float maxE = 0;
    476    float noise_floor;
    477    int remaining;
    478    AnalysisInfo *info;
    479    float hp_ener;
    480    float tonality2[240];
    481    float midE[8];
    482    float spec_variability=0;
    483    float band_log2[NB_TBANDS+1];
    484    float leakage_from[NB_TBANDS+1];
    485    float leakage_to[NB_TBANDS+1];
    486    float layer_out[MAX_NEURONS];
    487    float below_max_pitch;
    488    float above_max_pitch;
    489    int is_silence;
    490    SAVE_STACK;
    491 
    492    if (!tonal->initialized)
    493    {
    494       tonal->mem_fill = 240;
    495       tonal->initialized = 1;
    496    }
    497    alpha = 1.f/IMIN(10, 1+tonal->count);
    498    alphaE = 1.f/IMIN(25, 1+tonal->count);
    499    /* Noise floor related decay for bandwidth detection: -2.2 dB/second */
    500    alphaE2 = 1.f/IMIN(100, 1+tonal->count);
    501    if (tonal->count <= 1) alphaE2 = 1;
    502 
    503    if (tonal->Fs == 48000)
    504    {
    505       /* len and offset are now at 24 kHz. */
    506       len/= 2;
    507       offset /= 2;
    508    } else if (tonal->Fs == 16000) {
    509       len = 3*len/2;
    510       offset = 3*offset/2;
    511    }
    512 
    513    kfft = celt_mode->mdct.kfft[0];
    514    tonal->hp_ener_accum += (float)downmix_and_resample(downmix, x,
    515          &tonal->inmem[tonal->mem_fill], tonal->downmix_state,
    516          IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C, tonal->Fs);
    517    if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
    518    {
    519       tonal->mem_fill += len;
    520       /* Don't have enough to update the analysis */
    521       RESTORE_STACK;
    522       return;
    523    }
    524    hp_ener = tonal->hp_ener_accum;
    525    info = &tonal->info[tonal->write_pos++];
    526    if (tonal->write_pos>=DETECT_SIZE)
    527       tonal->write_pos-=DETECT_SIZE;
    528 
    529    is_silence = is_digital_silence32(tonal->inmem, ANALYSIS_BUF_SIZE, 1, lsb_depth);
    530 
    531    ALLOC(in, 480, kiss_fft_cpx);
    532    ALLOC(out, 480, kiss_fft_cpx);
    533    ALLOC(tonality, 240, float);
    534    ALLOC(noisiness, 240, float);
    535    for (i=0;i<N2;i++)
    536    {
    537       float w = analysis_window[i];
    538       in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
    539       in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
    540       in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
    541       in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
    542    }
    543    OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
    544    remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
    545    tonal->hp_ener_accum = (float)downmix_and_resample(downmix, x,
    546          &tonal->inmem[240], tonal->downmix_state, remaining,
    547          offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C, tonal->Fs);
    548    tonal->mem_fill = 240 + remaining;
    549    if (is_silence)
    550    {
    551       /* On silence, copy the previous analysis. */
    552       int prev_pos = tonal->write_pos-2;
    553       if (prev_pos < 0)
    554          prev_pos += DETECT_SIZE;
    555       OPUS_COPY(info, &tonal->info[prev_pos], 1);
    556       RESTORE_STACK;
    557       return;
    558    }
    559    opus_fft(kfft, in, out, tonal->arch);
    560 #ifndef FIXED_POINT
    561    /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
    562    if (celt_isnan(out[0].r))
    563    {
    564       info->valid = 0;
    565       RESTORE_STACK;
    566       return;
    567    }
    568 #endif
    569 
    570    for (i=1;i<N2;i++)
    571    {
    572       float X1r, X2r, X1i, X2i;
    573       float angle, d_angle, d2_angle;
    574       float angle2, d_angle2, d2_angle2;
    575       float mod1, mod2, avg_mod;
    576       X1r = (float)out[i].r+out[N-i].r;
    577       X1i = (float)out[i].i-out[N-i].i;
    578       X2r = (float)out[i].i+out[N-i].i;
    579       X2i = (float)out[N-i].r-out[i].r;
    580 
    581       angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
    582       d_angle = angle - A[i];
    583       d2_angle = d_angle - dA[i];
    584 
    585       angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
    586       d_angle2 = angle2 - angle;
    587       d2_angle2 = d_angle2 - d_angle;
    588 
    589       mod1 = d2_angle - (float)float2int(d2_angle);
    590       noisiness[i] = ABS16(mod1);
    591       mod1 *= mod1;
    592       mod1 *= mod1;
    593 
    594       mod2 = d2_angle2 - (float)float2int(d2_angle2);
    595       noisiness[i] += ABS16(mod2);
    596       mod2 *= mod2;
    597       mod2 *= mod2;
    598 
    599       avg_mod = .25f*(d2A[i]+mod1+2*mod2);
    600       /* This introduces an extra delay of 2 frames in the detection. */
    601       tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f;
    602       /* No delay on this detection, but it's less reliable. */
    603       tonality2[i] = 1.f/(1.f+40.f*16.f*pi4*mod2)-.015f;
    604 
    605       A[i] = angle2;
    606       dA[i] = d_angle2;
    607       d2A[i] = mod2;
    608    }
    609    for (i=2;i<N2-1;i++)
    610    {
    611       float tt = MIN32(tonality2[i], MAX32(tonality2[i-1], tonality2[i+1]));
    612       tonality[i] = .9f*MAX32(tonality[i], tt-.1f);
    613    }
    614    frame_tonality = 0;
    615    max_frame_tonality = 0;
    616    /*tw_sum = 0;*/
    617    info->activity = 0;
    618    frame_noisiness = 0;
    619    frame_stationarity = 0;
    620    if (!tonal->count)
    621    {
    622       for (b=0;b<NB_TBANDS;b++)
    623       {
    624          tonal->lowE[b] = 1e10;
    625          tonal->highE[b] = -1e10;
    626       }
    627    }
    628    relativeE = 0;
    629    frame_loudness = 0;
    630    /* The energy of the very first band is special because of DC. */
    631    {
    632       float E = 0;
    633       float X1r, X2r;
    634       X1r = 2*(float)out[0].r;
    635       X2r = 2*(float)out[0].i;
    636       E = X1r*X1r + X2r*X2r;
    637       for (i=1;i<4;i++)
    638       {
    639          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
    640                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
    641          E += binE;
    642       }
    643       E = SCALE_ENER(E);
    644       band_log2[0] = .5f*1.442695f*(float)log(E+1e-10f);
    645    }
    646    for (b=0;b<NB_TBANDS;b++)
    647    {
    648       float E=0, tE=0, nE=0;
    649       float L1, L2;
    650       float stationarity;
    651       for (i=tbands[b];i<tbands[b+1];i++)
    652       {
    653          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
    654                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
    655          binE = SCALE_ENER(binE);
    656          E += binE;
    657          tE += binE*MAX32(0, tonality[i]);
    658          nE += binE*2.f*(.5f-noisiness[i]);
    659       }
    660 #ifndef FIXED_POINT
    661       /* Check for extreme band energies that could cause NaNs later. */
    662       if (!(E<1e9f) || celt_isnan(E))
    663       {
    664          info->valid = 0;
    665          RESTORE_STACK;
    666          return;
    667       }
    668 #endif
    669 
    670       tonal->E[tonal->E_count][b] = E;
    671       frame_noisiness += nE/(1e-15f+E);
    672 
    673       frame_loudness += (float)sqrt(E+1e-10f);
    674       logE[b] = (float)log(E+1e-10f);
    675       band_log2[b+1] = .5f*1.442695f*(float)log(E+1e-10f);
    676       tonal->logE[tonal->E_count][b] = logE[b];
    677       if (tonal->count==0)
    678          tonal->highE[b] = tonal->lowE[b] = logE[b];
    679       if (tonal->highE[b] > tonal->lowE[b] + 7.5)
    680       {
    681          if (tonal->highE[b] - logE[b] > logE[b] - tonal->lowE[b])
    682             tonal->highE[b] -= .01f;
    683          else
    684             tonal->lowE[b] += .01f;
    685       }
    686       if (logE[b] > tonal->highE[b])
    687       {
    688          tonal->highE[b] = logE[b];
    689          tonal->lowE[b] = MAX32(tonal->highE[b]-15, tonal->lowE[b]);
    690       } else if (logE[b] < tonal->lowE[b])
    691       {
    692          tonal->lowE[b] = logE[b];
    693          tonal->highE[b] = MIN32(tonal->lowE[b]+15, tonal->highE[b]);
    694       }
    695       relativeE += (logE[b]-tonal->lowE[b])/(1e-5f + (tonal->highE[b]-tonal->lowE[b]));
    696 
    697       L1=L2=0;
    698       for (i=0;i<NB_FRAMES;i++)
    699       {
    700          L1 += (float)sqrt(tonal->E[i][b]);
    701          L2 += tonal->E[i][b];
    702       }
    703 
    704       stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
    705       stationarity *= stationarity;
    706       stationarity *= stationarity;
    707       frame_stationarity += stationarity;
    708       /*band_tonality[b] = tE/(1e-15+E)*/;
    709       band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
    710 #if 0
    711       if (b>=NB_TONAL_SKIP_BANDS)
    712       {
    713          frame_tonality += tweight[b]*band_tonality[b];
    714          tw_sum += tweight[b];
    715       }
    716 #else
    717       frame_tonality += band_tonality[b];
    718       if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
    719          frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
    720 #endif
    721       max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality);
    722       slope += band_tonality[b]*(b-8);
    723       /*printf("%f %f ", band_tonality[b], stationarity);*/
    724       tonal->prev_band_tonality[b] = band_tonality[b];
    725    }
    726 
    727    leakage_from[0] = band_log2[0];
    728    leakage_to[0] = band_log2[0] - LEAKAGE_OFFSET;
    729    for (b=1;b<NB_TBANDS+1;b++)
    730    {
    731       float leak_slope = LEAKAGE_SLOPE*(tbands[b]-tbands[b-1])/4;
    732       leakage_from[b] = MIN16(leakage_from[b-1]+leak_slope, band_log2[b]);
    733       leakage_to[b] = MAX16(leakage_to[b-1]-leak_slope, band_log2[b]-LEAKAGE_OFFSET);
    734    }
    735    for (b=NB_TBANDS-2;b>=0;b--)
    736    {
    737       float leak_slope = LEAKAGE_SLOPE*(tbands[b+1]-tbands[b])/4;
    738       leakage_from[b] = MIN16(leakage_from[b+1]+leak_slope, leakage_from[b]);
    739       leakage_to[b] = MAX16(leakage_to[b+1]-leak_slope, leakage_to[b]);
    740    }
    741    celt_assert(NB_TBANDS+1 <= LEAK_BANDS);
    742    for (b=0;b<NB_TBANDS+1;b++)
    743    {
    744       /* leak_boost[] is made up of two terms. The first, based on leakage_to[],
    745          represents the boost needed to overcome the amount of analysis leakage
    746          cause in a weaker band b by louder neighbouring bands.
    747          The second, based on leakage_from[], applies to a loud band b for
    748          which the quantization noise causes synthesis leakage to the weaker
    749          neighbouring bands. */
    750       float boost = MAX16(0, leakage_to[b] - band_log2[b]) +
    751             MAX16(0, band_log2[b] - (leakage_from[b]+LEAKAGE_OFFSET));
    752       info->leak_boost[b] = IMIN(255, (int)floor(.5 + 64.f*boost));
    753    }
    754    for (;b<LEAK_BANDS;b++) info->leak_boost[b] = 0;
    755 
    756    for (i=0;i<NB_FRAMES;i++)
    757    {
    758       int j;
    759       float mindist = 1e15f;
    760       for (j=0;j<NB_FRAMES;j++)
    761       {
    762          int k;
    763          float dist=0;
    764          for (k=0;k<NB_TBANDS;k++)
    765          {
    766             float tmp;
    767             tmp = tonal->logE[i][k] - tonal->logE[j][k];
    768             dist += tmp*tmp;
    769          }
    770          if (j!=i)
    771             mindist = MIN32(mindist, dist);
    772       }
    773       spec_variability += mindist;
    774    }
    775    spec_variability = (float)sqrt(spec_variability/NB_FRAMES/NB_TBANDS);
    776    bandwidth_mask = 0;
    777    bandwidth = 0;
    778    maxE = 0;
    779    noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
    780    noise_floor *= noise_floor;
    781    below_max_pitch=0;
    782    above_max_pitch=0;
    783    for (b=0;b<NB_TBANDS;b++)
    784    {
    785       float E=0;
    786       float Em;
    787       int band_start, band_end;
    788       /* Keep a margin of 300 Hz for aliasing */
    789       band_start = tbands[b];
    790       band_end = tbands[b+1];
    791       for (i=band_start;i<band_end;i++)
    792       {
    793          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
    794                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
    795          E += binE;
    796       }
    797       E = SCALE_ENER(E);
    798       maxE = MAX32(maxE, E);
    799       if (band_start < 64)
    800       {
    801          below_max_pitch += E;
    802       } else {
    803          above_max_pitch += E;
    804       }
    805       tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
    806       Em = MAX32(E, tonal->meanE[b]);
    807       /* Consider the band "active" only if all these conditions are met:
    808          1) less than 90 dB below the peak band (maximal masking possible considering
    809             both the ATH and the loudness-dependent slope of the spreading function)
    810          2) above the PCM quantization noise floor
    811          We use b+1 because the first CELT band isn't included in tbands[]
    812       */
    813       if (E*1e9f > maxE && (Em > 3*noise_floor*(band_end-band_start) || E > noise_floor*(band_end-band_start)))
    814          bandwidth = b+1;
    815       /* Check if the band is masked (see below). */
    816       is_masked[b] = E < (tonal->prev_bandwidth >= b+1  ? .01f : .05f)*bandwidth_mask;
    817       /* Use a simple follower with 13 dB/Bark slope for spreading function. */
    818       bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
    819    }
    820    /* Special case for the last two bands, for which we don't have spectrum but only
    821       the energy above 12 kHz. The difficulty here is that the high-pass we use
    822       leaks some LF energy, so we need to increase the threshold without accidentally cutting
    823       off the band. */
    824    if (tonal->Fs == 48000) {
    825       float noise_ratio;
    826       float Em;
    827       float E = hp_ener*(1.f/(60*60));
    828       noise_ratio = tonal->prev_bandwidth==20 ? 10.f : 30.f;
    829 
    830 #ifdef FIXED_POINT
    831       /* silk_resampler_down2_hp() shifted right by an extra 8 bits. */
    832       E *= 256.f*(1.f/Q15ONE)*(1.f/Q15ONE);
    833 #endif
    834       above_max_pitch += E;
    835       tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
    836       Em = MAX32(E, tonal->meanE[b]);
    837       if (Em > 3*noise_ratio*noise_floor*160 || E > noise_ratio*noise_floor*160)
    838          bandwidth = 20;
    839       /* Check if the band is masked (see below). */
    840       is_masked[b] = E < (tonal->prev_bandwidth == 20  ? .01f : .05f)*bandwidth_mask;
    841    }
    842    if (above_max_pitch > below_max_pitch)
    843       info->max_pitch_ratio = below_max_pitch/above_max_pitch;
    844    else
    845       info->max_pitch_ratio = 1;
    846    /* In some cases, resampling aliasing can create a small amount of energy in the first band
    847       being cut. So if the last band is masked, we don't include it.  */
    848    if (bandwidth == 20 && is_masked[NB_TBANDS])
    849       bandwidth-=2;
    850    else if (bandwidth > 0 && bandwidth <= NB_TBANDS && is_masked[bandwidth-1])
    851       bandwidth--;
    852    if (tonal->count<=2)
    853       bandwidth = 20;
    854    frame_loudness = 20*(float)log10(frame_loudness);
    855    tonal->Etracker = MAX32(tonal->Etracker-.003f, frame_loudness);
    856    tonal->lowECount *= (1-alphaE);
    857    if (frame_loudness < tonal->Etracker-30)
    858       tonal->lowECount += alphaE;
    859 
    860    for (i=0;i<8;i++)
    861    {
    862       float sum=0;
    863       for (b=0;b<16;b++)
    864          sum += dct_table[i*16+b]*logE[b];
    865       BFCC[i] = sum;
    866    }
    867    for (i=0;i<8;i++)
    868    {
    869       float sum=0;
    870       for (b=0;b<16;b++)
    871          sum += dct_table[i*16+b]*.5f*(tonal->highE[b]+tonal->lowE[b]);
    872       midE[i] = sum;
    873    }
    874 
    875    frame_stationarity /= NB_TBANDS;
    876    relativeE /= NB_TBANDS;
    877    if (tonal->count<10)
    878       relativeE = .5f;
    879    frame_noisiness /= NB_TBANDS;
    880 #if 1
    881    info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
    882 #else
    883    info->activity = .5*(1+frame_noisiness-frame_stationarity);
    884 #endif
    885    frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
    886    frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
    887    tonal->prev_tonality = frame_tonality;
    888 
    889    slope /= 8*8;
    890    info->tonality_slope = slope;
    891 
    892    tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
    893    tonal->count = IMIN(tonal->count+1, ANALYSIS_COUNT_MAX);
    894    info->tonality = frame_tonality;
    895 
    896    for (i=0;i<4;i++)
    897       features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i];
    898 
    899    for (i=0;i<4;i++)
    900       tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
    901 
    902    for (i=0;i<4;i++)
    903        features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]);
    904    for (i=0;i<3;i++)
    905        features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
    906 
    907    if (tonal->count > 5)
    908    {
    909       for (i=0;i<9;i++)
    910          tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
    911    }
    912    for (i=0;i<4;i++)
    913       features[i] = BFCC[i]-midE[i];
    914 
    915    for (i=0;i<8;i++)
    916    {
    917       tonal->mem[i+24] = tonal->mem[i+16];
    918       tonal->mem[i+16] = tonal->mem[i+8];
    919       tonal->mem[i+8] = tonal->mem[i];
    920       tonal->mem[i] = BFCC[i];
    921    }
    922    for (i=0;i<9;i++)
    923       features[11+i] = (float)sqrt(tonal->std[i]) - std_feature_bias[i];
    924    features[18] = spec_variability - 0.78f;
    925    features[20] = info->tonality - 0.154723f;
    926    features[21] = info->activity - 0.724643f;
    927    features[22] = frame_stationarity - 0.743717f;
    928    features[23] = info->tonality_slope + 0.069216f;
    929    features[24] = tonal->lowECount - 0.067930f;
    930 
    931    analysis_compute_dense(&layer0, layer_out, features);
    932    analysis_compute_gru(&layer1, tonal->rnn_state, layer_out);
    933    analysis_compute_dense(&layer2, frame_probs, tonal->rnn_state);
    934 
    935    /* Probability of speech or music vs noise */
    936    info->activity_probability = frame_probs[1];
    937    info->music_prob = frame_probs[0];
    938 
    939    /*printf("%f %f %f\n", frame_probs[0], frame_probs[1], info->music_prob);*/
    940 #ifdef MLP_TRAINING
    941    for (i=0;i<25;i++)
    942       printf("%f ", features[i]);
    943    printf("\n");
    944 #endif
    945 
    946    info->bandwidth = bandwidth;
    947    tonal->prev_bandwidth = bandwidth;
    948    /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
    949    info->noisiness = frame_noisiness;
    950    info->valid = 1;
    951    RESTORE_STACK;
    952 }
    953 
    954 void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
    955                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
    956                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
    957 {
    958   int offset;
    959   int pcm_len;
    960 
    961   analysis_frame_size -= analysis_frame_size&1;
    962   if (analysis_pcm != NULL)
    963   {
    964      /* Avoid overflow/wrap-around of the analysis buffer */
    965      analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/50, analysis_frame_size);
    966 
    967      pcm_len = analysis_frame_size - analysis->analysis_offset;
    968      offset = analysis->analysis_offset;
    969      while (pcm_len>0) {
    970         tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(Fs/50, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
    971         offset += Fs/50;
    972         pcm_len -= Fs/50;
    973      }
    974      analysis->analysis_offset = analysis_frame_size;
    975 
    976      analysis->analysis_offset -= frame_size;
    977   }
    978 
    979   tonality_get_info(analysis, analysis_info, frame_size);
    980 }
    981 
    982 #endif /* DISABLE_FLOAT_API */