tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pitch_analysis_core_FIX.c (34345B)


      1 /***********************************************************************
      2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
      3 Redistribution and use in source and binary forms, with or without
      4 modification, are permitted provided that the following conditions
      5 are met:
      6 - Redistributions of source code must retain the above copyright notice,
      7 this list of conditions and the following disclaimer.
      8 - Redistributions in binary form must reproduce the above copyright
      9 notice, this list of conditions and the following disclaimer in the
     10 documentation and/or other materials provided with the distribution.
     11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
     12 names of specific contributors, may be used to endorse or promote
     13 products derived from this software without specific prior written
     14 permission.
     15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     25 POSSIBILITY OF SUCH DAMAGE.
     26 ***********************************************************************/
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 
     32 /***********************************************************
     33 * Pitch analyser function
     34 ********************************************************** */
     35 #include "SigProc_FIX.h"
     36 #include "pitch_est_defines.h"
     37 #include "stack_alloc.h"
     38 #include "debug.h"
     39 #include "pitch.h"
     40 
     41 #define SCRATCH_SIZE    22
     42 #define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )
     43 #define SF_LENGTH_8KHZ  ( PE_SUBFR_LENGTH_MS * 8 )
     44 #define MIN_LAG_4KHZ    ( PE_MIN_LAG_MS * 4 )
     45 #define MIN_LAG_8KHZ    ( PE_MIN_LAG_MS * 8 )
     46 #define MAX_LAG_4KHZ    ( PE_MAX_LAG_MS * 4 )
     47 #define MAX_LAG_8KHZ    ( PE_MAX_LAG_MS * 8 - 1 )
     48 #define CSTRIDE_4KHZ    ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
     49 #define CSTRIDE_8KHZ    ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
     50 #define D_COMP_MIN      ( MIN_LAG_8KHZ - 3 )
     51 #define D_COMP_MAX      ( MAX_LAG_8KHZ + 4 )
     52 #define D_COMP_STRIDE   ( D_COMP_MAX - D_COMP_MIN )
     53 
     54 typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
     55 
     56 /************************************************************/
     57 /* Internally used functions                                */
     58 /************************************************************/
     59 static void silk_P_Ana_calc_corr_st3(
     60    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
     61    const opus_int16  frame[],                         /* I vector to correlate         */
     62    opus_int          start_lag,                       /* I lag offset to search around */
     63    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
     64    opus_int          nb_subfr,                        /* I number of subframes         */
     65    opus_int          complexity,                      /* I Complexity setting          */
     66    int               arch                             /* I Run-time architecture       */
     67 );
     68 
     69 static void silk_P_Ana_calc_energy_st3(
     70    silk_pe_stage3_vals energies_st3[],                /* O 3 DIM energy array */
     71    const opus_int16  frame[],                         /* I vector to calc energy in    */
     72    opus_int          start_lag,                       /* I lag offset to search around */
     73    opus_int          sf_length,                       /* I length of one 5 ms subframe */
     74    opus_int          nb_subfr,                        /* I number of subframes         */
     75    opus_int          complexity,                      /* I Complexity setting          */
     76    int               arch                             /* I Run-time architecture       */
     77 );
     78 
     79 /*************************************************************/
     80 /*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */
     81 /*************************************************************/
     82 opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
     83    const opus_int16            *frame_unscaled,    /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
     84    opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
     85    opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
     86    opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
     87    opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
     88    opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
     89    const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
     90    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
     91    const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
     92    const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
     93    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
     94    int                         arch                /* I    Run-time architecture                                       */
     95 )
     96 {
     97    VARDECL( opus_int16, frame_8kHz_buf );
     98    VARDECL( opus_int16, frame_4kHz );
     99    VARDECL( opus_int16, frame_scaled );
    100    opus_int32 filt_state[ 6 ];
    101    const opus_int16 *frame, *frame_8kHz;
    102    opus_int   i, k, d, j;
    103    VARDECL( opus_int16, C );
    104    VARDECL( opus_int32, xcorr32 );
    105    const opus_int16 *target_ptr, *basis_ptr;
    106    opus_int32 cross_corr, normalizer, energy, energy_basis, energy_target;
    107    opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp, shift;
    108    VARDECL( opus_int16, d_comp );
    109    opus_int32 sum, threshold, lag_counter;
    110    opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
    111    opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
    112    VARDECL( silk_pe_stage3_vals, energies_st3 );
    113    VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
    114    opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
    115    opus_int   sf_length;
    116    opus_int   min_lag;
    117    opus_int   max_lag;
    118    opus_int32 contour_bias_Q15, diff;
    119    opus_int   nb_cbk_search, cbk_size;
    120    opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
    121    const opus_int8 *Lag_CB_ptr;
    122    SAVE_STACK;
    123 
    124    /* Check for valid sampling frequency */
    125    celt_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
    126 
    127    /* Check for valid complexity setting */
    128    celt_assert( complexity >= SILK_PE_MIN_COMPLEX );
    129    celt_assert( complexity <= SILK_PE_MAX_COMPLEX );
    130 
    131    silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
    132    silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
    133 
    134    /* Set up frame lengths max / min lag for the sampling frequency */
    135    frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
    136    frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
    137    frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
    138    sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
    139    min_lag           = PE_MIN_LAG_MS * Fs_kHz;
    140    max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
    141 
    142    /* Downscale input if necessary */
    143    silk_sum_sqr_shift( &energy, &shift, frame_unscaled, frame_length );
    144    shift += 3 - silk_CLZ32( energy );        /* at least two bits headroom */
    145    ALLOC( frame_scaled, frame_length, opus_int16 );
    146    if( shift > 0 ) {
    147        shift = silk_RSHIFT( shift + 1, 1 );
    148        for( i = 0; i < frame_length; i++ ) {
    149            frame_scaled[ i ] = silk_RSHIFT( frame_unscaled[ i ], shift );
    150        }
    151        frame = frame_scaled;
    152    } else {
    153        frame = frame_unscaled;
    154    }
    155 
    156    ALLOC( frame_8kHz_buf, ( Fs_kHz == 8 ) ? 1 : frame_length_8kHz, opus_int16 );
    157    /* Resample from input sampled at Fs_kHz to 8 kHz */
    158    if( Fs_kHz == 16 ) {
    159        silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
    160        silk_resampler_down2( filt_state, frame_8kHz_buf, frame, frame_length );
    161        frame_8kHz = frame_8kHz_buf;
    162    } else if( Fs_kHz == 12 ) {
    163        silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
    164        silk_resampler_down2_3( filt_state, frame_8kHz_buf, frame, frame_length );
    165        frame_8kHz = frame_8kHz_buf;
    166    } else {
    167        celt_assert( Fs_kHz == 8 );
    168        frame_8kHz = frame;
    169    }
    170 
    171    /* Decimate again to 4 kHz */
    172    silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
    173    ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
    174    silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
    175 
    176    /* Low-pass filter */
    177    for( i = frame_length_4kHz - 1; i > 0; i-- ) {
    178        frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
    179    }
    180 
    181 
    182    /******************************************************************************
    183    * FIRST STAGE, operating in 4 khz
    184    ******************************************************************************/
    185    ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
    186    ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
    187    silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
    188    target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
    189    for( k = 0; k < nb_subfr >> 1; k++ ) {
    190        /* Check that we are within range of the array */
    191        celt_assert( target_ptr >= frame_4kHz );
    192        celt_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    193 
    194        basis_ptr = target_ptr - MIN_LAG_4KHZ;
    195 
    196        /* Check that we are within range of the array */
    197        celt_assert( basis_ptr >= frame_4kHz );
    198        celt_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    199 
    200        celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
    201 
    202        /* Calculate first vector products before loop */
    203        cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
    204        normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch );
    205        normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ, arch ) );
    206        normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
    207 
    208        matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
    209            (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                      /* Q13 */
    210 
    211        /* From now on normalizer is computed recursively */
    212        for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
    213            basis_ptr--;
    214 
    215            /* Check that we are within range of the array */
    216            silk_assert( basis_ptr >= frame_4kHz );
    217            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    218 
    219            cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
    220 
    221            /* Add contribution of new sample and remove contribution from oldest sample */
    222            normalizer = silk_ADD32( normalizer,
    223                silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
    224                silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
    225 
    226            matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
    227                (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                  /* Q13 */
    228        }
    229        /* Update target pointer */
    230        target_ptr += SF_LENGTH_8KHZ;
    231    }
    232 
    233    /* Combine two subframes into single correlation measure and apply short-lag bias */
    234    if( nb_subfr == PE_MAX_NB_SUBFR ) {
    235        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
    236            sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
    237                + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ );               /* Q14 */
    238            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
    239            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
    240        }
    241    } else {
    242        /* Only short-lag bias */
    243        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
    244            sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 );                          /* Q14 */
    245            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
    246            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
    247        }
    248    }
    249 
    250    /* Sort */
    251    length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
    252    celt_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
    253    silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
    254                                          length_d_srch );
    255 
    256    /* Escape if correlation is very low already here */
    257    Cmax = (opus_int)C[ 0 ];                                                    /* Q14 */
    258    if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
    259        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    260        *LTPCorr_Q15  = 0;
    261        *lagIndex     = 0;
    262        *contourIndex = 0;
    263        RESTORE_STACK;
    264        return 1;
    265    }
    266 
    267    threshold = silk_SMULWB( search_thres1_Q16, Cmax );
    268    for( i = 0; i < length_d_srch; i++ ) {
    269        /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
    270        if( C[ i ] > threshold ) {
    271            d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
    272        } else {
    273            length_d_srch = i;
    274            break;
    275        }
    276    }
    277    celt_assert( length_d_srch > 0 );
    278 
    279    ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
    280    for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
    281        d_comp[ i - D_COMP_MIN ] = 0;
    282    }
    283    for( i = 0; i < length_d_srch; i++ ) {
    284        d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
    285    }
    286 
    287    /* Convolution */
    288    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
    289        d_comp[ i - D_COMP_MIN ] +=
    290            d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
    291    }
    292 
    293    length_d_srch = 0;
    294    for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
    295        if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
    296            d_srch[ length_d_srch ] = i;
    297            length_d_srch++;
    298        }
    299    }
    300 
    301    /* Convolution */
    302    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
    303        d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
    304            + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
    305    }
    306 
    307    length_d_comp = 0;
    308    for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
    309        if( d_comp[ i - D_COMP_MIN ] > 0 ) {
    310            d_comp[ length_d_comp ] = i - 2;
    311            length_d_comp++;
    312        }
    313    }
    314 
    315    /**********************************************************************************
    316    ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
    317    *************************************************************************************/
    318 
    319    /*********************************************************************************
    320    * Find energy of each subframe projected onto its history, for a range of delays
    321    *********************************************************************************/
    322    silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
    323 
    324    target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
    325    for( k = 0; k < nb_subfr; k++ ) {
    326 
    327        /* Check that we are within range of the array */
    328        celt_assert( target_ptr >= frame_8kHz );
    329        celt_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
    330 
    331        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 );
    332        for( j = 0; j < length_d_comp; j++ ) {
    333            d = d_comp[ j ];
    334            basis_ptr = target_ptr - d;
    335 
    336            /* Check that we are within range of the array */
    337            silk_assert( basis_ptr >= frame_8kHz );
    338            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
    339 
    340            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
    341            if( cross_corr > 0 ) {
    342                energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
    343                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
    344                    (opus_int16)silk_DIV32_varQ( cross_corr,
    345                                                 silk_ADD32( energy_target,
    346                                                             energy_basis ),
    347                                                 13 + 1 );                                      /* Q13 */
    348            } else {
    349                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
    350            }
    351        }
    352        target_ptr += SF_LENGTH_8KHZ;
    353    }
    354 
    355    /* search over lag range and lags codebook */
    356    /* scale factor for lag codebook, as a function of center lag */
    357 
    358    CCmax   = silk_int32_MIN;
    359    CCmax_b = silk_int32_MIN;
    360 
    361    CBimax = 0; /* To avoid returning undefined lag values */
    362    lag = -1;   /* To check if lag with strong enough correlation has been found */
    363 
    364    if( prevLag > 0 ) {
    365        if( Fs_kHz == 12 ) {
    366            prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
    367        } else if( Fs_kHz == 16 ) {
    368            prevLag = silk_RSHIFT( prevLag, 1 );
    369        }
    370        prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
    371    } else {
    372        prevLag_log2_Q7 = 0;
    373    }
    374    silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
    375    /* Set up stage 2 codebook based on number of subframes */
    376    if( nb_subfr == PE_MAX_NB_SUBFR ) {
    377        cbk_size   = PE_NB_CBKS_STAGE2_EXT;
    378        Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
    379        if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
    380            /* If input is 8 khz use a larger codebook here because it is last stage */
    381            nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
    382        } else {
    383            nb_cbk_search = PE_NB_CBKS_STAGE2;
    384        }
    385    } else {
    386        cbk_size       = PE_NB_CBKS_STAGE2_10MS;
    387        Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
    388        nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
    389    }
    390 
    391    for( k = 0; k < length_d_srch; k++ ) {
    392        d = d_srch[ k ];
    393        for( j = 0; j < nb_cbk_search; j++ ) {
    394            CC[ j ] = 0;
    395            for( i = 0; i < nb_subfr; i++ ) {
    396                opus_int d_subfr;
    397                /* Try all codebooks */
    398                d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
    399                CC[ j ] = CC[ j ]
    400                    + (opus_int32)matrix_ptr( C, i,
    401                                              d_subfr - ( MIN_LAG_8KHZ - 2 ),
    402                                              CSTRIDE_8KHZ );
    403            }
    404        }
    405        /* Find best codebook */
    406        CCmax_new = silk_int32_MIN;
    407        CBimax_new = 0;
    408        for( i = 0; i < nb_cbk_search; i++ ) {
    409            if( CC[ i ] > CCmax_new ) {
    410                CCmax_new = CC[ i ];
    411                CBimax_new = i;
    412            }
    413        }
    414 
    415        /* Bias towards shorter lags */
    416        lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
    417        silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
    418        silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
    419        CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
    420 
    421        /* Bias towards previous lag */
    422        silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
    423        if( prevLag > 0 ) {
    424            delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
    425            silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
    426            delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
    427            prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
    428            prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
    429            CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
    430        }
    431 
    432        if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */
    433            CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 )  &&  /* Correlation needs to be high enough to be voiced */
    434            silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ      /* Lag must be in range                             */
    435         ) {
    436            CCmax_b = CCmax_new_b;
    437            CCmax   = CCmax_new;
    438            lag     = d;
    439            CBimax  = CBimax_new;
    440        }
    441    }
    442 
    443    if( lag == -1 ) {
    444        /* No suitable candidate found */
    445        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    446        *LTPCorr_Q15  = 0;
    447        *lagIndex     = 0;
    448        *contourIndex = 0;
    449        RESTORE_STACK;
    450        return 1;
    451    }
    452 
    453    /* Output normalized correlation */
    454    *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
    455    silk_assert( *LTPCorr_Q15 >= 0 );
    456 
    457    if( Fs_kHz > 8 ) {
    458        /* Search in original signal */
    459 
    460        CBimax_old = CBimax;
    461        /* Compensate for decimation */
    462        silk_assert( lag == silk_SAT16( lag ) );
    463        if( Fs_kHz == 12 ) {
    464            lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
    465        } else if( Fs_kHz == 16 ) {
    466            lag = silk_LSHIFT( lag, 1 );
    467        } else {
    468            lag = silk_SMULBB( lag, 3 );
    469        }
    470 
    471        lag = silk_LIMIT_int( lag, min_lag, max_lag );
    472        start_lag = silk_max_int( lag - 2, min_lag );
    473        end_lag   = silk_min_int( lag + 2, max_lag );
    474        lag_new   = lag;                                    /* to avoid undefined lag */
    475        CBimax    = 0;                                      /* to avoid undefined lag */
    476 
    477        CCmax = silk_int32_MIN;
    478        /* pitch lags according to second stage */
    479        for( k = 0; k < nb_subfr; k++ ) {
    480            pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
    481        }
    482 
    483        /* Set up codebook parameters according to complexity setting and frame length */
    484        if( nb_subfr == PE_MAX_NB_SUBFR ) {
    485            nb_cbk_search   = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
    486            cbk_size        = PE_NB_CBKS_STAGE3_MAX;
    487            Lag_CB_ptr      = &silk_CB_lags_stage3[ 0 ][ 0 ];
    488        } else {
    489            nb_cbk_search   = PE_NB_CBKS_STAGE3_10MS;
    490            cbk_size        = PE_NB_CBKS_STAGE3_10MS;
    491            Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    492        }
    493 
    494        /* Calculate the correlations and energies needed in stage 3 */
    495        ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
    496        ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
    497        silk_P_Ana_calc_corr_st3(  cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
    498        silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
    499 
    500        lag_counter = 0;
    501        silk_assert( lag == silk_SAT16( lag ) );
    502        contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
    503 
    504        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
    505        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 );
    506        for( d = start_lag; d <= end_lag; d++ ) {
    507            for( j = 0; j < nb_cbk_search; j++ ) {
    508                cross_corr = 0;
    509                energy     = energy_target;
    510                for( k = 0; k < nb_subfr; k++ ) {
    511                    cross_corr = silk_ADD32( cross_corr,
    512                        matrix_ptr( cross_corr_st3, k, j,
    513                                    nb_cbk_search )[ lag_counter ] );
    514                    energy     = silk_ADD32( energy,
    515                        matrix_ptr( energies_st3, k, j,
    516                                    nb_cbk_search )[ lag_counter ] );
    517                    silk_assert( energy >= 0 );
    518                }
    519                if( cross_corr > 0 ) {
    520                    CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 );          /* Q13 */
    521                    /* Reduce depending on flatness of contour */
    522                    diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j );            /* Q15 */
    523                    silk_assert( diff == silk_SAT16( diff ) );
    524                    CCmax_new = silk_SMULWB( CCmax_new, diff );                         /* Q14 */
    525                } else {
    526                    CCmax_new = 0;
    527                }
    528 
    529                if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
    530                    CCmax   = CCmax_new;
    531                    lag_new = d;
    532                    CBimax  = j;
    533                }
    534            }
    535            lag_counter++;
    536        }
    537 
    538        for( k = 0; k < nb_subfr; k++ ) {
    539            pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    540            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
    541        }
    542        *lagIndex = (opus_int16)( lag_new - min_lag);
    543        *contourIndex = (opus_int8)CBimax;
    544    } else {        /* Fs_kHz == 8 */
    545        /* Save Lags */
    546        for( k = 0; k < nb_subfr; k++ ) {
    547            pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    548            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
    549        }
    550        *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
    551        *contourIndex = (opus_int8)CBimax;
    552    }
    553    celt_assert( *lagIndex >= 0 );
    554    /* return as voiced */
    555    RESTORE_STACK;
    556    return 0;
    557 }
    558 
    559 /***********************************************************************
    560 * Calculates the correlations used in stage 3 search. In order to cover
    561 * the whole lag codebook for all the searched offset lags (lag +- 2),
    562 * the following correlations are needed in each sub frame:
    563 *
    564 * sf1: lag range [-8,...,7] total 16 correlations
    565 * sf2: lag range [-4,...,4] total 9 correlations
    566 * sf3: lag range [-3,....4] total 8 correltions
    567 * sf4: lag range [-6,....8] total 15 correlations
    568 *
    569 * In total 48 correlations. The direct implementation computed in worst
    570 * case 4*12*5 = 240 correlations, but more likely around 120.
    571 ***********************************************************************/
    572 static void silk_P_Ana_calc_corr_st3(
    573    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
    574    const opus_int16  frame[],                         /* I vector to correlate         */
    575    opus_int          start_lag,                       /* I lag offset to search around */
    576    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
    577    opus_int          nb_subfr,                        /* I number of subframes         */
    578    opus_int          complexity,                      /* I Complexity setting          */
    579    int               arch                             /* I Run-time architecture       */
    580 )
    581 {
    582    const opus_int16 *target_ptr;
    583    opus_int   i, j, k, lag_counter, lag_low, lag_high;
    584    opus_int   nb_cbk_search, delta, idx, cbk_size;
    585    VARDECL( opus_int32, scratch_mem );
    586    VARDECL( opus_int32, xcorr32 );
    587    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    588    SAVE_STACK;
    589 
    590    celt_assert( complexity >= SILK_PE_MIN_COMPLEX );
    591    celt_assert( complexity <= SILK_PE_MAX_COMPLEX );
    592 
    593    if( nb_subfr == PE_MAX_NB_SUBFR ) {
    594        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    595        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    596        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    597        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    598    } else {
    599        celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    600        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    601        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    602        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    603        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    604    }
    605    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
    606    ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
    607 
    608    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
    609    for( k = 0; k < nb_subfr; k++ ) {
    610        lag_counter = 0;
    611 
    612        /* Calculate the correlations for each subframe */
    613        lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    614        lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
    615        celt_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
    616        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
    617        for( j = lag_low; j <= lag_high; j++ ) {
    618            silk_assert( lag_counter < SCRATCH_SIZE );
    619            scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
    620            lag_counter++;
    621        }
    622 
    623        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    624        for( i = 0; i < nb_cbk_search; i++ ) {
    625            /* Fill out the 3 dim array that stores the correlations for */
    626            /* each code_book vector for each start lag */
    627            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    628            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    629                silk_assert( idx + j < SCRATCH_SIZE );
    630                silk_assert( idx + j < lag_counter );
    631                matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
    632                    scratch_mem[ idx + j ];
    633            }
    634        }
    635        target_ptr += sf_length;
    636    }
    637    RESTORE_STACK;
    638 }
    639 
    640 /********************************************************************/
    641 /* Calculate the energies for first two subframes. The energies are */
    642 /* calculated recursively.                                          */
    643 /********************************************************************/
    644 static void silk_P_Ana_calc_energy_st3(
    645    silk_pe_stage3_vals energies_st3[],                 /* O 3 DIM energy array */
    646    const opus_int16  frame[],                          /* I vector to calc energy in    */
    647    opus_int          start_lag,                        /* I lag offset to search around */
    648    opus_int          sf_length,                        /* I length of one 5 ms subframe */
    649    opus_int          nb_subfr,                         /* I number of subframes         */
    650    opus_int          complexity,                       /* I Complexity setting          */
    651    int               arch                              /* I Run-time architecture       */
    652 )
    653 {
    654    const opus_int16 *target_ptr, *basis_ptr;
    655    opus_int32 energy;
    656    opus_int   k, i, j, lag_counter;
    657    opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
    658    VARDECL( opus_int32, scratch_mem );
    659    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    660    SAVE_STACK;
    661 
    662    celt_assert( complexity >= SILK_PE_MIN_COMPLEX );
    663    celt_assert( complexity <= SILK_PE_MAX_COMPLEX );
    664 
    665    if( nb_subfr == PE_MAX_NB_SUBFR ) {
    666        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    667        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    668        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    669        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    670    } else {
    671        celt_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    672        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    673        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    674        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    675        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    676    }
    677    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
    678 
    679    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
    680    for( k = 0; k < nb_subfr; k++ ) {
    681        lag_counter = 0;
    682 
    683        /* Calculate the energy for first lag */
    684        basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
    685        energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch );
    686        silk_assert( energy >= 0 );
    687        scratch_mem[ lag_counter ] = energy;
    688        lag_counter++;
    689 
    690        lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
    691        for( i = 1; i < lag_diff; i++ ) {
    692            /* remove part outside new window */
    693            energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
    694            silk_assert( energy >= 0 );
    695 
    696            /* add part that comes into window */
    697            energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
    698            silk_assert( energy >= 0 );
    699            silk_assert( lag_counter < SCRATCH_SIZE );
    700            scratch_mem[ lag_counter ] = energy;
    701            lag_counter++;
    702        }
    703 
    704        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    705        for( i = 0; i < nb_cbk_search; i++ ) {
    706            /* Fill out the 3 dim array that stores the correlations for    */
    707            /* each code_book vector for each start lag                     */
    708            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    709            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    710                silk_assert( idx + j < SCRATCH_SIZE );
    711                silk_assert( idx + j < lag_counter );
    712                matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
    713                    scratch_mem[ idx + j ];
    714                silk_assert(
    715                    matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
    716            }
    717        }
    718        target_ptr += sf_length;
    719    }
    720    RESTORE_STACK;
    721 }