enc_API.c (32873B)
1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 #include "define.h" 32 #include "API.h" 33 #include "control.h" 34 #include "typedef.h" 35 #include "stack_alloc.h" 36 #include "structs.h" 37 #include "tuning_parameters.h" 38 #ifdef FIXED_POINT 39 #include "main_FIX.h" 40 #else 41 #include "main_FLP.h" 42 #endif 43 44 #ifdef ENABLE_DRED 45 #include "dred_encoder.h" 46 #endif 47 48 /***************************************/ 49 /* Read control structure from encoder */ 50 /***************************************/ 51 static opus_int silk_QueryEncoder( /* O Returns error code */ 52 const void *encState, /* I State */ 53 silk_EncControlStruct *encStatus /* O Encoder Status */ 54 ); 55 56 /****************************************/ 57 /* Encoder functions */ 58 /****************************************/ 59 60 opus_int silk_Get_Encoder_Size( /* O Returns error code */ 61 opus_int *encSizeBytes, /* O Number of bytes in SILK encoder state */ 62 opus_int channels /* I Number of channels */ 63 ) 64 { 65 opus_int ret = SILK_NO_ERROR; 66 67 *encSizeBytes = sizeof( silk_encoder ); 68 /* Skip second encoder state for mono. */ 69 if ( channels == 1 ) { 70 *encSizeBytes -= sizeof( silk_encoder_state_Fxx ); 71 } 72 73 return ret; 74 } 75 76 /*************************/ 77 /* Init or Reset encoder */ 78 /*************************/ 79 opus_int silk_InitEncoder( /* O Returns error code */ 80 void *encState, /* I/O State */ 81 int channels, /* I Number of channels */ 82 int arch, /* I Run-time architecture */ 83 silk_EncControlStruct *encStatus /* O Encoder Status */ 84 ) 85 { 86 silk_encoder *psEnc; 87 opus_int n, ret = SILK_NO_ERROR; 88 89 psEnc = (silk_encoder *)encState; 90 91 /* Reset encoder. Skip second encoder state for mono. */ 92 silk_memset( psEnc, 0, sizeof( silk_encoder ) - (channels==1)*sizeof( silk_encoder_state_Fxx ) ); 93 for( n = 0; n < channels; n++ ) { 94 if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) { 95 celt_assert( 0 ); 96 } 97 } 98 99 psEnc->nChannelsAPI = 1; 100 psEnc->nChannelsInternal = 1; 101 102 /* Read control structure */ 103 if( ret += silk_QueryEncoder( encState, encStatus ) ) { 104 celt_assert( 0 ); 105 } 106 107 return ret; 108 } 109 110 /***************************************/ 111 /* Read control structure from encoder */ 112 /***************************************/ 113 static opus_int silk_QueryEncoder( /* O Returns error code */ 114 const void *encState, /* I State */ 115 silk_EncControlStruct *encStatus /* O Encoder Status */ 116 ) 117 { 118 opus_int ret = SILK_NO_ERROR; 119 silk_encoder_state_Fxx *state_Fxx; 120 silk_encoder *psEnc = (silk_encoder *)encState; 121 122 state_Fxx = psEnc->state_Fxx; 123 124 encStatus->nChannelsAPI = psEnc->nChannelsAPI; 125 encStatus->nChannelsInternal = psEnc->nChannelsInternal; 126 encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz; 127 encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz; 128 encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz; 129 encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz; 130 encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms; 131 encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps; 132 encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc; 133 encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity; 134 encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC; 135 encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX; 136 encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR; 137 encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); 138 encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch; 139 encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0; 140 141 return ret; 142 } 143 144 145 /**************************/ 146 /* Encode frame with Silk */ 147 /**************************/ 148 /* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ 149 /* encControl->payloadSize_ms is set to */ 150 opus_int silk_Encode( /* O Returns error code */ 151 void *encState, /* I/O State */ 152 silk_EncControlStruct *encControl, /* I Control status */ 153 const opus_res *samplesIn, /* I Speech sample input vector */ 154 opus_int nSamplesIn, /* I Number of samples in input vector */ 155 ec_enc *psRangeEnc, /* I/O Compressor data structure */ 156 opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ 157 const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */ 158 opus_int activity /* I Decision of Opus voice activity detector */ 159 ) 160 { 161 opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0; 162 opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; 163 opus_int nSamplesFromInput = 0, nSamplesFromInputMax; 164 opus_int speech_act_thr_for_switch_Q8; 165 opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum; 166 silk_encoder *psEnc = ( silk_encoder * )encState; 167 VARDECL( opus_int16, buf ); 168 opus_int transition, curr_block, tot_blocks; 169 SAVE_STACK; 170 171 celt_assert( encControl->nChannelsAPI >= encControl->nChannelsInternal && encControl->nChannelsAPI >= psEnc->nChannelsInternal ); 172 if (encControl->reducedDependency) 173 { 174 for( n = 0; n < encControl->nChannelsAPI; n++ ) { 175 psEnc->state_Fxx[ n ].sCmn.first_frame_after_reset = 1; 176 } 177 } 178 for( n = 0; n < encControl->nChannelsAPI; n++ ) { 179 psEnc->state_Fxx[ n ].sCmn.nFramesEncoded = 0; 180 } 181 /* Check values in encoder control structure */ 182 if( ( ret = check_control_input( encControl ) ) != 0 ) { 183 celt_assert( 0 ); 184 RESTORE_STACK; 185 return ret; 186 } 187 188 encControl->switchReady = 0; 189 190 if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { 191 /* Mono -> Stereo transition: init state of second channel and stereo state */ 192 ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch ); 193 silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) ); 194 silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) ); 195 psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0; 196 psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1; 197 psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0; 198 psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1; 199 psEnc->sStereo.width_prev_Q14 = 0; 200 psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 ); 201 if( psEnc->nChannelsAPI == 2 ) { 202 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) ); 203 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) ); 204 } 205 } 206 207 transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal); 208 209 psEnc->nChannelsAPI = encControl->nChannelsAPI; 210 psEnc->nChannelsInternal = encControl->nChannelsInternal; 211 212 nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate ); 213 tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1; 214 curr_block = 0; 215 if( prefillFlag ) { 216 silk_LP_state save_LP; 217 /* Only accept input length of 10 ms */ 218 if( nBlocksOf10ms != 1 ) { 219 celt_assert( 0 ); 220 RESTORE_STACK; 221 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; 222 } 223 if ( prefillFlag == 2 ) { 224 save_LP = psEnc->state_Fxx[ 0 ].sCmn.sLP; 225 /* Save the sampling rate so the bandwidth switching code can keep handling transitions. */ 226 save_LP.saved_fs_kHz = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; 227 } 228 /* Reset Encoder */ 229 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 230 ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); 231 /* Restore the variable LP state. */ 232 if ( prefillFlag == 2 ) { 233 psEnc->state_Fxx[ n ].sCmn.sLP = save_LP; 234 } 235 celt_assert( !ret ); 236 } 237 tmp_payloadSize_ms = encControl->payloadSize_ms; 238 encControl->payloadSize_ms = 10; 239 tmp_complexity = encControl->complexity; 240 encControl->complexity = 0; 241 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 242 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; 243 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1; 244 } 245 } else { 246 /* Only accept input lengths that are a multiple of 10 ms */ 247 if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) { 248 celt_assert( 0 ); 249 RESTORE_STACK; 250 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; 251 } 252 /* Make sure no more than one packet can be produced */ 253 if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) { 254 celt_assert( 0 ); 255 RESTORE_STACK; 256 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; 257 } 258 } 259 260 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 261 /* Force the side channel to the same rate as the mid */ 262 opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0; 263 if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) { 264 silk_assert( 0 ); 265 RESTORE_STACK; 266 return ret; 267 } 268 if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) { 269 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { 270 psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0; 271 } 272 } 273 psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX; 274 } 275 celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); 276 277 /* Input buffering/resampling and encoding */ 278 nSamplesToBufferMax = 279 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; 280 nSamplesFromInputMax = 281 silk_DIV32_16( nSamplesToBufferMax * 282 psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, 283 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); 284 ALLOC( buf, nSamplesFromInputMax, opus_int16 ); 285 while( 1 ) { 286 int curr_nBitsUsedLBRR = 0; 287 nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx; 288 nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax ); 289 nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); 290 /* Resample and write to buffer */ 291 if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) { 292 opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; 293 for( n = 0; n < nSamplesFromInput; n++ ) { 294 buf[ n ] = RES2INT16(samplesIn[ 2 * n ]); 295 } 296 /* Making sure to start both resamplers from the same state when switching from mono to stereo */ 297 if( psEnc->nPrevChannelsInternal == 1 && id==0 ) { 298 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); 299 } 300 301 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, 302 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 303 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; 304 305 nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; 306 nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); 307 for( n = 0; n < nSamplesFromInput; n++ ) { 308 buf[ n ] = RES2INT16(samplesIn[ 2 * n + 1 ]); 309 } 310 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, 311 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 312 313 psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer; 314 } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) { 315 /* Combine left and right channels before resampling */ 316 for( n = 0; n < nSamplesFromInput; n++ ) { 317 sum = RES2INT16(samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ]); 318 buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); 319 } 320 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, 321 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 322 /* On the first mono frame, average the results for the two resampler states */ 323 if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) { 324 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, 325 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 326 for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) { 327 psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] = 328 silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] 329 + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1); 330 } 331 } 332 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; 333 } else { 334 celt_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); 335 for( n = 0; n < nSamplesFromInput; n++ ) { 336 buf[n] = RES2INT16(samplesIn[n]); 337 } 338 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, 339 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 340 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; 341 } 342 343 samplesIn += nSamplesFromInput * encControl->nChannelsAPI; 344 nSamplesIn -= nSamplesFromInput; 345 346 /* Default */ 347 psEnc->allowBandwidthSwitch = 0; 348 349 /* Silk encoder */ 350 if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) { 351 /* Enough data in input buffer, so encode */ 352 celt_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length ); 353 celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length ); 354 355 /* Deal with LBRR data */ 356 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) { 357 /* Create space at start of payload for VAD and FEC flags */ 358 opus_uint8 iCDF[ 2 ] = { 0, 0 }; 359 iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); 360 ec_enc_icdf( psRangeEnc, 0, iCDF, 8 ); 361 curr_nBitsUsedLBRR = ec_tell( psRangeEnc ); 362 363 /* Encode any LBRR data from previous packet */ 364 /* Encode LBRR flags */ 365 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 366 LBRR_symbol = 0; 367 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { 368 LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i ); 369 } 370 psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0; 371 if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) { 372 ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 ); 373 } 374 } 375 376 /* Code LBRR indices and excitation signals */ 377 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { 378 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 379 if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) { 380 opus_int condCoding; 381 382 if( encControl->nChannelsInternal == 2 && n == 0 ) { 383 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] ); 384 /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ 385 if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) { 386 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] ); 387 } 388 } 389 /* Use conditional coding if previous frame available */ 390 if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) { 391 condCoding = CODE_CONDITIONALLY; 392 } else { 393 condCoding = CODE_INDEPENDENTLY; 394 } 395 silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding ); 396 silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType, 397 psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length ); 398 } 399 } 400 } 401 402 /* Reset LBRR flags */ 403 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 404 silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) ); 405 } 406 curr_nBitsUsedLBRR = ec_tell( psRangeEnc ) - curr_nBitsUsedLBRR; 407 } 408 409 silk_HP_variable_cutoff( psEnc->state_Fxx ); 410 411 /* Total target bits for packet */ 412 nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); 413 /* Subtract bits used for LBRR */ 414 if( !prefillFlag ) { 415 /* psEnc->nBitsUsedLBRR is an exponential moving average of the LBRR usage, 416 except that for the first LBRR frame it does no averaging and for the first 417 frame after after LBRR, it goes back to zero immediately. */ 418 if ( curr_nBitsUsedLBRR < 10 ) { 419 psEnc->nBitsUsedLBRR = 0; 420 } else if ( psEnc->nBitsUsedLBRR < 10) { 421 psEnc->nBitsUsedLBRR = curr_nBitsUsedLBRR; 422 } else { 423 psEnc->nBitsUsedLBRR = ( psEnc->nBitsUsedLBRR + curr_nBitsUsedLBRR ) / 2; 424 } 425 nBits -= psEnc->nBitsUsedLBRR; 426 } 427 /* Divide by number of uncoded frames left in packet */ 428 nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket ); 429 /* Convert to bits/second */ 430 if( encControl->payloadSize_ms == 10 ) { 431 TargetRate_bps = silk_SMULBB( nBits, 100 ); 432 } else { 433 TargetRate_bps = silk_SMULBB( nBits, 50 ); 434 } 435 /* Subtract fraction of bits in excess of target in previous frames and packets */ 436 TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); 437 if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) { 438 /* Compare actual vs target bits so far in this packet */ 439 opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; 440 TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); 441 } 442 /* Never exceed input bitrate */ 443 TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 ); 444 445 /* Convert Left/Right to Mid/Side */ 446 if( encControl->nChannelsInternal == 2 ) { 447 silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ], 448 psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], 449 MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono, 450 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length ); 451 if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { 452 /* Reset side channel encoder memory for first frame with side coding */ 453 if( psEnc->prev_decode_only_middle == 1 ) { 454 silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) ); 455 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) ); 456 silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) ); 457 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) ); 458 psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100; 459 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100; 460 psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10; 461 psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; 462 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536; 463 psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1; 464 } 465 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ], activity ); 466 } else { 467 psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0; 468 } 469 if( !prefillFlag ) { 470 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); 471 if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { 472 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); 473 } 474 } 475 } else { 476 /* Buffering */ 477 silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) ); 478 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) ); 479 } 480 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ], activity ); 481 482 /* Encode */ 483 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 484 opus_int maxBits, useCBR; 485 486 /* Handling rate constraints */ 487 maxBits = encControl->maxBits; 488 if( tot_blocks == 2 && curr_block == 0 ) { 489 maxBits = maxBits * 3 / 5; 490 } else if( tot_blocks == 3 ) { 491 if( curr_block == 0 ) { 492 maxBits = maxBits * 2 / 5; 493 } else if( curr_block == 1 ) { 494 maxBits = maxBits * 3 / 4; 495 } 496 } 497 useCBR = encControl->useCBR && curr_block == tot_blocks - 1; 498 499 if( encControl->nChannelsInternal == 1 ) { 500 channelRate_bps = TargetRate_bps; 501 } else { 502 channelRate_bps = MStargetRates_bps[ n ]; 503 if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) { 504 useCBR = 0; 505 /* Give mid up to 1/2 of the max bits for that frame */ 506 maxBits -= encControl->maxBits / ( tot_blocks * 2 ); 507 } 508 } 509 510 if( channelRate_bps > 0 ) { 511 opus_int condCoding; 512 513 silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps ); 514 515 /* Use independent coding if no previous frame available */ 516 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) { 517 condCoding = CODE_INDEPENDENTLY; 518 } else if( n > 0 && psEnc->prev_decode_only_middle ) { 519 /* If we skipped a side frame in this packet, we don't 520 need LTP scaling; the LTP state is well-defined. */ 521 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; 522 } else { 523 condCoding = CODE_CONDITIONALLY; 524 } 525 if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) { 526 silk_assert( 0 ); 527 } 528 } 529 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; 530 psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0; 531 psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++; 532 } 533 psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ]; 534 535 /* Insert VAD and FEC flags at beginning of bitstream */ 536 if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) { 537 flags = 0; 538 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 539 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { 540 flags = silk_LSHIFT( flags, 1 ); 541 flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ]; 542 } 543 flags = silk_LSHIFT( flags, 1 ); 544 flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag; 545 } 546 if( !prefillFlag ) { 547 ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); 548 } 549 550 /* Return zero bytes if all channels DTXed */ 551 if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) { 552 *nBytesOut = 0; 553 } 554 555 psEnc->nBitsExceeded += *nBytesOut * 8; 556 psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); 557 psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 ); 558 559 /* Update flag indicating if bandwidth switching is allowed */ 560 speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ), 561 SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms ); 562 if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) { 563 psEnc->allowBandwidthSwitch = 1; 564 psEnc->timeSinceSwitchAllowed_ms = 0; 565 } else { 566 psEnc->allowBandwidthSwitch = 0; 567 psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms; 568 } 569 } 570 571 if( nSamplesIn == 0 ) { 572 break; 573 } 574 } else { 575 break; 576 } 577 curr_block++; 578 } 579 580 psEnc->nPrevChannelsInternal = encControl->nChannelsInternal; 581 582 encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch; 583 encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0; 584 encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); 585 encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14; 586 if( prefillFlag ) { 587 encControl->payloadSize_ms = tmp_payloadSize_ms; 588 encControl->complexity = tmp_complexity; 589 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 590 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; 591 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0; 592 } 593 } 594 595 encControl->signalType = psEnc->state_Fxx[0].sCmn.indices.signalType; 596 encControl->offset = silk_Quantization_Offsets_Q10 597 [ psEnc->state_Fxx[0].sCmn.indices.signalType >> 1 ] 598 [ psEnc->state_Fxx[0].sCmn.indices.quantOffsetType ]; 599 RESTORE_STACK; 600 return ret; 601 }