digital_agc.cc (23958B)
1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/agc/legacy/digital_agc.h" 12 13 #include <cstdint> 14 #include <cstring> 15 16 #include "common_audio/signal_processing/include/signal_processing_library.h" 17 #include "common_audio/signal_processing/include/spl_inl.h" 18 #include "modules/audio_processing/agc/legacy/gain_control.h" 19 #include "rtc_base/checks.h" 20 21 namespace webrtc { 22 23 namespace { 24 25 // To generate the gaintable, copy&paste the following lines to a Matlab window: 26 // MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1; 27 // zeros = 0:31; lvl = 2.^(1-zeros); 28 // A = -10*log10(lvl) * (CompRatio - 1) / CompRatio; 29 // B = MaxGain - MinGain; 30 // gains = round(2^16*10.^(0.05 * (MinGain + B * ( 31 // log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / 32 // log(1/(1+exp(Knee*B)))))); 33 // fprintf(1, '\t%i, %i, %i, %i,\n', gains); 34 // % Matlab code for plotting the gain and input/output level characteristic 35 // (copy/paste the following 3 lines): 36 // in = 10*log10(lvl); out = 20*log10(gains/65536); 37 // subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input 38 // (dB)'); ylabel('Gain (dB)'); 39 // subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on; 40 // xlabel('Input (dB)'); ylabel('Output (dB)'); 41 // zoom on; 42 43 // Generator table for y=log2(1+e^x) in Q8. 44 enum { kGenFuncTableSize = 128 }; 45 const uint16_t kGenFuncTable[kGenFuncTableSize] = { 46 256, 485, 786, 1126, 1484, 1849, 2217, 2586, 2955, 3324, 3693, 47 4063, 4432, 4801, 5171, 5540, 5909, 6279, 6648, 7017, 7387, 7756, 48 8125, 8495, 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, 11819, 49 12188, 12557, 12927, 13296, 13665, 14035, 14404, 14773, 15143, 15512, 15881, 50 16251, 16620, 16989, 17359, 17728, 18097, 18466, 18836, 19205, 19574, 19944, 51 20313, 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, 23637, 24006, 52 24376, 24745, 25114, 25484, 25853, 26222, 26592, 26961, 27330, 27700, 28069, 53 28438, 28808, 29177, 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132, 54 32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, 35456, 35825, 36194, 55 36564, 36933, 37302, 37672, 38041, 38410, 38780, 39149, 39518, 39888, 40257, 56 40626, 40996, 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, 44320, 57 44689, 45058, 45428, 45797, 46166, 46536, 46905}; 58 59 const int16_t kAvgDecayTime = 250; // frames; < 3000 60 61 // the 32 most significant bits of A(19) * B(26) >> 13 62 #define AGC_MUL32(A, B) (((B) >> 13) * (A) + (((0x00001FFF & (B)) * (A)) >> 13)) 63 // C + the 32 most significant bits of A * B 64 #define AGC_SCALEDIFF32(A, B, C) \ 65 ((C) + ((B) >> 16) * (A) + (((0x0000FFFF & (B)) * (A)) >> 16)) 66 67 } // namespace 68 69 int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16 70 int16_t digCompGaindB, // Q0 71 int16_t targetLevelDbfs, // Q0 72 uint8_t limiterEnable, 73 int16_t analogTarget) { // Q0 74 // This function generates the compressor gain table used in the fixed digital 75 // part. 76 uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox; 77 int32_t inLevel, limiterLvl; 78 int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32; 79 const uint16_t kLog10 = 54426; // log2(10) in Q14 80 const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14 81 const uint16_t kLogE_1 = 23637; // log2(e) in Q14 82 uint16_t constMaxGain; 83 uint16_t tmpU16, intPart, fracPart; 84 const int16_t kCompRatio = 3; 85 int16_t limiterOffset = 0; // Limiter offset 86 int16_t limiterIdx, limiterLvlX; 87 int16_t constLinApprox, maxGain, diffGain; 88 int16_t i, tmp16, tmp16no1; 89 int zeros, zerosScale; 90 91 // Constants 92 // kLogE_1 = 23637; // log2(e) in Q14 93 // kLog10 = 54426; // log2(10) in Q14 94 // kLog10_2 = 49321; // 10*log10(2) in Q14 95 96 // Calculate maximum digital gain and zero gain level 97 tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1); 98 tmp16no1 = analogTarget - targetLevelDbfs; 99 tmp16no1 += 100 WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); 101 maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs)); 102 tmp32no1 = maxGain * kCompRatio; 103 if ((digCompGaindB <= analogTarget) && (limiterEnable)) { 104 limiterOffset = 0; 105 } 106 107 // Calculate the difference between maximum gain and gain at 0dB0v 108 tmp32no1 = digCompGaindB * (kCompRatio - 1); 109 diffGain = 110 WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); 111 if (diffGain < 0 || diffGain >= kGenFuncTableSize) { 112 RTC_DCHECK(0); 113 return -1; 114 } 115 116 // Calculate the limiter level and index: 117 // limiterLvlX = analogTarget - limiterOffset 118 // limiterLvl = targetLevelDbfs + limiterOffset/compRatio 119 limiterLvlX = analogTarget - limiterOffset; 120 limiterIdx = 2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX * (1 << 13), 121 kLog10_2 / 2); 122 tmp16no1 = 123 WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio); 124 limiterLvl = targetLevelDbfs + tmp16no1; 125 126 // Calculate (through table lookup): 127 // constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8) 128 constMaxGain = kGenFuncTable[diffGain]; // in Q8 129 130 // Calculate a parameter used to approximate the fractional part of 2^x with a 131 // piecewise linear function in Q14: 132 // constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14); 133 constLinApprox = 22817; // in Q14 134 135 // Calculate a denominator used in the exponential part to convert from dB to 136 // linear scale: 137 // den = 20*constMaxGain (in Q8) 138 den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8 139 140 for (i = 0; i < 32; i++) { 141 // Calculate scaled input level (compressor): 142 // inLevel = 143 // fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio) 144 tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0 145 tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14 146 inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14 147 148 // Calculate diffGain-inLevel, to map using the genFuncTable 149 inLevel = (int32_t)diffGain * (1 << 14) - inLevel; // Q14 150 151 // Make calculations on abs(inLevel) and compensate for the sign afterwards. 152 absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14 153 154 // LUT with interpolation 155 intPart = (uint16_t)(absInLevel >> 14); 156 fracPart = 157 (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part 158 tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8 159 tmpU32no1 = tmpU16 * fracPart; // Q22 160 tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22 161 logApprox = tmpU32no1 >> 8; // Q14 162 // Compensate for negative exponent using the relation: 163 // log2(1 + 2^-x) = log2(1 + 2^x) - x 164 if (inLevel < 0) { 165 zeros = WebRtcSpl_NormU32(absInLevel); 166 zerosScale = 0; 167 if (zeros < 15) { 168 // Not enough space for multiplication 169 tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1) 170 tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13) 171 if (zeros < 9) { 172 zerosScale = 9 - zeros; 173 tmpU32no1 >>= zerosScale; // Q(zeros+13) 174 } else { 175 tmpU32no2 >>= zeros - 9; // Q22 176 } 177 } else { 178 tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28 179 tmpU32no2 >>= 6; // Q22 180 } 181 logApprox = 0; 182 if (tmpU32no2 < tmpU32no1) { 183 logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); // Q14 184 } 185 } 186 numFIX = (maxGain * constMaxGain) * (1 << 6); // Q14 187 numFIX -= (int32_t)logApprox * diffGain; // Q14 188 189 // Calculate ratio 190 // Shift `numFIX` as much as possible. 191 // Ensure we avoid wrap-around in `den` as well. 192 if (numFIX > (den >> 8) || -numFIX > (den >> 8)) { // `den` is Q8. 193 zeros = WebRtcSpl_NormW32(numFIX); 194 } else { 195 zeros = WebRtcSpl_NormW32(den) + 8; 196 } 197 numFIX *= 1 << zeros; // Q(14+zeros) 198 199 // Shift den so we end up in Qy1 200 tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 9); // Q(zeros - 1) 201 y32 = numFIX / tmp32no1; // in Q15 202 // This is to do rounding in Q14. 203 y32 = y32 >= 0 ? (y32 + 1) >> 1 : -((-y32 + 1) >> 1); 204 205 if (limiterEnable && (i < limiterIdx)) { 206 tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14 207 tmp32 -= limiterLvl * (1 << 14); // Q14 208 y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20); 209 } 210 if (y32 > 39000) { 211 tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27 212 tmp32 >>= 13; // In Q14. 213 } else { 214 tmp32 = y32 * kLog10 + 8192; // in Q28 215 tmp32 >>= 14; // In Q14. 216 } 217 tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16) 218 219 // Calculate power 220 if (tmp32 > 0) { 221 intPart = (int16_t)(tmp32 >> 14); 222 fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14 223 if ((fracPart >> 13) != 0) { 224 tmp16 = (2 << 14) - constLinApprox; 225 tmp32no2 = (1 << 14) - fracPart; 226 tmp32no2 *= tmp16; 227 tmp32no2 >>= 13; 228 tmp32no2 = (1 << 14) - tmp32no2; 229 } else { 230 tmp16 = constLinApprox - (1 << 14); 231 tmp32no2 = (fracPart * tmp16) >> 13; 232 } 233 fracPart = (uint16_t)tmp32no2; 234 gainTable[i] = 235 (1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14); 236 } else { 237 gainTable[i] = 0; 238 } 239 } 240 241 return 0; 242 } 243 244 int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) { 245 if (agcMode == kAgcModeFixedDigital) { 246 // start at minimum to find correct gain faster 247 stt->capacitorSlow = 0; 248 } else { 249 // start out with 0 dB gain 250 stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f); 251 } 252 stt->capacitorFast = 0; 253 stt->gain = 65536; 254 stt->gatePrevious = 0; 255 stt->agcMode = agcMode; 256 257 // initialize VADs 258 WebRtcAgc_InitVad(&stt->vadNearend); 259 WebRtcAgc_InitVad(&stt->vadFarend); 260 261 return 0; 262 } 263 264 int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt, 265 const int16_t* in_far, 266 size_t nrSamples) { 267 RTC_DCHECK(stt); 268 // VAD for far end 269 WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples); 270 271 return 0; 272 } 273 274 // Gains is an 11 element long array (one value per ms, incl start & end). 275 int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* stt, 276 const int16_t* const* in_near, 277 size_t /* num_bands */, 278 uint32_t FS, 279 int16_t lowlevelSignal, 280 int32_t gains[11]) { 281 int32_t tmp32; 282 int32_t env[10]; 283 int32_t max_nrg; 284 int32_t cur_level; 285 int32_t gain32; 286 int16_t logratio; 287 int16_t lower_thr, upper_thr; 288 int16_t zeros = 0, zeros_fast, frac = 0; 289 int16_t decay; 290 int16_t gate, gain_adj; 291 int16_t k; 292 size_t n, L; 293 294 // determine number of samples per ms 295 if (FS == 8000) { 296 L = 8; 297 } else if (FS == 16000 || FS == 32000 || FS == 48000) { 298 L = 16; 299 } else { 300 return -1; 301 } 302 303 // VAD for near end 304 logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, in_near[0], L * 10); 305 306 // Account for far end VAD 307 if (stt->vadFarend.counter > 10) { 308 tmp32 = 3 * logratio; 309 logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2); 310 } 311 312 // Determine decay factor depending on VAD 313 // upper_thr = 1.0f; 314 // lower_thr = 0.25f; 315 upper_thr = 1024; // Q10 316 lower_thr = 0; // Q10 317 if (logratio > upper_thr) { 318 // decay = -2^17 / DecayTime; -> -65 319 decay = -65; 320 } else if (logratio < lower_thr) { 321 decay = 0; 322 } else { 323 // decay = (int16_t)(((lower_thr - logratio) 324 // * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10); 325 // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65 326 tmp32 = (lower_thr - logratio) * 65; 327 decay = (int16_t)(tmp32 >> 10); 328 } 329 330 // adjust decay factor for long silence (detected as low standard deviation) 331 // This is only done in the adaptive modes 332 if (stt->agcMode != kAgcModeFixedDigital) { 333 if (stt->vadNearend.stdLongTerm < 4000) { 334 decay = 0; 335 } else if (stt->vadNearend.stdLongTerm < 8096) { 336 // decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 337 // 12); 338 tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay; 339 decay = (int16_t)(tmp32 >> 12); 340 } 341 342 if (lowlevelSignal != 0) { 343 decay = 0; 344 } 345 } 346 // Find max amplitude per sub frame 347 // iterate over sub frames 348 for (k = 0; k < 10; k++) { 349 // iterate over samples 350 max_nrg = 0; 351 for (n = 0; n < L; n++) { 352 int32_t nrg = in_near[0][k * L + n] * in_near[0][k * L + n]; 353 if (nrg > max_nrg) { 354 max_nrg = nrg; 355 } 356 } 357 env[k] = max_nrg; 358 } 359 360 // Calculate gain per sub frame 361 gains[0] = stt->gain; 362 for (k = 0; k < 10; k++) { 363 // Fast envelope follower 364 // decay time = -131000 / -1000 = 131 (ms) 365 stt->capacitorFast = 366 AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast); 367 if (env[k] > stt->capacitorFast) { 368 stt->capacitorFast = env[k]; 369 } 370 // Slow envelope follower 371 if (env[k] > stt->capacitorSlow) { 372 // increase capacitorSlow 373 stt->capacitorSlow = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow), 374 stt->capacitorSlow); 375 } else { 376 // decrease capacitorSlow 377 stt->capacitorSlow = 378 AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow); 379 } 380 381 // use maximum of both capacitors as current level 382 if (stt->capacitorFast > stt->capacitorSlow) { 383 cur_level = stt->capacitorFast; 384 } else { 385 cur_level = stt->capacitorSlow; 386 } 387 // Translate signal level into gain, using a piecewise linear approximation 388 // find number of leading zeros 389 zeros = WebRtcSpl_NormU32((uint32_t)cur_level); 390 if (cur_level == 0) { 391 zeros = 31; 392 } 393 tmp32 = ((uint32_t)cur_level << zeros) & 0x7FFFFFFF; 394 frac = (int16_t)(tmp32 >> 19); // Q12. 395 // Interpolate between gainTable[zeros] and gainTable[zeros-1]. 396 tmp32 = 397 ((stt->gainTable[zeros - 1] - stt->gainTable[zeros]) * (int64_t)frac) >> 398 12; 399 gains[k + 1] = stt->gainTable[zeros] + tmp32; 400 } 401 402 // Gate processing (lower gain during absence of speech) 403 zeros = (zeros << 9) - (frac >> 3); 404 // find number of leading zeros 405 zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast); 406 if (stt->capacitorFast == 0) { 407 zeros_fast = 31; 408 } 409 tmp32 = ((uint32_t)stt->capacitorFast << zeros_fast) & 0x7FFFFFFF; 410 zeros_fast <<= 9; 411 zeros_fast -= (int16_t)(tmp32 >> 22); 412 413 gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm; 414 415 if (gate < 0) { 416 stt->gatePrevious = 0; 417 } else { 418 tmp32 = stt->gatePrevious * 7; 419 gate = (int16_t)((gate + tmp32) >> 3); 420 stt->gatePrevious = gate; 421 } 422 // gate < 0 -> no gate 423 // gate > 2500 -> max gate 424 if (gate > 0) { 425 if (gate < 2500) { 426 gain_adj = (2500 - gate) >> 5; 427 } else { 428 gain_adj = 0; 429 } 430 for (k = 0; k < 10; k++) { 431 if ((gains[k + 1] - stt->gainTable[0]) > 8388608) { 432 // To prevent wraparound 433 tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8; 434 tmp32 *= 178 + gain_adj; 435 } else { 436 tmp32 = (gains[k + 1] - stt->gainTable[0]) * (178 + gain_adj); 437 tmp32 >>= 8; 438 } 439 gains[k + 1] = stt->gainTable[0] + tmp32; 440 } 441 } 442 443 // Limit gain to avoid overload distortion 444 for (k = 0; k < 10; k++) { 445 // Find a shift of gains[k + 1] such that it can be squared without 446 // overflow, but at least by 10 bits. 447 zeros = 10; 448 if (gains[k + 1] > 47452159) { 449 zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]); 450 } 451 gain32 = (gains[k + 1] >> zeros) + 1; 452 gain32 *= gain32; 453 // check for overflow 454 while (AGC_MUL32((env[k] >> 12) + 1, gain32) > 455 WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10))) { 456 // multiply by 253/256 ==> -0.1 dB 457 if (gains[k + 1] > 8388607) { 458 // Prevent wrap around 459 gains[k + 1] = (gains[k + 1] / 256) * 253; 460 } else { 461 gains[k + 1] = (gains[k + 1] * 253) / 256; 462 } 463 gain32 = (gains[k + 1] >> zeros) + 1; 464 gain32 *= gain32; 465 } 466 } 467 // gain reductions should be done 1 ms earlier than gain increases 468 for (k = 1; k < 10; k++) { 469 if (gains[k] > gains[k + 1]) { 470 gains[k] = gains[k + 1]; 471 } 472 } 473 // save start gain for next frame 474 stt->gain = gains[10]; 475 476 return 0; 477 } 478 479 int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11], 480 size_t num_bands, 481 uint32_t FS, 482 const int16_t* const* in_near, 483 int16_t* const* out) { 484 // Apply gain 485 // handle first sub frame separately 486 size_t L; 487 int16_t L2; // samples/subframe 488 489 // determine number of samples per ms 490 if (FS == 8000) { 491 L = 8; 492 L2 = 3; 493 } else if (FS == 16000 || FS == 32000 || FS == 48000) { 494 L = 16; 495 L2 = 4; 496 } else { 497 return -1; 498 } 499 500 for (size_t i = 0; i < num_bands; ++i) { 501 if (in_near[i] != out[i]) { 502 // Only needed if they don't already point to the same place. 503 memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0])); 504 } 505 } 506 507 // iterate over samples 508 int32_t delta = (gains[1] - gains[0]) * (1 << (4 - L2)); 509 int32_t gain32 = gains[0] * (1 << 4); 510 for (size_t n = 0; n < L; n++) { 511 for (size_t i = 0; i < num_bands; ++i) { 512 int32_t out_tmp = (int64_t)out[i][n] * ((gain32 + 127) >> 7) >> 16; 513 if (out_tmp > 4095) { 514 out[i][n] = (int16_t)32767; 515 } else if (out_tmp < -4096) { 516 out[i][n] = (int16_t)-32768; 517 } else { 518 int32_t tmp32 = ((int64_t)out[i][n] * (gain32 >> 4)) >> 16; 519 out[i][n] = (int16_t)tmp32; 520 } 521 } 522 523 gain32 += delta; 524 } 525 // iterate over subframes 526 for (int k = 1; k < 10; k++) { 527 delta = (gains[k + 1] - gains[k]) * (1 << (4 - L2)); 528 gain32 = gains[k] * (1 << 4); 529 // iterate over samples 530 for (size_t n = 0; n < L; n++) { 531 for (size_t i = 0; i < num_bands; ++i) { 532 int64_t tmp64 = ((int64_t)(out[i][k * L + n])) * (gain32 >> 4); 533 tmp64 = tmp64 >> 16; 534 if (tmp64 > 32767) { 535 out[i][k * L + n] = 32767; 536 } else if (tmp64 < -32768) { 537 out[i][k * L + n] = -32768; 538 } else { 539 out[i][k * L + n] = (int16_t)(tmp64); 540 } 541 } 542 gain32 += delta; 543 } 544 } 545 return 0; 546 } 547 548 void WebRtcAgc_InitVad(AgcVad* state) { 549 int16_t k; 550 551 state->HPstate = 0; // state of high pass filter 552 state->logRatio = 0; // log( P(active) / P(inactive) ) 553 // average input level (Q10) 554 state->meanLongTerm = 15 << 10; 555 556 // variance of input level (Q8) 557 state->varianceLongTerm = 500 << 8; 558 559 state->stdLongTerm = 0; // standard deviation of input level in dB 560 // short-term average input level (Q10) 561 state->meanShortTerm = 15 << 10; 562 563 // short-term variance of input level (Q8) 564 state->varianceShortTerm = 500 << 8; 565 566 state->stdShortTerm = 567 0; // short-term standard deviation of input level in dB 568 state->counter = 3; // counts updates 569 for (k = 0; k < 8; k++) { 570 // downsampling filter 571 state->downState[k] = 0; 572 } 573 } 574 575 int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state 576 const int16_t* in, // (i) Speech signal 577 size_t nrSamples) { // (i) number of samples 578 uint32_t nrg; 579 int32_t out, tmp32, tmp32b; 580 uint16_t tmpU16; 581 int16_t k, subfr, tmp16; 582 int16_t buf1[8]; 583 int16_t buf2[4]; 584 int16_t HPstate; 585 int16_t zeros, dB; 586 int64_t tmp64; 587 588 // process in 10 sub frames of 1 ms (to save on memory) 589 nrg = 0; 590 HPstate = state->HPstate; 591 for (subfr = 0; subfr < 10; subfr++) { 592 // downsample to 4 kHz 593 if (nrSamples == 160) { 594 for (k = 0; k < 8; k++) { 595 tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1]; 596 tmp32 >>= 1; 597 buf1[k] = (int16_t)tmp32; 598 } 599 in += 16; 600 601 WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState); 602 } else { 603 WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState); 604 in += 8; 605 } 606 607 // high pass filter and compute energy 608 for (k = 0; k < 4; k++) { 609 out = buf2[k] + HPstate; 610 tmp32 = 600 * out; 611 HPstate = (int16_t)((tmp32 >> 10) - buf2[k]); 612 613 // Add 'out * out / 2**6' to 'nrg' in a non-overflowing 614 // way. Guaranteed to work as long as 'out * out / 2**6' fits in 615 // an int32_t. 616 nrg += out * (out / (1 << 6)); 617 nrg += out * (out % (1 << 6)) / (1 << 6); 618 } 619 } 620 state->HPstate = HPstate; 621 622 // find number of leading zeros 623 if (!(0xFFFF0000 & nrg)) { 624 zeros = 16; 625 } else { 626 zeros = 0; 627 } 628 if (!(0xFF000000 & (nrg << zeros))) { 629 zeros += 8; 630 } 631 if (!(0xF0000000 & (nrg << zeros))) { 632 zeros += 4; 633 } 634 if (!(0xC0000000 & (nrg << zeros))) { 635 zeros += 2; 636 } 637 if (!(0x80000000 & (nrg << zeros))) { 638 zeros += 1; 639 } 640 641 // energy level (range {-32..30}) (Q10) 642 dB = (15 - zeros) * (1 << 11); 643 644 // Update statistics 645 646 if (state->counter < kAvgDecayTime) { 647 // decay time = AvgDecTime * 10 ms 648 state->counter++; 649 } 650 651 // update short-term estimate of mean energy level (Q10) 652 tmp32 = state->meanShortTerm * 15 + dB; 653 state->meanShortTerm = (int16_t)(tmp32 >> 4); 654 655 // update short-term estimate of variance in energy level (Q8) 656 tmp32 = (dB * dB) >> 12; 657 tmp32 += state->varianceShortTerm * 15; 658 state->varianceShortTerm = tmp32 / 16; 659 660 // update short-term estimate of standard deviation in energy level (Q10) 661 tmp32 = state->meanShortTerm * state->meanShortTerm; 662 tmp32 = (state->varianceShortTerm << 12) - tmp32; 663 state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); 664 665 // update long-term estimate of mean energy level (Q10) 666 tmp32 = state->meanLongTerm * state->counter + dB; 667 state->meanLongTerm = 668 WebRtcSpl_DivW32W16ResW16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); 669 670 // update long-term estimate of variance in energy level (Q8) 671 tmp32 = (dB * dB) >> 12; 672 tmp32 += state->varianceLongTerm * state->counter; 673 state->varianceLongTerm = 674 WebRtcSpl_DivW32W16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); 675 676 // update long-term estimate of standard deviation in energy level (Q10) 677 tmp32 = state->meanLongTerm * state->meanLongTerm; 678 tmp32 = (state->varianceLongTerm << 12) - tmp32; 679 state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); 680 681 // update voice activity measure (Q10) 682 tmp16 = 3 << 12; 683 // TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in 684 // ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16() 685 // was used, which did an intermediate cast to (int16_t), hence losing 686 // significant bits. This cause logRatio to max out positive, rather than 687 // negative. This is a bug, but has very little significance. 688 tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm); 689 tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm); 690 tmpU16 = (13 << 12); 691 tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16); 692 tmp64 = tmp32; 693 tmp64 += tmp32b >> 10; 694 tmp64 >>= 6; 695 696 // limit 697 if (tmp64 > 2048) { 698 tmp64 = 2048; 699 } else if (tmp64 < -2048) { 700 tmp64 = -2048; 701 } 702 state->logRatio = (int16_t)tmp64; 703 704 return state->logRatio; // Q10 705 } 706 707 } // namespace webrtc