webrtc_cng.cc (13757B)
1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_coding/codecs/cng/webrtc_cng.h" 12 13 #include <algorithm> 14 #include <cstddef> 15 #include <cstdint> 16 17 #include "api/array_view.h" 18 #include "common_audio/signal_processing/include/signal_processing_library.h" 19 #include "rtc_base/buffer.h" 20 #include "rtc_base/checks.h" 21 #include "rtc_base/numerics/safe_conversions.h" 22 23 namespace webrtc { 24 25 namespace { 26 27 const size_t kCngMaxOutsizeOrder = 640; 28 29 // TODO(ossu): Rename the left-over WebRtcCng according to style guide. 30 void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a); 31 32 const int32_t WebRtcCng_kDbov[94] = { 33 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992, 34 271562548, 215709799, 171344384, 136103682, 108110997, 85875618, 35 68213428, 54183852, 43039763, 34187699, 27156255, 21570980, 36 17134438, 13610368, 10811100, 8587562, 6821343, 5418385, 37 4303976, 3418770, 2715625, 2157098, 1713444, 1361037, 38 1081110, 858756, 682134, 541839, 430398, 341877, 39 271563, 215710, 171344, 136104, 108111, 85876, 40 68213, 54184, 43040, 34188, 27156, 21571, 41 17134, 13610, 10811, 8588, 6821, 5418, 42 4304, 3419, 2716, 2157, 1713, 1361, 43 1081, 859, 682, 542, 430, 342, 44 272, 216, 171, 136, 108, 86, 45 68, 54, 43, 34, 27, 22, 46 17, 14, 11, 9, 7, 5, 47 4, 3, 3, 2, 2, 1, 48 1, 1, 1, 1}; 49 50 const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = { 51 32702, 32636, 32570, 32505, 32439, 32374, 52 32309, 32244, 32179, 32114, 32049, 31985}; 53 54 } // namespace 55 56 ComfortNoiseDecoder::ComfortNoiseDecoder() { 57 /* Needed to get the right function pointers in SPLIB. */ 58 Reset(); 59 } 60 61 void ComfortNoiseDecoder::Reset() { 62 dec_seed_ = 7777; /* For debugging only. */ 63 dec_target_energy_ = 0; 64 dec_used_energy_ = 0; 65 for (auto& c : dec_target_reflCoefs_) 66 c = 0; 67 for (auto& c : dec_used_reflCoefs_) 68 c = 0; 69 for (auto& c : dec_filtstate_) 70 c = 0; 71 for (auto& c : dec_filtstateLow_) 72 c = 0; 73 dec_order_ = 5; 74 dec_target_scale_factor_ = 0; 75 dec_used_scale_factor_ = 0; 76 } 77 78 void ComfortNoiseDecoder::UpdateSid(ArrayView<const uint8_t> sid) { 79 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER]; 80 int32_t targetEnergy; 81 size_t length = sid.size(); 82 /* Throw away reflection coefficients of higher order than we can handle. */ 83 if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1)) 84 length = WEBRTC_CNG_MAX_LPC_ORDER + 1; 85 86 dec_order_ = static_cast<uint16_t>(length - 1); 87 88 uint8_t sid0 = std::min<uint8_t>(sid[0], 93); 89 targetEnergy = WebRtcCng_kDbov[sid0]; 90 /* Take down target energy to 75%. */ 91 targetEnergy = targetEnergy >> 1; 92 targetEnergy += targetEnergy >> 2; 93 94 dec_target_energy_ = targetEnergy; 95 96 /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */ 97 if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) { 98 for (size_t i = 0; i < (dec_order_); i++) { 99 refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/ 100 dec_target_reflCoefs_[i] = refCs[i]; 101 } 102 } else { 103 for (size_t i = 0; i < (dec_order_); i++) { 104 refCs[i] = (sid[i + 1] - 127) * (1 << 8); /* Q7 to Q15. */ 105 dec_target_reflCoefs_[i] = refCs[i]; 106 } 107 } 108 109 for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) { 110 refCs[i] = 0; 111 dec_target_reflCoefs_[i] = refCs[i]; 112 } 113 } 114 115 bool ComfortNoiseDecoder::Generate(ArrayView<int16_t> out_data, 116 bool new_period) { 117 int16_t excitation[kCngMaxOutsizeOrder]; 118 int16_t low[kCngMaxOutsizeOrder]; 119 int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1]; 120 int16_t ReflBetaStd = 26214; /* 0.8 in q15. */ 121 int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */ 122 int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */ 123 int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */ 124 int16_t Beta, BetaC; /* These are in Q15. */ 125 int32_t targetEnergy; 126 int16_t En; 127 int16_t temp16; 128 const size_t num_samples = out_data.size(); 129 130 if (num_samples > kCngMaxOutsizeOrder) { 131 return false; 132 } 133 134 if (new_period) { 135 dec_used_scale_factor_ = dec_target_scale_factor_; 136 Beta = ReflBetaNewP; 137 BetaC = ReflBetaCompNewP; 138 } else { 139 Beta = ReflBetaStd; 140 BetaC = ReflBetaCompStd; 141 } 142 143 /* Calculate new scale factor in Q13 */ 144 dec_used_scale_factor_ = checked_cast<int16_t>( 145 WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) + 146 WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13)); 147 148 dec_used_energy_ = dec_used_energy_ >> 1; 149 dec_used_energy_ += dec_target_energy_ >> 1; 150 151 /* Do the same for the reflection coeffs, albeit in Q15. */ 152 for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) { 153 dec_used_reflCoefs_[i] = 154 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], Beta, 15); 155 dec_used_reflCoefs_[i] += 156 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_target_reflCoefs_[i], BetaC, 15); 157 } 158 159 /* Compute the polynomial coefficients. */ 160 WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly); 161 162 targetEnergy = dec_used_energy_; 163 164 /* Calculate scaling factor based on filter energy. */ 165 En = 8192; /* 1.0 in Q13. */ 166 for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) { 167 /* Floating point value for reference. 168 E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) * 169 (dec_used_reflCoefs_[i] / 32768.0); 170 */ 171 172 /* Same in fixed point. */ 173 /* K(i).^2 in Q15. */ 174 temp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], 175 dec_used_reflCoefs_[i], 15); 176 /* 1 - K(i).^2 in Q15. */ 177 temp16 = 0x7fff - temp16; 178 En = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15); 179 } 180 181 /* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */ 182 183 /* Calculate sqrt(En * target_energy / excitation energy) */ 184 targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_); 185 186 En = (int16_t)WebRtcSpl_Sqrt(En) << 6; 187 En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */ 188 dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12); 189 190 /* Generate excitation. */ 191 /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */ 192 for (size_t i = 0; i < num_samples; i++) { 193 excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1; 194 } 195 196 /* Scale to correct energy. */ 197 WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_, 198 num_samples, 13); 199 200 /* `lpPoly` - Coefficients in Q12. 201 * `excitation` - Speech samples. 202 * `nst->dec_filtstate` - State preservation. 203 * `out_data` - Filtered speech samples. */ 204 WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation, 205 num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER, 206 dec_filtstateLow_, out_data.data(), low); 207 208 return true; 209 } 210 211 ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality) 212 : enc_nrOfCoefs_(quality), 213 enc_sampfreq_(fs), 214 enc_interval_(interval), 215 enc_msSinceSid_(0), 216 enc_Energy_(0), 217 enc_reflCoefs_{0}, 218 enc_corrVector_{0}, 219 enc_seed_(7777) /* For debugging only. */ { 220 RTC_CHECK_GT(quality, 0); 221 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER); 222 } 223 224 void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) { 225 RTC_CHECK_GT(quality, 0); 226 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER); 227 enc_nrOfCoefs_ = quality; 228 enc_sampfreq_ = fs; 229 enc_interval_ = interval; 230 enc_msSinceSid_ = 0; 231 enc_Energy_ = 0; 232 for (auto& c : enc_reflCoefs_) 233 c = 0; 234 for (auto& c : enc_corrVector_) 235 c = 0; 236 enc_seed_ = 7777; /* For debugging only. */ 237 } 238 239 size_t ComfortNoiseEncoder::Encode(ArrayView<const int16_t> speech, 240 bool force_sid, 241 Buffer* output) { 242 int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; 243 int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1]; 244 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; 245 int16_t hanningW[kCngMaxOutsizeOrder]; 246 int16_t ReflBeta = 19661; /* 0.6 in q15. */ 247 int16_t ReflBetaComp = 13107; /* 0.4 in q15. */ 248 int32_t outEnergy; 249 int outShifts; 250 size_t i; 251 int stab; 252 int acorrScale; 253 size_t index; 254 size_t ind, factor; 255 int32_t* bptr; 256 int32_t blo, bhi; 257 int16_t negate; 258 const int16_t* aptr; 259 int16_t speechBuf[kCngMaxOutsizeOrder]; 260 261 const size_t num_samples = speech.size(); 262 RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder); 263 264 for (i = 0; i < num_samples; i++) { 265 speechBuf[i] = speech[i]; 266 } 267 268 factor = num_samples; 269 270 /* Calculate energy and a coefficients. */ 271 outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts); 272 while (outShifts > 0) { 273 /* We can only do 5 shifts without destroying accuracy in 274 * division factor. */ 275 if (outShifts > 5) { 276 outEnergy <<= (outShifts - 5); 277 outShifts = 5; 278 } else { 279 factor /= 2; 280 outShifts--; 281 } 282 } 283 outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor); 284 285 if (outEnergy > 1) { 286 /* Create Hanning Window. */ 287 WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2); 288 for (i = 0; i < (num_samples / 2); i++) 289 hanningW[num_samples - i - 1] = hanningW[i]; 290 291 WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples, 292 14); 293 294 WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_, 295 corrVector, &acorrScale); 296 297 if (*corrVector == 0) 298 *corrVector = WEBRTC_SPL_WORD16_MAX; 299 300 /* Adds the bandwidth expansion. */ 301 aptr = WebRtcCng_kCorrWindow; 302 bptr = corrVector; 303 304 /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */ 305 for (ind = 0; ind < enc_nrOfCoefs_; ind++) { 306 /* The below code multiplies the 16 b corrWindow values (Q15) with 307 * the 32 b corrvector (Q0) and shifts the result down 15 steps. */ 308 negate = *bptr < 0; 309 if (negate) 310 *bptr = -*bptr; 311 312 blo = (int32_t)*aptr * (*bptr & 0xffff); 313 bhi = ((blo >> 16) & 0xffff) + 314 ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff)); 315 blo = (blo & 0xffff) | ((bhi & 0xffff) << 16); 316 317 *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t)blo >> 15); 318 if (negate) 319 *bptr = -*bptr; 320 bptr++; 321 } 322 /* End of bandwidth expansion. */ 323 324 stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs, enc_nrOfCoefs_); 325 326 if (!stab) { 327 /* Disregard from this frame */ 328 return 0; 329 } 330 331 } else { 332 for (i = 0; i < enc_nrOfCoefs_; i++) 333 refCs[i] = 0; 334 } 335 336 if (force_sid) { 337 /* Read instantaneous values instead of averaged. */ 338 for (i = 0; i < enc_nrOfCoefs_; i++) 339 enc_reflCoefs_[i] = refCs[i]; 340 enc_Energy_ = outEnergy; 341 } else { 342 /* Average history with new values. */ 343 for (i = 0; i < enc_nrOfCoefs_; i++) { 344 enc_reflCoefs_[i] = 345 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(enc_reflCoefs_[i], ReflBeta, 15); 346 enc_reflCoefs_[i] += 347 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15); 348 } 349 enc_Energy_ = (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2); 350 } 351 352 if (enc_Energy_ < 1) { 353 enc_Energy_ = 1; 354 } 355 356 if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) { 357 /* Search for best dbov value. */ 358 index = 0; 359 for (i = 1; i < 93; i++) { 360 /* Always round downwards. */ 361 if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) { 362 index = i; 363 break; 364 } 365 } 366 if ((i == 93) && (index == 0)) 367 index = 94; 368 369 const size_t output_coefs = enc_nrOfCoefs_ + 1; 370 output->AppendData(output_coefs, [&](ArrayView<uint8_t> output) { 371 output[0] = (uint8_t)index; 372 373 /* Quantize coefficients with tweak for WebRtc implementation of 374 * RFC3389. */ 375 if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) { 376 for (i = 0; i < enc_nrOfCoefs_; i++) { 377 /* Q15 to Q7 with rounding. */ 378 output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8); 379 } 380 } else { 381 for (i = 0; i < enc_nrOfCoefs_; i++) { 382 /* Q15 to Q7 with rounding. */ 383 output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8)); 384 } 385 } 386 387 return output_coefs; 388 }); 389 390 enc_msSinceSid_ = 391 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_); 392 return output_coefs; 393 } else { 394 enc_msSinceSid_ += 395 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_); 396 return 0; 397 } 398 } 399 400 namespace { 401 /* Values in `k` are Q15, and `a` Q12. */ 402 void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) { 403 int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1]; 404 int16_t* aptr; 405 int16_t* aptr2; 406 int16_t* anyptr; 407 const int16_t* kptr; 408 int m, i; 409 410 kptr = k; 411 *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */ 412 *any = *a; 413 a[1] = (*k + 4) >> 3; 414 for (m = 1; m < useOrder; m++) { 415 kptr++; 416 aptr = a; 417 aptr++; 418 aptr2 = &a[m]; 419 anyptr = any; 420 anyptr++; 421 422 any[m + 1] = (*kptr + 4) >> 3; 423 for (i = 0; i < m; i++) { 424 *anyptr++ = 425 (*aptr++) + 426 (int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15); 427 } 428 429 aptr = a; 430 anyptr = any; 431 for (i = 0; i < (m + 2); i++) { 432 *aptr++ = *anyptr++; 433 } 434 } 435 } 436 437 } // namespace 438 439 } // namespace webrtc