tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

vad_gmm.c (3028B)


      1 /*
      2 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "common_audio/vad/vad_gmm.h"
     12 
     13 #include "common_audio/signal_processing/include/signal_processing_library.h"
     14 
     15 static const int32_t kCompVar = 22005;
     16 static const int16_t kLog2Exp = 5909;  // log2(exp(1)) in Q12.
     17 
     18 // For a normal distribution, the probability of `input` is calculated and
     19 // returned (in Q20). The formula for normal distributed probability is
     20 //
     21 // 1 / s * exp(-(x - m)^2 / (2 * s^2))
     22 //
     23 // where the parameters are given in the following Q domains:
     24 // m = `mean` (Q7)
     25 // s = `std` (Q7)
     26 // x = `input` (Q4)
     27 // in addition to the probability we output `delta` (in Q11) used when updating
     28 // the noise/speech model.
     29 int32_t WebRtcVad_GaussianProbability(int16_t input,
     30                                      int16_t mean,
     31                                      int16_t std,
     32                                      int16_t* delta) {
     33  int16_t tmp16, inv_std, inv_std2, exp_value = 0;
     34  int32_t tmp32;
     35 
     36  // Calculate `inv_std` = 1 / s, in Q10.
     37  // 131072 = 1 in Q17, and (`std` >> 1) is for rounding instead of truncation.
     38  // Q-domain: Q17 / Q7 = Q10.
     39  tmp32 = (int32_t)131072 + (int32_t)(std >> 1);
     40  inv_std = (int16_t)WebRtcSpl_DivW32W16(tmp32, std);
     41 
     42  // Calculate `inv_std2` = 1 / s^2, in Q14.
     43  tmp16 = (inv_std >> 2);  // Q10 -> Q8.
     44  // Q-domain: (Q8 * Q8) >> 2 = Q14.
     45  inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
     46  // TODO(bjornv): Investigate if changing to
     47  // inv_std2 = (int16_t)((inv_std * inv_std) >> 6);
     48  // gives better accuracy.
     49 
     50  tmp16 = (input << 3);  // Q4 -> Q7
     51  tmp16 = tmp16 - mean;  // Q7 - Q7 = Q7
     52 
     53  // To be used later, when updating noise/speech model.
     54  // `delta` = (x - m) / s^2, in Q11.
     55  // Q-domain: (Q14 * Q7) >> 10 = Q11.
     56  *delta = (int16_t)((inv_std2 * tmp16) >> 10);
     57 
     58  // Calculate the exponent `tmp32` = (x - m)^2 / (2 * s^2), in Q10. Replacing
     59  // division by two with one shift.
     60  // Q-domain: (Q11 * Q7) >> 8 = Q10.
     61  tmp32 = (*delta * tmp16) >> 9;
     62 
     63  // If the exponent is small enough to give a non-zero probability we calculate
     64  // `exp_value` ~= exp(-(x - m)^2 / (2 * s^2))
     65  //             ~= exp2(-log2(exp(1)) * `tmp32`).
     66  if (tmp32 < kCompVar) {
     67    // Calculate `tmp16` = log2(exp(1)) * `tmp32`, in Q10.
     68    // Q-domain: (Q12 * Q10) >> 12 = Q10.
     69    tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
     70    tmp16 = -tmp16;
     71    exp_value = (0x0400 | (tmp16 & 0x03FF));
     72    tmp16 ^= 0xFFFF;
     73    tmp16 >>= 10;
     74    tmp16 += 1;
     75    // Get `exp_value` = exp(-`tmp32`) in Q10.
     76    exp_value >>= tmp16;
     77  }
     78 
     79  // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20.
     80  // Q-domain: Q10 * Q10 = Q20.
     81  return inv_std * exp_value;
     82 }