[ tor-browser ].git.dasho

vad_core.h (4108B)
      1 /*
      2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 /*
     12 * This header file includes the descriptions of the core VAD calls.
     13 */
     14 
     15 #ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
     16 #define COMMON_AUDIO_VAD_VAD_CORE_H_
     17 
     18 #include <stddef.h>
     19 #include <stdint.h>
     20 
     21 #include "common_audio/signal_processing/include/signal_processing_library.h"
     22 
     23 // TODO(https://bugs.webrtc.org/14476): When converted to C++, remove the macro.
     24 #if defined(__cplusplus)
     25 #define CONSTEXPR_INT(x) constexpr int x
     26 #else
     27 #define CONSTEXPR_INT(x) enum { x }
     28 #endif
     29 
     30 CONSTEXPR_INT(kNumChannels = 6);  // Number of frequency bands (named channels).
     31 CONSTEXPR_INT(
     32    kNumGaussians = 2);  // Number of Gaussians per channel in the GMM.
     33 CONSTEXPR_INT(kTableSize = kNumChannels * kNumGaussians);
     34 CONSTEXPR_INT(
     35    kMinEnergy = 10);  // Minimum energy required to trigger audio signal.
     36 
     37 typedef struct VadInstT_ {
     38  int vad;
     39  int32_t downsampling_filter_states[4];
     40  WebRtcSpl_State48khzTo8khz state_48_to_8;
     41  int16_t noise_means[kTableSize];
     42  int16_t speech_means[kTableSize];
     43  int16_t noise_stds[kTableSize];
     44  int16_t speech_stds[kTableSize];
     45  // TODO(bjornv): Change to `frame_count`.
     46  int32_t frame_counter;
     47  int16_t over_hang;  // Over Hang
     48  int16_t num_of_speech;
     49  // TODO(bjornv): Change to `age_vector`.
     50  int16_t index_vector[16 * kNumChannels];
     51  int16_t low_value_vector[16 * kNumChannels];
     52  // TODO(bjornv): Change to `median`.
     53  int16_t mean_value[kNumChannels];
     54  int16_t upper_state[5];
     55  int16_t lower_state[5];
     56  int16_t hp_filter_state[4];
     57  int16_t over_hang_max_1[3];
     58  int16_t over_hang_max_2[3];
     59  int16_t individual[3];
     60  int16_t total[3];
     61 
     62  int init_flag;
     63 } VadInstT;
     64 
     65 // Initializes the core VAD component. The default aggressiveness mode is
     66 // controlled by `kDefaultMode` in vad_core.c.
     67 //
     68 // - self [i/o] : Instance that should be initialized
     69 //
     70 // returns      : 0 (OK), -1 (null pointer in or if the default mode can't be
     71 //                set)
     72 int WebRtcVad_InitCore(VadInstT* self);
     73 
     74 /****************************************************************************
     75 * WebRtcVad_set_mode_core(...)
     76 *
     77 * This function changes the VAD settings
     78 *
     79 * Input:
     80 *      - inst      : VAD instance
     81 *      - mode      : Aggressiveness degree
     82 *                    0 (High quality) - 3 (Highly aggressive)
     83 *
     84 * Output:
     85 *      - inst      : Changed  instance
     86 *
     87 * Return value     :  0 - Ok
     88 *                    -1 - Error
     89 */
     90 
     91 int WebRtcVad_set_mode_core(VadInstT* self, int mode);
     92 
     93 /****************************************************************************
     94 * WebRtcVad_CalcVad48khz(...)
     95 * WebRtcVad_CalcVad32khz(...)
     96 * WebRtcVad_CalcVad16khz(...)
     97 * WebRtcVad_CalcVad8khz(...)
     98 *
     99 * Calculate probability for active speech and make VAD decision.
    100 *
    101 * Input:
    102 *      - inst          : Instance that should be initialized
    103 *      - speech_frame  : Input speech frame
    104 *      - frame_length  : Number of input samples
    105 *
    106 * Output:
    107 *      - inst          : Updated filter states etc.
    108 *
    109 * Return value         : VAD decision
    110 *                        0 - No active speech
    111 *                        1-6 - Active speech
    112 */
    113 int WebRtcVad_CalcVad48khz(VadInstT* inst,
    114                           const int16_t* speech_frame,
    115                           size_t frame_length);
    116 int WebRtcVad_CalcVad32khz(VadInstT* inst,
    117                           const int16_t* speech_frame,
    118                           size_t frame_length);
    119 int WebRtcVad_CalcVad16khz(VadInstT* inst,
    120                           const int16_t* speech_frame,
    121                           size_t frame_length);
    122 int WebRtcVad_CalcVad8khz(VadInstT* inst,
    123                          const int16_t* speech_frame,
    124                          size_t frame_length);
    125 
    126 #endif  // COMMON_AUDIO_VAD_VAD_CORE_H_
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE