rnn.cc (3241B)
1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/agc2/rnn_vad/rnn.h" 12 13 #include "api/array_view.h" 14 #include "modules/audio_processing/agc2/cpu_features.h" 15 #include "modules/audio_processing/agc2/rnn_vad/common.h" 16 #include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" 17 #include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" 18 #include "rtc_base/checks.h" 19 #include "third_party/rnnoise/src/rnn_vad_weights.h" 20 21 namespace webrtc { 22 namespace rnn_vad { 23 namespace { 24 25 using ::rnnoise::kInputLayerInputSize; 26 static_assert(kFeatureVectorSize == kInputLayerInputSize, ""); 27 using ::rnnoise::kInputDenseBias; 28 using ::rnnoise::kInputDenseWeights; 29 using ::rnnoise::kInputLayerOutputSize; 30 static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); 31 32 using ::rnnoise::kHiddenGruBias; 33 using ::rnnoise::kHiddenGruRecurrentWeights; 34 using ::rnnoise::kHiddenGruWeights; 35 using ::rnnoise::kHiddenLayerOutputSize; 36 static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, ""); 37 38 using ::rnnoise::kOutputDenseBias; 39 using ::rnnoise::kOutputDenseWeights; 40 using ::rnnoise::kOutputLayerOutputSize; 41 static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); 42 43 } // namespace 44 45 RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features) 46 : input_(kInputLayerInputSize, 47 kInputLayerOutputSize, 48 kInputDenseBias, 49 kInputDenseWeights, 50 ActivationFunction::kTansigApproximated, 51 cpu_features, 52 /*layer_name=*/"FC1"), 53 hidden_(kInputLayerOutputSize, 54 kHiddenLayerOutputSize, 55 kHiddenGruBias, 56 kHiddenGruWeights, 57 kHiddenGruRecurrentWeights, 58 cpu_features, 59 /*layer_name=*/"GRU1"), 60 output_(kHiddenLayerOutputSize, 61 kOutputLayerOutputSize, 62 kOutputDenseBias, 63 kOutputDenseWeights, 64 ActivationFunction::kSigmoidApproximated, 65 // The output layer is just 24x1. The unoptimized code is faster. 66 NoAvailableCpuFeatures(), 67 /*layer_name=*/"FC2") { 68 // Input-output chaining size checks. 69 RTC_DCHECK_EQ(input_.size(), hidden_.input_size()) 70 << "The input and the hidden layers sizes do not match."; 71 RTC_DCHECK_EQ(hidden_.size(), output_.input_size()) 72 << "The hidden and the output layers sizes do not match."; 73 } 74 75 RnnVad::~RnnVad() = default; 76 77 void RnnVad::Reset() { 78 hidden_.Reset(); 79 } 80 81 float RnnVad::ComputeVadProbability( 82 ArrayView<const float, kFeatureVectorSize> feature_vector, 83 bool is_silence) { 84 if (is_silence) { 85 Reset(); 86 return 0.f; 87 } 88 input_.ComputeOutput(feature_vector); 89 hidden_.ComputeOutput(input_); 90 output_.ComputeOutput(hidden_); 91 RTC_DCHECK_EQ(output_.size(), 1); 92 return output_.data()[0]; 93 } 94 95 } // namespace rnn_vad 96 } // namespace webrtc