commit 43fc2cd9eaaa347e082ea1be4c112a1288800a21 parent 18663a79f4c1c7233db909742f9cf75dc202f46f Author: Dan Baker <dbaker@mozilla.com> Date: Mon, 1 Dec 2025 23:25:32 -0700 Bug 2000941 - Vendor libwebrtc from ea14c99d67 Upstream commit: https://webrtc.googlesource.com/src/+/ea14c99d674905eba49ce4fd1801e712c269a66b Add basic ML residual echo estimator impl to AEC3 Implement a NeuralResidualEchoEstimator using ML, this class: - load a tflite model into C++ - rebuffer AEC3 signals to the model input format - run model - extract a basic residual echo estimate from the model output Co-authored-by: Sam Zackrisson <saza@google.com> Bug: webrtc:442444736 Change-Id: I46285bf16634d558a16451c3b1298b49ac9311e4 No-Iwyu: Not changing the tflite includes. Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/409563 Reviewed-by: Henrik Andreassson <henrika@webrtc.org> Reviewed-by: Per Ã…hgren <peah@webrtc.org> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Commit-Queue: Jesus de Vicente Pena <devicentepena@webrtc.org> Cr-Commit-Position: refs/heads/main@{#45711} Diffstat:
34 files changed, 959 insertions(+), 43 deletions(-)
diff --git a/third_party/libwebrtc/BUILD.gn b/third_party/libwebrtc/BUILD.gn @@ -896,6 +896,9 @@ group("poison_environment_construction") { group("poison_software_video_codecs") { } +group("poison_default_neural_residual_echo_estimator") { +} + if (!build_with_chromium) { # Write debug logs to gn_logs.txt. # This is also required for Siso builds. diff --git a/third_party/libwebrtc/DEPS b/third_party/libwebrtc/DEPS @@ -771,6 +771,9 @@ deps = { 'dep_type': 'cipd', }, + 'src/third_party/tflite/src': + Var('chromium_git') + '/external/github.com/tensorflow/tensorflow.git' + '@' + '313f58ae85278ced9ccc7f90ee630bdf8735c52f', + 'src/third_party/turbine/cipd': { 'packages': [ { @@ -831,6 +834,36 @@ deps = { 'condition': 'checkout_android and non_git_source', 'dep_type': 'cipd', }, + 'src/third_party/pthreadpool/src': + Var('chromium_git') + '/external/github.com/google/pthreadpool.git' + '@' + 'f5a07eddbf4be8f23e29e60a2ccf66b78b71f119', + + 'src/third_party/xnnpack/src': + Var('chromium_git') + '/external/github.com/google/XNNPACK.git' + '@' + '4d098efeac50c44a7c03e6feb1794908db4c3158', + + 'src/third_party/farmhash/src': + Var('chromium_git') + '/external/github.com/google/farmhash.git' + '@' + '816a4ae622e964763ca0862d9dbd19324a1eaf45', + + 'src/third_party/ruy/src': + Var('chromium_git') + '/external/github.com/google/ruy.git' + '@' + '9940fbf1e0c0863907e77e0600b99bb3e2bc2b9f', + + 'src/third_party/cpuinfo/src': + Var('chromium_git') + '/external/github.com/pytorch/cpuinfo.git' + '@' + '877328f188a3c7d1fa855871a278eb48d530c4c0', + + 'src/third_party/eigen3/src': + Var('chromium_git') + '/external/gitlab.com/libeigen/eigen.git' + '@' + '430e35fbd15d3c946d2d2ba19ec41c16ba217cb3', + + 'src/third_party/fp16/src': + Var('chromium_git') + '/external/github.com/Maratyszcza/FP16.git' + '@' + '3d2de1816307bac63c16a297e8c4dc501b4076df', + + 'src/third_party/gemmlowp/src': + Var('chromium_git') + '/external/github.com/google/gemmlowp.git' + '@' + '16e8662c34917be0065110bfcd9cc27d30f52fdf', + + 'src/third_party/fxdiv/src': + Var('chromium_git') + '/external/github.com/Maratyszcza/FXdiv.git' + '@' + '63058eff77e11aa15bf531df5dd34395ec3017c8', + + 'src/third_party/neon_2_sse/src': + Var('chromium_git') + '/external/github.com/intel/ARM_NEON_2_x86_SSE.git' + '@' + 'eb8b80b28f956275e291ea04a7beb5ed8289e872', + # Everything coming after this is automatically updated by the auto-roller. # === ANDROID_DEPS Generated Code Start === diff --git a/third_party/libwebrtc/README.mozilla.last-vendor b/third_party/libwebrtc/README.mozilla.last-vendor @@ -1,4 +1,4 @@ # ./mach python dom/media/webrtc/third_party_build/vendor-libwebrtc.py --from-local /Users/danielbaker/elm/.moz-fast-forward/moz-libwebrtc --commit mozpatches libwebrtc -libwebrtc updated from /Users/danielbaker/elm/.moz-fast-forward/moz-libwebrtc commit mozpatches on 2025-12-02T06:15:02.689857+00:00. +libwebrtc updated from /Users/danielbaker/elm/.moz-fast-forward/moz-libwebrtc commit mozpatches on 2025-12-02T06:25:17.904692+00:00. # base of lastest vendoring -52cbf242e9 +ea14c99d67 diff --git a/third_party/libwebrtc/modules/audio_processing/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/BUILD.gn @@ -573,6 +573,7 @@ if (rtc_include_tests) { "../../rtc_base:task_queue_for_test", "../../rtc_base:timeutils", "../../rtc_base/system:file_wrapper", + "aec3:neural_residual_echo_estimator_impl", "aec_dump", "//third_party/abseil-cpp/absl/base:nullability", "//third_party/abseil-cpp/absl/flags:flag", diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn @@ -7,6 +7,9 @@ # be found in the AUTHORS file in the root of the source tree. import("../../../webrtc.gni") +if (rtc_enable_protobuf) { + import("//third_party/protobuf/proto_library.gni") +} rtc_library("aec3") { visibility = [ "*" ] @@ -284,6 +287,47 @@ if (target_cpu == "x86" || target_cpu == "x64") { } } +if (rtc_enable_protobuf) { + proto_library("neural_residual_echo_estimator_proto") { + sources = [ "neural_residual_echo_estimator.proto" ] + proto_out_dir = "modules/audio_processing/aec3" + } + + rtc_library("neural_residual_echo_estimator_impl") { + visibility = [ "*" ] + poisonous = [ "default_neural_residual_echo_estimator" ] + configs += [ + "..:apm_debug_dump", + "//third_party/tflite:tflite_config_no_undef", + ] + sources = [ + "neural_residual_echo_estimator_impl.cc", + "neural_residual_echo_estimator_impl.h", + ] + deps = [ + ":aec3_common", + ":neural_residual_echo_estimator_proto", + "..:apm_logging", + "../../../api:array_view", + "../../../api/audio:neural_residual_echo_estimator_api", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "//third_party/abseil-cpp/absl/strings:string_view", + ] + if (build_with_chromium) { + deps += [ + "//third_party/tflite", + "//third_party/tflite:tflite_builtin_op_resolver", + ] + } else { + deps += [ + "//third_party/tflite:tflite_builtin_op_resolver_standalone", + "//third_party/tflite:tflite_standalone", + ] + } + } +} + if (rtc_include_tests) { rtc_library("aec3_unittests") { testonly = true @@ -323,12 +367,14 @@ if (rtc_include_tests) { "../../../api/environment:environment_factory", "../../../rtc_base:checks", "../../../rtc_base:cpu_info", + "../../../rtc_base:gunit_helpers", "../../../rtc_base:random", "../../../rtc_base:safe_minmax", "../../../rtc_base:stringutils", "../../../rtc_base/system:arch", "../../../system_wrappers:metrics", "../../../test:create_test_field_trials", + "../../../test:fileutils", "../../../test:test_support", "../utility:cascaded_biquad_filter", ] @@ -366,6 +412,7 @@ if (rtc_include_tests) { "matched_filter_unittest.cc", "moving_average_unittest.cc", "multi_channel_content_detector_unittest.cc", + "neural_residual_echo_estimator_impl_unittest.cc", "refined_filter_update_gain_unittest.cc", "render_buffer_unittest.cc", "render_delay_buffer_unittest.cc", @@ -380,6 +427,10 @@ if (rtc_include_tests) { "suppression_gain_unittest.cc", "vector_math_unittest.cc", ] + deps += [ + ":neural_residual_echo_estimator_impl", + ":neural_residual_echo_estimator_proto", + ] } if (!build_with_chromium) { diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/DEPS b/third_party/libwebrtc/modules/audio_processing/aec3/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+third_party/tflite" +] diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/neural_residual_echo_estimator.proto b/third_party/libwebrtc/modules/audio_processing/aec3/neural_residual_echo_estimator.proto @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2025 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +syntax = "proto3"; +option optimize_for = LITE_RUNTIME; +package webrtc.audioproc; + +message ReeModelMetadata { + optional int32 version = 1; +} diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/neural_residual_echo_estimator_impl.cc b/third_party/libwebrtc/modules/audio_processing/aec3/neural_residual_echo_estimator_impl.cc @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2025 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/neural_residual_echo_estimator_impl.h" + +#include <algorithm> +#include <array> +#include <cstdarg> +#include <cstdio> +#include <functional> +#include <map> +#include <memory> +#include <optional> +#include <string> +#include <utility> +#include <vector> + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h" +#else +#include "modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h" +#endif +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "third_party/tflite/src/tensorflow/lite/error_reporter.h" +#include "third_party/tflite/src/tensorflow/lite/interpreter.h" +#include "third_party/tflite/src/tensorflow/lite/kernels/kernel_util.h" +#include "third_party/tflite/src/tensorflow/lite/kernels/register.h" +#include "third_party/tflite/src/tensorflow/lite/model_builder.h" + +namespace webrtc { +namespace { +using ModelInputEnum = NeuralResidualEchoEstimatorImpl::ModelInputEnum; +using ModelOutputEnum = NeuralResidualEchoEstimatorImpl::ModelOutputEnum; + +// A TFLite ErrorReporter that writes its messages to RTC_LOG. +class LoggingErrorReporter : public tflite::ErrorReporter { + int Report(const char* format, va_list args) override { + char buffer[2048]; + const int result = vsnprintf(buffer, sizeof(buffer), format, args); + RTC_LOG(LS_ERROR) << buffer; + return result; + } +}; + +tflite::ErrorReporter* DefaultLoggingErrorReporter() { + static LoggingErrorReporter* instance = new LoggingErrorReporter(); + return instance; +} + +// Field under which the ML-REE metadata is stored in a TFLite model. +constexpr char kTfLiteMetadataKey[] = "REE_METADATA"; + +// Reads the model metadata from the TFLite model. If the metadata is not +// present, it returns a default metadata with version 1. If the metadata is +// present but cannot be parsed, it returns nullopt. +std::optional<audioproc::ReeModelMetadata> ReadModelMetadata( + const tflite::FlatBufferModel* model) { + audioproc::ReeModelMetadata default_metadata; + default_metadata.set_version(1); + const auto metadata_records = model->ReadAllMetadata(); + const auto metadata_field = metadata_records.find(kTfLiteMetadataKey); + if (metadata_field == metadata_records.end()) { + return default_metadata; + } + audioproc::ReeModelMetadata metadata; + if (metadata.ParseFromString(metadata_field->second)) { + return metadata; + } + return std::nullopt; +} + +// Encapsulates all the NeuralResidualEchoEstimatorImpl's interaction with +// TFLite. This allows the separation of rebuffering and similar AEC3-related +// bookkeeping from the TFLite-specific code, and makes it easier to test the +// former code by mocking. +class TfLiteModelRunner : public NeuralResidualEchoEstimatorImpl::ModelRunner { + public: + TfLiteModelRunner(std::string model_data, + std::unique_ptr<tflite::FlatBufferModel> tflite_model, + std::unique_ptr<tflite::Interpreter> tflite_interpreter, + audioproc::ReeModelMetadata metadata) + : model_data_(std::move(model_data)), + frame_size_(static_cast<int>( + tflite::NumElements(tflite_interpreter->input_tensor( + static_cast<int>(ModelInputEnum::kMic))))), + step_size_(frame_size_ / 2), + frame_size_by_2_plus_1_(frame_size_ / 2 + 1), + metadata_(metadata), + model_state_(tflite::NumElements(tflite_interpreter->input_tensor( + static_cast<int>(ModelInputEnum::kModelState))), + 0.0f), + tflite_model_(std::move(tflite_model)), + tflite_interpreter_(std::move(tflite_interpreter)) { + for (const auto input_enum : + {ModelInputEnum::kMic, ModelInputEnum::kLinearAecOutput, + ModelInputEnum::kAecRef}) { + webrtc::ArrayView<float> input_tensor( + tflite_interpreter_->typed_input_tensor<float>( + static_cast<int>(input_enum)), + frame_size_); + std::fill(input_tensor.begin(), input_tensor.end(), 0.0f); + } + + RTC_CHECK_EQ(frame_size_ % kBlockSize, 0); + RTC_CHECK_EQ(tflite::NumElements(tflite_interpreter_->input_tensor( + static_cast<int>(ModelInputEnum::kLinearAecOutput))), + frame_size_); + RTC_CHECK_EQ(tflite::NumElements(tflite_interpreter_->input_tensor( + static_cast<int>(ModelInputEnum::kAecRef))), + frame_size_); + RTC_CHECK_EQ(tflite::NumElements(tflite_interpreter_->input_tensor( + static_cast<int>(ModelInputEnum::kModelState))), + tflite::NumElements(tflite_interpreter_->output_tensor( + static_cast<int>(ModelOutputEnum::kModelState)))); + RTC_CHECK_EQ(tflite::NumElements(tflite_interpreter_->output_tensor( + static_cast<int>(ModelOutputEnum::kEchoMask))), + frame_size_by_2_plus_1_); + } + + ~TfLiteModelRunner() override {} + + int StepSize() const override { return step_size_; } + + webrtc::ArrayView<float> GetInput(ModelInputEnum input_enum) override { + int tensor_size = 0; + switch (input_enum) { + case ModelInputEnum::kMic: // fall-through + case ModelInputEnum::kLinearAecOutput: // fall-through + case ModelInputEnum::kAecRef: + tensor_size = frame_size_; + break; + case ModelInputEnum::kModelState: + tensor_size = static_cast<int>(model_state_.size()); + break; + case ModelInputEnum::kNumInputs: + RTC_CHECK(false); + } + return webrtc::ArrayView<float>( + tflite_interpreter_->typed_input_tensor<float>( + static_cast<int>(input_enum)), + tensor_size); + } + + webrtc::ArrayView<const float> GetOutputEchoMask() override { + return webrtc::ArrayView<const float>( + tflite_interpreter_->typed_output_tensor<const float>( + static_cast<int>(ModelOutputEnum::kEchoMask)), + frame_size_by_2_plus_1_); + } + + audioproc::ReeModelMetadata GetMetadata() const override { return metadata_; } + + bool Invoke() override { + auto input_state = GetInput(ModelInputEnum::kModelState); + std::copy(model_state_.begin(), model_state_.end(), input_state.begin()); + + const TfLiteStatus status = tflite_interpreter_->Invoke(); + if (status != kTfLiteOk && processing_error_log_counter_ <= 0) { + RTC_LOG(LS_ERROR) << "TfLiteModelRunner::Estimate() " + "invocation error, status=" + << status; + // Wait ~1 second before logging this error again. + processing_error_log_counter_ = 16000 / step_size_; + return false; + } else if (processing_error_log_counter_ > 0) { + --processing_error_log_counter_; + } + + auto output_state = webrtc::ArrayView<const float>( + tflite_interpreter_->typed_output_tensor<const float>( + static_cast<int>(ModelOutputEnum::kModelState)), + model_state_.size()); + std::copy(output_state.begin(), output_state.end(), model_state_.begin()); + + constexpr float kStateDecay = 0.999f; + for (float& state : model_state_) { + state *= kStateDecay; + } + + return true; + } + + private: + // Model data needs to be declared before `tflite_model_` to ensure that the + // data is destroyed after the tflite model. + const std::string model_data_; + + // Frame size of the model. + const int frame_size_; + + // Step size. + const int step_size_; + + // Size of the spectrum mask that is returned by the model. + const int frame_size_by_2_plus_1_; + + // Metadata of the model. + const audioproc::ReeModelMetadata metadata_; + + // LSTM states that carry over to the next inference invocation. + std::vector<float> model_state_; + + // TFLite model for residual echo estimation. + // Must outlive `tflite_interpreter_` + std::unique_ptr<tflite::FlatBufferModel> tflite_model_; + + // Used to run inference with `tflite_model_`. + std::unique_ptr<tflite::Interpreter> tflite_interpreter_; + + // Counter to avoid logging processing errors too often. + int processing_error_log_counter_ = 0; +}; + +void PushFrameToModelInput(std::vector<float>& frame, + webrtc::ArrayView<float> input) { + // Shift down overlap from previous frames. + std::copy(input.begin() + frame.size(), input.end(), input.begin()); + + // The model expects [-1,1]-scaled signals while AEC3 and APM scale floating + // point signals up by 32768 to match 16-bit fixed-point formats, so we + // convert to [-1,1] scale here. + constexpr float kScale = 1.0f / 32768; + std::transform(frame.begin(), frame.end(), input.end() - frame.size(), + [](float x) { return x * kScale; }); + frame.clear(); +} +} // namespace + +std::unique_ptr<NeuralResidualEchoEstimatorImpl::ModelRunner> +NeuralResidualEchoEstimatorImpl::LoadTfLiteModel( + absl::string_view ml_ree_model_path) { + std::string model_data; + auto model = tflite::FlatBufferModel::BuildFromFile( + std::string(ml_ree_model_path).c_str(), DefaultLoggingErrorReporter()); + if (!model) { + RTC_LOG(LS_ERROR) << "Error loading model from " << ml_ree_model_path; + return nullptr; + } + std::unique_ptr<tflite::Interpreter> interpreter; + tflite::ops::builtin::BuiltinOpResolver resolver; + if (tflite::InterpreterBuilder(*model, resolver)(&interpreter) != kTfLiteOk) { + RTC_LOG(LS_ERROR) << "Error creating interpreter"; + return nullptr; + } + if (interpreter->AllocateTensors() != kTfLiteOk) { + RTC_LOG(LS_ERROR) << "Error allocating tensors"; + return nullptr; + } + if (interpreter->inputs().size() != + static_cast<int>(ModelInputEnum::kNumInputs)) { + RTC_LOG(LS_ERROR) << "Model input number mismatch, got " + << interpreter->inputs().size() << " expected " + << static_cast<int>(ModelInputEnum::kNumInputs); + return nullptr; + } + if (interpreter->outputs().size() != + static_cast<int>(ModelOutputEnum::kNumOutputs)) { + RTC_LOG(LS_ERROR) << "Model output number mismatch, got " + << interpreter->outputs().size() << " expected " + << static_cast<int>(ModelOutputEnum::kNumOutputs); + return nullptr; + } + auto metadata = ReadModelMetadata(model.get()); + if (!metadata.has_value()) { + RTC_LOG(LS_ERROR) << "Error reading model metadata"; + return nullptr; + } + if (metadata->version() != 1) { + RTC_LOG(LS_ERROR) << "Model version mismatch, got " << metadata->version() + << " expected 1"; + return nullptr; + } + + return std::make_unique<TfLiteModelRunner>(std::move(model_data), + std::move(model), + std::move(interpreter), *metadata); +} + +int NeuralResidualEchoEstimatorImpl::instance_count_ = 0; + +NeuralResidualEchoEstimatorImpl::NeuralResidualEchoEstimatorImpl( + std::unique_ptr<ModelRunner> model_runner) + : model_runner_(std::move(model_runner)), + data_dumper_(new ApmDataDumper(++instance_count_)) { + input_mic_buffer_.reserve(model_runner_->StepSize()); + input_linear_aec_output_buffer_.reserve(model_runner_->StepSize()); + input_aec_ref_buffer_.reserve(model_runner_->StepSize()); + output_mask_.fill(0.0f); +} + +void NeuralResidualEchoEstimatorImpl::Estimate( + webrtc::ArrayView<const float> x, + webrtc::ArrayView<const std::array<float, kBlockSize>> y, + webrtc::ArrayView<const std::array<float, kBlockSize>> e, + webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2, + webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2, + webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2, + webrtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2, + webrtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded) { + // The input is buffered for model inference; multi-channel data is handled by + // summing the content of all channels. + input_mic_buffer_.insert(input_mic_buffer_.end(), y[0].begin(), y[0].end()); + input_linear_aec_output_buffer_.insert(input_linear_aec_output_buffer_.end(), + e[0].begin(), e[0].end()); + for (size_t ch = 1; ch < y.size(); ++ch) { + std::transform(y[ch].begin(), y[ch].end(), + input_mic_buffer_.end() - kBlockSize, + input_mic_buffer_.end() - kBlockSize, std::plus<float>()); + std::transform(e[ch].begin(), e[ch].end(), + input_linear_aec_output_buffer_.end() - kBlockSize, + input_linear_aec_output_buffer_.end() - kBlockSize, + std::plus<float>()); + } + input_aec_ref_buffer_.insert(input_aec_ref_buffer_.end(), x.begin(), x.end()); + + if (static_cast<int>(input_mic_buffer_.size()) == model_runner_->StepSize()) { + DumpInputs(); + PushFrameToModelInput(input_mic_buffer_, + model_runner_->GetInput(ModelInputEnum::kMic)); + PushFrameToModelInput( + input_linear_aec_output_buffer_, + model_runner_->GetInput(ModelInputEnum::kLinearAecOutput)); + PushFrameToModelInput(input_aec_ref_buffer_, + model_runner_->GetInput(ModelInputEnum::kAecRef)); + + if (model_runner_->Invoke()) { + // Downsample output mask to match the AEC3 frequency resolution. + webrtc::ArrayView<const float> output_mask = + model_runner_->GetOutputEchoMask(); + const int kDownsampleFactor = (output_mask.size() - 1) / kFftLengthBy2; + output_mask_[0] = output_mask[0]; + for (size_t i = 1; i < kFftLengthBy2Plus1; ++i) { + const auto* output_mask_ptr = + &output_mask[kDownsampleFactor * (i - 1) + 1]; + output_mask_[i] = *std::max_element( + output_mask_ptr, output_mask_ptr + kDownsampleFactor); + } + // The model is trained to predict the nearend magnitude spectrum but + // exposes 1 minus that mask. The next transformation computes the mask + // that estimates the echo power spectrum assuming that the sum of the + // power spectra of the nearend and the echo produces the power spectrum + // of the input microphone signal. + for (float& m : output_mask_) { + m = 1.0f - (1.0f - m) * (1.0f - m); + } + data_dumper_->DumpRaw("ml_ree_model_mask", output_mask); + data_dumper_->DumpRaw("ml_ree_output_mask", output_mask_); + } + } + + // Use the latest output mask to produce output echo power estimates. + for (size_t ch = 0; ch < E2.size(); ++ch) { + std::transform(E2[ch].begin(), E2[ch].end(), output_mask_.begin(), + R2[ch].begin(), + [](float power, float mask) { return power * mask; }); + std::copy(R2[ch].begin(), R2[ch].end(), R2_unbounded[ch].begin()); + } +} + +void NeuralResidualEchoEstimatorImpl::DumpInputs() { + data_dumper_->DumpWav("ml_ree_mic_input", input_mic_buffer_, 16000, 1); + data_dumper_->DumpWav("ml_ree_linear_aec_output", + input_linear_aec_output_buffer_, 16000, 1); + data_dumper_->DumpWav("ml_ree_aec_ref", input_aec_ref_buffer_, 16000, 1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/neural_residual_echo_estimator_impl.h b/third_party/libwebrtc/modules/audio_processing/aec3/neural_residual_echo_estimator_impl.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2025 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_NEURAL_RESIDUAL_ECHO_ESTIMATOR_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_NEURAL_RESIDUAL_ECHO_ESTIMATOR_IMPL_H_ + +#include <array> +#include <memory> +#include <vector> + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "api/audio/neural_residual_echo_estimator.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h" +#else +#include "modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h" +#endif + +namespace webrtc { + +// Implements the NeuralResidualEchoEstimator's virtual methods to estimate +// residual echo not fully removed by the linear AEC3 estimator. It uses a +// provided model to generate an echo residual mask from the linear AEC output +// and render signal. This mask is then used for estimating the echo residual +// that the AEC3 suppressor needs for computing the suppression gains. +class NeuralResidualEchoEstimatorImpl : public NeuralResidualEchoEstimator { + public: + enum class ModelInputEnum { + kModelState = 0, + kMic = 1, + kLinearAecOutput = 2, + kAecRef = 3, + kNumInputs = 4 + }; + enum class ModelOutputEnum { + kEchoMask = 0, + kModelState = 1, + kNumOutputs = 2 + }; + + // Executes a residual echo estimation model on given inputs. + class ModelRunner { + public: + virtual ~ModelRunner() = default; + + virtual int StepSize() const = 0; + + // Waveform inputs must be scaled to [-1.0, 1.0]. + virtual webrtc::ArrayView<float> GetInput(ModelInputEnum input_enum) = 0; + virtual webrtc::ArrayView<const float> GetOutputEchoMask() = 0; + virtual audioproc::ReeModelMetadata GetMetadata() const = 0; + virtual bool Invoke() = 0; + }; + + // Initializes an ML-based residual echo estimator from the tflite file path + // provided. Returns nullptr if any initialization step fails. + static std::unique_ptr<ModelRunner> LoadTfLiteModel( + absl::string_view ml_ree_model_path); + + explicit NeuralResidualEchoEstimatorImpl( + std::unique_ptr<ModelRunner> model_runner_); + + void Estimate( + webrtc::ArrayView<const float> x, + webrtc::ArrayView<const std::array<float, kBlockSize>> y, + webrtc::ArrayView<const std::array<float, kBlockSize>> e, + webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2, + webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2, + webrtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2, + webrtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2, + webrtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded) + override; + + private: + void DumpInputs(); + + // Encapsulates all ML model invocation work. + const std::unique_ptr<ModelRunner> model_runner_; + + // Input buffers for translating from the 4 ms FloatS16 block format of AEC3 + // to the model scale and frame size. + std::vector<float> input_mic_buffer_; + std::vector<float> input_linear_aec_output_buffer_; + std::vector<float> input_aec_ref_buffer_; + + // Downsampled model output for what fraction of the power content in the + // linear AEC output is echo for each bin. + std::array<float, kFftLengthBy2Plus1> output_mask_; + + static int instance_count_; + // Pointer to a data dumper that is used for debugging purposes. + std::unique_ptr<ApmDataDumper> data_dumper_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_NEURAL_RESIDUAL_ECHO_ESTIMATOR_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/neural_residual_echo_estimator_impl_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/neural_residual_echo_estimator_impl_unittest.cc @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2025 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/neural_residual_echo_estimator_impl.h" + +#include <algorithm> +#include <array> +#include <cstddef> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h" +#else +#include "modules/audio_processing/aec3/neural_residual_echo_estimator.pb.h" +#endif + +namespace webrtc { +namespace { +using testing::FloatEq; +using testing::Not; + +using ModelInputEnum = NeuralResidualEchoEstimatorImpl::ModelInputEnum; +using ModelOutputEnum = NeuralResidualEchoEstimatorImpl::ModelOutputEnum; + +struct ModelConstants { + explicit ModelConstants(int frame_size) + : frame_size(frame_size), + step_size(frame_size / 2), + frame_size_by_2_plus_1(frame_size / 2 + 1) {} + + int frame_size; + int step_size; + int frame_size_by_2_plus_1; +}; + +// Mocks the TF Lite interaction to simplify testing the behavior of +// preprocessing, postprocessing, and AEC3-related rebuffering. +class MockModelRunner : public NeuralResidualEchoEstimatorImpl::ModelRunner { + public: + explicit MockModelRunner(const ModelConstants& model_constants) + : constants_(model_constants), + input_mic_(constants_.frame_size), + input_linear_aec_output_(constants_.frame_size), + input_aec_ref_(constants_.frame_size), + output_echo_mask_(constants_.frame_size_by_2_plus_1) {} + + ~MockModelRunner() override {} + + int StepSize() const override { return constants_.step_size; } + + webrtc::ArrayView<float> GetInput(ModelInputEnum input_enum) override { + switch (input_enum) { + case ModelInputEnum::kMic: + return webrtc::ArrayView<float>(input_mic_.data(), + constants_.frame_size); + case ModelInputEnum::kLinearAecOutput: + return webrtc::ArrayView<float>(input_linear_aec_output_.data(), + constants_.frame_size); + case ModelInputEnum::kAecRef: + return webrtc::ArrayView<float>(input_aec_ref_.data(), + constants_.frame_size); + case NeuralResidualEchoEstimatorImpl::ModelInputEnum::kModelState: + case NeuralResidualEchoEstimatorImpl::ModelInputEnum::kNumInputs: + RTC_CHECK(false); + return webrtc::ArrayView<float>(); + } + } + + webrtc::ArrayView<const float> GetOutputEchoMask() override { + return webrtc::ArrayView<const float>(output_echo_mask_.data(), + constants_.frame_size_by_2_plus_1); + } + + MOCK_METHOD(audioproc::ReeModelMetadata, GetMetadata, (), (const, override)); + MOCK_METHOD(bool, Invoke, (), (override)); + + const ModelConstants constants_; + + std::vector<float> input_mic_; + std::vector<float> input_linear_aec_output_; + std::vector<float> input_aec_ref_; + std::vector<float> output_echo_mask_; +}; + +class NeuralResidualEchoEstimatorImplTest + : public ::testing::TestWithParam<ModelConstants> {}; +INSTANTIATE_TEST_SUITE_P( + VariableModelFrameLength, + NeuralResidualEchoEstimatorImplTest, + ::testing::Values(ModelConstants(/*frame_size=*/2 * kBlockSize), + ModelConstants(/*frame_size=*/4 * kBlockSize), + ModelConstants(/*frame_size=*/8 * kBlockSize))); + +TEST_P(NeuralResidualEchoEstimatorImplTest, + InputBlocksAreComposedIntoOverlappingFrames) { + const ModelConstants model_constants = GetParam(); + SCOPED_TRACE(testing::Message() + << "model_constants.frame_size=" << model_constants.frame_size); + + constexpr int kNumCaptureChannels = 1; + std::array<float, kBlockSize> x; + std::vector<std::array<float, kBlockSize>> y{kNumCaptureChannels}; + std::vector<std::array<float, kBlockSize>> e{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> E2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> S2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> Y2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> R2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded{ + kNumCaptureChannels}; + + auto mock_model_runner = std::make_unique<MockModelRunner>(model_constants); + for (int i = 0; i < model_constants.frame_size; ++i) { + // The odd numbers are different primes, to uniquely identify each buffer. + mock_model_runner->input_mic_[i] = i + 2311; + mock_model_runner->input_linear_aec_output_[i] = i + 2333; + mock_model_runner->input_aec_ref_[i] = i + 2339; + } + auto* mock_model_runner_ptr = mock_model_runner.get(); + NeuralResidualEchoEstimatorImpl estimator(std::move(mock_model_runner)); + + EXPECT_CALL(*mock_model_runner_ptr, Invoke()) + .Times(1) + .WillOnce(testing::Return(true)); + + const int num_blocks_to_process = model_constants.step_size / kBlockSize; + for (int block_counter = 0; block_counter < num_blocks_to_process; + ++block_counter) { + // The odd numbers are different primes, to uniquely identify each buffer. + for (size_t j = 0; j < kBlockSize; ++j) { + x[j] = block_counter * kBlockSize + j + 11; + y[0][j] = block_counter * kBlockSize + j + 13; + e[0][j] = block_counter * kBlockSize + j + 17; + } + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + E2[0][j] = block_counter * kFftLengthBy2Plus1 + j + 23; + S2[0][j] = block_counter * kFftLengthBy2Plus1 + j + 29; + Y2[0][j] = block_counter * kFftLengthBy2Plus1 + j + 31; + } + estimator.Estimate(x, y, e, S2, Y2, E2, R2, R2_unbounded); + } + + // Check that old buffer content is shifted down properly. + for (int i = 0; i < model_constants.frame_size - model_constants.step_size; + ++i) { + SCOPED_TRACE(testing::Message() << "i=" << i); + EXPECT_FLOAT_EQ(mock_model_runner_ptr->input_mic_[i], + model_constants.step_size + i + 2311); + EXPECT_FLOAT_EQ(mock_model_runner_ptr->input_linear_aec_output_[i], + model_constants.step_size + i + 2333); + EXPECT_FLOAT_EQ(mock_model_runner_ptr->input_aec_ref_[i], + model_constants.step_size + i + 2339); + } + // Check that new buffer content matches the input data. + for (int i = model_constants.frame_size - model_constants.step_size; + i < model_constants.frame_size; ++i) { + SCOPED_TRACE(testing::Message() << "i=" << i); + constexpr float kScaling = 1.0f / 32768; + int input_index = + i - (model_constants.frame_size - model_constants.step_size); + EXPECT_FLOAT_EQ(mock_model_runner_ptr->input_mic_[i], + kScaling * (input_index + 13)); + EXPECT_FLOAT_EQ(mock_model_runner_ptr->input_linear_aec_output_[i], + kScaling * (input_index + 17)); + EXPECT_FLOAT_EQ(mock_model_runner_ptr->input_aec_ref_[i], + kScaling * (input_index + 11)); + } +} + +TEST_P(NeuralResidualEchoEstimatorImplTest, OutputMaskIsApplied) { + const ModelConstants model_constants = GetParam(); + SCOPED_TRACE(testing::Message() + << "model_constants.frame_size=" << model_constants.frame_size); + + constexpr int kNumCaptureChannels = 1; + std::array<float, kBlockSize> x; + std::vector<std::array<float, kBlockSize>> y{kNumCaptureChannels}; + std::vector<std::array<float, kBlockSize>> e{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> E2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> S2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> Y2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> R2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded{ + kNumCaptureChannels}; + std::fill(x.begin(), x.end(), 10000); + std::fill(y[0].begin(), y[0].end(), 10000); + std::fill(e[0].begin(), e[0].end(), 10000); + std::fill(E2[0].begin(), E2[0].end(), 10000); + std::fill(S2[0].begin(), S2[0].end(), 10000); + std::fill(Y2[0].begin(), Y2[0].end(), 10000); + + auto mock_model_runner = std::make_unique<MockModelRunner>(model_constants); + + // Mock the output echo mask to be a ramp from 0.1 at DC to 1.0 at the highest + // frequency bin. + const int blocks_per_model_step = model_constants.step_size / kBlockSize; + mock_model_runner->output_echo_mask_[0] = 0.1; + for (size_t i = 1; i < kFftLengthBy2Plus1; ++i) { + for (int j = 1; j <= blocks_per_model_step; ++j) { + mock_model_runner + ->output_echo_mask_[(i - 1) * blocks_per_model_step + j] = + 0.1 + 0.9 * i / model_constants.step_size; + } + } + auto* mock_model_runner_ptr = mock_model_runner.get(); + NeuralResidualEchoEstimatorImpl estimator(std::move(mock_model_runner)); + + EXPECT_CALL(*mock_model_runner_ptr, Invoke()) + .Times(1) + .WillOnce(testing::Return(true)); + + for (int b = 0; b < blocks_per_model_step; ++b) { + estimator.Estimate(x, y, e, S2, Y2, E2, R2, R2_unbounded); + } + + // Check that the mocked output mask is applied. + for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) { + SCOPED_TRACE(testing::Message() << "i=" << i); + const float mask = (0.1 + 0.9 * i / model_constants.step_size); + const float power_adjusted_mask = 1 - (1 - mask) * (1 - mask); + EXPECT_FLOAT_EQ(R2[0][i], 10000 * power_adjusted_mask); + EXPECT_FLOAT_EQ(R2_unbounded[0][i], R2[0][i]); + } +} + +TEST(NeuralResidualEchoEstimatorWithRealModelTest, + RunEstimationWithRealTfLiteModel) { + std::string model_path = test::ResourcePath( + "audio_processing/aec3/noop_ml_aec_model_for_testing", "tflite"); + std::unique_ptr<NeuralResidualEchoEstimatorImpl::ModelRunner> + tflite_model_runner = + NeuralResidualEchoEstimatorImpl::LoadTfLiteModel(model_path); + ASSERT_TRUE(tflite_model_runner != nullptr); + const audioproc::ReeModelMetadata metadata = + tflite_model_runner->GetMetadata(); + // Default version 1 is returned when model metadata is missing. + ASSERT_EQ(metadata.version(), 1); + + NeuralResidualEchoEstimatorImpl estimator(std::move(tflite_model_runner)); + + constexpr int kNumCaptureChannels = 2; + std::array<float, kBlockSize> x; + std::vector<std::array<float, kBlockSize>> y{kNumCaptureChannels}; + std::vector<std::array<float, kBlockSize>> e{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> E2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> S2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> Y2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> R2{kNumCaptureChannels}; + std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded{ + kNumCaptureChannels}; + Random random_generator(4635U); + constexpr int kNumBlocksToProcess = 1000; + for (int b = 0; b < kNumBlocksToProcess; ++b) { + constexpr float kAmplitude = 0.1f; + RandomizeSampleVector(&random_generator, x, kAmplitude); + for (int ch = 0; ch < kNumCaptureChannels; ++ch) { + RandomizeSampleVector(&random_generator, y[ch], kAmplitude); + RandomizeSampleVector(&random_generator, e[ch], kAmplitude); + RandomizeSampleVector(&random_generator, E2[ch], kAmplitude); + RandomizeSampleVector(&random_generator, S2[ch], kAmplitude); + RandomizeSampleVector(&random_generator, Y2[ch], kAmplitude); + std::fill(R2[ch].begin(), R2[ch].end(), 1234.0f); + std::fill(R2_unbounded[ch].begin(), R2_unbounded[ch].end(), 1234.0f); + } + estimator.Estimate(x, y, e, S2, Y2, E2, R2, R2_unbounded); + + // Check that the output is populated. + for (int ch = 0; ch < kNumCaptureChannels; ++ch) { + for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) { + SCOPED_TRACE(testing::Message() << "block b=" << b << ", channel ch=" + << ch << ", index i=" << i); + EXPECT_THAT(R2[ch][i], Not(FloatEq(1234.0))); + EXPECT_THAT(R2_unbounded[ch][i], Not(FloatEq(1234.0))); + } + } + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h @@ -158,6 +158,7 @@ struct SimulationSettings { std::optional<int> dump_start_frame; std::optional<int> dump_end_frame; std::optional<int> init_to_process; + std::optional<std::string> neural_echo_residual_estimator_model; }; // Provides common functionality for performing audioprocessing simulations. diff --git a/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc b/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc @@ -35,6 +35,7 @@ #include "api/field_trials.h" #include "api/scoped_refptr.h" #include "common_audio/wav_file.h" +#include "modules/audio_processing/aec3/neural_residual_echo_estimator_impl.h" #include "modules/audio_processing/test/aec_dump_based_simulator.h" #include "modules/audio_processing/test/audio_processing_simulator.h" #include "modules/audio_processing/test/echo_canceller3_config_json.h" @@ -337,6 +338,12 @@ ABSL_FLAG(std::string, "E.g. running with --force_fieldtrials=WebRTC-FooFeature/Enable/" " will assign the group Enable to field trial WebRTC-FooFeature."); +ABSL_FLAG(std::string, + ree_model, + "", + "When running with a neural residual echo estimator, the path to the " + "model binary."); + namespace webrtc { namespace test { namespace { @@ -534,6 +541,8 @@ SimulationSettings CreateSettings() { SetSettingIfSpecified(absl::GetFlag(FLAGS_init_to_process), &settings.init_to_process); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ree_model), + &settings.neural_echo_residual_estimator_model); return settings; } @@ -804,6 +813,15 @@ void SetDependencies(const SimulationSettings& settings, builder.SetEchoControlFactory(std::make_unique<EchoCanceller3Factory>(cfg)); } + if (settings.neural_echo_residual_estimator_model) { + auto model_runner = NeuralResidualEchoEstimatorImpl::LoadTfLiteModel( + *settings.neural_echo_residual_estimator_model); + RTC_CHECK(model_runner); + builder.SetNeuralResidualEchoEstimator( + std::make_unique<NeuralResidualEchoEstimatorImpl>( + std::move(model_runner))); + } + if (settings.use_ed && *settings.use_ed) { builder.SetEchoDetector(CreateEchoDetector()); } diff --git a/third_party/libwebrtc/moz-patch-stack/s0001.patch b/third_party/libwebrtc/moz-patch-stack/s0001.patch @@ -1510,7 +1510,7 @@ index b7933130bd..428fc9615d 100644 vcm_ = nullptr; } diff --git a/webrtc.gni b/webrtc.gni -index ac905cea7c..613d872b69 100644 +index b18c04d6bb..0a291ff95c 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -110,7 +110,7 @@ declare_args() { diff --git a/third_party/libwebrtc/moz-patch-stack/s0027.patch b/third_party/libwebrtc/moz-patch-stack/s0027.patch @@ -116,7 +116,7 @@ index e8484bfad5..ffafaf0a4a 100644 # TODO(https://bugs.webrtc.org/14437): Remove this section if general # Chromium fix resolves the problem. diff --git a/BUILD.gn b/BUILD.gn -index 3efce2dd19..cbfc05f243 100644 +index 8488cf0818..14d1caaa01 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -33,7 +33,7 @@ if (is_android) { @@ -816,10 +816,10 @@ index c8b15e4040..7ace1135c8 100644 testonly = true diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn -index e864c4dfe2..c12c4b653f 100644 +index 6ab5f18f4d..2649cb2f35 100644 --- a/modules/audio_processing/aec3/BUILD.gn +++ b/modules/audio_processing/aec3/BUILD.gn -@@ -264,14 +264,11 @@ if (current_cpu == "x86" || current_cpu == "x64") { +@@ -267,14 +267,11 @@ if (current_cpu == "x86" || current_cpu == "x64") { "vector_math_avx2.cc", ] @@ -1280,7 +1280,7 @@ index 79b0e8e040..2e408d56c0 100644 "../api:sequence_checker", "../api/adaptation:resource_adaptation_api", diff --git a/webrtc.gni b/webrtc.gni -index 613d872b69..88e0441d9e 100644 +index 0a291ff95c..63f36cc466 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -35,6 +35,11 @@ if (is_mac) { @@ -1380,7 +1380,7 @@ index 613d872b69..88e0441d9e 100644 # Make it possible to provide custom locations for some libraries (move these # up into declare_args should we need to actually use them for the GN build). rtc_libvpx_dir = "//third_party/libvpx" -@@ -1209,7 +1219,7 @@ if (is_mac || is_ios) { +@@ -1212,7 +1222,7 @@ if (is_mac || is_ios) { } } diff --git a/third_party/libwebrtc/moz-patch-stack/s0034.patch b/third_party/libwebrtc/moz-patch-stack/s0034.patch @@ -20,7 +20,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/58f47eacaf10d12e2 11 files changed, 27 insertions(+), 27 deletions(-) diff --git a/BUILD.gn b/BUILD.gn -index cbfc05f243..ba0be681c7 100644 +index 14d1caaa01..7bce8fab51 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -444,12 +444,12 @@ config("common_config") { @@ -172,10 +172,10 @@ index ac862c65a8..e66ed2796e 100644 } else { sources += [ "spl_sqrt_floor.c" ] diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn -index c12c4b653f..fd9bd4f298 100644 +index 2649cb2f35..c08e7ad5c9 100644 --- a/modules/audio_processing/aec3/BUILD.gn +++ b/modules/audio_processing/aec3/BUILD.gn -@@ -123,7 +123,7 @@ rtc_library("aec3") { +@@ -126,7 +126,7 @@ rtc_library("aec3") { ] defines = [] @@ -184,7 +184,7 @@ index c12c4b653f..fd9bd4f298 100644 suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] cflags = [ "-mfpu=neon" ] } -@@ -162,7 +162,7 @@ rtc_library("aec3") { +@@ -165,7 +165,7 @@ rtc_library("aec3") { "//third_party/abseil-cpp/absl/strings:string_view", ] @@ -193,7 +193,7 @@ index c12c4b653f..fd9bd4f298 100644 deps += [ ":aec3_avx2" ] } } -@@ -253,7 +253,7 @@ rtc_source_set("fft_data") { +@@ -256,7 +256,7 @@ rtc_source_set("fft_data") { ] } @@ -276,7 +276,7 @@ index 693e45ff8b..faa88d3543 100644 config("x11_config") { if (rtc_use_x11_extensions) { diff --git a/webrtc.gni b/webrtc.gni -index 88e0441d9e..00032f4476 100644 +index 63f36cc466..f4ce4559aa 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -156,13 +156,13 @@ declare_args() { diff --git a/third_party/libwebrtc/moz-patch-stack/s0045.patch b/third_party/libwebrtc/moz-patch-stack/s0045.patch @@ -18,7 +18,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/0300b32b7de70fb89 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/BUILD.gn b/BUILD.gn -index ba0be681c7..bcf71680dc 100644 +index 7bce8fab51..65ac2f96bd 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -224,6 +224,9 @@ config("common_inherited_config") { @@ -74,7 +74,7 @@ index 20bf4afc44..697bedadb9 100644 #endif // defined(WEBRTC_POSIX) } diff --git a/webrtc.gni b/webrtc.gni -index 00032f4476..386c6ffc8f 100644 +index f4ce4559aa..e8355afb50 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -349,7 +349,7 @@ rtc_opus_dir = "//third_party/opus" diff --git a/third_party/libwebrtc/moz-patch-stack/s0055.patch b/third_party/libwebrtc/moz-patch-stack/s0055.patch @@ -11,7 +11,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/b0658888969395dca 2 files changed, 10 insertions(+) diff --git a/BUILD.gn b/BUILD.gn -index bcf71680dc..33ed200085 100644 +index 65ac2f96bd..2e83148707 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -628,6 +628,10 @@ if (!build_with_chromium) { diff --git a/third_party/libwebrtc/moz-patch-stack/s0069.patch b/third_party/libwebrtc/moz-patch-stack/s0069.patch @@ -10,7 +10,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/46fb51c90709be64c 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webrtc.gni b/webrtc.gni -index 386c6ffc8f..a8990f9c97 100644 +index e8355afb50..d7edd19b90 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -297,7 +297,7 @@ declare_args() { diff --git a/third_party/libwebrtc/moz-patch-stack/s0084.patch b/third_party/libwebrtc/moz-patch-stack/s0084.patch @@ -11,7 +11,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/2185cab977988fd4a 3 files changed, 16 insertions(+) diff --git a/BUILD.gn b/BUILD.gn -index 33ed200085..a1010e5e9a 100644 +index 2e83148707..1ad6866fac 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -613,6 +613,7 @@ if (!build_with_chromium) { diff --git a/third_party/libwebrtc/moz-patch-stack/s0093.patch b/third_party/libwebrtc/moz-patch-stack/s0093.patch @@ -10,7 +10,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/b050c455caa1d24a0 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/BUILD.gn b/BUILD.gn -index a1010e5e9a..771e0b196a 100644 +index 1ad6866fac..7d8d06c678 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -280,6 +280,7 @@ config("rtc_prod_config") { diff --git a/third_party/libwebrtc/moz-patch-stack/s0100.patch b/third_party/libwebrtc/moz-patch-stack/s0100.patch @@ -52,11 +52,11 @@ index 3bd0bfb79f..b7561e53b6 100644 rtc_library("encoded_frame") { diff --git a/webrtc.gni b/webrtc.gni -index a8990f9c97..70238c3891 100644 +index d7edd19b90..c84213867e 100644 --- a/webrtc.gni +++ b/webrtc.gni -@@ -486,8 +486,8 @@ all_poison_types = [ - "software_video_codecs", +@@ -489,8 +489,8 @@ all_poison_types = [ + "default_neural_residual_echo_estimator", ] -absl_include_config = "//third_party/abseil-cpp:absl_include_config" diff --git a/third_party/libwebrtc/moz-patch-stack/s0101.patch b/third_party/libwebrtc/moz-patch-stack/s0101.patch @@ -10,10 +10,10 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/45b99d1ba95b46896 1 file changed, 71 insertions(+) diff --git a/webrtc.gni b/webrtc.gni -index 70238c3891..87ce405e1c 100644 +index c84213867e..6d3d1853ad 100644 --- a/webrtc.gni +++ b/webrtc.gni -@@ -690,6 +690,36 @@ template("rtc_source_set") { +@@ -693,6 +693,36 @@ template("rtc_source_set") { deps += [ "//third_party/abseil-cpp:absl" ] } } @@ -50,7 +50,7 @@ index 70238c3891..87ce405e1c 100644 } } -@@ -924,6 +954,47 @@ template("rtc_library") { +@@ -927,6 +957,47 @@ template("rtc_library") { deps += [ "//third_party/abseil-cpp:absl" ] } } diff --git a/third_party/libwebrtc/moz-patch-stack/s0102.patch b/third_party/libwebrtc/moz-patch-stack/s0102.patch @@ -75,7 +75,7 @@ index 4df8681a9b..269ca2f5a7 100644 # The python interpreter to use by default. On Windows, this will look # for vpython3.exe and vpython3.bat. diff --git a/BUILD.gn b/BUILD.gn -index 771e0b196a..7e1e8353ab 100644 +index 7d8d06c678..a28f254f8d 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -21,15 +21,15 @@ @@ -116,7 +116,7 @@ index 771e0b196a..7e1e8353ab 100644 defines = [] deps = [ "net/dcsctp/public:factory", -@@ -892,7 +892,7 @@ group("poison_software_video_codecs") { +@@ -895,7 +895,7 @@ group("poison_default_neural_residual_echo_estimator") { if (!build_with_chromium) { # Write debug logs to gn_logs.txt. # This is also required for Siso builds. @@ -455,10 +455,10 @@ index 7ace1135c8..b63880a296 100644 config("audio_device_warnings_config") { diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn -index fd9bd4f298..f2348fb71c 100644 +index c08e7ad5c9..bb0da3bcae 100644 --- a/modules/audio_processing/aec3/BUILD.gn +++ b/modules/audio_processing/aec3/BUILD.gn -@@ -124,7 +124,7 @@ rtc_library("aec3") { +@@ -127,7 +127,7 @@ rtc_library("aec3") { defines = [] if (rtc_build_with_neon && target_cpu != "arm64") { @@ -804,7 +804,7 @@ index e7a254c85d..eb2cb6c937 100644 } } diff --git a/webrtc.gni b/webrtc.gni -index 87ce405e1c..6d303397f3 100644 +index 6d3d1853ad..da08c490e7 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -5,12 +5,12 @@ diff --git a/third_party/libwebrtc/moz-patch-stack/s0103.patch b/third_party/libwebrtc/moz-patch-stack/s0103.patch @@ -10,10 +10,10 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/f18c05287ce831369 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/webrtc.gni b/webrtc.gni -index 6d303397f3..2866396704 100644 +index da08c490e7..18c5f9c080 100644 --- a/webrtc.gni +++ b/webrtc.gni -@@ -535,7 +535,7 @@ template("rtc_test") { +@@ -538,7 +538,7 @@ template("rtc_test") { min_sdk_version = 23 target_sdk_version = 24 deps += [ @@ -22,7 +22,7 @@ index 6d303397f3..2866396704 100644 webrtc_root + "sdk/android:native_test_jni_onload", webrtc_root + "sdk/android:base_java", webrtc_root + "test:native_test_java", -@@ -990,11 +990,21 @@ template("rtc_library") { +@@ -993,11 +993,21 @@ template("rtc_library") { modified_deps = [] foreach (dep, deps) { newdep = string_replace(dep, "//third_party/", "//libwebrtc/third_party/") @@ -44,7 +44,7 @@ index 6d303397f3..2866396704 100644 } } -@@ -1032,7 +1042,7 @@ template("rtc_executable") { +@@ -1035,7 +1045,7 @@ template("rtc_executable") { if (is_win) { deps += [ # Give executables the default manifest on Windows (a no-op elsewhere). diff --git a/third_party/libwebrtc/moz-patch-stack/s0105.patch b/third_party/libwebrtc/moz-patch-stack/s0105.patch @@ -9,7 +9,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/90a33b06e51017d25 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/BUILD.gn b/BUILD.gn -index 7e1e8353ab..c73a7287bf 100644 +index a28f254f8d..c01dd4ed48 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -569,6 +569,7 @@ if (!build_with_chromium) { diff --git a/third_party/libwebrtc/moz-patch-stack/s0107.patch b/third_party/libwebrtc/moz-patch-stack/s0107.patch @@ -16,7 +16,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/4527c41ef71d7683c 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/BUILD.gn b/BUILD.gn -index c73a7287bf..246f9f9a1c 100644 +index c01dd4ed48..4ce07a89d1 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -525,6 +525,9 @@ if (!rtc_build_ssl) { @@ -147,7 +147,7 @@ index 408573e6a7..2f064aaf3a 100644 rtc_library("ssl") { if (!build_with_mozilla) { diff --git a/webrtc.gni b/webrtc.gni -index 2866396704..a2c939e3ec 100644 +index 18c5f9c080..06acdc47bb 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -112,7 +112,7 @@ declare_args() { diff --git a/third_party/libwebrtc/moz-patch-stack/s0111.patch b/third_party/libwebrtc/moz-patch-stack/s0111.patch @@ -10,7 +10,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/27c29807e6f46562d 1 file changed, 1 insertion(+) diff --git a/BUILD.gn b/BUILD.gn -index 246f9f9a1c..8240fc84d6 100644 +index 4ce07a89d1..8afe2e1880 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -619,6 +619,7 @@ if (!build_with_chromium) { diff --git a/third_party/libwebrtc/moz-patch-stack/s0116.patch b/third_party/libwebrtc/moz-patch-stack/s0116.patch @@ -18,7 +18,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/c8cf67108972d5cae 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webrtc.gni b/webrtc.gni -index a2c939e3ec..73783286e7 100644 +index 06acdc47bb..0f8d25aeba 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -132,7 +132,7 @@ declare_args() { diff --git a/third_party/libwebrtc/moz-patch-stack/s0126.patch b/third_party/libwebrtc/moz-patch-stack/s0126.patch @@ -9,7 +9,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/43de54a6e52daf0a0 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/BUILD.gn b/BUILD.gn -index 8240fc84d6..a3b738a7e1 100644 +index 8afe2e1880..02eeffd8bc 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -315,6 +315,16 @@ config("common_config") { diff --git a/third_party/libwebrtc/moz-patch-stack/s0128.patch b/third_party/libwebrtc/moz-patch-stack/s0128.patch @@ -9,7 +9,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/8e7778c59894c284b 1 file changed, 1 insertion(+) diff --git a/BUILD.gn b/BUILD.gn -index a3b738a7e1..2b3e510f76 100644 +index 02eeffd8bc..6e52ad5755 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -629,6 +629,7 @@ if (!build_with_chromium) { diff --git a/third_party/libwebrtc/moz-patch-stack/s0130.patch b/third_party/libwebrtc/moz-patch-stack/s0130.patch @@ -11,7 +11,7 @@ Mercurial Revision: https://hg.mozilla.org/mozilla-central/rev/a076ddec60822b1d2 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/BUILD.gn b/BUILD.gn -index 2b3e510f76..b7b2edb51f 100644 +index 6e52ad5755..0f10f0fa14 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -315,16 +315,6 @@ config("common_config") { diff --git a/third_party/libwebrtc/resources/BUILD.gn b/third_party/libwebrtc/resources/BUILD.gn @@ -39,6 +39,7 @@ modules_unittests_resources = [ "audio_device/audio_short16.pcm", "audio_device/audio_short44.pcm", "audio_device/audio_short48.pcm", + "audio_processing/aec3/noop_ml_aec_model_for_testing.tflite", "audio_processing/agc/agc_audio.pcm", "audio_processing/agc/agc_no_circular_buffer.dat", "audio_processing/agc/agc_pitch_gain.dat", diff --git a/third_party/libwebrtc/resources/audio_processing/aec3/noop_ml_aec_model_for_testing.tflite.sha1 b/third_party/libwebrtc/resources/audio_processing/aec3/noop_ml_aec_model_for_testing.tflite.sha1 @@ -0,0 +1 @@ +0f612634e630a5f4b9807ed7da329fea73b21be3 +\ No newline at end of file diff --git a/third_party/libwebrtc/webrtc.gni b/third_party/libwebrtc/webrtc.gni @@ -484,6 +484,9 @@ all_poison_types = [ # Software video codecs (VP8 and VP9 through libvpx). "software_video_codecs", + + # Residual echo estimator based that runs inferences using the tflite library. + "default_neural_residual_echo_estimator", ] absl_include_config = "//libwebrtc/third_party/abseil-cpp:absl_include_config"