simulator.cc (9020B)
1 /* 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/test/conversational_speech/simulator.h" 12 13 #include <algorithm> 14 #include <cstddef> 15 #include <cstdint> 16 #include <map> 17 #include <memory> 18 #include <set> 19 #include <string> 20 #include <tuple> 21 #include <utility> 22 #include <vector> 23 24 #include "absl/strings/string_view.h" 25 #include "api/array_view.h" 26 #include "common_audio/include/audio_util.h" 27 #include "common_audio/wav_file.h" 28 #include "modules/audio_processing/test/conversational_speech/multiend_call.h" 29 #include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" 30 #include "rtc_base/checks.h" 31 #include "rtc_base/logging.h" 32 #include "rtc_base/numerics/safe_conversions.h" 33 #include "test/testsupport/file_utils.h" 34 35 namespace webrtc { 36 namespace test { 37 namespace { 38 39 using conversational_speech::MultiEndCall; 40 using conversational_speech::SpeakerOutputFilePaths; 41 using conversational_speech::WavReaderInterface; 42 43 // Combines output path and speaker names to define the output file paths for 44 // the near-end and far=end audio tracks. 45 std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> 46 InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names, 47 absl::string_view output_path) { 48 // Create map. 49 auto speaker_output_file_paths_map = 50 std::make_unique<std::map<std::string, SpeakerOutputFilePaths>>(); 51 52 // Add near-end and far-end output paths into the map. 53 for (const auto& speaker_name : speaker_names) { 54 const std::string near_end_path = 55 test::JoinFilename(output_path, "s_" + speaker_name + "-near_end.wav"); 56 RTC_LOG(LS_VERBOSE) << "The near-end audio track will be created in " 57 << near_end_path << "."; 58 59 const std::string far_end_path = 60 test::JoinFilename(output_path, "s_" + speaker_name + "-far_end.wav"); 61 RTC_LOG(LS_VERBOSE) << "The far-end audio track will be created in " 62 << far_end_path << "."; 63 64 // Add to map. 65 speaker_output_file_paths_map->emplace( 66 std::piecewise_construct, std::forward_as_tuple(speaker_name), 67 std::forward_as_tuple(near_end_path, far_end_path)); 68 } 69 70 return speaker_output_file_paths_map; 71 } 72 73 // Class that provides one WavWriter for the near-end and one for the far-end 74 // output track of a speaker. 75 class SpeakerWavWriters { 76 public: 77 SpeakerWavWriters(const SpeakerOutputFilePaths& output_file_paths, 78 int sample_rate) 79 : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u), 80 far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {} 81 WavWriter* near_end_wav_writer() { return &near_end_wav_writer_; } 82 WavWriter* far_end_wav_writer() { return &far_end_wav_writer_; } 83 84 private: 85 WavWriter near_end_wav_writer_; 86 WavWriter far_end_wav_writer_; 87 }; 88 89 // Initializes one WavWriter instance for each speaker and both the near-end and 90 // far-end output tracks. 91 std::unique_ptr<std::map<std::string, SpeakerWavWriters>> 92 InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>& 93 speaker_output_file_paths, 94 int sample_rate) { 95 // Create map. 96 auto speaker_wav_writers_map = 97 std::make_unique<std::map<std::string, SpeakerWavWriters>>(); 98 99 // Add SpeakerWavWriters instance into the map. 100 for (auto it = speaker_output_file_paths.begin(); 101 it != speaker_output_file_paths.end(); ++it) { 102 speaker_wav_writers_map->emplace( 103 std::piecewise_construct, std::forward_as_tuple(it->first), 104 std::forward_as_tuple(it->second, sample_rate)); 105 } 106 107 return speaker_wav_writers_map; 108 } 109 110 // Reads all the samples for each audio track. 111 std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks( 112 const std::map<std::string, std::unique_ptr<WavReaderInterface>>& 113 audiotrack_readers) { 114 // Create map. 115 auto audiotracks_map = 116 std::make_unique<std::map<std::string, std::vector<int16_t>>>(); 117 118 // Add audio track vectors. 119 for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end(); 120 ++it) { 121 // Add map entry. 122 audiotracks_map->emplace(std::piecewise_construct, 123 std::forward_as_tuple(it->first), 124 std::forward_as_tuple(it->second->NumSamples())); 125 126 // Read samples. 127 it->second->ReadInt16Samples(audiotracks_map->at(it->first)); 128 } 129 130 return audiotracks_map; 131 } 132 133 // Writes all the values in `source_samples` via `wav_writer`. If the number of 134 // previously written samples in `wav_writer` is less than `interval_begin`, it 135 // adds zeros as left padding. The padding corresponds to intervals during which 136 // a speaker is not active. 137 void PadLeftWriteChunk(ArrayView<const int16_t> source_samples, 138 size_t interval_begin, 139 WavWriter* wav_writer) { 140 // Add left padding. 141 RTC_CHECK(wav_writer); 142 RTC_CHECK_GE(interval_begin, wav_writer->num_samples()); 143 size_t padding_size = interval_begin - wav_writer->num_samples(); 144 if (padding_size != 0) { 145 const std::vector<int16_t> padding(padding_size, 0); 146 wav_writer->WriteSamples(padding.data(), padding_size); 147 } 148 149 // Write source samples. 150 wav_writer->WriteSamples(source_samples.data(), source_samples.size()); 151 } 152 153 // Appends zeros via `wav_writer`. The number of zeros is always non-negative 154 // and equal to the difference between the previously written samples and 155 // `pad_samples`. 156 void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) { 157 RTC_CHECK(wav_writer); 158 RTC_CHECK_GE(pad_samples, wav_writer->num_samples()); 159 size_t padding_size = pad_samples - wav_writer->num_samples(); 160 if (padding_size != 0) { 161 const std::vector<int16_t> padding(padding_size, 0); 162 wav_writer->WriteSamples(padding.data(), padding_size); 163 } 164 } 165 166 void ScaleSignal(ArrayView<const int16_t> source_samples, 167 int gain, 168 ArrayView<int16_t> output_samples) { 169 const float gain_linear = DbToRatio(gain); 170 RTC_DCHECK_EQ(source_samples.size(), output_samples.size()); 171 std::transform(source_samples.begin(), source_samples.end(), 172 output_samples.begin(), [gain_linear](int16_t x) -> int16_t { 173 return saturated_cast<int16_t>(x * gain_linear); 174 }); 175 } 176 177 } // namespace 178 179 namespace conversational_speech { 180 181 std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate( 182 const MultiEndCall& multiend_call, 183 absl::string_view output_path) { 184 // Set output file paths and initialize wav writers. 185 const auto& speaker_names = multiend_call.speaker_names(); 186 auto speaker_output_file_paths = 187 InitSpeakerOutputFilePaths(speaker_names, output_path); 188 auto speakers_wav_writers = InitSpeakersWavWriters( 189 *speaker_output_file_paths, multiend_call.sample_rate()); 190 191 // Preload all the input audio tracks. 192 const auto& audiotrack_readers = multiend_call.audiotrack_readers(); 193 auto audiotracks = PreloadAudioTracks(audiotrack_readers); 194 195 // Write near-end and far-end output tracks. 196 for (const auto& speaking_turn : multiend_call.speaking_turns()) { 197 const std::string& active_speaker_name = speaking_turn.speaker_name; 198 const auto source_audiotrack = 199 audiotracks->at(speaking_turn.audiotrack_file_name); 200 std::vector<int16_t> scaled_audiotrack(source_audiotrack.size()); 201 ScaleSignal(source_audiotrack, speaking_turn.gain, scaled_audiotrack); 202 203 // Write active speaker's chunk to active speaker's near-end. 204 PadLeftWriteChunk( 205 scaled_audiotrack, speaking_turn.begin, 206 speakers_wav_writers->at(active_speaker_name).near_end_wav_writer()); 207 208 // Write active speaker's chunk to other participants' far-ends. 209 for (const std::string& speaker_name : speaker_names) { 210 if (speaker_name == active_speaker_name) 211 continue; 212 PadLeftWriteChunk( 213 scaled_audiotrack, speaking_turn.begin, 214 speakers_wav_writers->at(speaker_name).far_end_wav_writer()); 215 } 216 } 217 218 // Finalize all the output tracks with right padding. 219 // This is required to make all the output tracks duration equal. 220 size_t duration_samples = multiend_call.total_duration_samples(); 221 for (const std::string& speaker_name : speaker_names) { 222 PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(), 223 duration_samples); 224 PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(), 225 duration_samples); 226 } 227 228 return speaker_output_file_paths; 229 } 230 231 } // namespace conversational_speech 232 } // namespace test 233 } // namespace webrtc