[ tor-browser ].git.dasho

simulator.cc (9020B)
      1 /*
      2 *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/test/conversational_speech/simulator.h"
     12 
     13 #include <algorithm>
     14 #include <cstddef>
     15 #include <cstdint>
     16 #include <map>
     17 #include <memory>
     18 #include <set>
     19 #include <string>
     20 #include <tuple>
     21 #include <utility>
     22 #include <vector>
     23 
     24 #include "absl/strings/string_view.h"
     25 #include "api/array_view.h"
     26 #include "common_audio/include/audio_util.h"
     27 #include "common_audio/wav_file.h"
     28 #include "modules/audio_processing/test/conversational_speech/multiend_call.h"
     29 #include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
     30 #include "rtc_base/checks.h"
     31 #include "rtc_base/logging.h"
     32 #include "rtc_base/numerics/safe_conversions.h"
     33 #include "test/testsupport/file_utils.h"
     34 
     35 namespace webrtc {
     36 namespace test {
     37 namespace {
     38 
     39 using conversational_speech::MultiEndCall;
     40 using conversational_speech::SpeakerOutputFilePaths;
     41 using conversational_speech::WavReaderInterface;
     42 
     43 // Combines output path and speaker names to define the output file paths for
     44 // the near-end and far=end audio tracks.
     45 std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>>
     46 InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names,
     47                           absl::string_view output_path) {
     48  // Create map.
     49  auto speaker_output_file_paths_map =
     50      std::make_unique<std::map<std::string, SpeakerOutputFilePaths>>();
     51 
     52  // Add near-end and far-end output paths into the map.
     53  for (const auto& speaker_name : speaker_names) {
     54    const std::string near_end_path =
     55        test::JoinFilename(output_path, "s_" + speaker_name + "-near_end.wav");
     56    RTC_LOG(LS_VERBOSE) << "The near-end audio track will be created in "
     57                        << near_end_path << ".";
     58 
     59    const std::string far_end_path =
     60        test::JoinFilename(output_path, "s_" + speaker_name + "-far_end.wav");
     61    RTC_LOG(LS_VERBOSE) << "The far-end audio track will be created in "
     62                        << far_end_path << ".";
     63 
     64    // Add to map.
     65    speaker_output_file_paths_map->emplace(
     66        std::piecewise_construct, std::forward_as_tuple(speaker_name),
     67        std::forward_as_tuple(near_end_path, far_end_path));
     68  }
     69 
     70  return speaker_output_file_paths_map;
     71 }
     72 
     73 // Class that provides one WavWriter for the near-end and one for the far-end
     74 // output track of a speaker.
     75 class SpeakerWavWriters {
     76 public:
     77  SpeakerWavWriters(const SpeakerOutputFilePaths& output_file_paths,
     78                    int sample_rate)
     79      : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u),
     80        far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {}
     81  WavWriter* near_end_wav_writer() { return &near_end_wav_writer_; }
     82  WavWriter* far_end_wav_writer() { return &far_end_wav_writer_; }
     83 
     84 private:
     85  WavWriter near_end_wav_writer_;
     86  WavWriter far_end_wav_writer_;
     87 };
     88 
     89 // Initializes one WavWriter instance for each speaker and both the near-end and
     90 // far-end output tracks.
     91 std::unique_ptr<std::map<std::string, SpeakerWavWriters>>
     92 InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>&
     93                           speaker_output_file_paths,
     94                       int sample_rate) {
     95  // Create map.
     96  auto speaker_wav_writers_map =
     97      std::make_unique<std::map<std::string, SpeakerWavWriters>>();
     98 
     99  // Add SpeakerWavWriters instance into the map.
    100  for (auto it = speaker_output_file_paths.begin();
    101       it != speaker_output_file_paths.end(); ++it) {
    102    speaker_wav_writers_map->emplace(
    103        std::piecewise_construct, std::forward_as_tuple(it->first),
    104        std::forward_as_tuple(it->second, sample_rate));
    105  }
    106 
    107  return speaker_wav_writers_map;
    108 }
    109 
    110 // Reads all the samples for each audio track.
    111 std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks(
    112    const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
    113        audiotrack_readers) {
    114  // Create map.
    115  auto audiotracks_map =
    116      std::make_unique<std::map<std::string, std::vector<int16_t>>>();
    117 
    118  // Add audio track vectors.
    119  for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end();
    120       ++it) {
    121    // Add map entry.
    122    audiotracks_map->emplace(std::piecewise_construct,
    123                             std::forward_as_tuple(it->first),
    124                             std::forward_as_tuple(it->second->NumSamples()));
    125 
    126    // Read samples.
    127    it->second->ReadInt16Samples(audiotracks_map->at(it->first));
    128  }
    129 
    130  return audiotracks_map;
    131 }
    132 
    133 // Writes all the values in `source_samples` via `wav_writer`. If the number of
    134 // previously written samples in `wav_writer` is less than `interval_begin`, it
    135 // adds zeros as left padding. The padding corresponds to intervals during which
    136 // a speaker is not active.
    137 void PadLeftWriteChunk(ArrayView<const int16_t> source_samples,
    138                       size_t interval_begin,
    139                       WavWriter* wav_writer) {
    140  // Add left padding.
    141  RTC_CHECK(wav_writer);
    142  RTC_CHECK_GE(interval_begin, wav_writer->num_samples());
    143  size_t padding_size = interval_begin - wav_writer->num_samples();
    144  if (padding_size != 0) {
    145    const std::vector<int16_t> padding(padding_size, 0);
    146    wav_writer->WriteSamples(padding.data(), padding_size);
    147  }
    148 
    149  // Write source samples.
    150  wav_writer->WriteSamples(source_samples.data(), source_samples.size());
    151 }
    152 
    153 // Appends zeros via `wav_writer`. The number of zeros is always non-negative
    154 // and equal to the difference between the previously written samples and
    155 // `pad_samples`.
    156 void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) {
    157  RTC_CHECK(wav_writer);
    158  RTC_CHECK_GE(pad_samples, wav_writer->num_samples());
    159  size_t padding_size = pad_samples - wav_writer->num_samples();
    160  if (padding_size != 0) {
    161    const std::vector<int16_t> padding(padding_size, 0);
    162    wav_writer->WriteSamples(padding.data(), padding_size);
    163  }
    164 }
    165 
    166 void ScaleSignal(ArrayView<const int16_t> source_samples,
    167                 int gain,
    168                 ArrayView<int16_t> output_samples) {
    169  const float gain_linear = DbToRatio(gain);
    170  RTC_DCHECK_EQ(source_samples.size(), output_samples.size());
    171  std::transform(source_samples.begin(), source_samples.end(),
    172                 output_samples.begin(), [gain_linear](int16_t x) -> int16_t {
    173                   return saturated_cast<int16_t>(x * gain_linear);
    174                 });
    175 }
    176 
    177 }  // namespace
    178 
    179 namespace conversational_speech {
    180 
    181 std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate(
    182    const MultiEndCall& multiend_call,
    183    absl::string_view output_path) {
    184  // Set output file paths and initialize wav writers.
    185  const auto& speaker_names = multiend_call.speaker_names();
    186  auto speaker_output_file_paths =
    187      InitSpeakerOutputFilePaths(speaker_names, output_path);
    188  auto speakers_wav_writers = InitSpeakersWavWriters(
    189      *speaker_output_file_paths, multiend_call.sample_rate());
    190 
    191  // Preload all the input audio tracks.
    192  const auto& audiotrack_readers = multiend_call.audiotrack_readers();
    193  auto audiotracks = PreloadAudioTracks(audiotrack_readers);
    194 
    195  // Write near-end and far-end output tracks.
    196  for (const auto& speaking_turn : multiend_call.speaking_turns()) {
    197    const std::string& active_speaker_name = speaking_turn.speaker_name;
    198    const auto source_audiotrack =
    199        audiotracks->at(speaking_turn.audiotrack_file_name);
    200    std::vector<int16_t> scaled_audiotrack(source_audiotrack.size());
    201    ScaleSignal(source_audiotrack, speaking_turn.gain, scaled_audiotrack);
    202 
    203    // Write active speaker's chunk to active speaker's near-end.
    204    PadLeftWriteChunk(
    205        scaled_audiotrack, speaking_turn.begin,
    206        speakers_wav_writers->at(active_speaker_name).near_end_wav_writer());
    207 
    208    // Write active speaker's chunk to other participants' far-ends.
    209    for (const std::string& speaker_name : speaker_names) {
    210      if (speaker_name == active_speaker_name)
    211        continue;
    212      PadLeftWriteChunk(
    213          scaled_audiotrack, speaking_turn.begin,
    214          speakers_wav_writers->at(speaker_name).far_end_wav_writer());
    215    }
    216  }
    217 
    218  // Finalize all the output tracks with right padding.
    219  // This is required to make all the output tracks duration equal.
    220  size_t duration_samples = multiend_call.total_duration_samples();
    221  for (const std::string& speaker_name : speaker_names) {
    222    PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(),
    223                  duration_samples);
    224    PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(),
    225                  duration_samples);
    226  }
    227 
    228  return speaker_output_file_paths;
    229 }
    230 
    231 }  // namespace conversational_speech
    232 }  // namespace test
    233 }  // namespace webrtc
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE