tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

multiend_call.cc (7382B)


      1 /*
      2 *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_processing/test/conversational_speech/multiend_call.h"
     12 
     13 #include <algorithm>
     14 #include <cstddef>
     15 #include <iterator>
     16 #include <map>
     17 #include <memory>
     18 #include <string>
     19 #include <tuple>
     20 #include <utility>
     21 #include <vector>
     22 
     23 #include "absl/strings/string_view.h"
     24 #include "api/array_view.h"
     25 #include "modules/audio_processing/test/conversational_speech/timing.h"
     26 #include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h"
     27 #include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
     28 #include "rtc_base/checks.h"
     29 #include "rtc_base/logging.h"
     30 #include "test/testsupport/file_utils.h"
     31 
     32 namespace webrtc {
     33 namespace test {
     34 namespace conversational_speech {
     35 
     36 MultiEndCall::MultiEndCall(
     37    ArrayView<const Turn> timing,
     38    absl::string_view audiotracks_path,
     39    std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)
     40    : timing_(timing),
     41      audiotracks_path_(audiotracks_path),
     42      wavreader_abstract_factory_(std::move(wavreader_abstract_factory)),
     43      valid_(false) {
     44  FindSpeakerNames();
     45  if (CreateAudioTrackReaders())
     46    valid_ = CheckTiming();
     47 }
     48 
     49 MultiEndCall::~MultiEndCall() = default;
     50 
     51 void MultiEndCall::FindSpeakerNames() {
     52  RTC_DCHECK(speaker_names_.empty());
     53  for (const Turn& turn : timing_) {
     54    speaker_names_.emplace(turn.speaker_name);
     55  }
     56 }
     57 
     58 bool MultiEndCall::CreateAudioTrackReaders() {
     59  RTC_DCHECK(audiotrack_readers_.empty());
     60  sample_rate_hz_ = 0;  // Sample rate will be set when reading the first track.
     61  for (const Turn& turn : timing_) {
     62    auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
     63    if (it != audiotrack_readers_.end())
     64      continue;
     65 
     66    const std::string audiotrack_file_path =
     67        test::JoinFilename(audiotracks_path_, turn.audiotrack_file_name);
     68 
     69    // Map the audiotrack file name to a new instance of WavReaderInterface.
     70    std::unique_ptr<WavReaderInterface> wavreader =
     71        wavreader_abstract_factory_->Create(
     72            test::JoinFilename(audiotracks_path_, turn.audiotrack_file_name));
     73 
     74    if (sample_rate_hz_ == 0) {
     75      sample_rate_hz_ = wavreader->SampleRate();
     76    } else if (sample_rate_hz_ != wavreader->SampleRate()) {
     77      RTC_LOG(LS_ERROR)
     78          << "All the audio tracks should have the same sample rate.";
     79      return false;
     80    }
     81 
     82    if (wavreader->NumChannels() != 1) {
     83      RTC_LOG(LS_ERROR) << "Only mono audio tracks supported.";
     84      return false;
     85    }
     86 
     87    audiotrack_readers_.emplace(turn.audiotrack_file_name,
     88                                std::move(wavreader));
     89  }
     90 
     91  return true;
     92 }
     93 
     94 bool MultiEndCall::CheckTiming() {
     95  struct Interval {
     96    size_t begin;
     97    size_t end;
     98  };
     99  size_t number_of_turns = timing_.size();
    100  auto millisecond_to_samples = [](int ms, int sr) -> int {
    101    // Truncation may happen if the sampling rate is not an integer multiple
    102    // of 1000 (e.g., 44100).
    103    return ms * sr / 1000;
    104  };
    105  auto in_interval = [](size_t value, const Interval& interval) {
    106    return interval.begin <= value && value < interval.end;
    107  };
    108  total_duration_samples_ = 0;
    109  speaking_turns_.clear();
    110 
    111  // Begin and end timestamps for the last two turns (unit: number of samples).
    112  Interval second_last_turn = {.begin = 0, .end = 0};
    113  Interval last_turn = {.begin = 0, .end = 0};
    114 
    115  // Initialize map to store speaking turn indices of each speaker (used to
    116  // detect self cross-talk).
    117  std::map<std::string, std::vector<size_t>> speaking_turn_indices;
    118  for (const std::string& speaker_name : speaker_names_) {
    119    speaking_turn_indices.emplace(std::piecewise_construct,
    120                                  std::forward_as_tuple(speaker_name),
    121                                  std::forward_as_tuple());
    122  }
    123 
    124  // Parse turns.
    125  for (size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {
    126    const Turn& turn = timing_[turn_index];
    127    auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
    128    RTC_CHECK(it != audiotrack_readers_.end())
    129        << "Audio track reader not created";
    130 
    131    // Begin and end timestamps for the current turn.
    132    int offset_samples =
    133        millisecond_to_samples(turn.offset, it->second->SampleRate());
    134    std::size_t begin_timestamp = last_turn.end + offset_samples;
    135    std::size_t end_timestamp = begin_timestamp + it->second->NumSamples();
    136    RTC_LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp << "-"
    137                     << end_timestamp << " ms";
    138 
    139    // The order is invalid if the offset is negative and its absolute value is
    140    // larger then the duration of the previous turn.
    141    if (offset_samples < 0 &&
    142        -offset_samples > static_cast<int>(last_turn.end - last_turn.begin)) {
    143      RTC_LOG(LS_ERROR) << "invalid order";
    144      return false;
    145    }
    146 
    147    // Cross-talk with 3 or more speakers occurs when the beginning of the
    148    // current interval falls in the last two turns.
    149    if (turn_index > 1 && in_interval(begin_timestamp, last_turn) &&
    150        in_interval(begin_timestamp, second_last_turn)) {
    151      RTC_LOG(LS_ERROR) << "cross-talk with 3+ speakers";
    152      return false;
    153    }
    154 
    155    // Append turn.
    156    speaking_turns_.emplace_back(turn.speaker_name, turn.audiotrack_file_name,
    157                                 begin_timestamp, end_timestamp, turn.gain);
    158 
    159    // Save speaking turn index for self cross-talk detection.
    160    RTC_DCHECK_EQ(speaking_turns_.size(), turn_index + 1);
    161    speaking_turn_indices[turn.speaker_name].push_back(turn_index);
    162 
    163    // Update total duration of the consversational speech.
    164    if (total_duration_samples_ < end_timestamp)
    165      total_duration_samples_ = end_timestamp;
    166 
    167    // Update and continue with next turn.
    168    second_last_turn = last_turn;
    169    last_turn.begin = begin_timestamp;
    170    last_turn.end = end_timestamp;
    171  }
    172 
    173  // Detect self cross-talk.
    174  for (const std::string& speaker_name : speaker_names_) {
    175    RTC_LOG(LS_INFO) << "checking self cross-talk for <" << speaker_name << ">";
    176 
    177    // Copy all turns for this speaker to new vector.
    178    std::vector<SpeakingTurn> speaking_turns_for_name;
    179    std::copy_if(speaking_turns_.begin(), speaking_turns_.end(),
    180                 std::back_inserter(speaking_turns_for_name),
    181                 [&speaker_name](const SpeakingTurn& st) {
    182                   return st.speaker_name == speaker_name;
    183                 });
    184 
    185    // Check for overlap between adjacent elements.
    186    // This is a sufficient condition for self cross-talk since the intervals
    187    // are sorted by begin timestamp.
    188    auto overlap = std::adjacent_find(
    189        speaking_turns_for_name.begin(), speaking_turns_for_name.end(),
    190        [](const SpeakingTurn& a, const SpeakingTurn& b) {
    191          return a.end > b.begin;
    192        });
    193 
    194    if (overlap != speaking_turns_for_name.end()) {
    195      RTC_LOG(LS_ERROR) << "Self cross-talk detected";
    196      return false;
    197    }
    198  }
    199 
    200  return true;
    201 }
    202 
    203 }  // namespace conversational_speech
    204 }  // namespace test
    205 }  // namespace webrtc