tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

audio_encoder_cng.cc (11199B)


      1 /*
      2 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/audio_coding/codecs/cng/audio_encoder_cng.h"
     12 
     13 #include <cstddef>
     14 #include <cstdint>
     15 #include <memory>
     16 #include <optional>
     17 #include <utility>
     18 #include <vector>
     19 
     20 #include "api/array_view.h"
     21 #include "api/audio_codecs/audio_encoder.h"
     22 #include "api/units/time_delta.h"
     23 #include "common_audio/vad/include/vad.h"
     24 #include "modules/audio_coding/codecs/cng/webrtc_cng.h"
     25 #include "rtc_base/buffer.h"
     26 #include "rtc_base/checks.h"
     27 
     28 namespace webrtc {
     29 
     30 namespace {
     31 
     32 const int kMaxFrameSizeMs = 60;
     33 
     34 class AudioEncoderCng final : public AudioEncoder {
     35 public:
     36  explicit AudioEncoderCng(AudioEncoderCngConfig&& config);
     37  ~AudioEncoderCng() override;
     38 
     39  // Not copyable or moveable.
     40  AudioEncoderCng(const AudioEncoderCng&) = delete;
     41  AudioEncoderCng(AudioEncoderCng&&) = delete;
     42  AudioEncoderCng& operator=(const AudioEncoderCng&) = delete;
     43  AudioEncoderCng& operator=(AudioEncoderCng&&) = delete;
     44 
     45  int SampleRateHz() const override;
     46  size_t NumChannels() const override;
     47  int RtpTimestampRateHz() const override;
     48  size_t Num10MsFramesInNextPacket() const override;
     49  size_t Max10MsFramesInAPacket() const override;
     50  int GetTargetBitrate() const override;
     51  EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
     52                         ArrayView<const int16_t> audio,
     53                         Buffer* encoded) override;
     54  void Reset() override;
     55  bool SetFec(bool enable) override;
     56  bool SetDtx(bool enable) override;
     57  bool SetApplication(Application application) override;
     58  void SetMaxPlaybackRate(int frequency_hz) override;
     59  ArrayView<std::unique_ptr<AudioEncoder>> ReclaimContainedEncoders() override;
     60  void OnReceivedUplinkPacketLossFraction(
     61      float uplink_packet_loss_fraction) override;
     62  void OnReceivedUplinkBandwidth(int target_audio_bitrate_bps,
     63                                 std::optional<int64_t> bwe_period_ms) override;
     64  std::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange()
     65      const override;
     66 
     67 private:
     68  EncodedInfo EncodePassive(size_t frames_to_encode, Buffer* encoded);
     69  EncodedInfo EncodeActive(size_t frames_to_encode, Buffer* encoded);
     70  size_t SamplesPer10msFrame() const;
     71 
     72  std::unique_ptr<AudioEncoder> speech_encoder_;
     73  const int cng_payload_type_;
     74  const int num_cng_coefficients_;
     75  const int sid_frame_interval_ms_;
     76  std::vector<int16_t> speech_buffer_;
     77  std::vector<uint32_t> rtp_timestamps_;
     78  bool last_frame_active_;
     79  std::unique_ptr<Vad> vad_;
     80  std::unique_ptr<ComfortNoiseEncoder> cng_encoder_;
     81 };
     82 
     83 AudioEncoderCng::AudioEncoderCng(AudioEncoderCngConfig&& config)
     84    : speech_encoder_((static_cast<void>([&] {
     85                         RTC_CHECK(config.IsOk()) << "Invalid configuration.";
     86                       }()),
     87                       std::move(config.speech_encoder))),
     88      cng_payload_type_(config.payload_type),
     89      num_cng_coefficients_(config.num_cng_coefficients),
     90      sid_frame_interval_ms_(config.sid_frame_interval_ms),
     91      last_frame_active_(true),
     92      vad_(config.vad ? std::unique_ptr<Vad>(config.vad)
     93                      : CreateVad(config.vad_mode)),
     94      cng_encoder_(new ComfortNoiseEncoder(SampleRateHz(),
     95                                           sid_frame_interval_ms_,
     96                                           num_cng_coefficients_)) {
     97  speech_encoder_->Reset();
     98 }
     99 
    100 AudioEncoderCng::~AudioEncoderCng() = default;
    101 
    102 int AudioEncoderCng::SampleRateHz() const {
    103  return speech_encoder_->SampleRateHz();
    104 }
    105 
    106 size_t AudioEncoderCng::NumChannels() const {
    107  return 1;
    108 }
    109 
    110 int AudioEncoderCng::RtpTimestampRateHz() const {
    111  return speech_encoder_->RtpTimestampRateHz();
    112 }
    113 
    114 size_t AudioEncoderCng::Num10MsFramesInNextPacket() const {
    115  return speech_encoder_->Num10MsFramesInNextPacket();
    116 }
    117 
    118 size_t AudioEncoderCng::Max10MsFramesInAPacket() const {
    119  return speech_encoder_->Max10MsFramesInAPacket();
    120 }
    121 
    122 int AudioEncoderCng::GetTargetBitrate() const {
    123  return speech_encoder_->GetTargetBitrate();
    124 }
    125 
    126 AudioEncoder::EncodedInfo AudioEncoderCng::EncodeImpl(
    127    uint32_t rtp_timestamp,
    128    ArrayView<const int16_t> audio,
    129    Buffer* encoded) {
    130  const size_t samples_per_10ms_frame = SamplesPer10msFrame();
    131  RTC_CHECK_EQ(speech_buffer_.size(),
    132               rtp_timestamps_.size() * samples_per_10ms_frame);
    133  rtp_timestamps_.push_back(rtp_timestamp);
    134  RTC_DCHECK_EQ(samples_per_10ms_frame, audio.size());
    135  speech_buffer_.insert(speech_buffer_.end(), audio.cbegin(), audio.cend());
    136  const size_t frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket();
    137  if (rtp_timestamps_.size() < frames_to_encode) {
    138    return EncodedInfo();
    139  }
    140  RTC_CHECK_LE(frames_to_encode * 10, kMaxFrameSizeMs)
    141      << "Frame size cannot be larger than " << kMaxFrameSizeMs
    142      << " ms when using VAD/CNG.";
    143 
    144  // Group several 10 ms blocks per VAD call. Call VAD once or twice using the
    145  // following split sizes:
    146  // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms;
    147  // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms.
    148  size_t blocks_in_first_vad_call =
    149      (frames_to_encode > 3 ? 3 : frames_to_encode);
    150  if (frames_to_encode == 4)
    151    blocks_in_first_vad_call = 2;
    152  RTC_CHECK_GE(frames_to_encode, blocks_in_first_vad_call);
    153  const size_t blocks_in_second_vad_call =
    154      frames_to_encode - blocks_in_first_vad_call;
    155 
    156  // Check if all of the buffer is passive speech. Start with checking the first
    157  // block.
    158  Vad::Activity activity = vad_->VoiceActivity(
    159      &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call,
    160      SampleRateHz());
    161  if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) {
    162    // Only check the second block if the first was passive.
    163    activity = vad_->VoiceActivity(
    164        &speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call],
    165        samples_per_10ms_frame * blocks_in_second_vad_call, SampleRateHz());
    166  }
    167 
    168  EncodedInfo info;
    169  switch (activity) {
    170    case Vad::kPassive: {
    171      info = EncodePassive(frames_to_encode, encoded);
    172      last_frame_active_ = false;
    173      break;
    174    }
    175    case Vad::kActive: {
    176      info = EncodeActive(frames_to_encode, encoded);
    177      last_frame_active_ = true;
    178      break;
    179    }
    180    default: {
    181      RTC_CHECK_NOTREACHED();
    182    }
    183  }
    184 
    185  speech_buffer_.erase(
    186      speech_buffer_.begin(),
    187      speech_buffer_.begin() + frames_to_encode * samples_per_10ms_frame);
    188  rtp_timestamps_.erase(rtp_timestamps_.begin(),
    189                        rtp_timestamps_.begin() + frames_to_encode);
    190  return info;
    191 }
    192 
    193 void AudioEncoderCng::Reset() {
    194  speech_encoder_->Reset();
    195  speech_buffer_.clear();
    196  rtp_timestamps_.clear();
    197  last_frame_active_ = true;
    198  vad_->Reset();
    199  cng_encoder_.reset(new ComfortNoiseEncoder(
    200      SampleRateHz(), sid_frame_interval_ms_, num_cng_coefficients_));
    201 }
    202 
    203 bool AudioEncoderCng::SetFec(bool enable) {
    204  return speech_encoder_->SetFec(enable);
    205 }
    206 
    207 bool AudioEncoderCng::SetDtx(bool enable) {
    208  return speech_encoder_->SetDtx(enable);
    209 }
    210 
    211 bool AudioEncoderCng::SetApplication(Application application) {
    212  return speech_encoder_->SetApplication(application);
    213 }
    214 
    215 void AudioEncoderCng::SetMaxPlaybackRate(int frequency_hz) {
    216  speech_encoder_->SetMaxPlaybackRate(frequency_hz);
    217 }
    218 
    219 ArrayView<std::unique_ptr<AudioEncoder>>
    220 AudioEncoderCng::ReclaimContainedEncoders() {
    221  return ArrayView<std::unique_ptr<AudioEncoder>>(&speech_encoder_, 1);
    222 }
    223 
    224 void AudioEncoderCng::OnReceivedUplinkPacketLossFraction(
    225    float uplink_packet_loss_fraction) {
    226  speech_encoder_->OnReceivedUplinkPacketLossFraction(
    227      uplink_packet_loss_fraction);
    228 }
    229 
    230 void AudioEncoderCng::OnReceivedUplinkBandwidth(
    231    int target_audio_bitrate_bps,
    232    std::optional<int64_t> bwe_period_ms) {
    233  speech_encoder_->OnReceivedUplinkBandwidth(target_audio_bitrate_bps,
    234                                             bwe_period_ms);
    235 }
    236 
    237 std::optional<std::pair<TimeDelta, TimeDelta>>
    238 AudioEncoderCng::GetFrameLengthRange() const {
    239  return speech_encoder_->GetFrameLengthRange();
    240 }
    241 
    242 AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive(
    243    size_t frames_to_encode,
    244    Buffer* encoded) {
    245  bool force_sid = last_frame_active_;
    246  bool output_produced = false;
    247  const size_t samples_per_10ms_frame = SamplesPer10msFrame();
    248  AudioEncoder::EncodedInfo info;
    249 
    250  for (size_t i = 0; i < frames_to_encode; ++i) {
    251    // It's important not to pass &info.encoded_bytes directly to
    252    // WebRtcCng_Encode(), since later loop iterations may return zero in
    253    // that value, in which case we don't want to overwrite any value from
    254    // an earlier iteration.
    255    size_t encoded_bytes_tmp = cng_encoder_->Encode(
    256        ArrayView<const int16_t>(&speech_buffer_[i * samples_per_10ms_frame],
    257                                 samples_per_10ms_frame),
    258        force_sid, encoded);
    259 
    260    if (encoded_bytes_tmp > 0) {
    261      RTC_CHECK(!output_produced);
    262      info.encoded_bytes = encoded_bytes_tmp;
    263      output_produced = true;
    264      force_sid = false;
    265    }
    266  }
    267 
    268  info.encoded_timestamp = rtp_timestamps_.front();
    269  info.payload_type = cng_payload_type_;
    270  info.send_even_if_empty = true;
    271  info.speech = false;
    272  return info;
    273 }
    274 
    275 AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive(size_t frames_to_encode,
    276                                                        Buffer* encoded) {
    277  const size_t samples_per_10ms_frame = SamplesPer10msFrame();
    278  AudioEncoder::EncodedInfo info;
    279  for (size_t i = 0; i < frames_to_encode; ++i) {
    280    info = speech_encoder_->Encode(
    281        rtp_timestamps_.front(),
    282        ArrayView<const int16_t>(&speech_buffer_[i * samples_per_10ms_frame],
    283                                 samples_per_10ms_frame),
    284        encoded);
    285    if (i + 1 == frames_to_encode) {
    286      RTC_CHECK_GT(info.encoded_bytes, 0) << "Encoder didn't deliver data.";
    287    } else {
    288      RTC_CHECK_EQ(info.encoded_bytes, 0)
    289          << "Encoder delivered data too early.";
    290    }
    291  }
    292  return info;
    293 }
    294 
    295 size_t AudioEncoderCng::SamplesPer10msFrame() const {
    296  return CheckedDivExact(10 * SampleRateHz(), 1000);
    297 }
    298 
    299 }  // namespace
    300 
    301 AudioEncoderCngConfig::AudioEncoderCngConfig() = default;
    302 AudioEncoderCngConfig::AudioEncoderCngConfig(AudioEncoderCngConfig&&) = default;
    303 AudioEncoderCngConfig::~AudioEncoderCngConfig() = default;
    304 
    305 bool AudioEncoderCngConfig::IsOk() const {
    306  if (num_channels != 1)
    307    return false;
    308  if (!speech_encoder)
    309    return false;
    310  if (num_channels != speech_encoder->NumChannels())
    311    return false;
    312  if (sid_frame_interval_ms <
    313      static_cast<int>(speech_encoder->Max10MsFramesInAPacket() * 10))
    314    return false;
    315  if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER ||
    316      num_cng_coefficients <= 0)
    317    return false;
    318  return true;
    319 }
    320 
    321 std::unique_ptr<AudioEncoder> CreateComfortNoiseEncoder(
    322    AudioEncoderCngConfig&& config) {
    323  return std::make_unique<AudioEncoderCng>(std::move(config));
    324 }
    325 
    326 }  // namespace webrtc