tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

audio_frame.h (8899B)


      1 /*
      2 *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #ifndef API_AUDIO_AUDIO_FRAME_H_
     12 #define API_AUDIO_AUDIO_FRAME_H_
     13 
     14 #include <stddef.h>
     15 #include <stdint.h>
     16 
     17 #include <array>
     18 #include <optional>
     19 
     20 #include "api/array_view.h"
     21 #include "api/audio/audio_view.h"
     22 #include "api/audio/channel_layout.h"
     23 #include "api/rtp_packet_infos.h"
     24 #include "rtc_base/checks.h"
     25 
     26 namespace webrtc {
     27 
     28 // Default webrtc buffer size in milliseconds.
     29 constexpr size_t kDefaultAudioBufferLengthMs = 10u;
     30 
     31 // Default total number of audio buffers per second based on the default length.
     32 constexpr size_t kDefaultAudioBuffersPerSec =
     33    1000u / kDefaultAudioBufferLengthMs;
     34 
     35 // Returns the number of samples a buffer needs to hold for ~10ms of a single
     36 // audio channel at a given sample rate.
     37 // See also `AudioProcessing::GetFrameSize()`.
     38 inline size_t SampleRateToDefaultChannelSize(size_t sample_rate) {
     39  // Basic sanity check. 192kHz is the highest supported input sample rate.
     40  RTC_DCHECK_LE(sample_rate, 192000);
     41  return sample_rate / kDefaultAudioBuffersPerSec;
     42 }
     43 /////////////////////////////////////////////////////////////////////
     44 
     45 /* This class holds up to 120 ms of super-wideband (32 kHz) stereo audio. It
     46 * allows for adding and subtracting frames while keeping track of the resulting
     47 * states.
     48 *
     49 * Notes
     50 * - This is a de-facto api, not designed for external use. The AudioFrame class
     51 *   is in need of overhaul or even replacement, and anyone depending on it
     52 *   should be prepared for that.
     53 * - The total number of samples is samples_per_channel_ * num_channels_.
     54 * - Stereo data is interleaved starting with the left channel.
     55 */
     56 class AudioFrame {
     57 public:
     58  // Using constexpr here causes linker errors unless the variable also has an
     59  // out-of-class definition, which is impractical in this header-only class.
     60  // (This makes no sense because it compiles as an enum value, which we most
     61  // certainly cannot take the address of, just fine.) C++17 introduces inline
     62  // variables which should allow us to switch to constexpr and keep this a
     63  // header-only class.
     64  enum : size_t {
     65    // Stereo, 32 kHz, 120 ms (2 * 32 * 120)
     66    // Stereo, 192 kHz, 20 ms (2 * 192 * 20)
     67    // 8 channels (kMaxConcurrentChannels), 48 kHz, 20 ms (8 * 48 * 20).
     68    // 24 channels (kMaxNumberOfAudioChannels), 32 kHz kHz, 10 ms (24 * 32 * 10)
     69    // At 48 kHz, 10 ms buffers, the maximum number of channels AudioFrame can
     70    // hold, is 16. (16 * 48 * 10).
     71    kMaxDataSizeSamples = 7680,
     72    kMaxDataSizeBytes = kMaxDataSizeSamples * sizeof(int16_t),
     73  };
     74 
     75  enum VADActivity { kVadActive = 0, kVadPassive = 1, kVadUnknown = 2 };
     76  enum SpeechType {
     77    kNormalSpeech = 0,
     78    kPLC = 1,
     79    kCNG = 2,
     80    kPLCCNG = 3,
     81    kCodecPLC = 5,
     82    kUndefined = 4
     83  };
     84 
     85  AudioFrame();
     86 
     87  // Construct an audio frame with frame length properties and channel
     88  // information. `samples_per_channel()` will be initialized to a 10ms buffer
     89  // size and if `layout` is not specified (default value of
     90  // CHANNEL_LAYOUT_UNSUPPORTED is set), then the channel layout is derived
     91  // (guessed) from `num_channels`.
     92  AudioFrame(int sample_rate_hz,
     93             size_t num_channels,
     94             ChannelLayout layout = CHANNEL_LAYOUT_UNSUPPORTED);
     95 
     96  AudioFrame(const AudioFrame&) = delete;
     97  AudioFrame& operator=(const AudioFrame&) = delete;
     98 
     99  // Resets all members to their default state.
    100  void Reset();
    101  // Same as Reset(), but leaves mute state unchanged. Muting a frame requires
    102  // the buffer to be zeroed on the next call to mutable_data(). Callers
    103  // intending to write to the buffer immediately after Reset() can instead use
    104  // ResetWithoutMuting() to skip this wasteful zeroing.
    105  void ResetWithoutMuting();
    106 
    107  // TODO: b/335805780 - Accept InterleavedView.
    108  void UpdateFrame(uint32_t timestamp,
    109                   const int16_t* data,
    110                   size_t samples_per_channel,
    111                   int sample_rate_hz,
    112                   SpeechType speech_type,
    113                   VADActivity vad_activity,
    114                   size_t num_channels = 1);
    115 
    116  void CopyFrom(const AudioFrame& src);
    117 
    118  // data() returns a zeroed static buffer if the frame is muted.
    119  // TODO: b/335805780 - Return InterleavedView.
    120  const int16_t* data() const;
    121 
    122  // Returns a read-only view of all the valid samples held by the AudioFrame.
    123  // For a muted AudioFrame, the samples will all be 0.
    124  InterleavedView<const int16_t> data_view() const;
    125 
    126  // mutable_frame() always returns a non-static buffer; the first call to
    127  // mutable_frame() zeros the buffer and marks the frame as unmuted.
    128  // TODO: b/335805780 - Return an InterleavedView.
    129  int16_t* mutable_data();
    130 
    131  // Grants write access to the audio buffer. The size of the returned writable
    132  // view is determined by the `samples_per_channel` and `num_channels`
    133  // dimensions which the function checks for correctness and stores in the
    134  // internal member variables; `samples_per_channel()` and `num_channels()`
    135  // respectively.
    136  // If the state is currently muted, the returned view will be zeroed out.
    137  InterleavedView<int16_t> mutable_data(size_t samples_per_channel,
    138                                        size_t num_channels);
    139 
    140  // Prefer to mute frames using AudioFrameOperations::Mute.
    141  void Mute();
    142  // Frame is muted by default.
    143  bool muted() const;
    144 
    145  size_t max_16bit_samples() const { return data_.size(); }
    146  size_t samples_per_channel() const { return samples_per_channel_; }
    147  size_t num_channels() const { return num_channels_; }
    148 
    149  ChannelLayout channel_layout() const { return channel_layout_; }
    150  // Sets the `channel_layout` property as well as `num_channels`.
    151  void SetLayoutAndNumChannels(ChannelLayout layout, size_t num_channels);
    152 
    153  int sample_rate_hz() const { return sample_rate_hz_; }
    154 
    155  void set_absolute_capture_timestamp_ms(
    156      int64_t absolute_capture_time_stamp_ms) {
    157    absolute_capture_timestamp_ms_ = absolute_capture_time_stamp_ms;
    158  }
    159 
    160  std::optional<int64_t> absolute_capture_timestamp_ms() const {
    161    return absolute_capture_timestamp_ms_;
    162  }
    163 
    164  // Sets the sample_rate_hz and samples_per_channel properties based on a
    165  // given sample rate and calculates a default 10ms samples_per_channel value.
    166  void SetSampleRateAndChannelSize(int sample_rate);
    167 
    168  // RTP timestamp of the first sample in the AudioFrame.
    169  uint32_t timestamp_ = 0;
    170  // Time since the first frame in milliseconds.
    171  // -1 represents an uninitialized value.
    172  int64_t elapsed_time_ms_ = -1;
    173  // NTP time of the estimated capture time in local timebase in milliseconds.
    174  // -1 represents an uninitialized value.
    175  int64_t ntp_time_ms_ = -1;
    176  size_t samples_per_channel_ = 0;
    177  int sample_rate_hz_ = 0;
    178  size_t num_channels_ = 0;
    179  SpeechType speech_type_ = kUndefined;
    180  VADActivity vad_activity_ = kVadUnknown;
    181 
    182  // Information about packets used to assemble this audio frame. This is needed
    183  // by `SourceTracker` when the frame is delivered to the RTCRtpReceiver's
    184  // MediaStreamTrack, in order to implement getContributingSources(). See:
    185  // https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources
    186  //
    187  // TODO(bugs.webrtc.org/10757):
    188  //   Note that this information might not be fully accurate since we currently
    189  //   don't have a proper way to track it across the audio sync buffer. The
    190  //   sync buffer is the small sample-holding buffer located after the audio
    191  //   decoder and before where samples are assembled into output frames.
    192  //
    193  // `RtpPacketInfos` may also be empty if the audio samples did not come from
    194  // RTP packets. E.g. if the audio were locally generated by packet loss
    195  // concealment, comfort noise generation, etc.
    196  RtpPacketInfos packet_infos_;
    197 
    198 private:
    199  // A permanently zeroed out buffer to represent muted frames. This is a
    200  // header-only class, so the only way to avoid creating a separate zeroed
    201  // buffer per translation unit is to wrap a static in an inline function.
    202  static ArrayView<const int16_t> zeroed_data();
    203 
    204  std::array<int16_t, kMaxDataSizeSamples> data_;
    205  bool muted_ = true;
    206  ChannelLayout channel_layout_ = CHANNEL_LAYOUT_NONE;
    207 
    208  // Absolute capture timestamp when this audio frame was originally captured.
    209  // This is only valid for audio frames captured on this machine. The absolute
    210  // capture timestamp of a received frame is found in `packet_infos_`.
    211  // This timestamp MUST be based on the same clock as TimeMillis().
    212  std::optional<int64_t> absolute_capture_timestamp_ms_;
    213 };
    214 
    215 }  // namespace webrtc
    216 
    217 #endif  // API_AUDIO_AUDIO_FRAME_H_