audio_transport_impl.cc (11819B)
1 /* 2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "audio/audio_transport_impl.h" 12 13 #include <algorithm> 14 #include <cstddef> 15 #include <cstdint> 16 #include <memory> 17 #include <optional> 18 #include <utility> 19 #include <vector> 20 21 #include "api/audio/audio_frame.h" 22 #include "api/audio/audio_mixer.h" 23 #include "api/audio/audio_view.h" 24 #include "audio/remix_resample.h" 25 #include "audio/utility/audio_frame_operations.h" 26 #include "call/audio_sender.h" 27 #include "common_audio/resampler/include/push_resampler.h" 28 #include "modules/async_audio_processing/async_audio_processing.h" 29 #include "modules/audio_processing/include/audio_frame_proxies.h" 30 #include "rtc_base/checks.h" 31 #include "rtc_base/synchronization/mutex.h" 32 #include "rtc_base/trace_event.h" 33 34 namespace webrtc { 35 36 namespace { 37 38 // We want to process at the lowest sample rate and channel count possible 39 // without losing information. Choose the lowest native rate at least equal to 40 // the minimum of input and codec rates, choose lowest channel count, and 41 // configure the audio frame. 42 void InitializeCaptureFrame(int input_sample_rate, 43 int send_sample_rate_hz, 44 size_t input_num_channels, 45 size_t send_num_channels, 46 AudioFrame* audio_frame) { 47 RTC_DCHECK(audio_frame); 48 int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz); 49 for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) { 50 audio_frame->SetSampleRateAndChannelSize(native_rate_hz); 51 if (native_rate_hz >= min_processing_rate_hz) { 52 break; 53 } 54 } 55 audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels); 56 } 57 58 void ProcessCaptureFrame(uint32_t delay_ms, 59 bool key_pressed, 60 bool swap_stereo_channels, 61 AudioProcessing* audio_processing, 62 AudioFrame* audio_frame) { 63 RTC_DCHECK(audio_frame); 64 if (audio_processing) { 65 audio_processing->set_stream_delay_ms(delay_ms); 66 audio_processing->set_stream_key_pressed(key_pressed); 67 int error = ProcessAudioFrame(audio_processing, audio_frame); 68 69 RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error; 70 } 71 72 if (swap_stereo_channels) { 73 AudioFrameOperations::SwapStereoChannels(audio_frame); 74 } 75 } 76 77 // Resample audio in `frame` to given sample rate preserving the 78 // channel count and place the result in `destination`. 79 void Resample(const AudioFrame& frame, 80 const int destination_sample_rate, 81 PushResampler<int16_t>* resampler, 82 InterleavedView<int16_t> destination) { 83 TRACE_EVENT2("webrtc", "Resample", "frame sample rate", frame.sample_rate_hz_, 84 "destination_sample_rate", destination_sample_rate); 85 const size_t target_number_of_samples_per_channel = 86 SampleRateToDefaultChannelSize(destination_sample_rate); 87 RTC_DCHECK_EQ(NumChannels(destination), frame.num_channels_); 88 RTC_DCHECK_EQ(SamplesPerChannel(destination), 89 target_number_of_samples_per_channel); 90 RTC_CHECK_EQ(destination.data().size(), 91 frame.num_channels_ * target_number_of_samples_per_channel); 92 93 // TODO(yujo): Add special case handling of muted frames. 94 resampler->Resample(frame.data_view(), destination); 95 } 96 } // namespace 97 98 AudioTransportImpl::AudioTransportImpl( 99 AudioMixer* mixer, 100 AudioProcessing* audio_processing, 101 AsyncAudioProcessing::Factory* async_audio_processing_factory) 102 : audio_processing_(audio_processing), 103 async_audio_processing_( 104 async_audio_processing_factory 105 ? async_audio_processing_factory->CreateAsyncAudioProcessing( 106 [this](std::unique_ptr<AudioFrame> frame) { 107 this->SendProcessedData(std::move(frame)); 108 }) 109 : nullptr), 110 mixer_(mixer) { 111 RTC_DCHECK(mixer); 112 } 113 114 AudioTransportImpl::~AudioTransportImpl() {} 115 116 int32_t AudioTransportImpl::RecordedDataIsAvailable( 117 const void* audio_data, 118 size_t number_of_frames, 119 size_t bytes_per_sample, 120 size_t number_of_channels, 121 uint32_t sample_rate, 122 uint32_t audio_delay_milliseconds, 123 int32_t clock_drift, 124 uint32_t volume, 125 bool key_pressed, 126 uint32_t& new_mic_volume) { // NOLINT: to avoid changing APIs 127 return RecordedDataIsAvailable( 128 audio_data, number_of_frames, bytes_per_sample, number_of_channels, 129 sample_rate, audio_delay_milliseconds, clock_drift, volume, key_pressed, 130 new_mic_volume, /*estimated_capture_time_ns=*/std::nullopt); 131 } 132 133 // Not used in Chromium. Process captured audio and distribute to all sending 134 // streams, and try to do this at the lowest possible sample rate. 135 int32_t AudioTransportImpl::RecordedDataIsAvailable( 136 const void* audio_data, 137 size_t number_of_frames, 138 size_t bytes_per_sample, 139 size_t number_of_channels, 140 uint32_t sample_rate, 141 uint32_t audio_delay_milliseconds, 142 int32_t /*clock_drift*/, 143 uint32_t /*volume*/, 144 bool key_pressed, 145 uint32_t& /*new_mic_volume*/, 146 std::optional<int64_t> 147 estimated_capture_time_ns) { // NOLINT: to avoid changing APIs 148 RTC_DCHECK(audio_data); 149 RTC_DCHECK_GE(number_of_channels, 1); 150 RTC_DCHECK_LE(number_of_channels, 2); 151 RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample); 152 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); 153 // 100 = 1 second / data duration (10 ms). 154 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); 155 RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels, 156 AudioFrame::kMaxDataSizeBytes); 157 158 InterleavedView<const int16_t> source(static_cast<const int16_t*>(audio_data), 159 number_of_frames, number_of_channels); 160 161 int send_sample_rate_hz = 0; 162 size_t send_num_channels = 0; 163 bool swap_stereo_channels = false; 164 { 165 MutexLock lock(&capture_lock_); 166 send_sample_rate_hz = send_sample_rate_hz_; 167 send_num_channels = send_num_channels_; 168 swap_stereo_channels = swap_stereo_channels_; 169 } 170 171 std::unique_ptr<AudioFrame> audio_frame(new AudioFrame()); 172 InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels, 173 send_num_channels, audio_frame.get()); 174 voe::RemixAndResample(source, sample_rate, &capture_resampler_, 175 audio_frame.get()); 176 ProcessCaptureFrame(audio_delay_milliseconds, key_pressed, 177 swap_stereo_channels, audio_processing_, 178 audio_frame.get()); 179 180 if (estimated_capture_time_ns) { 181 audio_frame->set_absolute_capture_timestamp_ms(*estimated_capture_time_ns / 182 1000000); 183 } 184 185 RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); 186 if (async_audio_processing_) 187 async_audio_processing_->Process(std::move(audio_frame)); 188 else 189 SendProcessedData(std::move(audio_frame)); 190 191 return 0; 192 } 193 194 void AudioTransportImpl::SendProcessedData( 195 std::unique_ptr<AudioFrame> audio_frame) { 196 TRACE_EVENT0("webrtc", "AudioTransportImpl::SendProcessedData"); 197 RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); 198 MutexLock lock(&capture_lock_); 199 if (audio_senders_.empty()) 200 return; 201 202 auto it = audio_senders_.begin(); 203 while (++it != audio_senders_.end()) { 204 auto audio_frame_copy = std::make_unique<AudioFrame>(); 205 audio_frame_copy->CopyFrom(*audio_frame); 206 (*it)->SendAudioData(std::move(audio_frame_copy)); 207 } 208 // Send the original frame to the first stream w/o copying. 209 (*audio_senders_.begin())->SendAudioData(std::move(audio_frame)); 210 } 211 212 // Mix all received streams, feed the result to the AudioProcessing module, then 213 // resample the result to the requested output rate. 214 int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples, 215 const size_t nBytesPerSample, 216 const size_t nChannels, 217 const uint32_t samplesPerSec, 218 void* audioSamples, 219 size_t& nSamplesOut, 220 int64_t* elapsed_time_ms, 221 int64_t* ntp_time_ms) { 222 TRACE_EVENT0("webrtc", "AudioTransportImpl::NeedMorePlayData"); 223 RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample); 224 RTC_DCHECK_GE(nChannels, 1); 225 RTC_DCHECK_LE(nChannels, 2); 226 RTC_DCHECK_GE( 227 samplesPerSec, 228 static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz)); 229 230 // 100 = 1 second / data duration (10 ms). 231 RTC_DCHECK_EQ(nSamples * 100, samplesPerSec); 232 RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels, 233 AudioFrame::kMaxDataSizeBytes); 234 235 mixer_->Mix(nChannels, &mixed_frame_); 236 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; 237 *ntp_time_ms = mixed_frame_.ntp_time_ms_; 238 239 if (audio_processing_) { 240 const auto error = 241 ProcessReverseAudioFrame(audio_processing_, &mixed_frame_); 242 RTC_DCHECK_EQ(error, AudioProcessing::kNoError); 243 } 244 245 InterleavedView<int16_t> resampled(static_cast<int16_t*>(audioSamples), 246 nSamples, nChannels); 247 Resample(mixed_frame_, samplesPerSec, &render_resampler_, resampled); 248 nSamplesOut = resampled.size(); 249 return 0; 250 } 251 252 // Used by Chromium - same as NeedMorePlayData() but because Chrome has its 253 // own APM instance, does not call audio_processing_->ProcessReverseStream(). 254 void AudioTransportImpl::PullRenderData(int bits_per_sample, 255 int sample_rate, 256 size_t number_of_channels, 257 size_t number_of_frames, 258 void* audio_data, 259 int64_t* elapsed_time_ms, 260 int64_t* ntp_time_ms) { 261 TRACE_EVENT2("webrtc", "AudioTransportImpl::PullRenderData", "sample_rate", 262 sample_rate, "number_of_frames", number_of_frames); 263 RTC_DCHECK_EQ(bits_per_sample, 16); 264 RTC_DCHECK_GE(number_of_channels, 1); 265 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); 266 267 // 100 = 1 second / data duration (10 ms). 268 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); 269 270 // 8 = bits per byte. 271 RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels, 272 AudioFrame::kMaxDataSizeBytes); 273 mixer_->Mix(number_of_channels, &mixed_frame_); 274 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; 275 *ntp_time_ms = mixed_frame_.ntp_time_ms_; 276 277 Resample(mixed_frame_, sample_rate, &render_resampler_, 278 InterleavedView<int16_t>(static_cast<int16_t*>(audio_data), 279 number_of_frames, number_of_channels)); 280 } 281 282 void AudioTransportImpl::UpdateAudioSenders(std::vector<AudioSender*> senders, 283 int send_sample_rate_hz, 284 size_t send_num_channels) { 285 MutexLock lock(&capture_lock_); 286 audio_senders_ = std::move(senders); 287 send_sample_rate_hz_ = send_sample_rate_hz; 288 send_num_channels_ = send_num_channels; 289 } 290 291 void AudioTransportImpl::SetStereoChannelSwapping(bool enable) { 292 MutexLock lock(&capture_lock_); 293 swap_stereo_channels_ = enable; 294 } 295 296 } // namespace webrtc