audio_device_buffer.cc (21031B)
1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_device/audio_device_buffer.h" 12 13 #include <cmath> 14 #include <cstddef> 15 #include <cstdint> 16 #include <cstring> 17 #include <optional> 18 19 #include "api/audio/audio_device_defines.h" 20 #include "api/environment/environment.h" 21 #include "api/sequence_checker.h" 22 #include "api/task_queue/task_queue_factory.h" 23 #include "api/units/time_delta.h" 24 #include "common_audio/signal_processing/include/signal_processing_library.h" 25 #include "rtc_base/checks.h" 26 #include "rtc_base/logging.h" 27 #include "rtc_base/synchronization/mutex.h" 28 #include "rtc_base/time_utils.h" 29 #include "rtc_base/timestamp_aligner.h" 30 #include "rtc_base/trace_event.h" 31 #include "system_wrappers/include/metrics.h" 32 33 namespace webrtc { 34 35 static const char kTimerQueueName[] = "AudioDeviceBufferTimer"; 36 37 // Time between two sucessive calls to LogStats(). 38 static const size_t kTimerIntervalInSeconds = 10; 39 static const size_t kTimerIntervalInMilliseconds = 40 kTimerIntervalInSeconds * kNumMillisecsPerSec; 41 // Min time required to qualify an audio session as a "call". If playout or 42 // recording has been active for less than this time we will not store any 43 // logs or UMA stats but instead consider the call as too short. 44 static const size_t kMinValidCallTimeTimeInSeconds = 10; 45 static const size_t kMinValidCallTimeTimeInMilliseconds = 46 kMinValidCallTimeTimeInSeconds * kNumMillisecsPerSec; 47 #ifdef AUDIO_DEVICE_PLAYS_SINUS_TONE 48 static const double k2Pi = 6.28318530717959; 49 #endif 50 51 AudioDeviceBuffer::AudioDeviceBuffer(const Environment& env, 52 bool create_detached) 53 : env_(env), 54 task_queue_(env_.task_queue_factory().CreateTaskQueue( 55 kTimerQueueName, 56 TaskQueueFactory::Priority::NORMAL)), 57 audio_transport_cb_(nullptr), 58 rec_sample_rate_(0), 59 play_sample_rate_(0), 60 rec_channels_(0), 61 play_channels_(0), 62 playing_(false), 63 recording_(false), 64 typing_status_(false), 65 play_delay_ms_(0), 66 rec_delay_ms_(0), 67 num_stat_reports_(0), 68 last_timer_task_time_(0), 69 rec_stat_count_(0), 70 play_stat_count_(0), 71 play_start_time_(0), 72 only_silence_recorded_(true), 73 log_stats_(false) { 74 RTC_LOG(LS_INFO) << "AudioDeviceBuffer::ctor"; 75 #ifdef AUDIO_DEVICE_PLAYS_SINUS_TONE 76 phase_ = 0.0; 77 RTC_LOG(LS_WARNING) << "AUDIO_DEVICE_PLAYS_SINUS_TONE is defined!"; 78 #endif 79 if (create_detached) { 80 main_thread_checker_.Detach(); 81 } 82 } 83 84 AudioDeviceBuffer::~AudioDeviceBuffer() { 85 RTC_DCHECK_RUN_ON(&main_thread_checker_); 86 RTC_DCHECK(!playing_); 87 RTC_DCHECK(!recording_); 88 RTC_LOG(LS_INFO) << "AudioDeviceBuffer::~dtor"; 89 90 // Delete and and thus stop task queue before deleting other members to avoid 91 // race with running tasks. Even though !playing_ and !recording_ called 92 // StopPeriodicLogging, such stop is asynchronous and may race with the 93 // AudioDeviceBuffer destructor. In particular there might be regular LogStats 94 // that attempts to repost task to the task_queue_. 95 // Thus task_queue_ should be deleted before pointer to it is invalidated. 96 // std::unique_ptr destructor does the same two operations in reverse order as 97 // it doesn't expect member would be used after its destruction has started. 98 task_queue_.get_deleter()(task_queue_.get()); 99 task_queue_.release(); 100 } 101 102 int32_t AudioDeviceBuffer::RegisterAudioCallback( 103 AudioTransport* audio_callback) { 104 RTC_DCHECK_RUN_ON(&main_thread_checker_); 105 RTC_DLOG(LS_INFO) << __FUNCTION__; 106 if (playing_ || recording_) { 107 RTC_LOG(LS_ERROR) << "Failed to set audio transport since media was active"; 108 return -1; 109 } 110 audio_transport_cb_ = audio_callback; 111 return 0; 112 } 113 114 void AudioDeviceBuffer::StartPlayout() { 115 RTC_DCHECK_RUN_ON(&main_thread_checker_); 116 // TODO(henrika): allow for usage of DCHECK(!playing_) here instead. Today the 117 // ADM allows calling Start(), Start() by ignoring the second call but it 118 // makes more sense to only allow one call. 119 if (playing_) { 120 return; 121 } 122 RTC_DLOG(LS_INFO) << __FUNCTION__; 123 // Clear members tracking playout stats and do it on the task queue. 124 task_queue_->PostTask([this] { ResetPlayStats(); }); 125 // Start a periodic timer based on task queue if not already done by the 126 // recording side. 127 if (!recording_) { 128 StartPeriodicLogging(); 129 } 130 const int64_t now_time = env_.clock().TimeInMilliseconds(); 131 // Clear members that are only touched on the main (creating) thread. 132 play_start_time_ = now_time; 133 playing_ = true; 134 } 135 136 void AudioDeviceBuffer::StartRecording() { 137 RTC_DCHECK_RUN_ON(&main_thread_checker_); 138 if (recording_) { 139 return; 140 } 141 RTC_DLOG(LS_INFO) << __FUNCTION__; 142 // Clear members tracking recording stats and do it on the task queue. 143 task_queue_->PostTask([this] { ResetRecStats(); }); 144 // Start a periodic timer based on task queue if not already done by the 145 // playout side. 146 if (!playing_) { 147 StartPeriodicLogging(); 148 } 149 // Clear members that will be touched on the main (creating) thread. 150 rec_start_time_ = env_.clock().TimeInMilliseconds(); 151 recording_ = true; 152 // And finally a member which can be modified on the native audio thread. 153 // It is safe to do so since we know by design that the owning ADM has not 154 // yet started the native audio recording. 155 only_silence_recorded_ = true; 156 } 157 158 void AudioDeviceBuffer::StopPlayout() { 159 RTC_DCHECK_RUN_ON(&main_thread_checker_); 160 if (!playing_) { 161 return; 162 } 163 RTC_DLOG(LS_INFO) << __FUNCTION__; 164 playing_ = false; 165 // Stop periodic logging if no more media is active. 166 if (!recording_) { 167 StopPeriodicLogging(); 168 } 169 RTC_LOG(LS_INFO) << "total playout time: " 170 << (env_.clock().TimeInMilliseconds() - play_start_time_); 171 } 172 173 void AudioDeviceBuffer::StopRecording() { 174 RTC_DCHECK_RUN_ON(&main_thread_checker_); 175 if (!recording_) { 176 return; 177 } 178 RTC_DLOG(LS_INFO) << __FUNCTION__; 179 recording_ = false; 180 // Stop periodic logging if no more media is active. 181 if (!playing_) { 182 StopPeriodicLogging(); 183 } 184 // Add UMA histogram to keep track of the case when only zeros have been 185 // recorded. Measurements (max of absolute level) are taken twice per second, 186 // which means that if e.g 10 seconds of audio has been recorded, a total of 187 // 20 level estimates must all be identical to zero to trigger the histogram. 188 // `only_silence_recorded_` can only be cleared on the native audio thread 189 // that drives audio capture but we know by design that the audio has stopped 190 // when this method is called, hence there should not be aby conflicts. Also, 191 // the fact that `only_silence_recorded_` can be affected during the complete 192 // call makes chances of conflicts with potentially one last callback very 193 // small. 194 const size_t time_since_start = 195 env_.clock().TimeInMilliseconds() - rec_start_time_; 196 if (time_since_start > kMinValidCallTimeTimeInMilliseconds) { 197 const int only_zeros = static_cast<int>(only_silence_recorded_); 198 RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.RecordedOnlyZeros", only_zeros); 199 RTC_LOG(LS_INFO) << "HISTOGRAM(WebRTC.Audio.RecordedOnlyZeros): " 200 << only_zeros; 201 } 202 RTC_LOG(LS_INFO) << "total recording time: " << time_since_start; 203 } 204 205 int32_t AudioDeviceBuffer::SetRecordingSampleRate(uint32_t fsHz) { 206 RTC_LOG(LS_INFO) << "SetRecordingSampleRate(" << fsHz << ")"; 207 rec_sample_rate_ = fsHz; 208 return 0; 209 } 210 211 int32_t AudioDeviceBuffer::SetPlayoutSampleRate(uint32_t fsHz) { 212 RTC_LOG(LS_INFO) << "SetPlayoutSampleRate(" << fsHz << ")"; 213 play_sample_rate_ = fsHz; 214 return 0; 215 } 216 217 uint32_t AudioDeviceBuffer::RecordingSampleRate() const { 218 return rec_sample_rate_; 219 } 220 221 uint32_t AudioDeviceBuffer::PlayoutSampleRate() const { 222 return play_sample_rate_; 223 } 224 225 int32_t AudioDeviceBuffer::SetRecordingChannels(size_t channels) { 226 RTC_LOG(LS_INFO) << "SetRecordingChannels(" << channels << ")"; 227 rec_channels_ = channels; 228 return 0; 229 } 230 231 int32_t AudioDeviceBuffer::SetPlayoutChannels(size_t channels) { 232 RTC_LOG(LS_INFO) << "SetPlayoutChannels(" << channels << ")"; 233 play_channels_ = channels; 234 return 0; 235 } 236 237 size_t AudioDeviceBuffer::RecordingChannels() const { 238 return rec_channels_; 239 } 240 241 size_t AudioDeviceBuffer::PlayoutChannels() const { 242 return play_channels_; 243 } 244 245 int32_t AudioDeviceBuffer::SetTypingStatus(bool typing_status) { 246 typing_status_ = typing_status; 247 return 0; 248 } 249 250 void AudioDeviceBuffer::SetVQEData(int play_delay_ms, int rec_delay_ms) { 251 play_delay_ms_ = play_delay_ms; 252 rec_delay_ms_ = rec_delay_ms; 253 } 254 255 int32_t AudioDeviceBuffer::SetRecordedBuffer(const void* audio_buffer, 256 size_t samples_per_channel) { 257 return SetRecordedBuffer(audio_buffer, samples_per_channel, std::nullopt); 258 } 259 260 int32_t AudioDeviceBuffer::SetRecordedBuffer( 261 const void* audio_buffer, 262 size_t samples_per_channel, 263 std::optional<int64_t> capture_timestamp_ns) { 264 // Copy the complete input buffer to the local buffer. 265 const size_t old_size = rec_buffer_.size(); 266 rec_buffer_.SetData(static_cast<const int16_t*>(audio_buffer), 267 rec_channels_ * samples_per_channel); 268 // Keep track of the size of the recording buffer. Only updated when the 269 // size changes, which is a rare event. 270 if (old_size != rec_buffer_.size()) { 271 RTC_LOG(LS_INFO) << "Size of recording buffer: " << rec_buffer_.size(); 272 } 273 274 if (capture_timestamp_ns) { 275 int64_t align_offsync_estimation_time = env_.clock().TimeInMicroseconds(); 276 if (align_offsync_estimation_time - TimestampAligner::kMinFrameIntervalUs > 277 align_offsync_estimation_time_) { 278 align_offsync_estimation_time_ = align_offsync_estimation_time; 279 capture_timestamp_ns_ = 280 kNumNanosecsPerMicrosec * 281 timestamp_aligner_.TranslateTimestamp( 282 *capture_timestamp_ns / kNumNanosecsPerMicrosec, 283 align_offsync_estimation_time); 284 } else { 285 // The Timestamp aligner is designed to prevent timestamps that are too 286 // similar, and produces warnings if it is called to often. We do not care 287 // about that here, so we do this workaround. If we where to call the 288 // aligner within a millisecond, we instead call this, that do not update 289 // the clock offset estimation. This get us timestamps without generating 290 // warnings, but could generate two timestamps within a millisecond. 291 capture_timestamp_ns_ = 292 kNumNanosecsPerMicrosec * 293 timestamp_aligner_.TranslateTimestamp(*capture_timestamp_ns / 294 kNumNanosecsPerMicrosec); 295 } 296 } 297 // Derive a new level value twice per second and check if it is non-zero. 298 int16_t max_abs = 0; 299 RTC_DCHECK_LT(rec_stat_count_, 50); 300 if (++rec_stat_count_ >= 50) { 301 // Returns the largest absolute value in a signed 16-bit vector. 302 max_abs = WebRtcSpl_MaxAbsValueW16(rec_buffer_.data(), rec_buffer_.size()); 303 rec_stat_count_ = 0; 304 // Set `only_silence_recorded_` to false as soon as at least one detection 305 // of a non-zero audio packet is found. It can only be restored to true 306 // again by restarting the call. 307 if (max_abs > 0) { 308 only_silence_recorded_ = false; 309 } 310 } 311 // Update recording stats which is used as base for periodic logging of the 312 // audio input state. 313 UpdateRecStats(max_abs, samples_per_channel); 314 return 0; 315 } 316 317 int32_t AudioDeviceBuffer::DeliverRecordedData() { 318 if (!audio_transport_cb_) { 319 RTC_LOG(LS_WARNING) << "Invalid audio transport"; 320 return 0; 321 } 322 const size_t frames = rec_buffer_.size() / rec_channels_; 323 const size_t bytes_per_frame = rec_channels_ * sizeof(int16_t); 324 uint32_t new_mic_level_dummy = 0; 325 uint32_t total_delay_ms = play_delay_ms_ + rec_delay_ms_; 326 int32_t res = audio_transport_cb_->RecordedDataIsAvailable( 327 rec_buffer_.data(), frames, bytes_per_frame, rec_channels_, 328 rec_sample_rate_, total_delay_ms, 0, 0, typing_status_, 329 new_mic_level_dummy, capture_timestamp_ns_); 330 if (res == -1) { 331 RTC_LOG(LS_ERROR) << "RecordedDataIsAvailable() failed"; 332 } 333 return 0; 334 } 335 336 int32_t AudioDeviceBuffer::RequestPlayoutData(size_t samples_per_channel) { 337 TRACE_EVENT1("webrtc", "AudioDeviceBuffer::RequestPlayoutData", 338 "samples_per_channel", samples_per_channel); 339 340 // The consumer can change the requested size on the fly and we therefore 341 // resize the buffer accordingly. Also takes place at the first call to this 342 // method. 343 const size_t total_samples = play_channels_ * samples_per_channel; 344 if (play_buffer_.size() != total_samples) { 345 play_buffer_.SetSize(total_samples); 346 RTC_LOG(LS_INFO) << "Size of playout buffer: " << play_buffer_.size(); 347 } 348 349 size_t num_samples_out(0); 350 // It is currently supported to start playout without a valid audio 351 // transport object. Leads to warning and silence. 352 if (!audio_transport_cb_) { 353 RTC_LOG(LS_WARNING) << "Invalid audio transport"; 354 return 0; 355 } 356 357 // Retrieve new 16-bit PCM audio data using the audio transport instance. 358 int64_t elapsed_time_ms = -1; 359 int64_t ntp_time_ms = -1; 360 const size_t bytes_per_frame = play_channels_ * sizeof(int16_t); 361 uint32_t res = audio_transport_cb_->NeedMorePlayData( 362 samples_per_channel, bytes_per_frame, play_channels_, play_sample_rate_, 363 play_buffer_.data(), num_samples_out, &elapsed_time_ms, &ntp_time_ms); 364 if (res != 0) { 365 RTC_LOG(LS_ERROR) << "NeedMorePlayData() failed"; 366 } 367 368 // Derive a new level value twice per second. 369 int16_t max_abs = 0; 370 RTC_DCHECK_LT(play_stat_count_, 50); 371 if (++play_stat_count_ >= 50) { 372 // Returns the largest absolute value in a signed 16-bit vector. 373 max_abs = 374 WebRtcSpl_MaxAbsValueW16(play_buffer_.data(), play_buffer_.size()); 375 play_stat_count_ = 0; 376 } 377 // Update playout stats which is used as base for periodic logging of the 378 // audio output state. 379 UpdatePlayStats(max_abs, num_samples_out / play_channels_); 380 return static_cast<int32_t>(num_samples_out / play_channels_); 381 } 382 383 int32_t AudioDeviceBuffer::GetPlayoutData(void* audio_buffer) { 384 RTC_DCHECK_GT(play_buffer_.size(), 0); 385 #ifdef AUDIO_DEVICE_PLAYS_SINUS_TONE 386 const double phase_increment = 387 k2Pi * 440.0 / static_cast<double>(play_sample_rate_); 388 int16_t* destination_r = reinterpret_cast<int16_t*>(audio_buffer); 389 if (play_channels_ == 1) { 390 for (size_t i = 0; i < play_buffer_.size(); ++i) { 391 destination_r[i] = static_cast<int16_t>((sin(phase_) * (1 << 14))); 392 phase_ += phase_increment; 393 } 394 } else if (play_channels_ == 2) { 395 for (size_t i = 0; i < play_buffer_.size() / 2; ++i) { 396 destination_r[2 * i] = destination_r[2 * i + 1] = 397 static_cast<int16_t>((sin(phase_) * (1 << 14))); 398 phase_ += phase_increment; 399 } 400 } 401 #else 402 memcpy(audio_buffer, play_buffer_.data(), 403 play_buffer_.size() * sizeof(int16_t)); 404 #endif 405 // Return samples per channel or number of frames. 406 return static_cast<int32_t>(play_buffer_.size() / play_channels_); 407 } 408 409 void AudioDeviceBuffer::StartPeriodicLogging() { 410 task_queue_->PostTask([this] { LogStats(AudioDeviceBuffer::LOG_START); }); 411 } 412 413 void AudioDeviceBuffer::StopPeriodicLogging() { 414 task_queue_->PostTask([this] { LogStats(AudioDeviceBuffer::LOG_STOP); }); 415 } 416 417 void AudioDeviceBuffer::LogStats(LogState state) { 418 RTC_DCHECK_RUN_ON(task_queue_.get()); 419 int64_t now_time = env_.clock().TimeInMilliseconds(); 420 421 if (state == AudioDeviceBuffer::LOG_START) { 422 // Reset counters at start. We will not add any logging in this state but 423 // the timer will started by posting a new (delayed) task. 424 num_stat_reports_ = 0; 425 last_timer_task_time_ = now_time; 426 log_stats_ = true; 427 } else if (state == AudioDeviceBuffer::LOG_STOP) { 428 // Stop logging and posting new tasks. 429 log_stats_ = false; 430 } else if (state == AudioDeviceBuffer::LOG_ACTIVE) { 431 // Keep logging unless logging was disabled while task was posted. 432 } 433 434 // Avoid adding more logs since we are in STOP mode. 435 if (!log_stats_) { 436 return; 437 } 438 439 int64_t next_callback_time = now_time + kTimerIntervalInMilliseconds; 440 int64_t time_since_last = TimeDiff(now_time, last_timer_task_time_); 441 last_timer_task_time_ = now_time; 442 443 Stats stats; 444 { 445 MutexLock lock(&lock_); 446 stats = stats_; 447 stats_.max_rec_level = 0; 448 stats_.max_play_level = 0; 449 } 450 451 // Cache current sample rate from atomic members. 452 const uint32_t rec_sample_rate = rec_sample_rate_; 453 const uint32_t play_sample_rate = play_sample_rate_; 454 455 // Log the latest statistics but skip the first two rounds just after state 456 // was set to LOG_START to ensure that we have at least one full stable 457 // 10-second interval for sample-rate estimation. Hence, first printed log 458 // will be after ~20 seconds. 459 if (++num_stat_reports_ > 2 && 460 static_cast<size_t>(time_since_last) > kTimerIntervalInMilliseconds / 2) { 461 uint32_t diff_samples = stats.rec_samples - last_stats_.rec_samples; 462 float rate = diff_samples / (static_cast<float>(time_since_last) / 1000.0); 463 uint32_t abs_diff_rate_in_percent = 0; 464 if (rec_sample_rate > 0 && rate > 0) { 465 abs_diff_rate_in_percent = static_cast<uint32_t>( 466 0.5f + 467 ((100.0f * std::abs(rate - rec_sample_rate)) / rec_sample_rate)); 468 RTC_HISTOGRAM_PERCENTAGE("WebRTC.Audio.RecordSampleRateOffsetInPercent", 469 abs_diff_rate_in_percent); 470 RTC_LOG(LS_INFO) << "[REC : " << time_since_last << "msec, " 471 << rec_sample_rate / 1000 << "kHz] callbacks: " 472 << stats.rec_callbacks - last_stats_.rec_callbacks 473 << ", " 474 "samples: " 475 << diff_samples 476 << ", " 477 "rate: " 478 << static_cast<int>(rate + 0.5) 479 << ", " 480 "rate diff: " 481 << abs_diff_rate_in_percent 482 << "%, " 483 "level: " 484 << stats.max_rec_level; 485 } 486 487 diff_samples = stats.play_samples - last_stats_.play_samples; 488 rate = diff_samples / (static_cast<float>(time_since_last) / 1000.0); 489 abs_diff_rate_in_percent = 0; 490 if (play_sample_rate > 0 && rate > 0) { 491 abs_diff_rate_in_percent = static_cast<uint32_t>( 492 0.5f + 493 ((100.0f * std::abs(rate - play_sample_rate)) / play_sample_rate)); 494 RTC_HISTOGRAM_PERCENTAGE("WebRTC.Audio.PlayoutSampleRateOffsetInPercent", 495 abs_diff_rate_in_percent); 496 RTC_LOG(LS_INFO) << "[PLAY: " << time_since_last << "msec, " 497 << play_sample_rate / 1000 << "kHz] callbacks: " 498 << stats.play_callbacks - last_stats_.play_callbacks 499 << ", " 500 "samples: " 501 << diff_samples 502 << ", " 503 "rate: " 504 << static_cast<int>(rate + 0.5) 505 << ", " 506 "rate diff: " 507 << abs_diff_rate_in_percent 508 << "%, " 509 "level: " 510 << stats.max_play_level; 511 } 512 } 513 last_stats_ = stats; 514 515 int64_t time_to_wait_ms = 516 next_callback_time - env_.clock().TimeInMilliseconds(); 517 RTC_DCHECK_GT(time_to_wait_ms, 0) << "Invalid timer interval"; 518 519 // Keep posting new (delayed) tasks until state is changed to kLogStop. 520 task_queue_->PostDelayedTask( 521 [this] { AudioDeviceBuffer::LogStats(AudioDeviceBuffer::LOG_ACTIVE); }, 522 TimeDelta::Millis(time_to_wait_ms)); 523 } 524 525 void AudioDeviceBuffer::ResetRecStats() { 526 RTC_DCHECK_RUN_ON(task_queue_.get()); 527 last_stats_.ResetRecStats(); 528 MutexLock lock(&lock_); 529 stats_.ResetRecStats(); 530 } 531 532 void AudioDeviceBuffer::ResetPlayStats() { 533 RTC_DCHECK_RUN_ON(task_queue_.get()); 534 last_stats_.ResetPlayStats(); 535 MutexLock lock(&lock_); 536 stats_.ResetPlayStats(); 537 } 538 539 void AudioDeviceBuffer::UpdateRecStats(int16_t max_abs, 540 size_t samples_per_channel) { 541 MutexLock lock(&lock_); 542 ++stats_.rec_callbacks; 543 stats_.rec_samples += samples_per_channel; 544 if (max_abs > stats_.max_rec_level) { 545 stats_.max_rec_level = max_abs; 546 } 547 } 548 549 void AudioDeviceBuffer::UpdatePlayStats(int16_t max_abs, 550 size_t samples_per_channel) { 551 MutexLock lock(&lock_); 552 ++stats_.play_callbacks; 553 stats_.play_samples += samples_per_channel; 554 if (max_abs > stats_.max_play_level) { 555 stats_.max_play_level = max_abs; 556 } 557 } 558 559 } // namespace webrtc