AudioDecoderInputTrack.cpp (24633B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "AudioDecoderInputTrack.h" 6 7 #include "MediaData.h" 8 #include "RLBoxSoundTouch.h" 9 #include "Tracing.h" 10 #include "mozilla/ScopeExit.h" 11 #include "mozilla/StaticPrefs_media.h" 12 13 namespace mozilla { 14 15 extern LazyLogModule gMediaDecoderLog; 16 17 #define LOG(msg, ...) \ 18 MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \ 19 ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__)) 20 21 #define LOG_M(msg, this, ...) \ 22 MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \ 23 ("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__)) 24 25 /* static */ 26 AudioDecoderInputTrack* AudioDecoderInputTrack::Create( 27 MediaTrackGraph* aGraph, nsISerialEventTarget* aDecoderThread, 28 const AudioInfo& aInfo, float aPlaybackRate, float aVolume, 29 bool aPreservesPitch) { 30 MOZ_ASSERT(aGraph); 31 MOZ_ASSERT(aDecoderThread); 32 AudioDecoderInputTrack* track = 33 new AudioDecoderInputTrack(aDecoderThread, aGraph->GraphRate(), aInfo, 34 aPlaybackRate, aVolume, aPreservesPitch); 35 aGraph->AddTrack(track); 36 return track; 37 } 38 39 AudioDecoderInputTrack::AudioDecoderInputTrack( 40 nsISerialEventTarget* aDecoderThread, TrackRate aGraphRate, 41 const AudioInfo& aInfo, float aPlaybackRate, float aVolume, 42 bool aPreservesPitch) 43 : ProcessedMediaTrack(aGraphRate, MediaSegment::AUDIO, new AudioSegment()), 44 mDecoderThread(aDecoderThread), 45 mResamplerChannelCount(0), 46 mInitialInputChannels(aInfo.mChannels), 47 mInputSampleRate(aInfo.mRate), 48 mDelayedScheduler(mDecoderThread), 49 mPlaybackRate(aPlaybackRate), 50 mVolume(aVolume), 51 mPreservesPitch(aPreservesPitch) {} 52 53 bool AudioDecoderInputTrack::ConvertAudioDataToSegment( 54 AudioData* aAudio, AudioSegment& aSegment, 55 const PrincipalHandle& aPrincipalHandle) { 56 AssertOnDecoderThread(); 57 MOZ_ASSERT(aAudio); 58 MOZ_ASSERT(aSegment.IsEmpty()); 59 if (!aAudio->Frames()) { 60 LOG("Ignore audio with zero frame"); 61 return false; 62 } 63 64 aAudio->EnsureAudioBuffer(); 65 RefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer; 66 AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data()); 67 AutoTArray<const AudioDataValue*, 2> channels; 68 for (uint32_t i = 0; i < aAudio->mChannels; ++i) { 69 channels.AppendElement(bufferData + i * aAudio->Frames()); 70 } 71 aSegment.AppendFrames(buffer.forget(), channels, aAudio->Frames(), 72 aPrincipalHandle); 73 const TrackRate newInputRate = static_cast<TrackRate>(aAudio->mRate); 74 if (newInputRate != mInputSampleRate) { 75 LOG("Input sample rate changed %u -> %u", mInputSampleRate, newInputRate); 76 mInputSampleRate = newInputRate; 77 mResampler.own(nullptr); 78 mResamplerChannelCount = 0; 79 } 80 if (mInputSampleRate != Graph()->GraphRate()) { 81 aSegment.ResampleChunks(mResampler, &mResamplerChannelCount, 82 mInputSampleRate, Graph()->GraphRate()); 83 } 84 return aSegment.GetDuration() > 0; 85 } 86 87 void AudioDecoderInputTrack::AppendData( 88 AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) { 89 AssertOnDecoderThread(); 90 MOZ_ASSERT(aAudio); 91 nsTArray<RefPtr<AudioData>> audio; 92 audio.AppendElement(aAudio); 93 AppendData(audio, aPrincipalHandle); 94 } 95 96 void AudioDecoderInputTrack::AppendData( 97 nsTArray<RefPtr<AudioData>>& aAudioArray, 98 const PrincipalHandle& aPrincipalHandle) { 99 AssertOnDecoderThread(); 100 MOZ_ASSERT(!mShutdownSPSCQueue); 101 102 // Batching all new data together in order to push them as a single unit that 103 // gives the SPSC queue more spaces. 104 for (const auto& audio : aAudioArray) { 105 BatchData(audio, aPrincipalHandle); 106 } 107 108 // If SPSC queue doesn't have much available capacity now, we would push 109 // batched later. 110 if (ShouldBatchData()) { 111 return; 112 } 113 PushBatchedDataIfNeeded(); 114 } 115 116 bool AudioDecoderInputTrack::ShouldBatchData() const { 117 AssertOnDecoderThread(); 118 // If the SPSC queue has less available capacity than the threshold, then all 119 // input audio data should be batched together, in order not to increase the 120 // pressure of SPSC queue. 121 static const int kThresholdNumerator = 3; 122 static const int kThresholdDenominator = 10; 123 return mSPSCQueue.AvailableWrite() < 124 mSPSCQueue.Capacity() * kThresholdNumerator / kThresholdDenominator; 125 } 126 127 bool AudioDecoderInputTrack::HasBatchedData() const { 128 AssertOnDecoderThread(); 129 return !mBatchedData.mSegment.IsEmpty(); 130 } 131 132 void AudioDecoderInputTrack::BatchData( 133 AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) { 134 AssertOnDecoderThread(); 135 AudioSegment segment; 136 if (!ConvertAudioDataToSegment(aAudio, segment, aPrincipalHandle)) { 137 return; 138 } 139 mBatchedData.mSegment.AppendFrom(&segment); 140 if (!mBatchedData.mStartTime.IsValid()) { 141 mBatchedData.mStartTime = aAudio->mTime; 142 } 143 mBatchedData.mEndTime = aAudio->GetEndTime(); 144 LOG("batched data [%" PRId64 ":%" PRId64 "] sz=%" PRId64, 145 aAudio->mTime.ToMicroseconds(), aAudio->GetEndTime().ToMicroseconds(), 146 mBatchedData.mSegment.GetDuration()); 147 DispatchPushBatchedDataIfNeeded(); 148 } 149 150 void AudioDecoderInputTrack::DispatchPushBatchedDataIfNeeded() { 151 AssertOnDecoderThread(); 152 MOZ_ASSERT(!mShutdownSPSCQueue); 153 // The graph thread runs iteration around per 2~10ms. Doing this to ensure 154 // that we can keep consuming data. If the producer stops pushing new data 155 // due to MDSM stops decoding, which is because MDSM thinks the data stored 156 // in the audio queue are enough. The way to remove those data from the 157 // audio queue is driven by us, so we have to keep consuming data. 158 // Otherwise, we would get stuck because those batched data would never be 159 // consumed. 160 static const uint8_t kTimeoutMS = 10; 161 TimeStamp target = 162 TimeStamp::Now() + TimeDuration::FromMilliseconds(kTimeoutMS); 163 mDelayedScheduler.Ensure( 164 target, 165 [self = RefPtr<AudioDecoderInputTrack>(this), this]() { 166 LOG("In the task of DispatchPushBatchedDataIfNeeded"); 167 mDelayedScheduler.CompleteRequest(); 168 MOZ_ASSERT(!mShutdownSPSCQueue); 169 MOZ_ASSERT(HasBatchedData()); 170 // The capacity in SPSC is still not enough, so we can't push data now. 171 // Retrigger another task to push batched data. 172 if (ShouldBatchData()) { 173 DispatchPushBatchedDataIfNeeded(); 174 return; 175 } 176 PushBatchedDataIfNeeded(); 177 }, 178 []() { MOZ_DIAGNOSTIC_CRASH("DispatchPushBatchedDataIfNeeded reject"); }); 179 } 180 181 void AudioDecoderInputTrack::PushBatchedDataIfNeeded() { 182 AssertOnDecoderThread(); 183 if (!HasBatchedData()) { 184 return; 185 } 186 LOG("Append batched data [%" PRId64 ":%" PRId64 "], available SPSC sz=%u", 187 mBatchedData.mStartTime.ToMicroseconds(), 188 mBatchedData.mEndTime.ToMicroseconds(), mSPSCQueue.AvailableWrite()); 189 SPSCData data({SPSCData::DecodedData(std::move(mBatchedData))}); 190 PushDataToSPSCQueue(data); 191 MOZ_ASSERT(mBatchedData.mSegment.IsEmpty()); 192 // No batched data remains, we can cancel the pending tasks. 193 mDelayedScheduler.Reset(); 194 } 195 196 void AudioDecoderInputTrack::NotifyEndOfStream() { 197 AssertOnDecoderThread(); 198 // Force to push all data before EOS. Otherwise, the track would be ended too 199 // early without sending all data. 200 PushBatchedDataIfNeeded(); 201 SPSCData data({SPSCData::EOS()}); 202 LOG("Set EOS, available SPSC sz=%u", mSPSCQueue.AvailableWrite()); 203 PushDataToSPSCQueue(data); 204 } 205 206 void AudioDecoderInputTrack::ClearFutureData() { 207 AssertOnDecoderThread(); 208 // Clear the data hasn't been pushed to SPSC queue yet. 209 mBatchedData.Clear(); 210 mDelayedScheduler.Reset(); 211 SPSCData data({SPSCData::ClearFutureData()}); 212 LOG("Set clear future data, available SPSC sz=%u", 213 mSPSCQueue.AvailableWrite()); 214 PushDataToSPSCQueue(data); 215 } 216 217 void AudioDecoderInputTrack::PushDataToSPSCQueue(SPSCData& data) { 218 AssertOnDecoderThread(); 219 const bool rv = mSPSCQueue.Enqueue(data); 220 MOZ_DIAGNOSTIC_ASSERT(rv, "Failed to push data, SPSC queue is full!"); 221 (void)rv; 222 } 223 224 void AudioDecoderInputTrack::SetVolume(float aVolume) { 225 AssertOnDecoderThread(); 226 LOG("Set volume=%f", aVolume); 227 GetMainThreadSerialEventTarget()->Dispatch( 228 NS_NewRunnableFunction("AudioDecoderInputTrack::SetVolume", 229 [self = RefPtr<AudioDecoderInputTrack>(this), 230 aVolume] { self->SetVolumeImpl(aVolume); })); 231 } 232 233 void AudioDecoderInputTrack::SetVolumeImpl(float aVolume) { 234 MOZ_ASSERT(NS_IsMainThread()); 235 QueueControlMessageWithNoShutdown([self = RefPtr{this}, this, aVolume] { 236 TRACE_COMMENT("AudioDecoderInputTrack::SetVolume ControlMessage", "%f", 237 aVolume); 238 LOG_M("Apply volume=%f", this, aVolume); 239 mVolume = aVolume; 240 }); 241 } 242 243 void AudioDecoderInputTrack::SetPlaybackRate(float aPlaybackRate) { 244 AssertOnDecoderThread(); 245 LOG("Set playback rate=%f", aPlaybackRate); 246 GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction( 247 "AudioDecoderInputTrack::SetPlaybackRate", 248 [self = RefPtr<AudioDecoderInputTrack>(this), aPlaybackRate] { 249 self->SetPlaybackRateImpl(aPlaybackRate); 250 })); 251 } 252 253 void AudioDecoderInputTrack::SetPlaybackRateImpl(float aPlaybackRate) { 254 MOZ_ASSERT(NS_IsMainThread()); 255 QueueControlMessageWithNoShutdown([self = RefPtr{this}, this, aPlaybackRate] { 256 TRACE_COMMENT("AudioDecoderInputTrack::SetPlaybackRate ControlMessage", 257 "%f", aPlaybackRate); 258 LOG_M("Apply playback rate=%f", this, aPlaybackRate); 259 mPlaybackRate = aPlaybackRate; 260 SetTempoAndRateForTimeStretcher(); 261 }); 262 } 263 264 void AudioDecoderInputTrack::SetPreservesPitch(bool aPreservesPitch) { 265 AssertOnDecoderThread(); 266 LOG("Set preserves pitch=%d", aPreservesPitch); 267 GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction( 268 "AudioDecoderInputTrack::SetPreservesPitch", 269 [self = RefPtr<AudioDecoderInputTrack>(this), aPreservesPitch] { 270 self->SetPreservesPitchImpl(aPreservesPitch); 271 })); 272 } 273 274 void AudioDecoderInputTrack::SetPreservesPitchImpl(bool aPreservesPitch) { 275 MOZ_ASSERT(NS_IsMainThread()); 276 QueueControlMessageWithNoShutdown( 277 [self = RefPtr{this}, this, aPreservesPitch] { 278 TRACE_COMMENT("AudioDecoderInputTrack::SetPreservesPitch", "%s", 279 aPreservesPitch ? "true" : "false"); 280 LOG_M("Apply preserves pitch=%d", this, aPreservesPitch); 281 mPreservesPitch = aPreservesPitch; 282 SetTempoAndRateForTimeStretcher(); 283 }); 284 } 285 286 void AudioDecoderInputTrack::Close() { 287 AssertOnDecoderThread(); 288 LOG("Close"); 289 mShutdownSPSCQueue = true; 290 mBatchedData.Clear(); 291 mDelayedScheduler.Reset(); 292 } 293 294 void AudioDecoderInputTrack::DestroyImpl() { 295 LOG("DestroyImpl"); 296 AssertOnGraphThreadOrNotRunning(); 297 mBufferedData.Clear(); 298 if (mTimeStretcher) { 299 delete mTimeStretcher; 300 mTimeStretcher = nullptr; 301 } 302 ProcessedMediaTrack::DestroyImpl(); 303 } 304 305 AudioDecoderInputTrack::~AudioDecoderInputTrack() { 306 MOZ_ASSERT(mBatchedData.mSegment.IsEmpty()); 307 MOZ_ASSERT(mShutdownSPSCQueue); 308 mResampler.own(nullptr); 309 } 310 311 void AudioDecoderInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo, 312 uint32_t aFlags) { 313 AssertOnGraphThread(); 314 if (Ended()) { 315 return; 316 } 317 318 TrackTime consumedDuration = 0; 319 auto notify = MakeScopeExit([this, &consumedDuration] { 320 NotifyInTheEndOfProcessInput(consumedDuration); 321 }); 322 323 if (mSentAllData && (aFlags & ALLOW_END)) { 324 LOG("End track"); 325 mEnded = true; 326 return; 327 } 328 329 const TrackTime expectedDuration = aTo - aFrom; 330 LOG("ProcessInput [%" PRId64 " to %" PRId64 "], duration=%" PRId64, aFrom, 331 aTo, expectedDuration); 332 333 // Drain all data from SPSC queue first, because we want that the SPSC queue 334 // always has capacity of accepting data from the producer. In addition, we 335 // also need to check if there is any control related data that should be 336 // applied to output segment, eg. `ClearFutureData`. 337 SPSCData data; 338 while (mSPSCQueue.Dequeue(&data, 1) > 0) { 339 HandleSPSCData(data); 340 } 341 342 consumedDuration += AppendBufferedDataToOutput(expectedDuration); 343 if (HasSentAllData()) { 344 LOG("Sent all data, should end track in next iteration"); 345 mSentAllData = true; 346 } 347 } 348 349 void AudioDecoderInputTrack::HandleSPSCData(SPSCData& aData) { 350 AssertOnGraphThread(); 351 if (aData.IsDecodedData()) { 352 MOZ_ASSERT(!mReceivedEOS); 353 AudioSegment& segment = aData.AsDecodedData()->mSegment; 354 LOG("popped out data [%" PRId64 ":%" PRId64 "] sz=%" PRId64, 355 aData.AsDecodedData()->mStartTime.ToMicroseconds(), 356 aData.AsDecodedData()->mEndTime.ToMicroseconds(), 357 segment.GetDuration()); 358 mBufferedData.AppendFrom(&segment); 359 return; 360 } 361 if (aData.IsEOS()) { 362 MOZ_ASSERT(!Ended()); 363 LOG("Received EOS"); 364 mReceivedEOS = true; 365 return; 366 } 367 if (aData.IsClearFutureData()) { 368 LOG("Clear future data"); 369 mBufferedData.Clear(); 370 if (!Ended()) { 371 LOG("Clear EOS"); 372 mReceivedEOS = false; 373 } 374 return; 375 } 376 MOZ_ASSERT_UNREACHABLE("unsupported SPSC data"); 377 } 378 379 TrackTime AudioDecoderInputTrack::AppendBufferedDataToOutput( 380 TrackTime aExpectedDuration) { 381 AssertOnGraphThread(); 382 383 // Remove the necessary part from `mBufferedData` to create a new 384 // segment in order to apply some operation without affecting all data. 385 AudioSegment outputSegment; 386 TrackTime consumedDuration = 0; 387 if (mPlaybackRate != 1.0) { 388 consumedDuration = 389 AppendTimeStretchedDataToSegment(aExpectedDuration, outputSegment); 390 } else { 391 consumedDuration = 392 AppendUnstretchedDataToSegment(aExpectedDuration, outputSegment); 393 } 394 395 // Apply any necessary change on the segement which would be outputed to the 396 // graph. 397 const TrackTime appendedDuration = outputSegment.GetDuration(); 398 outputSegment.ApplyVolume(mVolume); 399 ApplyTrackDisabling(&outputSegment); 400 mSegment->AppendFrom(&outputSegment); 401 402 unsigned int numSamples = 0; 403 if (mTimeStretcher) { 404 numSamples = mTimeStretcher->numSamples().unverified_safe_because( 405 "Only used for logging."); 406 } 407 408 LOG("Appended %" PRId64 ", consumed %" PRId64 409 ", remaining raw buffered %" PRId64 ", remaining time-stretched %u", 410 appendedDuration, consumedDuration, mBufferedData.GetDuration(), 411 numSamples); 412 if (auto gap = aExpectedDuration - appendedDuration; gap > 0) { 413 LOG("Audio underrun, fill silence %" PRId64, gap); 414 MOZ_ASSERT(mBufferedData.IsEmpty()); 415 mSegment->AppendNullData(gap); 416 } 417 return consumedDuration; 418 } 419 420 TrackTime AudioDecoderInputTrack::AppendTimeStretchedDataToSegment( 421 TrackTime aExpectedDuration, AudioSegment& aOutput) { 422 AssertOnGraphThread(); 423 EnsureTimeStretcher(); 424 425 MOZ_ASSERT(mPlaybackRate != 1.0f); 426 MOZ_ASSERT(aExpectedDuration >= 0); 427 MOZ_ASSERT(mTimeStretcher); 428 MOZ_ASSERT(aOutput.IsEmpty()); 429 430 // If we don't have enough data that have been time-stretched, fill raw data 431 // into the time stretcher until the amount of samples that time stretcher 432 // finishes processed reaches or exceeds the expected duration. 433 TrackTime consumedDuration = 0; 434 mTimeStretcher->numSamples().copy_and_verify([&](auto numSamples) { 435 // Attacker controlled soundtouch can return a bogus numSamples, which 436 // can result in filling data into the time stretcher (or not). This is 437 // safe as long as filling (and getting) data is checked. 438 if (numSamples < aExpectedDuration) { 439 consumedDuration = FillDataToTimeStretcher(aExpectedDuration); 440 } 441 }); 442 MOZ_ASSERT(consumedDuration >= 0); 443 (void)GetDataFromTimeStretcher(aExpectedDuration, aOutput); 444 return consumedDuration; 445 } 446 447 TrackTime AudioDecoderInputTrack::FillDataToTimeStretcher( 448 TrackTime aExpectedDuration) { 449 AssertOnGraphThread(); 450 MOZ_ASSERT(mPlaybackRate != 1.0f); 451 MOZ_ASSERT(aExpectedDuration >= 0); 452 MOZ_ASSERT(mTimeStretcher); 453 454 TrackTime consumedDuration = 0; 455 const uint32_t channels = GetChannelCountForTimeStretcher(); 456 mBufferedData.IterateOnChunks([&](AudioChunk* aChunk) { 457 MOZ_ASSERT(aChunk); 458 if (aChunk->IsNull() && aChunk->GetDuration() == 0) { 459 // Skip this chunk and wait for next one. 460 return false; 461 } 462 const uint32_t bufferLength = channels * aChunk->GetDuration(); 463 if (bufferLength > mInterleavedBuffer.Capacity()) { 464 mInterleavedBuffer.SetCapacity(bufferLength); 465 } 466 mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength); 467 if (aChunk->IsNull()) { 468 MOZ_ASSERT(aChunk->GetDuration(), "chunk with only silence"); 469 memset(mInterleavedBuffer.Elements(), 0, mInterleavedBuffer.Length()); 470 } else { 471 // Do the up-mix/down-mix first if necessary that forces to change the 472 // data's channel count to the time stretcher's channel count. Then 473 // perform a transformation from planar to interleaved. 474 switch (aChunk->mBufferFormat) { 475 case AUDIO_FORMAT_S16: 476 WriteChunk<int16_t>(*aChunk, channels, 1.0f, 477 mInterleavedBuffer.Elements()); 478 break; 479 case AUDIO_FORMAT_FLOAT32: 480 WriteChunk<float>(*aChunk, channels, 1.0f, 481 mInterleavedBuffer.Elements()); 482 break; 483 default: 484 MOZ_ASSERT_UNREACHABLE("Not expected format"); 485 } 486 } 487 mTimeStretcher->putSamples(mInterleavedBuffer.Elements(), 488 aChunk->GetDuration()); 489 consumedDuration += aChunk->GetDuration(); 490 return mTimeStretcher->numSamples().copy_and_verify( 491 [&aExpectedDuration](auto numSamples) { 492 // Attacker controlled soundtouch can return a bogus numSamples to 493 // return early or force additional iterations. This is safe 494 // as long as all the writes in the lambda are checked. 495 return numSamples >= aExpectedDuration; 496 }); 497 }); 498 mBufferedData.RemoveLeading(consumedDuration); 499 return consumedDuration; 500 } 501 502 TrackTime AudioDecoderInputTrack::AppendUnstretchedDataToSegment( 503 TrackTime aExpectedDuration, AudioSegment& aOutput) { 504 AssertOnGraphThread(); 505 MOZ_ASSERT(mPlaybackRate == 1.0f); 506 MOZ_ASSERT(aExpectedDuration >= 0); 507 MOZ_ASSERT(aOutput.IsEmpty()); 508 509 const TrackTime drained = 510 DrainStretchedDataIfNeeded(aExpectedDuration, aOutput); 511 const TrackTime available = 512 std::min(aExpectedDuration - drained, mBufferedData.GetDuration()); 513 aOutput.AppendSlice(mBufferedData, 0, available); 514 MOZ_ASSERT(aOutput.GetDuration() <= aExpectedDuration); 515 mBufferedData.RemoveLeading(available); 516 return available; 517 } 518 519 TrackTime AudioDecoderInputTrack::DrainStretchedDataIfNeeded( 520 TrackTime aExpectedDuration, AudioSegment& aOutput) { 521 AssertOnGraphThread(); 522 MOZ_ASSERT(mPlaybackRate == 1.0f); 523 MOZ_ASSERT(aExpectedDuration >= 0); 524 525 if (!mTimeStretcher) { 526 return 0; 527 } 528 auto numSamples = mTimeStretcher->numSamples().unverified_safe_because( 529 "Bogus numSamples can result in draining the stretched data (or not)."); 530 if (numSamples == 0) { 531 return 0; 532 } 533 return GetDataFromTimeStretcher(aExpectedDuration, aOutput); 534 } 535 536 TrackTime AudioDecoderInputTrack::GetDataFromTimeStretcher( 537 TrackTime aExpectedDuration, AudioSegment& aOutput) { 538 AssertOnGraphThread(); 539 MOZ_ASSERT(mTimeStretcher); 540 MOZ_ASSERT(aExpectedDuration >= 0); 541 542 auto numSamples = 543 mTimeStretcher->numSamples().unverified_safe_because("Used for logging"); 544 545 mTimeStretcher->numUnprocessedSamples().copy_and_verify([&](auto samples) { 546 if (HasSentAllData() && samples) { 547 mTimeStretcher->flush(); 548 LOG("Flush %u frames from the time stretcher", numSamples); 549 } 550 }); 551 552 // Flushing may have change the number of samples 553 numSamples = mTimeStretcher->numSamples().unverified_safe_because( 554 "Used to decide to flush (or not), which is checked."); 555 556 const TrackTime available = 557 std::min((TrackTime)numSamples, aExpectedDuration); 558 if (available == 0) { 559 // Either running out of stretched data, or the raw data we filled into 560 // the time stretcher were not enough for producing stretched data. 561 return 0; 562 } 563 564 // Retrieve interleaved data from the time stretcher. 565 const uint32_t channelCount = GetChannelCountForTimeStretcher(); 566 const uint32_t bufferLength = channelCount * available; 567 if (bufferLength > mInterleavedBuffer.Capacity()) { 568 mInterleavedBuffer.SetCapacity(bufferLength); 569 } 570 mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength); 571 mTimeStretcher->receiveSamples(mInterleavedBuffer.Elements(), available); 572 573 // Perform a transformation from interleaved to planar. 574 CheckedInt<size_t> bufferSize(sizeof(AudioDataValue)); 575 bufferSize *= bufferLength; 576 RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize); 577 AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data()); 578 AutoTArray<AudioDataValue*, 2> planarBuffer; 579 planarBuffer.SetLength(channelCount); 580 for (size_t idx = 0; idx < channelCount; idx++) { 581 planarBuffer[idx] = bufferData + idx * available; 582 } 583 DeinterleaveAndConvertBuffer(mInterleavedBuffer.Elements(), available, 584 channelCount, planarBuffer.Elements()); 585 AutoTArray<const AudioDataValue*, 2> outputChannels; 586 outputChannels.AppendElements(planarBuffer); 587 aOutput.AppendFrames(buffer.forget(), outputChannels, 588 static_cast<int32_t>(available), 589 mBufferedData.GetOldestPrinciple()); 590 return available; 591 } 592 593 void AudioDecoderInputTrack::NotifyInTheEndOfProcessInput( 594 TrackTime aFillDuration) { 595 AssertOnGraphThread(); 596 mWrittenFrames += aFillDuration; 597 LOG("Notify, fill=%" PRId64 ", total written=%" PRId64 ", ended=%d", 598 aFillDuration, mWrittenFrames, Ended()); 599 if (aFillDuration > 0) { 600 mOnOutput.Notify(mWrittenFrames); 601 } 602 if (Ended()) { 603 mOnEnd.Notify(); 604 } 605 } 606 607 bool AudioDecoderInputTrack::HasSentAllData() const { 608 AssertOnGraphThread(); 609 return mReceivedEOS && mSPSCQueue.AvailableRead() == 0 && 610 mBufferedData.IsEmpty(); 611 } 612 613 uint32_t AudioDecoderInputTrack::NumberOfChannels() const { 614 AssertOnGraphThread(); 615 const uint32_t maxChannelCount = GetData<AudioSegment>()->MaxChannelCount(); 616 return maxChannelCount ? maxChannelCount : mInitialInputChannels; 617 } 618 619 void AudioDecoderInputTrack::EnsureTimeStretcher() { 620 AssertOnGraphThread(); 621 if (!mTimeStretcher) { 622 mTimeStretcher = new RLBoxSoundTouch(); 623 MOZ_RELEASE_ASSERT(mTimeStretcher); 624 MOZ_RELEASE_ASSERT(mTimeStretcher->Init()); 625 626 mTimeStretcher->setSampleRate(Graph()->GraphRate()); 627 mTimeStretcher->setChannels(GetChannelCountForTimeStretcher()); 628 mTimeStretcher->setPitch(1.0); 629 630 // SoundTouch v2.1.2 uses automatic time-stretch settings with the following 631 // values: 632 // Tempo 0.5: 90ms sequence, 20ms seekwindow, 8ms overlap 633 // Tempo 2.0: 40ms sequence, 15ms seekwindow, 8ms overlap 634 // We are going to use a smaller 10ms sequence size to improve speech 635 // clarity, giving more resolution at high tempo and less reverb at low 636 // tempo. Maintain 15ms seekwindow and 8ms overlap for smoothness. 637 mTimeStretcher->setSetting( 638 SETTING_SEQUENCE_MS, 639 StaticPrefs::media_audio_playbackrate_soundtouch_sequence_ms()); 640 mTimeStretcher->setSetting( 641 SETTING_SEEKWINDOW_MS, 642 StaticPrefs::media_audio_playbackrate_soundtouch_seekwindow_ms()); 643 mTimeStretcher->setSetting( 644 SETTING_OVERLAP_MS, 645 StaticPrefs::media_audio_playbackrate_soundtouch_overlap_ms()); 646 SetTempoAndRateForTimeStretcher(); 647 LOG("Create TimeStretcher (channel=%d, playbackRate=%f, preservePitch=%d)", 648 GetChannelCountForTimeStretcher(), mPlaybackRate, mPreservesPitch); 649 } 650 } 651 652 void AudioDecoderInputTrack::SetTempoAndRateForTimeStretcher() { 653 AssertOnGraphThread(); 654 if (!mTimeStretcher) { 655 return; 656 } 657 if (mPreservesPitch) { 658 mTimeStretcher->setTempo(mPlaybackRate); 659 mTimeStretcher->setRate(1.0f); 660 } else { 661 mTimeStretcher->setTempo(1.0f); 662 mTimeStretcher->setRate(mPlaybackRate); 663 } 664 } 665 666 uint32_t AudioDecoderInputTrack::GetChannelCountForTimeStretcher() const { 667 // The time stretcher MUST be initialized with a fixed channel count, but the 668 // channel count in audio chunks might vary. Therefore, we always use the 669 // initial input channel count to initialize the time stretcher and perform a 670 // real-time down-mix/up-mix for audio chunks which have different channel 671 // count than the initial input channel count. 672 return mInitialInputChannels; 673 } 674 675 #undef LOG 676 } // namespace mozilla