TrackEncoder.h (15231B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef TrackEncoder_h_ 7 #define TrackEncoder_h_ 8 9 #include "AudioSegment.h" 10 #include "EncodedFrame.h" 11 #include "MediaQueue.h" 12 #include "MediaTrackGraph.h" 13 #include "TrackMetadataBase.h" 14 #include "VideoSegment.h" 15 16 namespace mozilla { 17 18 class AbstractThread; 19 class DriftCompensator; 20 class TrackEncoder; 21 22 class TrackEncoderListener { 23 public: 24 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoderListener) 25 26 /** 27 * Called when the TrackEncoder has received its first real data. 28 */ 29 virtual void Started(TrackEncoder* aEncoder) = 0; 30 31 /** 32 * Called when the TrackEncoder's underlying encoder has been successfully 33 * initialized and there's non-null data ready to be encoded. 34 */ 35 virtual void Initialized(TrackEncoder* aEncoder) = 0; 36 37 /** 38 * Called after the TrackEncoder hit an unexpected error, causing it to 39 * abort operation. 40 */ 41 virtual void Error(TrackEncoder* aEncoder) = 0; 42 43 protected: 44 virtual ~TrackEncoderListener() = default; 45 }; 46 47 /** 48 * Base class of AudioTrackEncoder and VideoTrackEncoder. Lifetime managed by 49 * MediaEncoder. All methods are to be called only on the worker thread. 50 * 51 * The control APIs are all called by MediaEncoder on its dedicated thread. Data 52 * is encoded as soon as it has been appended (and time has advanced past its 53 * end in case of video) and pushed to mEncodedDataQueue. 54 */ 55 class TrackEncoder { 56 public: 57 TrackEncoder(TrackRate aTrackRate, 58 MediaQueue<EncodedFrame>& aEncodedDataQueue); 59 60 /** 61 * Called by MediaEncoder to cancel the encoding. 62 */ 63 virtual void Cancel() = 0; 64 65 /** 66 * Notifies us that we have reached the end of the stream and no more data 67 * will be appended. 68 */ 69 virtual void NotifyEndOfStream() = 0; 70 71 /** 72 * Creates and sets up meta data for a specific codec, called on the worker 73 * thread. 74 */ 75 virtual already_AddRefed<TrackMetadataBase> GetMetadata() = 0; 76 77 /** 78 * MediaQueue containing encoded data, that is pushed as soon as it's ready. 79 */ 80 MediaQueue<EncodedFrame>& EncodedDataQueue() { return mEncodedDataQueue; } 81 82 /** 83 * Returns true once this TrackEncoder is initialized. 84 */ 85 bool IsInitialized(); 86 87 /** 88 * Returns true once this TrackEncoder has received some data. 89 */ 90 bool IsStarted(); 91 92 /** 93 * True if the track encoder has encoded all source segments coming from 94 * MediaTrackGraph. Call on the worker thread. 95 */ 96 bool IsEncodingComplete() const; 97 98 /** 99 * Registers a listener to events from this TrackEncoder. 100 * We hold a strong reference to the listener. 101 */ 102 void RegisterListener(TrackEncoderListener* aListener); 103 104 /** 105 * Unregisters a listener from events from this TrackEncoder. 106 * The listener will stop receiving events synchronously. 107 */ 108 bool UnregisterListener(TrackEncoderListener* aListener); 109 110 virtual void SetBitrate(const uint32_t aBitrate) = 0; 111 112 /** 113 * It's optional to set the worker thread, but if you do we'll assert that 114 * we are in the worker thread in every method that gets called. 115 */ 116 void SetWorkerThread(AbstractThread* aWorkerThread); 117 118 /** 119 * Measure size of internal buffers. 120 */ 121 virtual size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) = 0; 122 123 protected: 124 virtual ~TrackEncoder() { MOZ_ASSERT(mListeners.IsEmpty()); } 125 126 /** 127 * If this TrackEncoder was not already initialized, it is set to initialized 128 * and listeners are notified. 129 */ 130 void SetInitialized(); 131 132 /** 133 * If this TrackEncoder was not already marked started, its started state is 134 * set and listeners are notified. 135 */ 136 void SetStarted(); 137 138 /** 139 * Called after an error. Cancels the encoding and notifies listeners. 140 */ 141 void OnError(); 142 143 /** 144 * True if the track encoder has been initialized successfully. 145 */ 146 bool mInitialized; 147 148 /** 149 * True if the track encoder has received data. 150 */ 151 bool mStarted; 152 153 /** 154 * True once all data until the end of the input track has been received. 155 */ 156 bool mEndOfStream; 157 158 /** 159 * True once this encoding has been cancelled. 160 */ 161 bool mCanceled; 162 163 // How many times we have tried to initialize the encoder. 164 uint32_t mInitCounter; 165 166 /** 167 * True if this TrackEncoder is currently suspended. 168 */ 169 bool mSuspended; 170 171 /** 172 * The track rate of source media. 173 */ 174 const TrackRate mTrackRate; 175 176 /** 177 * If set we assert that all methods are called on this thread. 178 */ 179 RefPtr<AbstractThread> mWorkerThread; 180 181 /** 182 * MediaQueue where encoded data ends up. Note that metadata goes out of band. 183 */ 184 MediaQueue<EncodedFrame>& mEncodedDataQueue; 185 186 nsTArray<RefPtr<TrackEncoderListener>> mListeners; 187 }; 188 189 class AudioTrackEncoder : public TrackEncoder { 190 public: 191 AudioTrackEncoder(TrackRate aTrackRate, 192 MediaQueue<EncodedFrame>& aEncodedDataQueue) 193 : TrackEncoder(aTrackRate, aEncodedDataQueue), 194 mChannels(0), 195 mNotInitDuration(0), 196 mAudioBitrate(0) {} 197 198 /** 199 * Suspends encoding from now, i.e., all future audio data received through 200 * AppendAudioSegment() until the next Resume() will be dropped. 201 */ 202 void Suspend(); 203 204 /** 205 * Resumes encoding starting now, i.e., data from the next 206 * AppendAudioSegment() will get encoded. 207 */ 208 void Resume(); 209 210 /** 211 * Appends and consumes track data from aSegment. 212 */ 213 void AppendAudioSegment(AudioSegment&& aSegment); 214 215 template <typename T> 216 static void InterleaveTrackData(nsTArray<const T*>& aInput, int32_t aDuration, 217 uint32_t aOutputChannels, 218 AudioDataValue* aOutput, float aVolume) { 219 if (aInput.Length() < aOutputChannels) { 220 // Up-mix. This might make the mChannelData have more than aChannels. 221 AudioChannelsUpMix(&aInput, aOutputChannels, 222 SilentChannel::ZeroChannel<T>()); 223 } 224 225 if (aInput.Length() > aOutputChannels) { 226 DownmixAndInterleave<T>(aInput, aDuration, aVolume, aOutputChannels, 227 aOutput); 228 } else { 229 InterleaveAndConvertBuffer(aInput.Elements(), aDuration, aVolume, 230 aOutputChannels, aOutput); 231 } 232 } 233 234 /** 235 * Interleaves the track data and stores the result into aOutput. Might need 236 * to up-mix or down-mix the channel data if the channels number of this chunk 237 * is different from aOutputChannels. The channel data from aChunk might be 238 * modified by up-mixing. 239 */ 240 static void InterleaveTrackData(AudioChunk& aChunk, int32_t aDuration, 241 uint32_t aOutputChannels, 242 AudioDataValue* aOutput); 243 244 /** 245 * De-interleaves the aInput data and stores the result into aOutput. 246 * No up-mix or down-mix operations inside. 247 */ 248 static void DeInterleaveTrackData(AudioDataValue* aInput, int32_t aDuration, 249 int32_t aChannels, AudioDataValue* aOutput); 250 251 /** 252 * Measure size of internal buffers. 253 */ 254 size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override; 255 256 void SetBitrate(const uint32_t aBitrate) override { 257 mAudioBitrate = aBitrate; 258 } 259 260 /** 261 * Tries to initiate the AudioEncoder based on data in aSegment. 262 * This can be re-called often, as it will exit early should we already be 263 * initiated. mInitiated will only be set if there was enough data in 264 * aSegment to infer metadata. If mInitiated gets set, listeners are notified. 265 * 266 * Not having enough data in aSegment to initiate the encoder for an 267 * accumulated aDuration of one second will make us initiate with a default 268 * number of channels. 269 * 270 * If we attempt to initiate the underlying encoder but fail, we Cancel() and 271 * notify listeners. 272 */ 273 void TryInit(const AudioSegment& aSegment, TrackTime aDuration); 274 275 void Cancel() override; 276 277 /** 278 * Dispatched from MediaTrackGraph when we have finished feeding data to 279 * mOutgoingBuffer. 280 */ 281 void NotifyEndOfStream() override; 282 283 protected: 284 /** 285 * Number of samples per channel in a pcm buffer. This is also the value of 286 * frame size required by audio encoder, and listeners will be notified when 287 * at least this much data has been added to mOutgoingBuffer. 288 */ 289 virtual int NumInputFramesPerPacket() const { return 0; } 290 291 /** 292 * Initializes the audio encoder. The call of this method is delayed until we 293 * have received the first valid track from MediaTrackGraph. 294 */ 295 virtual nsresult Init(int aChannels) = 0; 296 297 /** 298 * Encodes buffered data and pushes it to mEncodedDataQueue. 299 */ 300 virtual nsresult Encode(AudioSegment* aSegment) = 0; 301 302 /** 303 * The number of channels are used for processing PCM data in the audio 304 * encoder. This value comes from the first valid audio chunk. If encoder 305 * can't support the channels in the chunk, downmix PCM stream can be 306 * performed. This value also be used to initialize the audio encoder. 307 */ 308 int mChannels; 309 310 /** 311 * A segment queue of outgoing audio track data to the encoder. 312 * The contents of mOutgoingBuffer will always be what has been appended on 313 * the encoder thread but not yet consumed by the encoder sub class. 314 */ 315 AudioSegment mOutgoingBuffer; 316 317 TrackTime mNotInitDuration; 318 319 uint32_t mAudioBitrate; 320 }; 321 322 enum class FrameDroppingMode { 323 ALLOW, // Allowed to drop frames to keep up under load 324 DISALLOW, // Must not drop any frames, even if it means we will OOM 325 }; 326 327 class VideoTrackEncoder : public TrackEncoder { 328 public: 329 VideoTrackEncoder(RefPtr<DriftCompensator> aDriftCompensator, 330 TrackRate aTrackRate, 331 MediaQueue<EncodedFrame>& aEncodedDataQueue, 332 FrameDroppingMode aFrameDroppingMode); 333 334 /** 335 * Suspends encoding from aTime, i.e., all video frame with a timestamp 336 * between aTime and the timestamp of the next Resume() will be dropped. 337 */ 338 void Suspend(const TimeStamp& aTime); 339 340 /** 341 * Resumes encoding starting at aTime. 342 */ 343 void Resume(const TimeStamp& aTime); 344 345 /** 346 * Makes the video black from aTime. 347 */ 348 void Disable(const TimeStamp& aTime); 349 350 /** 351 * Makes the video non-black from aTime. 352 * 353 * NB that it could still be forced black for other reasons, like principals. 354 */ 355 void Enable(const TimeStamp& aTime); 356 357 /** 358 * Appends source video frames to mIncomingBuffer. We only append the source 359 * chunk if the image is different from mLastChunk's image. Called on the 360 * MediaTrackGraph thread. 361 */ 362 void AppendVideoSegment(VideoSegment&& aSegment); 363 364 /** 365 * Measure size of internal buffers. 366 */ 367 size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override; 368 369 void SetBitrate(const uint32_t aBitrate) override { 370 mVideoBitrate = aBitrate; 371 } 372 373 /** 374 * Tries to initiate the VideoEncoder based on data in aSegment. 375 * This can be re-called often, as it will exit early should we already be 376 * initiated. mInitiated will only be set if there was enough data in 377 * aSegment to infer metadata. If mInitiated gets set, listeners are notified. 378 * The amount of chunks needed can be controlled by 379 * aFrameRateDetectionMinChunks which denotes the minimum number of chunks 380 * needed to infer the framerate. 381 * 382 * Failing to initiate the encoder for an accumulated aDuration of 30 seconds 383 * is seen as an error and will cancel the current encoding. 384 */ 385 void Init(const VideoSegment& aSegment, const TimeStamp& aTime, 386 size_t aFrameRateDetectionMinChunks); 387 388 TrackTime SecondsToMediaTime(double aS) const { 389 NS_ASSERTION(0 <= aS && aS <= TRACK_TICKS_MAX / TRACK_RATE_MAX, 390 "Bad seconds"); 391 return mTrackRate * aS; 392 } 393 394 /** 395 * MediaTrackGraph notifies us about the time of the track's start. 396 * This gets called on the MediaEncoder thread after a dispatch. 397 */ 398 void SetStartOffset(const TimeStamp& aStartOffset); 399 400 void Cancel() override; 401 402 /** 403 * Notifies us that we have reached the end of the stream and no more data 404 * will be appended to mIncomingBuffer. 405 */ 406 void NotifyEndOfStream() override; 407 408 /** 409 * Dispatched from MediaTrackGraph when it has run an iteration so we can 410 * hand more data to the encoder. 411 */ 412 void AdvanceCurrentTime(const TimeStamp& aTime); 413 414 protected: 415 /** 416 * Initialize the video encoder. In order to collect the value of width and 417 * height of source frames, this initialization is delayed until we have 418 * received the first valid video frame from MediaTrackGraph. 419 * Listeners will be notified after it has been successfully initialized. 420 */ 421 virtual nsresult Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth, 422 int32_t aDisplayHeight, float aEstimatedFrameRate) = 0; 423 424 /** 425 * Encodes data in the outgoing buffer and pushes it to mEncodedDataQueue. 426 */ 427 virtual nsresult Encode(VideoSegment* aSegment) = 0; 428 429 /** 430 * Drift compensator for re-clocking incoming video frame wall-clock 431 * timestamps to audio time. 432 */ 433 const RefPtr<DriftCompensator> mDriftCompensator; 434 435 /** 436 * The last unique frame and duration so far handled by 437 * NotifyAdvanceCurrentTime. When a new frame is detected, mLastChunk is added 438 * to mOutgoingBuffer. 439 */ 440 VideoChunk mLastChunk; 441 442 /** 443 * A segment queue of incoming video track data, from listeners. 444 * The duration of mIncomingBuffer is irrelevant as we only look at TimeStamps 445 * of frames. Consumed data is replaced by null data. 446 */ 447 VideoSegment mIncomingBuffer; 448 449 /** 450 * A segment queue of outgoing video track data to the encoder. 451 * The contents of mOutgoingBuffer will always be what has been consumed from 452 * mIncomingBuffer (up to mCurrentTime) but not yet consumed by the encoder 453 * sub class. There won't be any null data at the beginning of mOutgoingBuffer 454 * unless explicitly pushed by the producer. 455 */ 456 VideoSegment mOutgoingBuffer; 457 458 /** 459 * The number of mTrackRate ticks we have passed to mOutgoingBuffer. 460 */ 461 TrackTime mEncodedTicks; 462 463 /** 464 * The time up to which we have forwarded data from mIncomingBuffer to 465 * mOutgoingBuffer. 466 */ 467 TimeStamp mCurrentTime; 468 469 /** 470 * The time the video track started, so the start of the video track can be 471 * synced to the start of the audio track. 472 * 473 * Note that this time will progress during suspension, to make sure the 474 * incoming frames stay in sync with the output. 475 */ 476 TimeStamp mStartTime; 477 478 /** 479 * The time Suspend was called on the MediaRecorder, so we can calculate the 480 * duration on the next Resume(). 481 */ 482 TimeStamp mSuspendTime; 483 484 uint32_t mVideoBitrate; 485 486 /** 487 * ALLOW to drop frames under load. 488 * DISALLOW to encode all frames, mainly for testing. 489 */ 490 FrameDroppingMode mFrameDroppingMode; 491 492 /** 493 * True if the video MediaTrackTrack this VideoTrackEncoder is attached to is 494 * currently enabled. While false, we encode all frames as black. 495 */ 496 bool mEnabled; 497 }; 498 499 } // namespace mozilla 500 501 #endif