videoprocessor.cc (30735B)
1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/video_coding/codecs/test/videoprocessor.h" 12 13 #include <algorithm> 14 #include <cstddef> 15 #include <cstdint> 16 #include <cstring> 17 #include <iterator> 18 #include <limits> 19 #include <memory> 20 #include <utility> 21 #include <vector> 22 23 #include "api/environment/environment.h" 24 #include "api/scoped_refptr.h" 25 #include "api/sequence_checker.h" 26 #include "api/task_queue/task_queue_base.h" 27 #include "api/test/videocodec_test_fixture.h" 28 #include "api/test/videocodec_test_stats.h" 29 #include "api/video/builtin_video_bitrate_allocator_factory.h" 30 #include "api/video/encoded_image.h" 31 #include "api/video/i420_buffer.h" 32 #include "api/video/resolution.h" 33 #include "api/video/video_bitrate_allocator.h" 34 #include "api/video/video_bitrate_allocator_factory.h" 35 #include "api/video/video_codec_type.h" 36 #include "api/video/video_frame.h" 37 #include "api/video/video_frame_buffer.h" 38 #include "api/video/video_frame_type.h" 39 #include "api/video/video_rotation.h" 40 #include "api/video_codecs/video_codec.h" 41 #include "api/video_codecs/video_decoder.h" 42 #include "api/video_codecs/video_encoder.h" 43 #include "common_video/h264/h264_common.h" 44 #include "common_video/libyuv/include/webrtc_libyuv.h" 45 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" 46 #include "modules/video_coding/codecs/interface/common_constants.h" 47 #include "modules/video_coding/codecs/test/videocodec_test_stats_impl.h" 48 #include "modules/video_coding/include/video_codec_interface.h" 49 #include "modules/video_coding/include/video_error_codes.h" 50 #include "rtc_base/checks.h" 51 #include "rtc_base/time_utils.h" 52 #include "test/gtest.h" 53 #include "test/testsupport/frame_reader.h" 54 #include "test/testsupport/frame_writer.h" 55 #include "third_party/libyuv/include/libyuv/compare.h" 56 #include "third_party/libyuv/include/libyuv/scale.h" 57 58 namespace webrtc { 59 namespace test { 60 61 namespace { 62 const int kMsToRtpTimestamp = kVideoPayloadTypeFrequency / 1000; 63 const int kMaxBufferedInputFrames = 20; 64 65 const VideoEncoder::Capabilities kCapabilities(false); 66 67 size_t GetMaxNaluSizeBytes(const EncodedImage& encoded_frame, 68 const VideoCodecTestFixture::Config& config) { 69 if (config.codec_settings.codecType != kVideoCodecH264) 70 return 0; 71 72 std::vector<H264::NaluIndex> nalu_indices = 73 H264::FindNaluIndices(encoded_frame); 74 75 RTC_CHECK(!nalu_indices.empty()); 76 77 size_t max_size = 0; 78 for (const H264::NaluIndex& index : nalu_indices) 79 max_size = std::max(max_size, index.payload_size); 80 81 return max_size; 82 } 83 84 size_t GetTemporalLayerIndex(const CodecSpecificInfo& codec_specific) { 85 size_t temporal_idx = 0; 86 if (codec_specific.codecType == kVideoCodecVP8) { 87 temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx; 88 } else if (codec_specific.codecType == kVideoCodecVP9) { 89 temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx; 90 } 91 if (temporal_idx == kNoTemporalIdx) { 92 temporal_idx = 0; 93 } 94 return temporal_idx; 95 } 96 97 int GetElapsedTimeMicroseconds(int64_t start_ns, int64_t stop_ns) { 98 int64_t diff_us = (stop_ns - start_ns) / kNumNanosecsPerMicrosec; 99 RTC_DCHECK_GE(diff_us, std::numeric_limits<int>::min()); 100 RTC_DCHECK_LE(diff_us, std::numeric_limits<int>::max()); 101 return static_cast<int>(diff_us); 102 } 103 104 void CalculateFrameQuality(const I420BufferInterface& ref_buffer, 105 const I420BufferInterface& dec_buffer, 106 VideoCodecTestStats::FrameStatistics* frame_stat, 107 bool calc_ssim) { 108 if (ref_buffer.width() != dec_buffer.width() || 109 ref_buffer.height() != dec_buffer.height()) { 110 RTC_CHECK_GE(ref_buffer.width(), dec_buffer.width()); 111 RTC_CHECK_GE(ref_buffer.height(), dec_buffer.height()); 112 // Downscale reference frame. 113 scoped_refptr<I420Buffer> scaled_buffer = 114 I420Buffer::Create(dec_buffer.width(), dec_buffer.height()); 115 I420Scale(ref_buffer.DataY(), ref_buffer.StrideY(), ref_buffer.DataU(), 116 ref_buffer.StrideU(), ref_buffer.DataV(), ref_buffer.StrideV(), 117 ref_buffer.width(), ref_buffer.height(), 118 scaled_buffer->MutableDataY(), scaled_buffer->StrideY(), 119 scaled_buffer->MutableDataU(), scaled_buffer->StrideU(), 120 scaled_buffer->MutableDataV(), scaled_buffer->StrideV(), 121 scaled_buffer->width(), scaled_buffer->height(), 122 libyuv::kFilterBox); 123 124 CalculateFrameQuality(*scaled_buffer, dec_buffer, frame_stat, calc_ssim); 125 } else { 126 const uint64_t sse_y = libyuv::ComputeSumSquareErrorPlane( 127 dec_buffer.DataY(), dec_buffer.StrideY(), ref_buffer.DataY(), 128 ref_buffer.StrideY(), dec_buffer.width(), dec_buffer.height()); 129 130 const uint64_t sse_u = libyuv::ComputeSumSquareErrorPlane( 131 dec_buffer.DataU(), dec_buffer.StrideU(), ref_buffer.DataU(), 132 ref_buffer.StrideU(), dec_buffer.width() / 2, dec_buffer.height() / 2); 133 134 const uint64_t sse_v = libyuv::ComputeSumSquareErrorPlane( 135 dec_buffer.DataV(), dec_buffer.StrideV(), ref_buffer.DataV(), 136 ref_buffer.StrideV(), dec_buffer.width() / 2, dec_buffer.height() / 2); 137 138 const size_t num_y_samples = dec_buffer.width() * dec_buffer.height(); 139 const size_t num_u_samples = 140 dec_buffer.width() / 2 * dec_buffer.height() / 2; 141 142 frame_stat->psnr_y = libyuv::SumSquareErrorToPsnr(sse_y, num_y_samples); 143 frame_stat->psnr_u = libyuv::SumSquareErrorToPsnr(sse_u, num_u_samples); 144 frame_stat->psnr_v = libyuv::SumSquareErrorToPsnr(sse_v, num_u_samples); 145 frame_stat->psnr = libyuv::SumSquareErrorToPsnr( 146 sse_y + sse_u + sse_v, num_y_samples + 2 * num_u_samples); 147 148 if (calc_ssim) { 149 frame_stat->ssim = I420SSIM(ref_buffer, dec_buffer); 150 } 151 } 152 } 153 154 } // namespace 155 156 VideoProcessor::VideoProcessor(const Environment& env, 157 VideoEncoder* encoder, 158 VideoDecoderList* decoders, 159 FrameReader* input_frame_reader, 160 const VideoCodecTestFixture::Config& config, 161 VideoCodecTestStatsImpl* stats, 162 IvfFileWriterMap* encoded_frame_writers, 163 FrameWriterList* decoded_frame_writers) 164 : config_(config), 165 num_simulcast_or_spatial_layers_( 166 std::max(config_.NumberOfSimulcastStreams(), 167 config_.NumberOfSpatialLayers())), 168 analyze_frame_quality_(!config_.measure_cpu), 169 stats_(stats), 170 encoder_(encoder), 171 decoders_(decoders), 172 bitrate_allocator_(CreateBuiltinVideoBitrateAllocatorFactory()->Create( 173 env, 174 config_.codec_settings)), 175 encode_callback_(this), 176 input_frame_reader_(input_frame_reader), 177 merged_encoded_frames_(num_simulcast_or_spatial_layers_), 178 encoded_frame_writers_(encoded_frame_writers), 179 decoded_frame_writers_(decoded_frame_writers), 180 last_inputed_frame_num_(0), 181 last_inputed_timestamp_(0), 182 first_encoded_frame_(num_simulcast_or_spatial_layers_, true), 183 last_encoded_frame_num_(num_simulcast_or_spatial_layers_), 184 first_decoded_frame_(num_simulcast_or_spatial_layers_, true), 185 last_decoded_frame_num_(num_simulcast_or_spatial_layers_), 186 last_decoded_frame_buffer_(num_simulcast_or_spatial_layers_), 187 post_encode_time_ns_(0), 188 is_finalized_(false) { 189 // Sanity checks. 190 RTC_CHECK(TaskQueueBase::Current()) 191 << "VideoProcessor must be run on a task queue."; 192 RTC_CHECK(stats_); 193 RTC_CHECK(encoder_); 194 RTC_CHECK(decoders_); 195 RTC_CHECK_EQ(decoders_->size(), num_simulcast_or_spatial_layers_); 196 RTC_CHECK(input_frame_reader_); 197 RTC_CHECK(encoded_frame_writers_); 198 RTC_CHECK(!decoded_frame_writers || 199 decoded_frame_writers->size() == num_simulcast_or_spatial_layers_); 200 201 // Setup required callbacks for the encoder and decoder and initialize them. 202 RTC_CHECK_EQ(encoder_->RegisterEncodeCompleteCallback(&encode_callback_), 203 WEBRTC_VIDEO_CODEC_OK); 204 205 // Initialize codecs so that they are ready to receive frames. 206 RTC_CHECK_EQ(encoder_->InitEncode( 207 &config_.codec_settings, 208 VideoEncoder::Settings( 209 kCapabilities, static_cast<int>(config_.NumberOfCores()), 210 config_.max_payload_size_bytes)), 211 WEBRTC_VIDEO_CODEC_OK); 212 213 for (size_t i = 0; i < num_simulcast_or_spatial_layers_; ++i) { 214 decode_callback_.push_back( 215 std::make_unique<VideoProcessorDecodeCompleteCallback>(this, i)); 216 VideoDecoder::Settings decoder_settings; 217 decoder_settings.set_max_render_resolution( 218 {config_.codec_settings.width, config_.codec_settings.height}); 219 decoder_settings.set_codec_type(config_.codec_settings.codecType); 220 decoder_settings.set_number_of_cores(config_.NumberOfCores()); 221 RTC_CHECK(decoders_->at(i)->Configure(decoder_settings)); 222 RTC_CHECK_EQ(decoders_->at(i)->RegisterDecodeCompleteCallback( 223 decode_callback_.at(i).get()), 224 WEBRTC_VIDEO_CODEC_OK); 225 } 226 } 227 228 VideoProcessor::~VideoProcessor() { 229 RTC_DCHECK_RUN_ON(&sequence_checker_); 230 231 if (!is_finalized_) { 232 Finalize(); 233 } 234 235 // Explicitly reset codecs, in case they don't do that themselves when they 236 // go out of scope. 237 RTC_CHECK_EQ(encoder_->Release(), WEBRTC_VIDEO_CODEC_OK); 238 encoder_->RegisterEncodeCompleteCallback(nullptr); 239 for (auto& decoder : *decoders_) { 240 RTC_CHECK_EQ(decoder->Release(), WEBRTC_VIDEO_CODEC_OK); 241 decoder->RegisterDecodeCompleteCallback(nullptr); 242 } 243 244 // Sanity check. 245 RTC_CHECK_LE(input_frames_.size(), kMaxBufferedInputFrames); 246 } 247 248 void VideoProcessor::ProcessFrame() { 249 RTC_DCHECK_RUN_ON(&sequence_checker_); 250 RTC_DCHECK(!is_finalized_); 251 252 RTC_DCHECK_GT(target_rates_.size(), 0u); 253 RTC_DCHECK_EQ(target_rates_.begin()->first, 0u); 254 RateProfile target_rate = 255 std::prev(target_rates_.upper_bound(last_inputed_frame_num_))->second; 256 257 const size_t frame_number = last_inputed_frame_num_++; 258 259 // Get input frame and store for future quality calculation. 260 Resolution resolution = Resolution({.width = config_.codec_settings.width, 261 .height = config_.codec_settings.height}); 262 FrameReader::Ratio framerate_scale = FrameReader::Ratio( 263 {.num = config_.clip_fps.value_or(config_.codec_settings.maxFramerate), 264 .den = static_cast<int>(config_.codec_settings.maxFramerate)}); 265 scoped_refptr<I420BufferInterface> buffer = input_frame_reader_->PullFrame( 266 /*frame_num*/ nullptr, resolution, framerate_scale); 267 268 RTC_CHECK(buffer) << "Tried to read too many frames from the file."; 269 const size_t timestamp = 270 last_inputed_timestamp_ + 271 static_cast<size_t>(kVideoPayloadTypeFrequency / target_rate.input_fps); 272 VideoFrame input_frame = 273 VideoFrame::Builder() 274 .set_video_frame_buffer(buffer) 275 .set_rtp_timestamp(static_cast<uint32_t>(timestamp)) 276 .set_timestamp_ms(static_cast<int64_t>(timestamp / kMsToRtpTimestamp)) 277 .set_rotation(kVideoRotation_0) 278 .build(); 279 // Store input frame as a reference for quality calculations. 280 if (config_.decode && !config_.measure_cpu) { 281 if (input_frames_.size() == kMaxBufferedInputFrames) { 282 input_frames_.erase(input_frames_.begin()); 283 } 284 285 if (config_.reference_width != -1 && config_.reference_height != -1 && 286 (input_frame.width() != config_.reference_width || 287 input_frame.height() != config_.reference_height)) { 288 scoped_refptr<I420Buffer> scaled_buffer = I420Buffer::Create( 289 config_.codec_settings.width, config_.codec_settings.height); 290 scaled_buffer->ScaleFrom(*input_frame.video_frame_buffer()->ToI420()); 291 292 VideoFrame scaled_reference_frame = input_frame; 293 scaled_reference_frame.set_video_frame_buffer(scaled_buffer); 294 input_frames_.emplace(frame_number, scaled_reference_frame); 295 296 if (config_.reference_width == config_.codec_settings.width && 297 config_.reference_height == config_.codec_settings.height) { 298 // Both encoding and comparison uses the same down-scale factor, reuse 299 // it for encoder below. 300 input_frame = scaled_reference_frame; 301 } 302 } else { 303 input_frames_.emplace(frame_number, input_frame); 304 } 305 } 306 last_inputed_timestamp_ = timestamp; 307 308 post_encode_time_ns_ = 0; 309 310 // Create frame statistics object for all simulcast/spatial layers. 311 for (size_t i = 0; i < num_simulcast_or_spatial_layers_; ++i) { 312 FrameStatistics frame_stat(frame_number, timestamp, i); 313 stats_->AddFrame(frame_stat); 314 } 315 316 // For the highest measurement accuracy of the encode time, the start/stop 317 // time recordings should wrap the Encode call as tightly as possible. 318 const int64_t encode_start_ns = TimeNanos(); 319 for (size_t i = 0; i < num_simulcast_or_spatial_layers_; ++i) { 320 FrameStatistics* frame_stat = stats_->GetFrame(frame_number, i); 321 frame_stat->encode_start_ns = encode_start_ns; 322 } 323 324 if (input_frame.width() != config_.codec_settings.width || 325 input_frame.height() != config_.codec_settings.height) { 326 scoped_refptr<I420Buffer> scaled_buffer = I420Buffer::Create( 327 config_.codec_settings.width, config_.codec_settings.height); 328 scaled_buffer->ScaleFrom(*input_frame.video_frame_buffer()->ToI420()); 329 input_frame.set_video_frame_buffer(scaled_buffer); 330 } 331 332 // Encode. 333 const std::vector<VideoFrameType> frame_types = 334 (frame_number == 0) 335 ? std::vector<VideoFrameType>(num_simulcast_or_spatial_layers_, 336 VideoFrameType::kVideoFrameKey) 337 : std::vector<VideoFrameType>(num_simulcast_or_spatial_layers_, 338 VideoFrameType::kVideoFrameDelta); 339 const int encode_return_code = encoder_->Encode(input_frame, &frame_types); 340 for (size_t i = 0; i < num_simulcast_or_spatial_layers_; ++i) { 341 FrameStatistics* frame_stat = stats_->GetFrame(frame_number, i); 342 frame_stat->encode_return_code = encode_return_code; 343 } 344 } 345 346 void VideoProcessor::SetRates(size_t bitrate_kbps, double framerate_fps) { 347 RTC_DCHECK_RUN_ON(&sequence_checker_); 348 RTC_DCHECK(!is_finalized_); 349 350 target_rates_[last_inputed_frame_num_] = 351 RateProfile({.target_kbps = bitrate_kbps, .input_fps = framerate_fps}); 352 353 auto bitrate_allocation = 354 bitrate_allocator_->Allocate(VideoBitrateAllocationParameters( 355 static_cast<uint32_t>(bitrate_kbps * 1000), framerate_fps)); 356 encoder_->SetRates( 357 VideoEncoder::RateControlParameters(bitrate_allocation, framerate_fps)); 358 } 359 360 int32_t VideoProcessor::VideoProcessorDecodeCompleteCallback::Decoded( 361 VideoFrame& image) { 362 // Post the callback to the right task queue, if needed. 363 if (!task_queue_->IsCurrent()) { 364 // There might be a limited amount of output buffers, make a copy to make 365 // sure we don't block the decoder. 366 VideoFrame copy = VideoFrame::Builder() 367 .set_video_frame_buffer(I420Buffer::Copy( 368 *image.video_frame_buffer()->ToI420())) 369 .set_rotation(image.rotation()) 370 .set_timestamp_us(image.timestamp_us()) 371 .set_id(image.id()) 372 .build(); 373 copy.set_rtp_timestamp(image.rtp_timestamp()); 374 375 task_queue_->PostTask([this, copy]() { 376 video_processor_->FrameDecoded(copy, simulcast_svc_idx_); 377 }); 378 return 0; 379 } 380 video_processor_->FrameDecoded(image, simulcast_svc_idx_); 381 return 0; 382 } 383 384 void VideoProcessor::FrameEncoded(const EncodedImage& encoded_image, 385 const CodecSpecificInfo& codec_specific) { 386 RTC_DCHECK_RUN_ON(&sequence_checker_); 387 388 // For the highest measurement accuracy of the encode time, the start/stop 389 // time recordings should wrap the Encode call as tightly as possible. 390 const int64_t encode_stop_ns = TimeNanos(); 391 392 const VideoCodecType codec_type = codec_specific.codecType; 393 if (config_.encoded_frame_checker) { 394 config_.encoded_frame_checker->CheckEncodedFrame(codec_type, encoded_image); 395 } 396 397 // Layer metadata. 398 // We could either have simulcast layers or spatial layers. 399 // TODO(https://crbug.com/webrtc/14891): If we want to support a mix of 400 // simulcast and SVC we'll also need to consider the case where we have both 401 // simulcast and spatial indices. 402 size_t stream_idx = encoded_image.SpatialIndex().value_or( 403 encoded_image.SimulcastIndex().value_or(0)); 404 size_t temporal_idx = GetTemporalLayerIndex(codec_specific); 405 406 FrameStatistics* frame_stat = 407 stats_->GetFrameWithTimestamp(encoded_image.RtpTimestamp(), stream_idx); 408 const size_t frame_number = frame_stat->frame_number; 409 410 // Ensure that the encode order is monotonically increasing, within this 411 // simulcast/spatial layer. 412 RTC_CHECK(first_encoded_frame_[stream_idx] || 413 last_encoded_frame_num_[stream_idx] < frame_number); 414 415 // Ensure SVC spatial layers are delivered in ascending order. 416 const size_t num_spatial_layers = config_.NumberOfSpatialLayers(); 417 if (!first_encoded_frame_[stream_idx] && num_spatial_layers > 1) { 418 for (size_t i = 0; i < stream_idx; ++i) { 419 RTC_CHECK_LE(last_encoded_frame_num_[i], frame_number); 420 } 421 for (size_t i = stream_idx + 1; i < num_simulcast_or_spatial_layers_; ++i) { 422 RTC_CHECK_GT(frame_number, last_encoded_frame_num_[i]); 423 } 424 } 425 first_encoded_frame_[stream_idx] = false; 426 last_encoded_frame_num_[stream_idx] = frame_number; 427 428 RateProfile target_rate = 429 std::prev(target_rates_.upper_bound(frame_number))->second; 430 auto bitrate_allocation = 431 bitrate_allocator_->Allocate(VideoBitrateAllocationParameters( 432 static_cast<uint32_t>(target_rate.target_kbps * 1000), 433 target_rate.input_fps)); 434 435 // Update frame statistics. 436 frame_stat->encoding_successful = true; 437 frame_stat->encode_time_us = GetElapsedTimeMicroseconds( 438 frame_stat->encode_start_ns, encode_stop_ns - post_encode_time_ns_); 439 frame_stat->target_bitrate_kbps = 440 bitrate_allocation.GetTemporalLayerSum(stream_idx, temporal_idx) / 1000; 441 frame_stat->target_framerate_fps = target_rate.input_fps; 442 frame_stat->length_bytes = encoded_image.size(); 443 frame_stat->frame_type = encoded_image._frameType; 444 frame_stat->temporal_idx = temporal_idx; 445 frame_stat->max_nalu_size_bytes = GetMaxNaluSizeBytes(encoded_image, config_); 446 frame_stat->qp = encoded_image.qp_; 447 448 if (codec_type == kVideoCodecVP9) { 449 const CodecSpecificInfoVP9& vp9_info = codec_specific.codecSpecific.VP9; 450 frame_stat->inter_layer_predicted = vp9_info.inter_layer_predicted; 451 frame_stat->non_ref_for_inter_layer_pred = 452 vp9_info.non_ref_for_inter_layer_pred; 453 } else { 454 frame_stat->inter_layer_predicted = false; 455 frame_stat->non_ref_for_inter_layer_pred = true; 456 } 457 458 const EncodedImage* encoded_image_for_decode = &encoded_image; 459 if (config_.decode || !encoded_frame_writers_->empty()) { 460 if (num_spatial_layers > 1) { 461 encoded_image_for_decode = BuildAndStoreSuperframe( 462 encoded_image, codec_type, frame_number, stream_idx, 463 frame_stat->inter_layer_predicted); 464 } 465 } 466 467 if (config_.decode) { 468 DecodeFrame(*encoded_image_for_decode, stream_idx); 469 470 if (codec_specific.end_of_picture && num_spatial_layers > 1) { 471 // If inter-layer prediction is enabled and upper layer was dropped then 472 // base layer should be passed to upper layer decoder. Otherwise decoder 473 // won't be able to decode next superframe. 474 const EncodedImage* base_image = nullptr; 475 const FrameStatistics* base_stat = nullptr; 476 for (size_t i = 0; i < num_spatial_layers; ++i) { 477 const bool layer_dropped = (first_decoded_frame_[i] || 478 last_decoded_frame_num_[i] < frame_number); 479 480 // Ensure current layer was decoded. 481 RTC_CHECK(layer_dropped == false || i != stream_idx); 482 483 if (!layer_dropped) { 484 base_image = &merged_encoded_frames_[i]; 485 base_stat = 486 stats_->GetFrameWithTimestamp(encoded_image.RtpTimestamp(), i); 487 } else if (base_image && !base_stat->non_ref_for_inter_layer_pred) { 488 DecodeFrame(*base_image, i); 489 } 490 } 491 } 492 } else { 493 frame_stat->decode_return_code = WEBRTC_VIDEO_CODEC_NO_OUTPUT; 494 } 495 496 // Since frames in higher TLs typically depend on frames in lower TLs, 497 // write out frames in lower TLs to bitstream dumps of higher TLs. 498 for (size_t write_temporal_idx = temporal_idx; 499 write_temporal_idx < config_.NumberOfTemporalLayers(); 500 ++write_temporal_idx) { 501 const VideoProcessor::LayerKey layer_key(stream_idx, write_temporal_idx); 502 auto it = encoded_frame_writers_->find(layer_key); 503 if (it != encoded_frame_writers_->cend()) { 504 RTC_CHECK(it->second->WriteFrame(*encoded_image_for_decode, 505 config_.codec_settings.codecType)); 506 } 507 } 508 509 if (!config_.encode_in_real_time) { 510 // To get pure encode time for next layers, measure time spent in encode 511 // callback and subtract it from encode time of next layers. 512 post_encode_time_ns_ += TimeNanos() - encode_stop_ns; 513 } 514 } 515 516 void VideoProcessor::CalcFrameQuality(const I420BufferInterface& decoded_frame, 517 FrameStatistics* frame_stat) { 518 RTC_DCHECK_RUN_ON(&sequence_checker_); 519 520 const auto reference_frame = input_frames_.find(frame_stat->frame_number); 521 RTC_CHECK(reference_frame != input_frames_.cend()) 522 << "The codecs are either buffering too much, dropping too much, or " 523 "being too slow relative to the input frame rate."; 524 525 // SSIM calculation is not optimized. Skip it in real-time mode. 526 const bool calc_ssim = !config_.encode_in_real_time; 527 CalculateFrameQuality(*reference_frame->second.video_frame_buffer()->ToI420(), 528 decoded_frame, frame_stat, calc_ssim); 529 530 frame_stat->quality_analysis_successful = true; 531 } 532 533 void VideoProcessor::WriteDecodedFrame(const I420BufferInterface& decoded_frame, 534 FrameWriter& frame_writer) { 535 int input_video_width = config_.codec_settings.width; 536 int input_video_height = config_.codec_settings.height; 537 538 scoped_refptr<I420Buffer> scaled_buffer; 539 const I420BufferInterface* scaled_frame; 540 541 if (decoded_frame.width() == input_video_width && 542 decoded_frame.height() == input_video_height) { 543 scaled_frame = &decoded_frame; 544 } else { 545 EXPECT_DOUBLE_EQ( 546 static_cast<double>(input_video_width) / input_video_height, 547 static_cast<double>(decoded_frame.width()) / decoded_frame.height()); 548 549 scaled_buffer = I420Buffer::Create(input_video_width, input_video_height); 550 scaled_buffer->ScaleFrom(decoded_frame); 551 552 scaled_frame = scaled_buffer.get(); 553 } 554 555 // Ensure there is no padding. 556 RTC_CHECK_EQ(scaled_frame->StrideY(), input_video_width); 557 RTC_CHECK_EQ(scaled_frame->StrideU(), input_video_width / 2); 558 RTC_CHECK_EQ(scaled_frame->StrideV(), input_video_width / 2); 559 560 RTC_CHECK_EQ(3 * input_video_width * input_video_height / 2, 561 frame_writer.FrameLength()); 562 563 RTC_CHECK(frame_writer.WriteFrame(scaled_frame->DataY())); 564 } 565 566 void VideoProcessor::FrameDecoded(const VideoFrame& decoded_frame, 567 size_t spatial_idx) { 568 RTC_DCHECK_RUN_ON(&sequence_checker_); 569 570 // For the highest measurement accuracy of the decode time, the start/stop 571 // time recordings should wrap the Decode call as tightly as possible. 572 const int64_t decode_stop_ns = TimeNanos(); 573 574 FrameStatistics* frame_stat = 575 stats_->GetFrameWithTimestamp(decoded_frame.rtp_timestamp(), spatial_idx); 576 const size_t frame_number = frame_stat->frame_number; 577 578 if (!first_decoded_frame_[spatial_idx]) { 579 for (size_t dropped_frame_number = last_decoded_frame_num_[spatial_idx] + 1; 580 dropped_frame_number < frame_number; ++dropped_frame_number) { 581 FrameStatistics* dropped_frame_stat = 582 stats_->GetFrame(dropped_frame_number, spatial_idx); 583 584 if (analyze_frame_quality_ && config_.analyze_quality_of_dropped_frames) { 585 // Calculate frame quality comparing input frame with last decoded one. 586 CalcFrameQuality(*last_decoded_frame_buffer_[spatial_idx], 587 dropped_frame_stat); 588 } 589 590 if (decoded_frame_writers_ != nullptr) { 591 // Fill drops with last decoded frame to make them look like freeze at 592 // playback and to keep decoded layers in sync. 593 WriteDecodedFrame(*last_decoded_frame_buffer_[spatial_idx], 594 *decoded_frame_writers_->at(spatial_idx)); 595 } 596 } 597 } 598 599 // Ensure that the decode order is monotonically increasing, within this 600 // simulcast/spatial layer. 601 RTC_CHECK(first_decoded_frame_[spatial_idx] || 602 last_decoded_frame_num_[spatial_idx] < frame_number); 603 first_decoded_frame_[spatial_idx] = false; 604 last_decoded_frame_num_[spatial_idx] = frame_number; 605 606 // Update frame statistics. 607 frame_stat->decoding_successful = true; 608 frame_stat->decode_time_us = 609 GetElapsedTimeMicroseconds(frame_stat->decode_start_ns, decode_stop_ns); 610 frame_stat->decoded_width = decoded_frame.width(); 611 frame_stat->decoded_height = decoded_frame.height(); 612 613 // Skip quality metrics calculation to not affect CPU usage. 614 if (analyze_frame_quality_ || decoded_frame_writers_) { 615 // Save last decoded frame to handle possible future drops. 616 scoped_refptr<I420BufferInterface> i420buffer = 617 decoded_frame.video_frame_buffer()->ToI420(); 618 619 // Copy decoded frame to a buffer without padding/stride such that we can 620 // dump Y, U and V planes into a file in one shot. 621 last_decoded_frame_buffer_[spatial_idx] = I420Buffer::Copy( 622 i420buffer->width(), i420buffer->height(), i420buffer->DataY(), 623 i420buffer->StrideY(), i420buffer->DataU(), i420buffer->StrideU(), 624 i420buffer->DataV(), i420buffer->StrideV()); 625 } 626 627 if (analyze_frame_quality_) { 628 CalcFrameQuality(*decoded_frame.video_frame_buffer()->ToI420(), frame_stat); 629 } 630 631 if (decoded_frame_writers_ != nullptr) { 632 WriteDecodedFrame(*last_decoded_frame_buffer_[spatial_idx], 633 *decoded_frame_writers_->at(spatial_idx)); 634 } 635 636 // Erase all buffered input frames that we have moved past for all 637 // simulcast/spatial layers. Never buffer more than 638 // `kMaxBufferedInputFrames` frames, to protect against long runs of 639 // consecutive frame drops for a particular layer. 640 const auto min_last_decoded_frame_num = std::min_element( 641 last_decoded_frame_num_.cbegin(), last_decoded_frame_num_.cend()); 642 const size_t min_buffered_frame_num = 643 std::max(0, static_cast<int>(frame_number) - kMaxBufferedInputFrames + 1); 644 RTC_CHECK(min_last_decoded_frame_num != last_decoded_frame_num_.cend()); 645 const auto input_frames_erase_before = input_frames_.lower_bound( 646 std::max(*min_last_decoded_frame_num, min_buffered_frame_num)); 647 input_frames_.erase(input_frames_.cbegin(), input_frames_erase_before); 648 } 649 650 void VideoProcessor::DecodeFrame(const EncodedImage& encoded_image, 651 size_t spatial_idx) { 652 RTC_DCHECK_RUN_ON(&sequence_checker_); 653 FrameStatistics* frame_stat = 654 stats_->GetFrameWithTimestamp(encoded_image.RtpTimestamp(), spatial_idx); 655 656 frame_stat->decode_start_ns = TimeNanos(); 657 frame_stat->decode_return_code = 658 decoders_->at(spatial_idx)->Decode(encoded_image, 0); 659 } 660 661 const EncodedImage* VideoProcessor::BuildAndStoreSuperframe( 662 const EncodedImage& encoded_image, 663 const VideoCodecType /* codec */, 664 size_t /* frame_number */, 665 size_t spatial_idx, 666 bool inter_layer_predicted) { 667 // Should only be called for SVC. 668 RTC_CHECK_GT(config_.NumberOfSpatialLayers(), 1); 669 670 EncodedImage base_image; 671 RTC_CHECK_EQ(base_image.size(), 0); 672 673 // Each SVC layer is decoded with dedicated decoder. Find the nearest 674 // non-dropped base frame and merge it and current frame into superframe. 675 if (inter_layer_predicted) { 676 for (int base_idx = static_cast<int>(spatial_idx) - 1; base_idx >= 0; 677 --base_idx) { 678 EncodedImage lower_layer = merged_encoded_frames_.at(base_idx); 679 if (lower_layer.RtpTimestamp() == encoded_image.RtpTimestamp()) { 680 base_image = lower_layer; 681 break; 682 } 683 } 684 } 685 const size_t payload_size_bytes = base_image.size() + encoded_image.size(); 686 687 auto buffer = EncodedImageBuffer::Create(payload_size_bytes); 688 if (base_image.size()) { 689 RTC_CHECK(base_image.data()); 690 memcpy(buffer->data(), base_image.data(), base_image.size()); 691 } 692 memcpy(buffer->data() + base_image.size(), encoded_image.data(), 693 encoded_image.size()); 694 695 EncodedImage copied_image = encoded_image; 696 copied_image.SetEncodedData(buffer); 697 if (base_image.size()) 698 copied_image._frameType = base_image._frameType; 699 700 // Replace previous EncodedImage for this spatial layer. 701 merged_encoded_frames_.at(spatial_idx) = std::move(copied_image); 702 703 return &merged_encoded_frames_.at(spatial_idx); 704 } 705 706 void VideoProcessor::Finalize() { 707 RTC_DCHECK_RUN_ON(&sequence_checker_); 708 RTC_DCHECK(!is_finalized_); 709 is_finalized_ = true; 710 711 if (!(analyze_frame_quality_ && config_.analyze_quality_of_dropped_frames) && 712 decoded_frame_writers_ == nullptr) { 713 return; 714 } 715 716 for (size_t spatial_idx = 0; spatial_idx < num_simulcast_or_spatial_layers_; 717 ++spatial_idx) { 718 if (first_decoded_frame_[spatial_idx]) { 719 continue; // No decoded frames on this spatial layer. 720 } 721 722 for (size_t dropped_frame_number = last_decoded_frame_num_[spatial_idx] + 1; 723 dropped_frame_number < last_inputed_frame_num_; 724 ++dropped_frame_number) { 725 FrameStatistics* frame_stat = 726 stats_->GetFrame(dropped_frame_number, spatial_idx); 727 728 RTC_DCHECK(!frame_stat->decoding_successful); 729 730 if (analyze_frame_quality_ && config_.analyze_quality_of_dropped_frames) { 731 CalcFrameQuality(*last_decoded_frame_buffer_[spatial_idx], frame_stat); 732 } 733 734 if (decoded_frame_writers_ != nullptr) { 735 WriteDecodedFrame(*last_decoded_frame_buffer_[spatial_idx], 736 *decoded_frame_writers_->at(spatial_idx)); 737 } 738 } 739 } 740 } 741 742 } // namespace test 743 } // namespace webrtc