video_adapter.cc (19359B)
1 /* 2 * Copyright (c) 2010 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "media/base/video_adapter.h" 12 13 #include <algorithm> 14 #include <cmath> 15 #include <cstdint> 16 #include <cstdlib> 17 #include <limits> 18 #include <numeric> 19 #include <optional> 20 #include <string> 21 #include <utility> 22 23 #include "api/video/resolution.h" 24 #include "api/video/video_source_interface.h" 25 #include "media/base/video_common.h" 26 #include "rtc_base/checks.h" 27 #include "rtc_base/logging.h" 28 #include "rtc_base/strings/string_builder.h" 29 #include "rtc_base/synchronization/mutex.h" 30 #include "rtc_base/time_utils.h" 31 32 namespace { 33 34 struct Fraction { 35 int numerator; 36 int denominator; 37 38 void DivideByGcd() { 39 int g = std::gcd(numerator, denominator); 40 numerator /= g; 41 denominator /= g; 42 } 43 44 // Determines number of output pixels if both width and height of an input of 45 // `input_pixels` pixels is scaled with the fraction numerator / denominator. 46 int scale_pixel_count(int input_pixels) { 47 return (numerator * numerator * static_cast<int64_t>(input_pixels)) / 48 (denominator * denominator); 49 } 50 }; 51 52 // Round `value_to_round` to a multiple of `multiple`. Prefer rounding upwards, 53 // but never more than `max_value`. 54 int roundUp(int value_to_round, int multiple, int max_value) { 55 const int rounded_value = 56 (value_to_round + multiple - 1) / multiple * multiple; 57 return rounded_value <= max_value ? rounded_value 58 : (max_value / multiple * multiple); 59 } 60 61 // Generates a scale factor that makes `input_pixels` close to `target_pixels`, 62 // but no higher than `max_pixels`. 63 Fraction FindScale(int input_width, 64 int input_height, 65 int target_pixels, 66 int max_pixels) { 67 // This function only makes sense for a positive target. 68 RTC_DCHECK_GT(target_pixels, 0); 69 RTC_DCHECK_GT(max_pixels, 0); 70 RTC_DCHECK_GE(max_pixels, target_pixels); 71 72 const int input_pixels = input_width * input_height; 73 74 // Don't scale up original. 75 if (target_pixels >= input_pixels) 76 return {.numerator = 1, .denominator = 1}; 77 78 Fraction current_scale = {.numerator = 1, .denominator = 1}; 79 Fraction best_scale = {.numerator = 1, .denominator = 1}; 80 81 // Start scaling down by 2/3 depending on `input_width` and `input_height`. 82 if (input_width % 3 == 0 && input_height % 3 == 0) { 83 // 2/3 (then alternates 3/4, 2/3, 3/4,...). 84 current_scale = {.numerator = 6, .denominator = 6}; 85 } 86 if (input_width % 9 == 0 && input_height % 9 == 0) { 87 // 2/3, 2/3 (then alternates 3/4, 2/3, 3/4,...). 88 current_scale = {.numerator = 36, .denominator = 36}; 89 } 90 91 // The minimum (absolute) difference between the number of output pixels and 92 // the target pixel count. 93 int min_pixel_diff = std::numeric_limits<int>::max(); 94 if (input_pixels <= max_pixels) { 95 // Start condition for 1/1 case, if it is less than max. 96 min_pixel_diff = std::abs(input_pixels - target_pixels); 97 } 98 99 // Alternately scale down by 3/4 and 2/3. This results in fractions which are 100 // effectively scalable. For instance, starting at 1280x720 will result in 101 // the series (3/4) => 960x540, (1/2) => 640x360, (3/8) => 480x270, 102 // (1/4) => 320x180, (3/16) => 240x125, (1/8) => 160x90. 103 while (current_scale.scale_pixel_count(input_pixels) > target_pixels) { 104 if (current_scale.numerator % 3 == 0 && 105 current_scale.denominator % 2 == 0) { 106 // Multiply by 2/3. 107 current_scale.numerator /= 3; 108 current_scale.denominator /= 2; 109 } else { 110 // Multiply by 3/4. 111 current_scale.numerator *= 3; 112 current_scale.denominator *= 4; 113 } 114 115 int output_pixels = current_scale.scale_pixel_count(input_pixels); 116 if (output_pixels <= max_pixels) { 117 int diff = std::abs(target_pixels - output_pixels); 118 if (diff < min_pixel_diff) { 119 min_pixel_diff = diff; 120 best_scale = current_scale; 121 } 122 } 123 } 124 best_scale.DivideByGcd(); 125 126 return best_scale; 127 } 128 129 std::optional<std::pair<int, int>> Swap( 130 const std::optional<std::pair<int, int>>& in) { 131 if (!in) { 132 return std::nullopt; 133 } 134 return std::make_pair(in->second, in->first); 135 } 136 137 } // namespace 138 139 namespace webrtc { 140 141 VideoAdapter::VideoAdapter(int source_resolution_alignment) 142 : frames_in_(0), 143 frames_out_(0), 144 frames_scaled_(0), 145 adaption_changes_(0), 146 previous_width_(0), 147 previous_height_(0), 148 source_resolution_alignment_(source_resolution_alignment), 149 resolution_alignment_(source_resolution_alignment), 150 resolution_request_target_pixel_count_(std::numeric_limits<int>::max()), 151 resolution_request_max_pixel_count_(std::numeric_limits<int>::max()), 152 max_framerate_request_(std::numeric_limits<int>::max()) {} 153 154 VideoAdapter::VideoAdapter() : VideoAdapter(1) {} 155 156 VideoAdapter::~VideoAdapter() {} 157 158 bool VideoAdapter::DropFrame(int64_t in_timestamp_ns) { 159 int max_fps = max_framerate_request_; 160 if (output_format_request_.max_fps) 161 max_fps = std::min(max_fps, *output_format_request_.max_fps); 162 163 framerate_controller_.SetMaxFramerate(max_fps); 164 return framerate_controller_.ShouldDropFrame(in_timestamp_ns); 165 } 166 167 bool VideoAdapter::AdaptFrameResolution(int in_width, 168 int in_height, 169 int64_t in_timestamp_ns, 170 int* cropped_width, 171 int* cropped_height, 172 int* out_width, 173 int* out_height) { 174 MutexLock lock(&mutex_); 175 ++frames_in_; 176 177 // The max output pixel count is the minimum of the requests from 178 // OnOutputFormatRequest and OnResolutionFramerateRequest. 179 int max_pixel_count = resolution_request_max_pixel_count_; 180 181 // Select target aspect ratio and max pixel count depending on input frame 182 // orientation. 183 std::optional<std::pair<int, int>> target_aspect_ratio; 184 if (in_width > in_height) { 185 target_aspect_ratio = output_format_request_.target_landscape_aspect_ratio; 186 if (output_format_request_.max_landscape_pixel_count) 187 max_pixel_count = std::min( 188 max_pixel_count, *output_format_request_.max_landscape_pixel_count); 189 } else { 190 target_aspect_ratio = output_format_request_.target_portrait_aspect_ratio; 191 if (output_format_request_.max_portrait_pixel_count) 192 max_pixel_count = std::min( 193 max_pixel_count, *output_format_request_.max_portrait_pixel_count); 194 } 195 196 int target_pixel_count = 197 std::min(resolution_request_target_pixel_count_, max_pixel_count); 198 199 // Drop the input frame if necessary. 200 if (max_pixel_count <= 0 || DropFrame(in_timestamp_ns)) { 201 // Show VAdapt log every 90 frames dropped. (3 seconds) 202 if ((frames_in_ - frames_out_) % 90 == 0) { 203 // TODO(fbarchard): Reduce to LS_VERBOSE when adapter info is not needed 204 // in default calls. 205 RTC_LOG(LS_INFO) << "VAdapt Drop Frame: scaled " << frames_scaled_ 206 << " / out " << frames_out_ << " / in " << frames_in_ 207 << " Changes: " << adaption_changes_ 208 << " Input: " << in_width << "x" << in_height 209 << " timestamp: " << in_timestamp_ns 210 << " Output fps: " << max_framerate_request_ << "/" 211 << output_format_request_.max_fps.value_or(-1) 212 << " alignment: " << resolution_alignment_; 213 } 214 215 // Drop frame. 216 return false; 217 } 218 219 // Calculate how the input should be cropped. 220 if (!target_aspect_ratio || target_aspect_ratio->first <= 0 || 221 target_aspect_ratio->second <= 0) { 222 *cropped_width = in_width; 223 *cropped_height = in_height; 224 } else { 225 const float requested_aspect = 226 target_aspect_ratio->first / 227 static_cast<float>(target_aspect_ratio->second); 228 *cropped_width = 229 std::min(in_width, static_cast<int>(in_height * requested_aspect)); 230 *cropped_height = 231 std::min(in_height, static_cast<int>(in_width / requested_aspect)); 232 } 233 const Fraction scale = FindScale(*cropped_width, *cropped_height, 234 target_pixel_count, max_pixel_count); 235 // Adjust cropping slightly to get correctly aligned output size and a perfect 236 // scale factor. 237 *cropped_width = roundUp(*cropped_width, 238 scale.denominator * resolution_alignment_, in_width); 239 *cropped_height = roundUp( 240 *cropped_height, scale.denominator * resolution_alignment_, in_height); 241 RTC_DCHECK_EQ(0, *cropped_width % scale.denominator); 242 RTC_DCHECK_EQ(0, *cropped_height % scale.denominator); 243 244 // Calculate output size. 245 *out_width = *cropped_width / scale.denominator * scale.numerator; 246 *out_height = *cropped_height / scale.denominator * scale.numerator; 247 RTC_DCHECK_EQ(0, *out_width % resolution_alignment_); 248 RTC_DCHECK_EQ(0, *out_height % resolution_alignment_); 249 250 // Lastly, make the output size fit within the resolution restrictions as 251 // specified by `scale_resolution_down_to_`. This does not modify aspect ratio 252 // or cropping, only `out_width` and `out_height`. 253 if (scale_resolution_down_to_.has_value()) { 254 // Make frame and "scale to" have matching orientation. 255 Resolution scale_resolution_down_to = scale_resolution_down_to_.value(); 256 if ((*out_width < *out_height) != (scale_resolution_down_to_->width < 257 scale_resolution_down_to_->height)) { 258 scale_resolution_down_to = {.width = scale_resolution_down_to_->height, 259 .height = scale_resolution_down_to_->width}; 260 } 261 // Downscale by smallest scaling factor, if necessary. 262 if (*out_width > 0 && *out_height > 0 && 263 (scale_resolution_down_to.width < *out_width || 264 scale_resolution_down_to.height < *out_height)) { 265 double scale_factor = std::min( 266 scale_resolution_down_to.width / static_cast<double>(*out_width), 267 scale_resolution_down_to.height / static_cast<double>(*out_height)); 268 *out_width = 269 roundUp(std::round(*out_width * scale_factor), resolution_alignment_, 270 scale_resolution_down_to.width); 271 *out_height = 272 roundUp(std::round(*out_height * scale_factor), resolution_alignment_, 273 scale_resolution_down_to.height); 274 RTC_DCHECK_EQ(0, *out_width % resolution_alignment_); 275 RTC_DCHECK_EQ(0, *out_height % resolution_alignment_); 276 } 277 } 278 279 ++frames_out_; 280 if (scale.numerator != scale.denominator) 281 ++frames_scaled_; 282 283 if (previous_width_ && 284 (previous_width_ != *out_width || previous_height_ != *out_height)) { 285 ++adaption_changes_; 286 RTC_LOG(LS_INFO) << "Frame size changed: scaled " << frames_scaled_ 287 << " / out " << frames_out_ << " / in " << frames_in_ 288 << " Changes: " << adaption_changes_ 289 << " Input: " << in_width << "x" << in_height 290 << " Scale: " << scale.numerator << "/" 291 << scale.denominator << " Output: " << *out_width << "x" 292 << *out_height << " fps: " << max_framerate_request_ << "/" 293 << output_format_request_.max_fps.value_or(-1) 294 << " alignment: " << resolution_alignment_; 295 } 296 297 previous_width_ = *out_width; 298 previous_height_ = *out_height; 299 300 return true; 301 } 302 303 void VideoAdapter::OnOutputFormatRequest( 304 const std::optional<VideoFormat>& format) { 305 std::optional<std::pair<int, int>> target_aspect_ratio; 306 std::optional<int> max_pixel_count; 307 std::optional<int> max_fps; 308 if (format) { 309 target_aspect_ratio = std::make_pair(format->width, format->height); 310 max_pixel_count = format->width * format->height; 311 if (format->interval > 0) 312 max_fps = kNumNanosecsPerSec / format->interval; 313 } 314 OnOutputFormatRequest(target_aspect_ratio, max_pixel_count, max_fps); 315 } 316 317 void VideoAdapter::OnOutputFormatRequest( 318 const std::optional<std::pair<int, int>>& target_aspect_ratio, 319 const std::optional<int>& max_pixel_count, 320 const std::optional<int>& max_fps) { 321 std::optional<std::pair<int, int>> target_landscape_aspect_ratio; 322 std::optional<std::pair<int, int>> target_portrait_aspect_ratio; 323 if (target_aspect_ratio && target_aspect_ratio->first > 0 && 324 target_aspect_ratio->second > 0) { 325 // Maintain input orientation. 326 const int max_side = 327 std::max(target_aspect_ratio->first, target_aspect_ratio->second); 328 const int min_side = 329 std::min(target_aspect_ratio->first, target_aspect_ratio->second); 330 target_landscape_aspect_ratio = std::make_pair(max_side, min_side); 331 target_portrait_aspect_ratio = std::make_pair(min_side, max_side); 332 } 333 OnOutputFormatRequest(target_landscape_aspect_ratio, max_pixel_count, 334 target_portrait_aspect_ratio, max_pixel_count, max_fps); 335 } 336 337 void VideoAdapter::OnOutputFormatRequest( 338 const std::optional<std::pair<int, int>>& target_landscape_aspect_ratio, 339 const std::optional<int>& max_landscape_pixel_count, 340 const std::optional<std::pair<int, int>>& target_portrait_aspect_ratio, 341 const std::optional<int>& max_portrait_pixel_count, 342 const std::optional<int>& max_fps) { 343 MutexLock lock(&mutex_); 344 345 OutputFormatRequest request = { 346 .target_landscape_aspect_ratio = target_landscape_aspect_ratio, 347 .max_landscape_pixel_count = max_landscape_pixel_count, 348 .target_portrait_aspect_ratio = target_portrait_aspect_ratio, 349 .max_portrait_pixel_count = max_portrait_pixel_count, 350 .max_fps = max_fps}; 351 352 if (stashed_output_format_request_) { 353 // Save the output format request for later use in case the encoder making 354 // this call would become active, because currently all active encoders use 355 // scale_resolution_down_to instead. 356 stashed_output_format_request_ = request; 357 RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: " 358 << stashed_output_format_request_->ToString(); 359 } else { 360 output_format_request_ = request; 361 RTC_LOG(LS_INFO) << "Setting output_format_request_: " 362 << output_format_request_.ToString(); 363 } 364 365 framerate_controller_.Reset(); 366 } 367 368 void VideoAdapter::OnSinkWants(const VideoSinkWants& sink_wants) { 369 MutexLock lock(&mutex_); 370 resolution_request_max_pixel_count_ = sink_wants.max_pixel_count; 371 resolution_request_target_pixel_count_ = 372 sink_wants.target_pixel_count.value_or( 373 resolution_request_max_pixel_count_); 374 max_framerate_request_ = sink_wants.max_framerate_fps; 375 resolution_alignment_ = 376 std::lcm(source_resolution_alignment_, sink_wants.resolution_alignment); 377 // Convert from std::optional<VideoSinkWants::FrameSize> to 378 // std::optional<Resolution>. Both are {int,int}. 379 scale_resolution_down_to_ = std::nullopt; 380 if (sink_wants.requested_resolution.has_value()) { 381 scale_resolution_down_to_ = { 382 .width = sink_wants.requested_resolution->width, 383 .height = sink_wants.requested_resolution->height}; 384 } 385 386 // If scale_resolution_down_to is used, and there are no active encoders 387 // that are NOT using scale_resolution_down_to (aka newapi), then override 388 // calls to OnOutputFormatRequest and use values from scale_resolution_down_to 389 // instead (combined with qualityscaling based on pixel counts above). 390 if (!sink_wants.requested_resolution) { 391 if (stashed_output_format_request_) { 392 // because current active_output_format_request is based on 393 // scale_resolution_down_to logic, while current encoder(s) doesn't want 394 // that, we have to restore the stashed request. 395 RTC_LOG(LS_INFO) << "Unstashing OnOutputFormatRequest: " 396 << stashed_output_format_request_->ToString(); 397 output_format_request_ = *stashed_output_format_request_; 398 stashed_output_format_request_.reset(); 399 } 400 return; 401 } 402 403 // The code below is only needed when `scale_resolution_down_to` is signalled 404 // back to the video source which only happens if 405 // `VideoStreamEncoderSettings::use_standard_scale_resolution_down_to` is 406 // false. 407 // TODO(https://crbug.com/webrtc/366284861): Delete the code below as part of 408 // deleting this flag and only supporting the standard behavior. 409 410 if (sink_wants.aggregates.has_value() && 411 sink_wants.aggregates->any_active_without_requested_resolution) { 412 return; 413 } 414 415 if (!stashed_output_format_request_) { 416 // The active output format request is about to be cleared due to 417 // request_resolution. We need to save it for later use in case the encoder 418 // which doesn't use request_resolution logic become active in the future. 419 stashed_output_format_request_ = output_format_request_; 420 RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: " 421 << stashed_output_format_request_->ToString(); 422 } 423 424 // Clear the output format request, `scale_resolution_down_to_` will be 425 // applied instead which happens inside AdaptFrameResolution(). 426 output_format_request_ = {}; 427 } 428 429 int VideoAdapter::GetTargetPixels() const { 430 MutexLock lock(&mutex_); 431 return resolution_request_target_pixel_count_; 432 } 433 434 float VideoAdapter::GetMaxFramerate() const { 435 MutexLock lock(&mutex_); 436 // Minimum of `output_format_request_.max_fps` and `max_framerate_request_` is 437 // used to throttle frame-rate. 438 int framerate = 439 std::min(max_framerate_request_, 440 output_format_request_.max_fps.value_or(max_framerate_request_)); 441 if (framerate == std::numeric_limits<int>::max()) { 442 return std::numeric_limits<float>::infinity(); 443 } else { 444 return max_framerate_request_; 445 } 446 } 447 448 std::string VideoAdapter::OutputFormatRequest::ToString() const { 449 StringBuilder oss; 450 oss << "[ "; 451 if (target_landscape_aspect_ratio == Swap(target_portrait_aspect_ratio) && 452 max_landscape_pixel_count == max_portrait_pixel_count) { 453 if (target_landscape_aspect_ratio) { 454 oss << target_landscape_aspect_ratio->first << "x" 455 << target_landscape_aspect_ratio->second; 456 } else { 457 oss << "unset-resolution"; 458 } 459 if (max_landscape_pixel_count) { 460 oss << " max_pixel_count: " << *max_landscape_pixel_count; 461 } 462 } else { 463 oss << "[ landscape: "; 464 if (target_landscape_aspect_ratio) { 465 oss << target_landscape_aspect_ratio->first << "x" 466 << target_landscape_aspect_ratio->second; 467 } else { 468 oss << "unset"; 469 } 470 if (max_landscape_pixel_count) { 471 oss << " max_pixel_count: " << *max_landscape_pixel_count; 472 } 473 oss << " ] [ portrait: "; 474 if (target_portrait_aspect_ratio) { 475 oss << target_portrait_aspect_ratio->first << "x" 476 << target_portrait_aspect_ratio->second; 477 } 478 if (max_portrait_pixel_count) { 479 oss << " max_pixel_count: " << *max_portrait_pixel_count; 480 } 481 oss << " ]"; 482 } 483 oss << " max_fps: "; 484 if (max_fps) { 485 oss << *max_fps; 486 } else { 487 oss << "unset"; 488 } 489 oss << " ]"; 490 return oss.Release(); 491 } 492 493 } // namespace webrtc