tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

video_adapter.cc (19359B)


      1 /*
      2 *  Copyright (c) 2010 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "media/base/video_adapter.h"
     12 
     13 #include <algorithm>
     14 #include <cmath>
     15 #include <cstdint>
     16 #include <cstdlib>
     17 #include <limits>
     18 #include <numeric>
     19 #include <optional>
     20 #include <string>
     21 #include <utility>
     22 
     23 #include "api/video/resolution.h"
     24 #include "api/video/video_source_interface.h"
     25 #include "media/base/video_common.h"
     26 #include "rtc_base/checks.h"
     27 #include "rtc_base/logging.h"
     28 #include "rtc_base/strings/string_builder.h"
     29 #include "rtc_base/synchronization/mutex.h"
     30 #include "rtc_base/time_utils.h"
     31 
     32 namespace {
     33 
     34 struct Fraction {
     35  int numerator;
     36  int denominator;
     37 
     38  void DivideByGcd() {
     39    int g = std::gcd(numerator, denominator);
     40    numerator /= g;
     41    denominator /= g;
     42  }
     43 
     44  // Determines number of output pixels if both width and height of an input of
     45  // `input_pixels` pixels is scaled with the fraction numerator / denominator.
     46  int scale_pixel_count(int input_pixels) {
     47    return (numerator * numerator * static_cast<int64_t>(input_pixels)) /
     48           (denominator * denominator);
     49  }
     50 };
     51 
     52 // Round `value_to_round` to a multiple of `multiple`. Prefer rounding upwards,
     53 // but never more than `max_value`.
     54 int roundUp(int value_to_round, int multiple, int max_value) {
     55  const int rounded_value =
     56      (value_to_round + multiple - 1) / multiple * multiple;
     57  return rounded_value <= max_value ? rounded_value
     58                                    : (max_value / multiple * multiple);
     59 }
     60 
     61 // Generates a scale factor that makes `input_pixels` close to `target_pixels`,
     62 // but no higher than `max_pixels`.
     63 Fraction FindScale(int input_width,
     64                   int input_height,
     65                   int target_pixels,
     66                   int max_pixels) {
     67  // This function only makes sense for a positive target.
     68  RTC_DCHECK_GT(target_pixels, 0);
     69  RTC_DCHECK_GT(max_pixels, 0);
     70  RTC_DCHECK_GE(max_pixels, target_pixels);
     71 
     72  const int input_pixels = input_width * input_height;
     73 
     74  // Don't scale up original.
     75  if (target_pixels >= input_pixels)
     76    return {.numerator = 1, .denominator = 1};
     77 
     78  Fraction current_scale = {.numerator = 1, .denominator = 1};
     79  Fraction best_scale = {.numerator = 1, .denominator = 1};
     80 
     81  // Start scaling down by 2/3 depending on `input_width` and `input_height`.
     82  if (input_width % 3 == 0 && input_height % 3 == 0) {
     83    // 2/3 (then alternates 3/4, 2/3, 3/4,...).
     84    current_scale = {.numerator = 6, .denominator = 6};
     85  }
     86  if (input_width % 9 == 0 && input_height % 9 == 0) {
     87    // 2/3, 2/3 (then alternates 3/4, 2/3, 3/4,...).
     88    current_scale = {.numerator = 36, .denominator = 36};
     89  }
     90 
     91  // The minimum (absolute) difference between the number of output pixels and
     92  // the target pixel count.
     93  int min_pixel_diff = std::numeric_limits<int>::max();
     94  if (input_pixels <= max_pixels) {
     95    // Start condition for 1/1 case, if it is less than max.
     96    min_pixel_diff = std::abs(input_pixels - target_pixels);
     97  }
     98 
     99  // Alternately scale down by 3/4 and 2/3. This results in fractions which are
    100  // effectively scalable. For instance, starting at 1280x720 will result in
    101  // the series (3/4) => 960x540, (1/2) => 640x360, (3/8) => 480x270,
    102  // (1/4) => 320x180, (3/16) => 240x125, (1/8) => 160x90.
    103  while (current_scale.scale_pixel_count(input_pixels) > target_pixels) {
    104    if (current_scale.numerator % 3 == 0 &&
    105        current_scale.denominator % 2 == 0) {
    106      // Multiply by 2/3.
    107      current_scale.numerator /= 3;
    108      current_scale.denominator /= 2;
    109    } else {
    110      // Multiply by 3/4.
    111      current_scale.numerator *= 3;
    112      current_scale.denominator *= 4;
    113    }
    114 
    115    int output_pixels = current_scale.scale_pixel_count(input_pixels);
    116    if (output_pixels <= max_pixels) {
    117      int diff = std::abs(target_pixels - output_pixels);
    118      if (diff < min_pixel_diff) {
    119        min_pixel_diff = diff;
    120        best_scale = current_scale;
    121      }
    122    }
    123  }
    124  best_scale.DivideByGcd();
    125 
    126  return best_scale;
    127 }
    128 
    129 std::optional<std::pair<int, int>> Swap(
    130    const std::optional<std::pair<int, int>>& in) {
    131  if (!in) {
    132    return std::nullopt;
    133  }
    134  return std::make_pair(in->second, in->first);
    135 }
    136 
    137 }  // namespace
    138 
    139 namespace webrtc {
    140 
    141 VideoAdapter::VideoAdapter(int source_resolution_alignment)
    142    : frames_in_(0),
    143      frames_out_(0),
    144      frames_scaled_(0),
    145      adaption_changes_(0),
    146      previous_width_(0),
    147      previous_height_(0),
    148      source_resolution_alignment_(source_resolution_alignment),
    149      resolution_alignment_(source_resolution_alignment),
    150      resolution_request_target_pixel_count_(std::numeric_limits<int>::max()),
    151      resolution_request_max_pixel_count_(std::numeric_limits<int>::max()),
    152      max_framerate_request_(std::numeric_limits<int>::max()) {}
    153 
    154 VideoAdapter::VideoAdapter() : VideoAdapter(1) {}
    155 
    156 VideoAdapter::~VideoAdapter() {}
    157 
    158 bool VideoAdapter::DropFrame(int64_t in_timestamp_ns) {
    159  int max_fps = max_framerate_request_;
    160  if (output_format_request_.max_fps)
    161    max_fps = std::min(max_fps, *output_format_request_.max_fps);
    162 
    163  framerate_controller_.SetMaxFramerate(max_fps);
    164  return framerate_controller_.ShouldDropFrame(in_timestamp_ns);
    165 }
    166 
    167 bool VideoAdapter::AdaptFrameResolution(int in_width,
    168                                        int in_height,
    169                                        int64_t in_timestamp_ns,
    170                                        int* cropped_width,
    171                                        int* cropped_height,
    172                                        int* out_width,
    173                                        int* out_height) {
    174  MutexLock lock(&mutex_);
    175  ++frames_in_;
    176 
    177  // The max output pixel count is the minimum of the requests from
    178  // OnOutputFormatRequest and OnResolutionFramerateRequest.
    179  int max_pixel_count = resolution_request_max_pixel_count_;
    180 
    181  // Select target aspect ratio and max pixel count depending on input frame
    182  // orientation.
    183  std::optional<std::pair<int, int>> target_aspect_ratio;
    184  if (in_width > in_height) {
    185    target_aspect_ratio = output_format_request_.target_landscape_aspect_ratio;
    186    if (output_format_request_.max_landscape_pixel_count)
    187      max_pixel_count = std::min(
    188          max_pixel_count, *output_format_request_.max_landscape_pixel_count);
    189  } else {
    190    target_aspect_ratio = output_format_request_.target_portrait_aspect_ratio;
    191    if (output_format_request_.max_portrait_pixel_count)
    192      max_pixel_count = std::min(
    193          max_pixel_count, *output_format_request_.max_portrait_pixel_count);
    194  }
    195 
    196  int target_pixel_count =
    197      std::min(resolution_request_target_pixel_count_, max_pixel_count);
    198 
    199  // Drop the input frame if necessary.
    200  if (max_pixel_count <= 0 || DropFrame(in_timestamp_ns)) {
    201    // Show VAdapt log every 90 frames dropped. (3 seconds)
    202    if ((frames_in_ - frames_out_) % 90 == 0) {
    203      // TODO(fbarchard): Reduce to LS_VERBOSE when adapter info is not needed
    204      // in default calls.
    205      RTC_LOG(LS_INFO) << "VAdapt Drop Frame: scaled " << frames_scaled_
    206                       << " / out " << frames_out_ << " / in " << frames_in_
    207                       << " Changes: " << adaption_changes_
    208                       << " Input: " << in_width << "x" << in_height
    209                       << " timestamp: " << in_timestamp_ns
    210                       << " Output fps: " << max_framerate_request_ << "/"
    211                       << output_format_request_.max_fps.value_or(-1)
    212                       << " alignment: " << resolution_alignment_;
    213    }
    214 
    215    // Drop frame.
    216    return false;
    217  }
    218 
    219  // Calculate how the input should be cropped.
    220  if (!target_aspect_ratio || target_aspect_ratio->first <= 0 ||
    221      target_aspect_ratio->second <= 0) {
    222    *cropped_width = in_width;
    223    *cropped_height = in_height;
    224  } else {
    225    const float requested_aspect =
    226        target_aspect_ratio->first /
    227        static_cast<float>(target_aspect_ratio->second);
    228    *cropped_width =
    229        std::min(in_width, static_cast<int>(in_height * requested_aspect));
    230    *cropped_height =
    231        std::min(in_height, static_cast<int>(in_width / requested_aspect));
    232  }
    233  const Fraction scale = FindScale(*cropped_width, *cropped_height,
    234                                   target_pixel_count, max_pixel_count);
    235  // Adjust cropping slightly to get correctly aligned output size and a perfect
    236  // scale factor.
    237  *cropped_width = roundUp(*cropped_width,
    238                           scale.denominator * resolution_alignment_, in_width);
    239  *cropped_height = roundUp(
    240      *cropped_height, scale.denominator * resolution_alignment_, in_height);
    241  RTC_DCHECK_EQ(0, *cropped_width % scale.denominator);
    242  RTC_DCHECK_EQ(0, *cropped_height % scale.denominator);
    243 
    244  // Calculate output size.
    245  *out_width = *cropped_width / scale.denominator * scale.numerator;
    246  *out_height = *cropped_height / scale.denominator * scale.numerator;
    247  RTC_DCHECK_EQ(0, *out_width % resolution_alignment_);
    248  RTC_DCHECK_EQ(0, *out_height % resolution_alignment_);
    249 
    250  // Lastly, make the output size fit within the resolution restrictions as
    251  // specified by `scale_resolution_down_to_`. This does not modify aspect ratio
    252  // or cropping, only `out_width` and `out_height`.
    253  if (scale_resolution_down_to_.has_value()) {
    254    // Make frame and "scale to" have matching orientation.
    255    Resolution scale_resolution_down_to = scale_resolution_down_to_.value();
    256    if ((*out_width < *out_height) != (scale_resolution_down_to_->width <
    257                                       scale_resolution_down_to_->height)) {
    258      scale_resolution_down_to = {.width = scale_resolution_down_to_->height,
    259                                  .height = scale_resolution_down_to_->width};
    260    }
    261    // Downscale by smallest scaling factor, if necessary.
    262    if (*out_width > 0 && *out_height > 0 &&
    263        (scale_resolution_down_to.width < *out_width ||
    264         scale_resolution_down_to.height < *out_height)) {
    265      double scale_factor = std::min(
    266          scale_resolution_down_to.width / static_cast<double>(*out_width),
    267          scale_resolution_down_to.height / static_cast<double>(*out_height));
    268      *out_width =
    269          roundUp(std::round(*out_width * scale_factor), resolution_alignment_,
    270                  scale_resolution_down_to.width);
    271      *out_height =
    272          roundUp(std::round(*out_height * scale_factor), resolution_alignment_,
    273                  scale_resolution_down_to.height);
    274      RTC_DCHECK_EQ(0, *out_width % resolution_alignment_);
    275      RTC_DCHECK_EQ(0, *out_height % resolution_alignment_);
    276    }
    277  }
    278 
    279  ++frames_out_;
    280  if (scale.numerator != scale.denominator)
    281    ++frames_scaled_;
    282 
    283  if (previous_width_ &&
    284      (previous_width_ != *out_width || previous_height_ != *out_height)) {
    285    ++adaption_changes_;
    286    RTC_LOG(LS_INFO) << "Frame size changed: scaled " << frames_scaled_
    287                     << " / out " << frames_out_ << " / in " << frames_in_
    288                     << " Changes: " << adaption_changes_
    289                     << " Input: " << in_width << "x" << in_height
    290                     << " Scale: " << scale.numerator << "/"
    291                     << scale.denominator << " Output: " << *out_width << "x"
    292                     << *out_height << " fps: " << max_framerate_request_ << "/"
    293                     << output_format_request_.max_fps.value_or(-1)
    294                     << " alignment: " << resolution_alignment_;
    295  }
    296 
    297  previous_width_ = *out_width;
    298  previous_height_ = *out_height;
    299 
    300  return true;
    301 }
    302 
    303 void VideoAdapter::OnOutputFormatRequest(
    304    const std::optional<VideoFormat>& format) {
    305  std::optional<std::pair<int, int>> target_aspect_ratio;
    306  std::optional<int> max_pixel_count;
    307  std::optional<int> max_fps;
    308  if (format) {
    309    target_aspect_ratio = std::make_pair(format->width, format->height);
    310    max_pixel_count = format->width * format->height;
    311    if (format->interval > 0)
    312      max_fps = kNumNanosecsPerSec / format->interval;
    313  }
    314  OnOutputFormatRequest(target_aspect_ratio, max_pixel_count, max_fps);
    315 }
    316 
    317 void VideoAdapter::OnOutputFormatRequest(
    318    const std::optional<std::pair<int, int>>& target_aspect_ratio,
    319    const std::optional<int>& max_pixel_count,
    320    const std::optional<int>& max_fps) {
    321  std::optional<std::pair<int, int>> target_landscape_aspect_ratio;
    322  std::optional<std::pair<int, int>> target_portrait_aspect_ratio;
    323  if (target_aspect_ratio && target_aspect_ratio->first > 0 &&
    324      target_aspect_ratio->second > 0) {
    325    // Maintain input orientation.
    326    const int max_side =
    327        std::max(target_aspect_ratio->first, target_aspect_ratio->second);
    328    const int min_side =
    329        std::min(target_aspect_ratio->first, target_aspect_ratio->second);
    330    target_landscape_aspect_ratio = std::make_pair(max_side, min_side);
    331    target_portrait_aspect_ratio = std::make_pair(min_side, max_side);
    332  }
    333  OnOutputFormatRequest(target_landscape_aspect_ratio, max_pixel_count,
    334                        target_portrait_aspect_ratio, max_pixel_count, max_fps);
    335 }
    336 
    337 void VideoAdapter::OnOutputFormatRequest(
    338    const std::optional<std::pair<int, int>>& target_landscape_aspect_ratio,
    339    const std::optional<int>& max_landscape_pixel_count,
    340    const std::optional<std::pair<int, int>>& target_portrait_aspect_ratio,
    341    const std::optional<int>& max_portrait_pixel_count,
    342    const std::optional<int>& max_fps) {
    343  MutexLock lock(&mutex_);
    344 
    345  OutputFormatRequest request = {
    346      .target_landscape_aspect_ratio = target_landscape_aspect_ratio,
    347      .max_landscape_pixel_count = max_landscape_pixel_count,
    348      .target_portrait_aspect_ratio = target_portrait_aspect_ratio,
    349      .max_portrait_pixel_count = max_portrait_pixel_count,
    350      .max_fps = max_fps};
    351 
    352  if (stashed_output_format_request_) {
    353    // Save the output format request for later use in case the encoder making
    354    // this call would become active, because currently all active encoders use
    355    // scale_resolution_down_to instead.
    356    stashed_output_format_request_ = request;
    357    RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
    358                     << stashed_output_format_request_->ToString();
    359  } else {
    360    output_format_request_ = request;
    361    RTC_LOG(LS_INFO) << "Setting output_format_request_: "
    362                     << output_format_request_.ToString();
    363  }
    364 
    365  framerate_controller_.Reset();
    366 }
    367 
    368 void VideoAdapter::OnSinkWants(const VideoSinkWants& sink_wants) {
    369  MutexLock lock(&mutex_);
    370  resolution_request_max_pixel_count_ = sink_wants.max_pixel_count;
    371  resolution_request_target_pixel_count_ =
    372      sink_wants.target_pixel_count.value_or(
    373          resolution_request_max_pixel_count_);
    374  max_framerate_request_ = sink_wants.max_framerate_fps;
    375  resolution_alignment_ =
    376      std::lcm(source_resolution_alignment_, sink_wants.resolution_alignment);
    377  // Convert from std::optional<VideoSinkWants::FrameSize> to
    378  // std::optional<Resolution>. Both are {int,int}.
    379  scale_resolution_down_to_ = std::nullopt;
    380  if (sink_wants.requested_resolution.has_value()) {
    381    scale_resolution_down_to_ = {
    382        .width = sink_wants.requested_resolution->width,
    383        .height = sink_wants.requested_resolution->height};
    384  }
    385 
    386  // If scale_resolution_down_to is used, and there are no active encoders
    387  // that are NOT using scale_resolution_down_to (aka newapi), then override
    388  // calls to OnOutputFormatRequest and use values from scale_resolution_down_to
    389  // instead (combined with qualityscaling based on pixel counts above).
    390  if (!sink_wants.requested_resolution) {
    391    if (stashed_output_format_request_) {
    392      // because current active_output_format_request is based on
    393      // scale_resolution_down_to logic, while current encoder(s) doesn't want
    394      // that, we have to restore the stashed request.
    395      RTC_LOG(LS_INFO) << "Unstashing OnOutputFormatRequest: "
    396                       << stashed_output_format_request_->ToString();
    397      output_format_request_ = *stashed_output_format_request_;
    398      stashed_output_format_request_.reset();
    399    }
    400    return;
    401  }
    402 
    403  // The code below is only needed when `scale_resolution_down_to` is signalled
    404  // back to the video source which only happens if
    405  // `VideoStreamEncoderSettings::use_standard_scale_resolution_down_to` is
    406  // false.
    407  // TODO(https://crbug.com/webrtc/366284861): Delete the code below as part of
    408  // deleting this flag and only supporting the standard behavior.
    409 
    410  if (sink_wants.aggregates.has_value() &&
    411      sink_wants.aggregates->any_active_without_requested_resolution) {
    412    return;
    413  }
    414 
    415  if (!stashed_output_format_request_) {
    416    // The active output format request is about to be cleared due to
    417    // request_resolution. We need to save it for later use in case the encoder
    418    // which doesn't use request_resolution logic become active in the future.
    419    stashed_output_format_request_ = output_format_request_;
    420    RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
    421                     << stashed_output_format_request_->ToString();
    422  }
    423 
    424  // Clear the output format request, `scale_resolution_down_to_` will be
    425  // applied instead which happens inside AdaptFrameResolution().
    426  output_format_request_ = {};
    427 }
    428 
    429 int VideoAdapter::GetTargetPixels() const {
    430  MutexLock lock(&mutex_);
    431  return resolution_request_target_pixel_count_;
    432 }
    433 
    434 float VideoAdapter::GetMaxFramerate() const {
    435  MutexLock lock(&mutex_);
    436  // Minimum of `output_format_request_.max_fps` and `max_framerate_request_` is
    437  // used to throttle frame-rate.
    438  int framerate =
    439      std::min(max_framerate_request_,
    440               output_format_request_.max_fps.value_or(max_framerate_request_));
    441  if (framerate == std::numeric_limits<int>::max()) {
    442    return std::numeric_limits<float>::infinity();
    443  } else {
    444    return max_framerate_request_;
    445  }
    446 }
    447 
    448 std::string VideoAdapter::OutputFormatRequest::ToString() const {
    449  StringBuilder oss;
    450  oss << "[ ";
    451  if (target_landscape_aspect_ratio == Swap(target_portrait_aspect_ratio) &&
    452      max_landscape_pixel_count == max_portrait_pixel_count) {
    453    if (target_landscape_aspect_ratio) {
    454      oss << target_landscape_aspect_ratio->first << "x"
    455          << target_landscape_aspect_ratio->second;
    456    } else {
    457      oss << "unset-resolution";
    458    }
    459    if (max_landscape_pixel_count) {
    460      oss << " max_pixel_count: " << *max_landscape_pixel_count;
    461    }
    462  } else {
    463    oss << "[ landscape: ";
    464    if (target_landscape_aspect_ratio) {
    465      oss << target_landscape_aspect_ratio->first << "x"
    466          << target_landscape_aspect_ratio->second;
    467    } else {
    468      oss << "unset";
    469    }
    470    if (max_landscape_pixel_count) {
    471      oss << " max_pixel_count: " << *max_landscape_pixel_count;
    472    }
    473    oss << " ] [ portrait: ";
    474    if (target_portrait_aspect_ratio) {
    475      oss << target_portrait_aspect_ratio->first << "x"
    476          << target_portrait_aspect_ratio->second;
    477    }
    478    if (max_portrait_pixel_count) {
    479      oss << " max_pixel_count: " << *max_portrait_pixel_count;
    480    }
    481    oss << " ]";
    482  }
    483  oss << " max_fps: ";
    484  if (max_fps) {
    485    oss << *max_fps;
    486  } else {
    487    oss << "unset";
    488  }
    489  oss << " ]";
    490  return oss.Release();
    491 }
    492 
    493 }  // namespace webrtc