tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ransac.c (17680B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <memory.h>
     13 #include <math.h>
     14 #include <time.h>
     15 #include <stdio.h>
     16 #include <stdbool.h>
     17 #include <string.h>
     18 #include <assert.h>
     19 
     20 #include "aom_dsp/flow_estimation/ransac.h"
     21 #include "aom_dsp/mathutils.h"
     22 #include "aom_mem/aom_mem.h"
     23 
     24 // TODO(rachelbarker): Remove dependence on code in av1/encoder/
     25 #include "av1/encoder/random.h"
     26 
     27 #define MAX_MINPTS 4
     28 #define MINPTS_MULTIPLIER 5
     29 
     30 #define INLIER_THRESHOLD 1.25
     31 #define INLIER_THRESHOLD_SQUARED (INLIER_THRESHOLD * INLIER_THRESHOLD)
     32 
     33 // Number of initial models to generate
     34 #define NUM_TRIALS 20
     35 
     36 // Number of times to refine the best model found
     37 #define NUM_REFINES 5
     38 
     39 // Flag to enable functions for finding TRANSLATION type models.
     40 //
     41 // These modes are not considered currently due to a spec bug (see comments
     42 // in gm_get_motion_vector() in av1/common/mv.h). Thus we don't need to compile
     43 // the corresponding search functions, but it is nice to keep the source around
     44 // but disabled, for completeness.
     45 #define ALLOW_TRANSLATION_MODELS 0
     46 
     47 typedef struct {
     48  int num_inliers;
     49  double sse;  // Sum of squared errors of inliers
     50  int *inlier_indices;
     51 } RANSAC_MOTION;
     52 
     53 ////////////////////////////////////////////////////////////////////////////////
     54 // ransac
     55 typedef bool (*FindTransformationFunc)(const Correspondence *points,
     56                                       const int *indices, int num_indices,
     57                                       double *params);
     58 typedef void (*ScoreModelFunc)(const double *mat, const Correspondence *points,
     59                               int num_points, RANSAC_MOTION *model);
     60 
     61 // vtable-like structure which stores all of the information needed by RANSAC
     62 // for a particular model type
     63 typedef struct {
     64  FindTransformationFunc find_transformation;
     65  ScoreModelFunc score_model;
     66 
     67  // The minimum number of points which can be passed to find_transformation
     68  // to generate a model.
     69  //
     70  // This should be set as small as possible. This is due to an observation
     71  // from section 4 of "Optimal Ransac" by A. Hast, J. Nysjö and
     72  // A. Marchetti (https://dspace5.zcu.cz/bitstream/11025/6869/1/Hast.pdf):
     73  // using the minimum possible number of points in the initial model maximizes
     74  // the chance that all of the selected points are inliers.
     75  //
     76  // That paper proposes a method which can deal with models which are
     77  // contaminated by outliers, which helps in cases where the inlier fraction
     78  // is low. However, for our purposes, global motion only gives significant
     79  // gains when the inlier fraction is high.
     80  //
     81  // So we do not use the method from this paper, but we do find that
     82  // minimizing the number of points used for initial model fitting helps
     83  // make the best use of the limited number of models we consider.
     84  int minpts;
     85 } RansacModelInfo;
     86 
     87 #if ALLOW_TRANSLATION_MODELS
     88 static void score_translation(const double *mat, const Correspondence *points,
     89                              int num_points, RANSAC_MOTION *model) {
     90  model->num_inliers = 0;
     91  model->sse = 0.0;
     92 
     93  for (int i = 0; i < num_points; ++i) {
     94    const double x1 = points[i].x;
     95    const double y1 = points[i].y;
     96    const double x2 = points[i].rx;
     97    const double y2 = points[i].ry;
     98 
     99    const double proj_x = x1 + mat[0];
    100    const double proj_y = y1 + mat[1];
    101 
    102    const double dx = proj_x - x2;
    103    const double dy = proj_y - y2;
    104    const double sse = dx * dx + dy * dy;
    105 
    106    if (sse < INLIER_THRESHOLD_SQUARED) {
    107      model->inlier_indices[model->num_inliers++] = i;
    108      model->sse += sse;
    109    }
    110  }
    111 }
    112 #endif  // ALLOW_TRANSLATION_MODELS
    113 
    114 static void score_affine(const double *mat, const Correspondence *points,
    115                         int num_points, RANSAC_MOTION *model) {
    116  model->num_inliers = 0;
    117  model->sse = 0.0;
    118 
    119  for (int i = 0; i < num_points; ++i) {
    120    const double x1 = points[i].x;
    121    const double y1 = points[i].y;
    122    const double x2 = points[i].rx;
    123    const double y2 = points[i].ry;
    124 
    125    const double proj_x = mat[2] * x1 + mat[3] * y1 + mat[0];
    126    const double proj_y = mat[4] * x1 + mat[5] * y1 + mat[1];
    127 
    128    const double dx = proj_x - x2;
    129    const double dy = proj_y - y2;
    130    const double sse = dx * dx + dy * dy;
    131 
    132    if (sse < INLIER_THRESHOLD_SQUARED) {
    133      model->inlier_indices[model->num_inliers++] = i;
    134      model->sse += sse;
    135    }
    136  }
    137 }
    138 
    139 #if ALLOW_TRANSLATION_MODELS
    140 static bool find_translation(const Correspondence *points, const int *indices,
    141                             int num_indices, double *params) {
    142  double sumx = 0;
    143  double sumy = 0;
    144 
    145  for (int i = 0; i < num_indices; ++i) {
    146    int index = indices[i];
    147    const double sx = points[index].x;
    148    const double sy = points[index].y;
    149    const double dx = points[index].rx;
    150    const double dy = points[index].ry;
    151 
    152    sumx += dx - sx;
    153    sumy += dy - sy;
    154  }
    155 
    156  params[0] = sumx / np;
    157  params[1] = sumy / np;
    158  params[2] = 1;
    159  params[3] = 0;
    160  params[4] = 0;
    161  params[5] = 1;
    162  return true;
    163 }
    164 #endif  // ALLOW_TRANSLATION_MODELS
    165 
    166 static bool find_rotzoom(const Correspondence *points, const int *indices,
    167                         int num_indices, double *params) {
    168  const int n = 4;    // Size of least-squares problem
    169  double mat[4 * 4];  // Accumulator for A'A
    170  double y[4];        // Accumulator for A'b
    171  double a[4];        // Single row of A
    172  double b;           // Single element of b
    173 
    174  least_squares_init(mat, y, n);
    175  for (int i = 0; i < num_indices; ++i) {
    176    int index = indices[i];
    177    const double sx = points[index].x;
    178    const double sy = points[index].y;
    179    const double dx = points[index].rx;
    180    const double dy = points[index].ry;
    181 
    182    a[0] = 1;
    183    a[1] = 0;
    184    a[2] = sx;
    185    a[3] = sy;
    186    b = dx;
    187    least_squares_accumulate(mat, y, a, b, n);
    188 
    189    a[0] = 0;
    190    a[1] = 1;
    191    a[2] = sy;
    192    a[3] = -sx;
    193    b = dy;
    194    least_squares_accumulate(mat, y, a, b, n);
    195  }
    196 
    197  // Fill in params[0] .. params[3] with output model
    198  if (!least_squares_solve(mat, y, params, n)) {
    199    return false;
    200  }
    201 
    202  // Fill in remaining parameters
    203  params[4] = -params[3];
    204  params[5] = params[2];
    205 
    206  return true;
    207 }
    208 
    209 static bool find_affine(const Correspondence *points, const int *indices,
    210                        int num_indices, double *params) {
    211  // Note: The least squares problem for affine models is 6-dimensional,
    212  // but it splits into two independent 3-dimensional subproblems.
    213  // Solving these two subproblems separately and recombining at the end
    214  // results in less total computation than solving the 6-dimensional
    215  // problem directly.
    216  //
    217  // The two subproblems correspond to all the parameters which contribute
    218  // to the x output of the model, and all the parameters which contribute
    219  // to the y output, respectively.
    220 
    221  const int n = 3;       // Size of each least-squares problem
    222  double mat[2][3 * 3];  // Accumulator for A'A
    223  double y[2][3];        // Accumulator for A'b
    224  double x[2][3];        // Output vector
    225  double a[2][3];        // Single row of A
    226  double b[2];           // Single element of b
    227 
    228  least_squares_init(mat[0], y[0], n);
    229  least_squares_init(mat[1], y[1], n);
    230  for (int i = 0; i < num_indices; ++i) {
    231    int index = indices[i];
    232    const double sx = points[index].x;
    233    const double sy = points[index].y;
    234    const double dx = points[index].rx;
    235    const double dy = points[index].ry;
    236 
    237    a[0][0] = 1;
    238    a[0][1] = sx;
    239    a[0][2] = sy;
    240    b[0] = dx;
    241    least_squares_accumulate(mat[0], y[0], a[0], b[0], n);
    242 
    243    a[1][0] = 1;
    244    a[1][1] = sx;
    245    a[1][2] = sy;
    246    b[1] = dy;
    247    least_squares_accumulate(mat[1], y[1], a[1], b[1], n);
    248  }
    249 
    250  if (!least_squares_solve(mat[0], y[0], x[0], n)) {
    251    return false;
    252  }
    253  if (!least_squares_solve(mat[1], y[1], x[1], n)) {
    254    return false;
    255  }
    256 
    257  // Rearrange least squares result to form output model
    258  params[0] = x[0][0];
    259  params[1] = x[1][0];
    260  params[2] = x[0][1];
    261  params[3] = x[0][2];
    262  params[4] = x[1][1];
    263  params[5] = x[1][2];
    264 
    265  return true;
    266 }
    267 
    268 // Return -1 if 'a' is a better motion, 1 if 'b' is better, 0 otherwise.
    269 static int compare_motions(const void *arg_a, const void *arg_b) {
    270  const RANSAC_MOTION *motion_a = (RANSAC_MOTION *)arg_a;
    271  const RANSAC_MOTION *motion_b = (RANSAC_MOTION *)arg_b;
    272 
    273  if (motion_a->num_inliers > motion_b->num_inliers) return -1;
    274  if (motion_a->num_inliers < motion_b->num_inliers) return 1;
    275  if (motion_a->sse < motion_b->sse) return -1;
    276  if (motion_a->sse > motion_b->sse) return 1;
    277  return 0;
    278 }
    279 
    280 static bool is_better_motion(const RANSAC_MOTION *motion_a,
    281                             const RANSAC_MOTION *motion_b) {
    282  return compare_motions(motion_a, motion_b) < 0;
    283 }
    284 
    285 // Returns true on success, false on error
    286 static bool ransac_internal(const Correspondence *matched_points, int npoints,
    287                            MotionModel *motion_models, int num_desired_motions,
    288                            const RansacModelInfo *model_info,
    289                            bool *mem_alloc_failed) {
    290  assert(npoints >= 0);
    291  int i = 0;
    292  int minpts = model_info->minpts;
    293  bool ret_val = true;
    294 
    295  unsigned int seed = (unsigned int)npoints;
    296 
    297  int indices[MAX_MINPTS] = { 0 };
    298 
    299  // Store information for the num_desired_motions best transformations found
    300  // and the worst motion among them, as well as the motion currently under
    301  // consideration.
    302  RANSAC_MOTION *motions, *worst_kept_motion = NULL;
    303  RANSAC_MOTION current_motion;
    304 
    305  // Store the parameters and the indices of the inlier points for the motion
    306  // currently under consideration.
    307  double params_this_motion[MAX_PARAMDIM];
    308 
    309  // Initialize output models, as a fallback in case we can't find a model
    310  for (i = 0; i < num_desired_motions; i++) {
    311    memcpy(motion_models[i].params, kIdentityParams,
    312           MAX_PARAMDIM * sizeof(*(motion_models[i].params)));
    313    motion_models[i].num_inliers = 0;
    314  }
    315 
    316  if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) {
    317    return false;
    318  }
    319 
    320  int min_inliers = AOMMAX((int)(MIN_INLIER_PROB * npoints), minpts);
    321 
    322  motions =
    323      (RANSAC_MOTION *)aom_calloc(num_desired_motions, sizeof(RANSAC_MOTION));
    324 
    325  // Allocate one large buffer which will be carved up to store the inlier
    326  // indices for the current motion plus the num_desired_motions many
    327  // output models
    328  // This allows us to keep the allocation/deallocation logic simple, without
    329  // having to (for example) check that `motions` is non-null before allocating
    330  // the inlier arrays
    331  int *inlier_buffer = (int *)aom_malloc(sizeof(*inlier_buffer) * npoints *
    332                                         (num_desired_motions + 1));
    333 
    334  if (!(motions && inlier_buffer)) {
    335    ret_val = false;
    336    *mem_alloc_failed = true;
    337    goto finish_ransac;
    338  }
    339 
    340  // Once all our allocations are known-good, we can fill in our structures
    341  worst_kept_motion = motions;
    342 
    343  for (i = 0; i < num_desired_motions; ++i) {
    344    motions[i].inlier_indices = inlier_buffer + i * npoints;
    345  }
    346  memset(&current_motion, 0, sizeof(current_motion));
    347  current_motion.inlier_indices = inlier_buffer + num_desired_motions * npoints;
    348 
    349  for (int trial_count = 0; trial_count < NUM_TRIALS; trial_count++) {
    350    lcg_pick(npoints, minpts, indices, &seed);
    351 
    352    if (!model_info->find_transformation(matched_points, indices, minpts,
    353                                         params_this_motion)) {
    354      continue;
    355    }
    356 
    357    model_info->score_model(params_this_motion, matched_points, npoints,
    358                            &current_motion);
    359 
    360    if (current_motion.num_inliers < min_inliers) {
    361      // Reject models with too few inliers
    362      continue;
    363    }
    364 
    365    if (is_better_motion(&current_motion, worst_kept_motion)) {
    366      // This motion is better than the worst currently kept motion. Remember
    367      // the inlier points and sse. The parameters for each kept motion
    368      // will be recomputed later using only the inliers.
    369      worst_kept_motion->num_inliers = current_motion.num_inliers;
    370      worst_kept_motion->sse = current_motion.sse;
    371 
    372      // Rather than copying the (potentially many) inlier indices from
    373      // current_motion.inlier_indices to worst_kept_motion->inlier_indices,
    374      // we can swap the underlying pointers.
    375      //
    376      // This is okay because the next time current_motion.inlier_indices
    377      // is used will be in the next trial, where we ignore its previous
    378      // contents anyway. And both arrays will be deallocated together at the
    379      // end of this function, so there are no lifetime issues.
    380      int *tmp = worst_kept_motion->inlier_indices;
    381      worst_kept_motion->inlier_indices = current_motion.inlier_indices;
    382      current_motion.inlier_indices = tmp;
    383 
    384      // Determine the new worst kept motion and its num_inliers and sse.
    385      for (i = 0; i < num_desired_motions; ++i) {
    386        if (is_better_motion(worst_kept_motion, &motions[i])) {
    387          worst_kept_motion = &motions[i];
    388        }
    389      }
    390    }
    391  }
    392 
    393  // Sort the motions, best first.
    394  qsort(motions, num_desired_motions, sizeof(RANSAC_MOTION), compare_motions);
    395 
    396  // Refine each of the best N models using iterative estimation.
    397  //
    398  // The idea here is loosely based on the iterative method from
    399  // "Locally Optimized RANSAC" by O. Chum, J. Matas and Josef Kittler:
    400  // https://cmp.felk.cvut.cz/ftp/articles/matas/chum-dagm03.pdf
    401  //
    402  // However, we implement a simpler version than their proposal, and simply
    403  // refit the model repeatedly until the number of inliers stops increasing,
    404  // with a cap on the number of iterations to defend against edge cases which
    405  // only improve very slowly.
    406  for (i = 0; i < num_desired_motions; ++i) {
    407    if (motions[i].num_inliers <= 0) {
    408      // Output model has already been initialized to the identity model,
    409      // so just skip setup
    410      continue;
    411    }
    412 
    413    bool bad_model = false;
    414    for (int refine_count = 0; refine_count < NUM_REFINES; refine_count++) {
    415      int num_inliers = motions[i].num_inliers;
    416      assert(num_inliers >= min_inliers);
    417 
    418      if (!model_info->find_transformation(matched_points,
    419                                           motions[i].inlier_indices,
    420                                           num_inliers, params_this_motion)) {
    421        // In the unlikely event that this model fitting fails, we don't have a
    422        // good fallback. So leave this model set to the identity model
    423        bad_model = true;
    424        break;
    425      }
    426 
    427      // Score the newly generated model
    428      model_info->score_model(params_this_motion, matched_points, npoints,
    429                              &current_motion);
    430 
    431      // At this point, there are three possibilities:
    432      // 1) If we found more inliers, keep refining.
    433      // 2) If we found the same number of inliers but a lower SSE, we want to
    434      //    keep the new model, but further refinement is unlikely to gain much.
    435      //    So commit to this new model
    436      // 3) It is possible, but very unlikely, that the new model will have
    437      //    fewer inliers. If it does happen, we probably just lost a few
    438      //    borderline inliers. So treat the same as case (2).
    439      if (current_motion.num_inliers > motions[i].num_inliers) {
    440        motions[i].num_inliers = current_motion.num_inliers;
    441        motions[i].sse = current_motion.sse;
    442        int *tmp = motions[i].inlier_indices;
    443        motions[i].inlier_indices = current_motion.inlier_indices;
    444        current_motion.inlier_indices = tmp;
    445      } else {
    446        // Refined model is no better, so stop
    447        // This shouldn't be significantly worse than the previous model,
    448        // so it's fine to use the parameters in params_this_motion.
    449        // This saves us from having to cache the previous iteration's params.
    450        break;
    451      }
    452    }
    453 
    454    if (bad_model) continue;
    455 
    456    // Fill in output struct
    457    memcpy(motion_models[i].params, params_this_motion,
    458           MAX_PARAMDIM * sizeof(*motion_models[i].params));
    459    for (int j = 0; j < motions[i].num_inliers; j++) {
    460      int index = motions[i].inlier_indices[j];
    461      const Correspondence *corr = &matched_points[index];
    462      motion_models[i].inliers[2 * j + 0] = (int)rint(corr->x);
    463      motion_models[i].inliers[2 * j + 1] = (int)rint(corr->y);
    464    }
    465    motion_models[i].num_inliers = motions[i].num_inliers;
    466  }
    467 
    468 finish_ransac:
    469  aom_free(inlier_buffer);
    470  aom_free(motions);
    471 
    472  return ret_val;
    473 }
    474 
    475 static const RansacModelInfo ransac_model_info[TRANS_TYPES] = {
    476  // IDENTITY
    477  { NULL, NULL, 0 },
    478 // TRANSLATION
    479 #if ALLOW_TRANSLATION_MODELS
    480  { find_translation, score_translation, 1 },
    481 #else
    482  { NULL, NULL, 0 },
    483 #endif
    484  // ROTZOOM
    485  { find_rotzoom, score_affine, 2 },
    486  // AFFINE
    487  { find_affine, score_affine, 3 },
    488 };
    489 
    490 // Returns true on success, false on error
    491 bool ransac(const Correspondence *matched_points, int npoints,
    492            TransformationType type, MotionModel *motion_models,
    493            int num_desired_motions, bool *mem_alloc_failed) {
    494 #if ALLOW_TRANSLATION_MODELS
    495  assert(type > IDENTITY && type < TRANS_TYPES);
    496 #else
    497  assert(type > TRANSLATION && type < TRANS_TYPES);
    498 #endif  // ALLOW_TRANSLATION_MODELS
    499 
    500  return ransac_internal(matched_points, npoints, motion_models,
    501                         num_desired_motions, &ransac_model_info[type],
    502                         mem_alloc_failed);
    503 }