ransac.c (17680B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <memory.h> 13 #include <math.h> 14 #include <time.h> 15 #include <stdio.h> 16 #include <stdbool.h> 17 #include <string.h> 18 #include <assert.h> 19 20 #include "aom_dsp/flow_estimation/ransac.h" 21 #include "aom_dsp/mathutils.h" 22 #include "aom_mem/aom_mem.h" 23 24 // TODO(rachelbarker): Remove dependence on code in av1/encoder/ 25 #include "av1/encoder/random.h" 26 27 #define MAX_MINPTS 4 28 #define MINPTS_MULTIPLIER 5 29 30 #define INLIER_THRESHOLD 1.25 31 #define INLIER_THRESHOLD_SQUARED (INLIER_THRESHOLD * INLIER_THRESHOLD) 32 33 // Number of initial models to generate 34 #define NUM_TRIALS 20 35 36 // Number of times to refine the best model found 37 #define NUM_REFINES 5 38 39 // Flag to enable functions for finding TRANSLATION type models. 40 // 41 // These modes are not considered currently due to a spec bug (see comments 42 // in gm_get_motion_vector() in av1/common/mv.h). Thus we don't need to compile 43 // the corresponding search functions, but it is nice to keep the source around 44 // but disabled, for completeness. 45 #define ALLOW_TRANSLATION_MODELS 0 46 47 typedef struct { 48 int num_inliers; 49 double sse; // Sum of squared errors of inliers 50 int *inlier_indices; 51 } RANSAC_MOTION; 52 53 //////////////////////////////////////////////////////////////////////////////// 54 // ransac 55 typedef bool (*FindTransformationFunc)(const Correspondence *points, 56 const int *indices, int num_indices, 57 double *params); 58 typedef void (*ScoreModelFunc)(const double *mat, const Correspondence *points, 59 int num_points, RANSAC_MOTION *model); 60 61 // vtable-like structure which stores all of the information needed by RANSAC 62 // for a particular model type 63 typedef struct { 64 FindTransformationFunc find_transformation; 65 ScoreModelFunc score_model; 66 67 // The minimum number of points which can be passed to find_transformation 68 // to generate a model. 69 // 70 // This should be set as small as possible. This is due to an observation 71 // from section 4 of "Optimal Ransac" by A. Hast, J. Nysjö and 72 // A. Marchetti (https://dspace5.zcu.cz/bitstream/11025/6869/1/Hast.pdf): 73 // using the minimum possible number of points in the initial model maximizes 74 // the chance that all of the selected points are inliers. 75 // 76 // That paper proposes a method which can deal with models which are 77 // contaminated by outliers, which helps in cases where the inlier fraction 78 // is low. However, for our purposes, global motion only gives significant 79 // gains when the inlier fraction is high. 80 // 81 // So we do not use the method from this paper, but we do find that 82 // minimizing the number of points used for initial model fitting helps 83 // make the best use of the limited number of models we consider. 84 int minpts; 85 } RansacModelInfo; 86 87 #if ALLOW_TRANSLATION_MODELS 88 static void score_translation(const double *mat, const Correspondence *points, 89 int num_points, RANSAC_MOTION *model) { 90 model->num_inliers = 0; 91 model->sse = 0.0; 92 93 for (int i = 0; i < num_points; ++i) { 94 const double x1 = points[i].x; 95 const double y1 = points[i].y; 96 const double x2 = points[i].rx; 97 const double y2 = points[i].ry; 98 99 const double proj_x = x1 + mat[0]; 100 const double proj_y = y1 + mat[1]; 101 102 const double dx = proj_x - x2; 103 const double dy = proj_y - y2; 104 const double sse = dx * dx + dy * dy; 105 106 if (sse < INLIER_THRESHOLD_SQUARED) { 107 model->inlier_indices[model->num_inliers++] = i; 108 model->sse += sse; 109 } 110 } 111 } 112 #endif // ALLOW_TRANSLATION_MODELS 113 114 static void score_affine(const double *mat, const Correspondence *points, 115 int num_points, RANSAC_MOTION *model) { 116 model->num_inliers = 0; 117 model->sse = 0.0; 118 119 for (int i = 0; i < num_points; ++i) { 120 const double x1 = points[i].x; 121 const double y1 = points[i].y; 122 const double x2 = points[i].rx; 123 const double y2 = points[i].ry; 124 125 const double proj_x = mat[2] * x1 + mat[3] * y1 + mat[0]; 126 const double proj_y = mat[4] * x1 + mat[5] * y1 + mat[1]; 127 128 const double dx = proj_x - x2; 129 const double dy = proj_y - y2; 130 const double sse = dx * dx + dy * dy; 131 132 if (sse < INLIER_THRESHOLD_SQUARED) { 133 model->inlier_indices[model->num_inliers++] = i; 134 model->sse += sse; 135 } 136 } 137 } 138 139 #if ALLOW_TRANSLATION_MODELS 140 static bool find_translation(const Correspondence *points, const int *indices, 141 int num_indices, double *params) { 142 double sumx = 0; 143 double sumy = 0; 144 145 for (int i = 0; i < num_indices; ++i) { 146 int index = indices[i]; 147 const double sx = points[index].x; 148 const double sy = points[index].y; 149 const double dx = points[index].rx; 150 const double dy = points[index].ry; 151 152 sumx += dx - sx; 153 sumy += dy - sy; 154 } 155 156 params[0] = sumx / np; 157 params[1] = sumy / np; 158 params[2] = 1; 159 params[3] = 0; 160 params[4] = 0; 161 params[5] = 1; 162 return true; 163 } 164 #endif // ALLOW_TRANSLATION_MODELS 165 166 static bool find_rotzoom(const Correspondence *points, const int *indices, 167 int num_indices, double *params) { 168 const int n = 4; // Size of least-squares problem 169 double mat[4 * 4]; // Accumulator for A'A 170 double y[4]; // Accumulator for A'b 171 double a[4]; // Single row of A 172 double b; // Single element of b 173 174 least_squares_init(mat, y, n); 175 for (int i = 0; i < num_indices; ++i) { 176 int index = indices[i]; 177 const double sx = points[index].x; 178 const double sy = points[index].y; 179 const double dx = points[index].rx; 180 const double dy = points[index].ry; 181 182 a[0] = 1; 183 a[1] = 0; 184 a[2] = sx; 185 a[3] = sy; 186 b = dx; 187 least_squares_accumulate(mat, y, a, b, n); 188 189 a[0] = 0; 190 a[1] = 1; 191 a[2] = sy; 192 a[3] = -sx; 193 b = dy; 194 least_squares_accumulate(mat, y, a, b, n); 195 } 196 197 // Fill in params[0] .. params[3] with output model 198 if (!least_squares_solve(mat, y, params, n)) { 199 return false; 200 } 201 202 // Fill in remaining parameters 203 params[4] = -params[3]; 204 params[5] = params[2]; 205 206 return true; 207 } 208 209 static bool find_affine(const Correspondence *points, const int *indices, 210 int num_indices, double *params) { 211 // Note: The least squares problem for affine models is 6-dimensional, 212 // but it splits into two independent 3-dimensional subproblems. 213 // Solving these two subproblems separately and recombining at the end 214 // results in less total computation than solving the 6-dimensional 215 // problem directly. 216 // 217 // The two subproblems correspond to all the parameters which contribute 218 // to the x output of the model, and all the parameters which contribute 219 // to the y output, respectively. 220 221 const int n = 3; // Size of each least-squares problem 222 double mat[2][3 * 3]; // Accumulator for A'A 223 double y[2][3]; // Accumulator for A'b 224 double x[2][3]; // Output vector 225 double a[2][3]; // Single row of A 226 double b[2]; // Single element of b 227 228 least_squares_init(mat[0], y[0], n); 229 least_squares_init(mat[1], y[1], n); 230 for (int i = 0; i < num_indices; ++i) { 231 int index = indices[i]; 232 const double sx = points[index].x; 233 const double sy = points[index].y; 234 const double dx = points[index].rx; 235 const double dy = points[index].ry; 236 237 a[0][0] = 1; 238 a[0][1] = sx; 239 a[0][2] = sy; 240 b[0] = dx; 241 least_squares_accumulate(mat[0], y[0], a[0], b[0], n); 242 243 a[1][0] = 1; 244 a[1][1] = sx; 245 a[1][2] = sy; 246 b[1] = dy; 247 least_squares_accumulate(mat[1], y[1], a[1], b[1], n); 248 } 249 250 if (!least_squares_solve(mat[0], y[0], x[0], n)) { 251 return false; 252 } 253 if (!least_squares_solve(mat[1], y[1], x[1], n)) { 254 return false; 255 } 256 257 // Rearrange least squares result to form output model 258 params[0] = x[0][0]; 259 params[1] = x[1][0]; 260 params[2] = x[0][1]; 261 params[3] = x[0][2]; 262 params[4] = x[1][1]; 263 params[5] = x[1][2]; 264 265 return true; 266 } 267 268 // Return -1 if 'a' is a better motion, 1 if 'b' is better, 0 otherwise. 269 static int compare_motions(const void *arg_a, const void *arg_b) { 270 const RANSAC_MOTION *motion_a = (RANSAC_MOTION *)arg_a; 271 const RANSAC_MOTION *motion_b = (RANSAC_MOTION *)arg_b; 272 273 if (motion_a->num_inliers > motion_b->num_inliers) return -1; 274 if (motion_a->num_inliers < motion_b->num_inliers) return 1; 275 if (motion_a->sse < motion_b->sse) return -1; 276 if (motion_a->sse > motion_b->sse) return 1; 277 return 0; 278 } 279 280 static bool is_better_motion(const RANSAC_MOTION *motion_a, 281 const RANSAC_MOTION *motion_b) { 282 return compare_motions(motion_a, motion_b) < 0; 283 } 284 285 // Returns true on success, false on error 286 static bool ransac_internal(const Correspondence *matched_points, int npoints, 287 MotionModel *motion_models, int num_desired_motions, 288 const RansacModelInfo *model_info, 289 bool *mem_alloc_failed) { 290 assert(npoints >= 0); 291 int i = 0; 292 int minpts = model_info->minpts; 293 bool ret_val = true; 294 295 unsigned int seed = (unsigned int)npoints; 296 297 int indices[MAX_MINPTS] = { 0 }; 298 299 // Store information for the num_desired_motions best transformations found 300 // and the worst motion among them, as well as the motion currently under 301 // consideration. 302 RANSAC_MOTION *motions, *worst_kept_motion = NULL; 303 RANSAC_MOTION current_motion; 304 305 // Store the parameters and the indices of the inlier points for the motion 306 // currently under consideration. 307 double params_this_motion[MAX_PARAMDIM]; 308 309 // Initialize output models, as a fallback in case we can't find a model 310 for (i = 0; i < num_desired_motions; i++) { 311 memcpy(motion_models[i].params, kIdentityParams, 312 MAX_PARAMDIM * sizeof(*(motion_models[i].params))); 313 motion_models[i].num_inliers = 0; 314 } 315 316 if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) { 317 return false; 318 } 319 320 int min_inliers = AOMMAX((int)(MIN_INLIER_PROB * npoints), minpts); 321 322 motions = 323 (RANSAC_MOTION *)aom_calloc(num_desired_motions, sizeof(RANSAC_MOTION)); 324 325 // Allocate one large buffer which will be carved up to store the inlier 326 // indices for the current motion plus the num_desired_motions many 327 // output models 328 // This allows us to keep the allocation/deallocation logic simple, without 329 // having to (for example) check that `motions` is non-null before allocating 330 // the inlier arrays 331 int *inlier_buffer = (int *)aom_malloc(sizeof(*inlier_buffer) * npoints * 332 (num_desired_motions + 1)); 333 334 if (!(motions && inlier_buffer)) { 335 ret_val = false; 336 *mem_alloc_failed = true; 337 goto finish_ransac; 338 } 339 340 // Once all our allocations are known-good, we can fill in our structures 341 worst_kept_motion = motions; 342 343 for (i = 0; i < num_desired_motions; ++i) { 344 motions[i].inlier_indices = inlier_buffer + i * npoints; 345 } 346 memset(¤t_motion, 0, sizeof(current_motion)); 347 current_motion.inlier_indices = inlier_buffer + num_desired_motions * npoints; 348 349 for (int trial_count = 0; trial_count < NUM_TRIALS; trial_count++) { 350 lcg_pick(npoints, minpts, indices, &seed); 351 352 if (!model_info->find_transformation(matched_points, indices, minpts, 353 params_this_motion)) { 354 continue; 355 } 356 357 model_info->score_model(params_this_motion, matched_points, npoints, 358 ¤t_motion); 359 360 if (current_motion.num_inliers < min_inliers) { 361 // Reject models with too few inliers 362 continue; 363 } 364 365 if (is_better_motion(¤t_motion, worst_kept_motion)) { 366 // This motion is better than the worst currently kept motion. Remember 367 // the inlier points and sse. The parameters for each kept motion 368 // will be recomputed later using only the inliers. 369 worst_kept_motion->num_inliers = current_motion.num_inliers; 370 worst_kept_motion->sse = current_motion.sse; 371 372 // Rather than copying the (potentially many) inlier indices from 373 // current_motion.inlier_indices to worst_kept_motion->inlier_indices, 374 // we can swap the underlying pointers. 375 // 376 // This is okay because the next time current_motion.inlier_indices 377 // is used will be in the next trial, where we ignore its previous 378 // contents anyway. And both arrays will be deallocated together at the 379 // end of this function, so there are no lifetime issues. 380 int *tmp = worst_kept_motion->inlier_indices; 381 worst_kept_motion->inlier_indices = current_motion.inlier_indices; 382 current_motion.inlier_indices = tmp; 383 384 // Determine the new worst kept motion and its num_inliers and sse. 385 for (i = 0; i < num_desired_motions; ++i) { 386 if (is_better_motion(worst_kept_motion, &motions[i])) { 387 worst_kept_motion = &motions[i]; 388 } 389 } 390 } 391 } 392 393 // Sort the motions, best first. 394 qsort(motions, num_desired_motions, sizeof(RANSAC_MOTION), compare_motions); 395 396 // Refine each of the best N models using iterative estimation. 397 // 398 // The idea here is loosely based on the iterative method from 399 // "Locally Optimized RANSAC" by O. Chum, J. Matas and Josef Kittler: 400 // https://cmp.felk.cvut.cz/ftp/articles/matas/chum-dagm03.pdf 401 // 402 // However, we implement a simpler version than their proposal, and simply 403 // refit the model repeatedly until the number of inliers stops increasing, 404 // with a cap on the number of iterations to defend against edge cases which 405 // only improve very slowly. 406 for (i = 0; i < num_desired_motions; ++i) { 407 if (motions[i].num_inliers <= 0) { 408 // Output model has already been initialized to the identity model, 409 // so just skip setup 410 continue; 411 } 412 413 bool bad_model = false; 414 for (int refine_count = 0; refine_count < NUM_REFINES; refine_count++) { 415 int num_inliers = motions[i].num_inliers; 416 assert(num_inliers >= min_inliers); 417 418 if (!model_info->find_transformation(matched_points, 419 motions[i].inlier_indices, 420 num_inliers, params_this_motion)) { 421 // In the unlikely event that this model fitting fails, we don't have a 422 // good fallback. So leave this model set to the identity model 423 bad_model = true; 424 break; 425 } 426 427 // Score the newly generated model 428 model_info->score_model(params_this_motion, matched_points, npoints, 429 ¤t_motion); 430 431 // At this point, there are three possibilities: 432 // 1) If we found more inliers, keep refining. 433 // 2) If we found the same number of inliers but a lower SSE, we want to 434 // keep the new model, but further refinement is unlikely to gain much. 435 // So commit to this new model 436 // 3) It is possible, but very unlikely, that the new model will have 437 // fewer inliers. If it does happen, we probably just lost a few 438 // borderline inliers. So treat the same as case (2). 439 if (current_motion.num_inliers > motions[i].num_inliers) { 440 motions[i].num_inliers = current_motion.num_inliers; 441 motions[i].sse = current_motion.sse; 442 int *tmp = motions[i].inlier_indices; 443 motions[i].inlier_indices = current_motion.inlier_indices; 444 current_motion.inlier_indices = tmp; 445 } else { 446 // Refined model is no better, so stop 447 // This shouldn't be significantly worse than the previous model, 448 // so it's fine to use the parameters in params_this_motion. 449 // This saves us from having to cache the previous iteration's params. 450 break; 451 } 452 } 453 454 if (bad_model) continue; 455 456 // Fill in output struct 457 memcpy(motion_models[i].params, params_this_motion, 458 MAX_PARAMDIM * sizeof(*motion_models[i].params)); 459 for (int j = 0; j < motions[i].num_inliers; j++) { 460 int index = motions[i].inlier_indices[j]; 461 const Correspondence *corr = &matched_points[index]; 462 motion_models[i].inliers[2 * j + 0] = (int)rint(corr->x); 463 motion_models[i].inliers[2 * j + 1] = (int)rint(corr->y); 464 } 465 motion_models[i].num_inliers = motions[i].num_inliers; 466 } 467 468 finish_ransac: 469 aom_free(inlier_buffer); 470 aom_free(motions); 471 472 return ret_val; 473 } 474 475 static const RansacModelInfo ransac_model_info[TRANS_TYPES] = { 476 // IDENTITY 477 { NULL, NULL, 0 }, 478 // TRANSLATION 479 #if ALLOW_TRANSLATION_MODELS 480 { find_translation, score_translation, 1 }, 481 #else 482 { NULL, NULL, 0 }, 483 #endif 484 // ROTZOOM 485 { find_rotzoom, score_affine, 2 }, 486 // AFFINE 487 { find_affine, score_affine, 3 }, 488 }; 489 490 // Returns true on success, false on error 491 bool ransac(const Correspondence *matched_points, int npoints, 492 TransformationType type, MotionModel *motion_models, 493 int num_desired_motions, bool *mem_alloc_failed) { 494 #if ALLOW_TRANSLATION_MODELS 495 assert(type > IDENTITY && type < TRANS_TYPES); 496 #else 497 assert(type > TRANSLATION && type < TRANS_TYPES); 498 #endif // ALLOW_TRANSLATION_MODELS 499 500 return ransac_internal(matched_points, npoints, motion_models, 501 num_desired_motions, &ransac_model_info[type], 502 mem_alloc_failed); 503 }