optical_flow.c (44384B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 #include <math.h> 12 #include <limits.h> 13 14 #include "config/aom_config.h" 15 16 #include "aom_dsp/mathutils.h" 17 #include "aom_mem/aom_mem.h" 18 19 #include "av1/common/av1_common_int.h" 20 #include "av1/encoder/encoder.h" 21 #include "av1/encoder/optical_flow.h" 22 #include "av1/encoder/sparse_linear_solver.h" 23 #include "av1/encoder/reconinter_enc.h" 24 25 #if CONFIG_OPTICAL_FLOW_API 26 27 void av1_init_opfl_params(OPFL_PARAMS *opfl_params) { 28 opfl_params->pyramid_levels = OPFL_PYRAMID_LEVELS; 29 opfl_params->warping_steps = OPFL_WARPING_STEPS; 30 opfl_params->lk_params = NULL; 31 } 32 33 void av1_init_lk_params(LK_PARAMS *lk_params) { 34 lk_params->window_size = OPFL_WINDOW_SIZE; 35 } 36 37 // Helper function to determine whether a frame is encoded with high bit-depth. 38 static inline int is_frame_high_bitdepth(const YV12_BUFFER_CONFIG *frame) { 39 return (frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; 40 } 41 42 // Helper function to determine whether optical flow method is sparse. 43 static inline int is_sparse(const OPFL_PARAMS *opfl_params) { 44 return (opfl_params->flags & OPFL_FLAG_SPARSE) ? 1 : 0; 45 } 46 47 static void gradients_over_window(const YV12_BUFFER_CONFIG *frame, 48 const YV12_BUFFER_CONFIG *ref_frame, 49 const double x_coord, const double y_coord, 50 const int window_size, const int bit_depth, 51 double *ix, double *iy, double *it, 52 LOCALMV *mv); 53 54 // coefficients for bilinear interpolation on unit square 55 static int pixel_interp(const double x, const double y, const double b00, 56 const double b01, const double b10, const double b11) { 57 const int xint = (int)x; 58 const int yint = (int)y; 59 const double xdec = x - xint; 60 const double ydec = y - yint; 61 const double a = (1 - xdec) * (1 - ydec); 62 const double b = xdec * (1 - ydec); 63 const double c = (1 - xdec) * ydec; 64 const double d = xdec * ydec; 65 // if x, y are already integers, this results to b00 66 int interp = (int)round(a * b00 + b * b01 + c * b10 + d * b11); 67 return interp; 68 } 69 70 // Scharr filter to compute spatial gradient 71 static void spatial_gradient(const YV12_BUFFER_CONFIG *frame, const int x_coord, 72 const int y_coord, const int direction, 73 double *derivative) { 74 double *filter; 75 // Scharr filters 76 double gx[9] = { -3, 0, 3, -10, 0, 10, -3, 0, 3 }; 77 double gy[9] = { -3, -10, -3, 0, 0, 0, 3, 10, 3 }; 78 if (direction == 0) { // x direction 79 filter = gx; 80 } else { // y direction 81 filter = gy; 82 } 83 int idx = 0; 84 double d = 0; 85 for (int yy = -1; yy <= 1; yy++) { 86 for (int xx = -1; xx <= 1; xx++) { 87 d += filter[idx] * 88 frame->y_buffer[(y_coord + yy) * frame->y_stride + (x_coord + xx)]; 89 idx++; 90 } 91 } 92 // normalization scaling factor for scharr 93 *derivative = d / 32.0; 94 } 95 96 // Determine the spatial gradient at subpixel locations 97 // For example, when reducing images for pyramidal LK, 98 // corners found in original image may be at subpixel locations. 99 static void gradient_interp(double *fullpel_deriv, const double x_coord, 100 const double y_coord, const int w, const int h, 101 double *derivative) { 102 const int xint = (int)x_coord; 103 const int yint = (int)y_coord; 104 double interp; 105 if (xint + 1 > w - 1 || yint + 1 > h - 1) { 106 interp = fullpel_deriv[yint * w + xint]; 107 } else { 108 interp = pixel_interp(x_coord, y_coord, fullpel_deriv[yint * w + xint], 109 fullpel_deriv[yint * w + (xint + 1)], 110 fullpel_deriv[(yint + 1) * w + xint], 111 fullpel_deriv[(yint + 1) * w + (xint + 1)]); 112 } 113 114 *derivative = interp; 115 } 116 117 static void temporal_gradient(const YV12_BUFFER_CONFIG *frame, 118 const YV12_BUFFER_CONFIG *frame2, 119 const double x_coord, const double y_coord, 120 const int bit_depth, double *derivative, 121 LOCALMV *mv) { 122 const int w = 2; 123 const int h = 2; 124 uint8_t pred1[4]; 125 uint8_t pred2[4]; 126 127 const int y = (int)y_coord; 128 const int x = (int)x_coord; 129 const double ydec = y_coord - y; 130 const double xdec = x_coord - x; 131 const int is_intrabc = 0; // Is intra-copied? 132 const int is_high_bitdepth = is_frame_high_bitdepth(frame2); 133 const int subsampling_x = 0, subsampling_y = 0; // for y-buffer 134 const int_interpfilters interp_filters = 135 av1_broadcast_interp_filter(MULTITAP_SHARP); 136 const int plane = 0; // y-plane 137 const struct buf_2d ref_buf2 = { NULL, frame2->y_buffer, frame2->y_crop_width, 138 frame2->y_crop_height, frame2->y_stride }; 139 struct scale_factors scale; 140 av1_setup_scale_factors_for_frame(&scale, frame->y_crop_width, 141 frame->y_crop_height, frame->y_crop_width, 142 frame->y_crop_height); 143 InterPredParams inter_pred_params; 144 av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x, 145 subsampling_y, bit_depth, is_high_bitdepth, is_intrabc, 146 &scale, &ref_buf2, interp_filters); 147 inter_pred_params.interp_filter_params[0] = 148 &av1_interp_filter_params_list[interp_filters.as_filters.x_filter]; 149 inter_pred_params.interp_filter_params[1] = 150 &av1_interp_filter_params_list[interp_filters.as_filters.y_filter]; 151 inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth); 152 MV newmv = { .row = (int16_t)round((mv->row + xdec) * 8), 153 .col = (int16_t)round((mv->col + ydec) * 8) }; 154 av1_enc_build_one_inter_predictor(pred2, w, &newmv, &inter_pred_params); 155 const struct buf_2d ref_buf1 = { NULL, frame->y_buffer, frame->y_crop_width, 156 frame->y_crop_height, frame->y_stride }; 157 av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x, 158 subsampling_y, bit_depth, is_high_bitdepth, is_intrabc, 159 &scale, &ref_buf1, interp_filters); 160 inter_pred_params.interp_filter_params[0] = 161 &av1_interp_filter_params_list[interp_filters.as_filters.x_filter]; 162 inter_pred_params.interp_filter_params[1] = 163 &av1_interp_filter_params_list[interp_filters.as_filters.y_filter]; 164 inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth); 165 MV zeroMV = { .row = (int16_t)round(xdec * 8), 166 .col = (int16_t)round(ydec * 8) }; 167 av1_enc_build_one_inter_predictor(pred1, w, &zeroMV, &inter_pred_params); 168 169 *derivative = pred2[0] - pred1[0]; 170 } 171 172 // Numerical differentiate over window_size x window_size surrounding (x,y) 173 // location. Alters ix, iy, it to contain numerical partial derivatives 174 static void gradients_over_window(const YV12_BUFFER_CONFIG *frame, 175 const YV12_BUFFER_CONFIG *ref_frame, 176 const double x_coord, const double y_coord, 177 const int window_size, const int bit_depth, 178 double *ix, double *iy, double *it, 179 LOCALMV *mv) { 180 const double left = x_coord - window_size / 2.0; 181 const double top = y_coord - window_size / 2.0; 182 // gradient operators need pixel before and after (start at 1) 183 const double x_start = AOMMAX(1, left); 184 const double y_start = AOMMAX(1, top); 185 const int frame_height = frame->y_crop_height; 186 const int frame_width = frame->y_crop_width; 187 double deriv_x; 188 double deriv_y; 189 double deriv_t; 190 191 const double x_end = AOMMIN(x_coord + window_size / 2.0, frame_width - 2); 192 const double y_end = AOMMIN(y_coord + window_size / 2.0, frame_height - 2); 193 const int xs = (int)AOMMAX(1, x_start - 1); 194 const int ys = (int)AOMMAX(1, y_start - 1); 195 const int xe = (int)AOMMIN(x_end + 2, frame_width - 2); 196 const int ye = (int)AOMMIN(y_end + 2, frame_height - 2); 197 // with normalization, gradients may be double values 198 double *fullpel_dx = aom_malloc((ye - ys) * (xe - xs) * sizeof(deriv_x)); 199 double *fullpel_dy = aom_malloc((ye - ys) * (xe - xs) * sizeof(deriv_y)); 200 if (!fullpel_dx || !fullpel_dy) { 201 aom_free(fullpel_dx); 202 aom_free(fullpel_dy); 203 return; 204 } 205 206 // TODO(any): This could be more efficient in the case that x_coord 207 // and y_coord are integers.. but it may look more messy. 208 209 // calculate spatial gradients at full pixel locations 210 for (int j = ys; j < ye; j++) { 211 for (int i = xs; i < xe; i++) { 212 spatial_gradient(frame, i, j, 0, &deriv_x); 213 spatial_gradient(frame, i, j, 1, &deriv_y); 214 int idx = (j - ys) * (xe - xs) + (i - xs); 215 fullpel_dx[idx] = deriv_x; 216 fullpel_dy[idx] = deriv_y; 217 } 218 } 219 // compute numerical differentiation for every pixel in window 220 // (this potentially includes subpixels) 221 for (double j = y_start; j < y_end; j++) { 222 for (double i = x_start; i < x_end; i++) { 223 temporal_gradient(frame, ref_frame, i, j, bit_depth, &deriv_t, mv); 224 gradient_interp(fullpel_dx, i - xs, j - ys, xe - xs, ye - ys, &deriv_x); 225 gradient_interp(fullpel_dy, i - xs, j - ys, xe - xs, ye - ys, &deriv_y); 226 int idx = (int)(j - top) * window_size + (int)(i - left); 227 ix[idx] = deriv_x; 228 iy[idx] = deriv_y; 229 it[idx] = deriv_t; 230 } 231 } 232 // TODO(any): to avoid setting deriv arrays to zero for every iteration, 233 // could instead pass these two values back through function call 234 // int first_idx = (int)(y_start - top) * window_size + (int)(x_start - left); 235 // int width = window_size - ((int)(x_start - left) + (int)(left + window_size 236 // - x_end)); 237 238 aom_free(fullpel_dx); 239 aom_free(fullpel_dy); 240 } 241 242 // To compute eigenvalues of 2x2 matrix: Solve for lambda where 243 // Determinant(matrix - lambda*identity) == 0 244 static void eigenvalues_2x2(const double *matrix, double *eig) { 245 const double a = 1; 246 const double b = -1 * matrix[0] - matrix[3]; 247 const double c = -1 * matrix[1] * matrix[2] + matrix[0] * matrix[3]; 248 // quadratic formula 249 const double discriminant = b * b - 4 * a * c; 250 eig[0] = (-b - sqrt(discriminant)) / (2.0 * a); 251 eig[1] = (-b + sqrt(discriminant)) / (2.0 * a); 252 // double check that eigenvalues are ordered by magnitude 253 if (fabs(eig[0]) > fabs(eig[1])) { 254 double tmp = eig[0]; 255 eig[0] = eig[1]; 256 eig[1] = tmp; 257 } 258 } 259 260 // Shi-Tomasi corner detection criteria 261 static double corner_score(const YV12_BUFFER_CONFIG *frame_to_filter, 262 const YV12_BUFFER_CONFIG *ref_frame, const int x, 263 const int y, double *i_x, double *i_y, double *i_t, 264 const int n, const int bit_depth) { 265 double eig[2]; 266 LOCALMV mv = { .row = 0, .col = 0 }; 267 // TODO(any): technically, ref_frame and i_t are not used by corner score 268 // so these could be replaced by dummy variables, 269 // or change this to spatial gradient function over window only 270 gradients_over_window(frame_to_filter, ref_frame, x, y, n, bit_depth, i_x, 271 i_y, i_t, &mv); 272 double Mres1[1] = { 0 }, Mres2[1] = { 0 }, Mres3[1] = { 0 }; 273 multiply_mat(i_x, i_x, Mres1, 1, n * n, 1); 274 multiply_mat(i_x, i_y, Mres2, 1, n * n, 1); 275 multiply_mat(i_y, i_y, Mres3, 1, n * n, 1); 276 double M[4] = { Mres1[0], Mres2[0], Mres2[0], Mres3[0] }; 277 eigenvalues_2x2(M, eig); 278 return fabs(eig[0]); 279 } 280 281 // Finds corners in frame_to_filter 282 // For less strict requirements (i.e. more corners), decrease threshold 283 static int detect_corners(const YV12_BUFFER_CONFIG *frame_to_filter, 284 const YV12_BUFFER_CONFIG *ref_frame, 285 const int maxcorners, int *ref_corners, 286 const int bit_depth) { 287 const int frame_height = frame_to_filter->y_crop_height; 288 const int frame_width = frame_to_filter->y_crop_width; 289 // TODO(any): currently if maxcorners is decreased, then it only means 290 // corners will be omited from bottom-right of image. if maxcorners 291 // is actually used, then this algorithm would need to re-iterate 292 // and choose threshold based on that 293 assert(maxcorners == frame_height * frame_width); 294 int countcorners = 0; 295 const double threshold = 0.1; 296 double score; 297 const int n = 3; 298 double i_x[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 299 double i_y[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 300 double i_t[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 301 const int fromedge = n; 302 double max_score = corner_score(frame_to_filter, ref_frame, fromedge, 303 fromedge, i_x, i_y, i_t, n, bit_depth); 304 // rough estimate of max corner score in image 305 for (int x = fromedge; x < frame_width - fromedge; x += 1) { 306 for (int y = fromedge; y < frame_height - fromedge; y += frame_height / 5) { 307 for (int i = 0; i < n * n; i++) { 308 i_x[i] = 0; 309 i_y[i] = 0; 310 i_t[i] = 0; 311 } 312 score = corner_score(frame_to_filter, ref_frame, x, y, i_x, i_y, i_t, n, 313 bit_depth); 314 if (score > max_score) { 315 max_score = score; 316 } 317 } 318 } 319 // score all the points and choose corners over threshold 320 for (int x = fromedge; x < frame_width - fromedge; x += 1) { 321 for (int y = fromedge; 322 (y < frame_height - fromedge) && countcorners < maxcorners; y += 1) { 323 for (int i = 0; i < n * n; i++) { 324 i_x[i] = 0; 325 i_y[i] = 0; 326 i_t[i] = 0; 327 } 328 score = corner_score(frame_to_filter, ref_frame, x, y, i_x, i_y, i_t, n, 329 bit_depth); 330 if (score > threshold * max_score) { 331 ref_corners[countcorners * 2] = x; 332 ref_corners[countcorners * 2 + 1] = y; 333 countcorners++; 334 } 335 } 336 } 337 return countcorners; 338 } 339 340 // weights is an nxn matrix. weights is filled with a gaussian function, 341 // with independent variable: distance from the center point. 342 static void gaussian(const double sigma, const int n, const int normalize, 343 double *weights) { 344 double total_weight = 0; 345 for (int j = 0; j < n; j++) { 346 for (int i = 0; i < n; i++) { 347 double distance = sqrt(pow(n / 2 - i, 2) + pow(n / 2 - j, 2)); 348 double weight = exp(-0.5 * pow(distance / sigma, 2)); 349 weights[j * n + i] = weight; 350 total_weight += weight; 351 } 352 } 353 if (normalize == 1) { 354 for (int j = 0; j < n; j++) { 355 weights[j] = weights[j] / total_weight; 356 } 357 } 358 } 359 360 static double convolve(const double *filter, const int *img, const int size) { 361 double result = 0; 362 for (int i = 0; i < size; i++) { 363 result += filter[i] * img[i]; 364 } 365 return result; 366 } 367 368 // Applies a Gaussian low-pass smoothing filter to produce 369 // a corresponding lower resolution image with halved dimensions 370 static void reduce(uint8_t *img, int height, int width, int stride, 371 uint8_t *reduced_img) { 372 const int new_width = width / 2; 373 const int window_size = 5; 374 const double gaussian_filter[25] = { 375 1. / 256, 1.0 / 64, 3. / 128, 1. / 64, 1. / 256, 1. / 64, 1. / 16, 376 3. / 32, 1. / 16, 1. / 64, 3. / 128, 3. / 32, 9. / 64, 3. / 32, 377 3. / 128, 1. / 64, 1. / 16, 3. / 32, 1. / 16, 1. / 64, 1. / 256, 378 1. / 64, 3. / 128, 1. / 64, 1. / 256 379 }; 380 // filter is 5x5 so need prev and forward 2 pixels 381 int img_section[25]; 382 for (int y = 0; y < height - 1; y += 2) { 383 for (int x = 0; x < width - 1; x += 2) { 384 int i = 0; 385 for (int yy = y - window_size / 2; yy <= y + window_size / 2; yy++) { 386 for (int xx = x - window_size / 2; xx <= x + window_size / 2; xx++) { 387 int yvalue = yy; 388 int xvalue = xx; 389 // copied pixels outside the boundary 390 if (yvalue < 0) yvalue = 0; 391 if (xvalue < 0) xvalue = 0; 392 if (yvalue >= height) yvalue = height - 1; 393 if (xvalue >= width) xvalue = width - 1; 394 img_section[i++] = img[yvalue * stride + xvalue]; 395 } 396 } 397 reduced_img[(y / 2) * new_width + (x / 2)] = (uint8_t)convolve( 398 gaussian_filter, img_section, window_size * window_size); 399 } 400 } 401 } 402 403 static int cmpfunc(const void *a, const void *b) { 404 return (*(int *)a - *(int *)b); 405 } 406 static void filter_mvs(const MV_FILTER_TYPE mv_filter, const int frame_height, 407 const int frame_width, LOCALMV *localmvs, MV *mvs) { 408 const int n = 5; // window size 409 // for smoothing filter 410 const double gaussian_filter[25] = { 411 1. / 256, 1. / 64, 3. / 128, 1. / 64, 1. / 256, 1. / 64, 1. / 16, 412 3. / 32, 1. / 16, 1. / 64, 3. / 128, 3. / 32, 9. / 64, 3. / 32, 413 3. / 128, 1. / 64, 1. / 16, 3. / 32, 1. / 16, 1. / 64, 1. / 256, 414 1. / 64, 3. / 128, 1. / 64, 1. / 256 415 }; 416 // for median filter 417 int mvrows[25]; 418 int mvcols[25]; 419 if (mv_filter != MV_FILTER_NONE) { 420 for (int y = 0; y < frame_height; y++) { 421 for (int x = 0; x < frame_width; x++) { 422 int center_idx = y * frame_width + x; 423 int i = 0; 424 double filtered_row = 0; 425 double filtered_col = 0; 426 for (int yy = y - n / 2; yy <= y + n / 2; yy++) { 427 for (int xx = x - n / 2; xx <= x + n / 2; xx++) { 428 int yvalue = yy; 429 int xvalue = xx; 430 // copied pixels outside the boundary 431 if (yvalue < 0) yvalue = 0; 432 if (xvalue < 0) xvalue = 0; 433 if (yvalue >= frame_height) yvalue = frame_height - 1; 434 if (xvalue >= frame_width) xvalue = frame_width - 1; 435 int index = yvalue * frame_width + xvalue; 436 if (mv_filter == MV_FILTER_SMOOTH) { 437 filtered_row += mvs[index].row * gaussian_filter[i]; 438 filtered_col += mvs[index].col * gaussian_filter[i]; 439 } else if (mv_filter == MV_FILTER_MEDIAN) { 440 mvrows[i] = mvs[index].row; 441 mvcols[i] = mvs[index].col; 442 } 443 i++; 444 } 445 } 446 447 MV mv = mvs[center_idx]; 448 if (mv_filter == MV_FILTER_SMOOTH) { 449 mv.row = (int16_t)filtered_row; 450 mv.col = (int16_t)filtered_col; 451 } else if (mv_filter == MV_FILTER_MEDIAN) { 452 qsort(mvrows, 25, sizeof(mv.row), cmpfunc); 453 qsort(mvcols, 25, sizeof(mv.col), cmpfunc); 454 mv.row = mvrows[25 / 2]; 455 mv.col = mvcols[25 / 2]; 456 } 457 LOCALMV localmv = { .row = ((double)mv.row) / 8, 458 .col = ((double)mv.row) / 8 }; 459 localmvs[y * frame_width + x] = localmv; 460 // if mvs array is immediately updated here, then the result may 461 // propagate to other pixels. 462 } 463 } 464 for (int i = 0; i < frame_height * frame_width; i++) { 465 MV mv = { .row = (int16_t)round(8 * localmvs[i].row), 466 .col = (int16_t)round(8 * localmvs[i].col) }; 467 mvs[i] = mv; 468 } 469 } 470 } 471 472 // Computes optical flow at a single pyramid level, 473 // using Lucas-Kanade algorithm. 474 // Modifies mvs array. 475 static void lucas_kanade(const YV12_BUFFER_CONFIG *from_frame, 476 const YV12_BUFFER_CONFIG *to_frame, const int level, 477 const LK_PARAMS *lk_params, const int num_ref_corners, 478 int *ref_corners, const int mv_stride, 479 const int bit_depth, LOCALMV *mvs) { 480 assert(lk_params->window_size > 0 && lk_params->window_size % 2 == 0); 481 const int n = lk_params->window_size; 482 // algorithm is sensitive to window size 483 double *i_x = (double *)aom_malloc(n * n * sizeof(*i_x)); 484 double *i_y = (double *)aom_malloc(n * n * sizeof(*i_y)); 485 double *i_t = (double *)aom_malloc(n * n * sizeof(*i_t)); 486 double *weights = (double *)aom_malloc(n * n * sizeof(*weights)); 487 if (!i_x || !i_y || !i_t || !weights) goto free_lk_buf; 488 489 const int expand_multiplier = (int)pow(2, level); 490 double sigma = 0.2 * n; 491 // normalizing doesn't really affect anything since it's applied 492 // to every component of M and b 493 gaussian(sigma, n, 0, weights); 494 for (int i = 0; i < num_ref_corners; i++) { 495 const double x_coord = 1.0 * ref_corners[i * 2] / expand_multiplier; 496 const double y_coord = 1.0 * ref_corners[i * 2 + 1] / expand_multiplier; 497 int highres_x = ref_corners[i * 2]; 498 int highres_y = ref_corners[i * 2 + 1]; 499 int mv_idx = highres_y * (mv_stride) + highres_x; 500 LOCALMV mv_old = mvs[mv_idx]; 501 mv_old.row = mv_old.row / expand_multiplier; 502 mv_old.col = mv_old.col / expand_multiplier; 503 // using this instead of memset, since it's not completely 504 // clear if zero memset works on double arrays 505 for (int j = 0; j < n * n; j++) { 506 i_x[j] = 0; 507 i_y[j] = 0; 508 i_t[j] = 0; 509 } 510 gradients_over_window(from_frame, to_frame, x_coord, y_coord, n, bit_depth, 511 i_x, i_y, i_t, &mv_old); 512 double Mres1[1] = { 0 }, Mres2[1] = { 0 }, Mres3[1] = { 0 }; 513 double bres1[1] = { 0 }, bres2[1] = { 0 }; 514 for (int j = 0; j < n * n; j++) { 515 Mres1[0] += weights[j] * i_x[j] * i_x[j]; 516 Mres2[0] += weights[j] * i_x[j] * i_y[j]; 517 Mres3[0] += weights[j] * i_y[j] * i_y[j]; 518 bres1[0] += weights[j] * i_x[j] * i_t[j]; 519 bres2[0] += weights[j] * i_y[j] * i_t[j]; 520 } 521 double M[4] = { Mres1[0], Mres2[0], Mres2[0], Mres3[0] }; 522 double b[2] = { -1 * bres1[0], -1 * bres2[0] }; 523 double eig[2] = { 1, 1 }; 524 eigenvalues_2x2(M, eig); 525 double threshold = 0.1; 526 if (fabs(eig[0]) > threshold) { 527 // if M is not invertible, then displacement 528 // will default to zeros 529 double u[2] = { 0, 0 }; 530 linsolve(2, M, 2, b, u); 531 int mult = 1; 532 if (level != 0) 533 mult = expand_multiplier; // mv doubles when resolution doubles 534 LOCALMV mv = { .row = (mult * (u[0] + mv_old.row)), 535 .col = (mult * (u[1] + mv_old.col)) }; 536 mvs[mv_idx] = mv; 537 mvs[mv_idx] = mv; 538 } 539 } 540 free_lk_buf: 541 aom_free(weights); 542 aom_free(i_t); 543 aom_free(i_x); 544 aom_free(i_y); 545 } 546 547 // Warp the src_frame to warper_frame according to mvs. 548 // mvs point to src_frame 549 static void warp_back_frame(YV12_BUFFER_CONFIG *warped_frame, 550 const YV12_BUFFER_CONFIG *src_frame, 551 const LOCALMV *mvs, int mv_stride) { 552 int w, h; 553 const int fw = src_frame->y_crop_width; 554 const int fh = src_frame->y_crop_height; 555 const int src_fs = src_frame->y_stride, warped_fs = warped_frame->y_stride; 556 const uint8_t *src_buf = src_frame->y_buffer; 557 uint8_t *warped_buf = warped_frame->y_buffer; 558 double temp; 559 for (h = 0; h < fh; h++) { 560 for (w = 0; w < fw; w++) { 561 double cord_x = (double)w + mvs[h * mv_stride + w].col; 562 double cord_y = (double)h + mvs[h * mv_stride + w].row; 563 cord_x = fclamp(cord_x, 0, (double)(fw - 1)); 564 cord_y = fclamp(cord_y, 0, (double)(fh - 1)); 565 const int floorx = (int)floor(cord_x); 566 const int floory = (int)floor(cord_y); 567 const double fracx = cord_x - (double)floorx; 568 const double fracy = cord_y - (double)floory; 569 570 temp = 0; 571 for (int hh = 0; hh < 2; hh++) { 572 const double weighth = hh ? (fracy) : (1 - fracy); 573 for (int ww = 0; ww < 2; ww++) { 574 const double weightw = ww ? (fracx) : (1 - fracx); 575 int y = floory + hh; 576 int x = floorx + ww; 577 y = clamp(y, 0, fh - 1); 578 x = clamp(x, 0, fw - 1); 579 temp += (double)src_buf[y * src_fs + x] * weightw * weighth; 580 } 581 } 582 warped_buf[h * warped_fs + w] = (uint8_t)round(temp); 583 } 584 } 585 } 586 587 // Same as warp_back_frame, but using a better interpolation filter. 588 static void warp_back_frame_intp(YV12_BUFFER_CONFIG *warped_frame, 589 const YV12_BUFFER_CONFIG *src_frame, 590 const LOCALMV *mvs, int mv_stride) { 591 int w, h; 592 const int fw = src_frame->y_crop_width; 593 const int fh = src_frame->y_crop_height; 594 const int warped_fs = warped_frame->y_stride; 595 uint8_t *warped_buf = warped_frame->y_buffer; 596 const int blk = 2; 597 uint8_t temp_blk[4]; 598 599 const int is_intrabc = 0; // Is intra-copied? 600 const int is_high_bitdepth = is_frame_high_bitdepth(src_frame); 601 const int subsampling_x = 0, subsampling_y = 0; // for y-buffer 602 const int_interpfilters interp_filters = 603 av1_broadcast_interp_filter(MULTITAP_SHARP2); 604 const int plane = 0; // y-plane 605 const struct buf_2d ref_buf2 = { NULL, src_frame->y_buffer, 606 src_frame->y_crop_width, 607 src_frame->y_crop_height, 608 src_frame->y_stride }; 609 const int bit_depth = src_frame->bit_depth; 610 struct scale_factors scale; 611 av1_setup_scale_factors_for_frame( 612 &scale, src_frame->y_crop_width, src_frame->y_crop_height, 613 src_frame->y_crop_width, src_frame->y_crop_height); 614 615 for (h = 0; h < fh; h++) { 616 for (w = 0; w < fw; w++) { 617 InterPredParams inter_pred_params; 618 av1_init_inter_params(&inter_pred_params, blk, blk, h, w, subsampling_x, 619 subsampling_y, bit_depth, is_high_bitdepth, 620 is_intrabc, &scale, &ref_buf2, interp_filters); 621 inter_pred_params.interp_filter_params[0] = 622 &av1_interp_filter_params_list[interp_filters.as_filters.x_filter]; 623 inter_pred_params.interp_filter_params[1] = 624 &av1_interp_filter_params_list[interp_filters.as_filters.y_filter]; 625 inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth); 626 MV newmv = { .row = (int16_t)round((mvs[h * mv_stride + w].row) * 8), 627 .col = (int16_t)round((mvs[h * mv_stride + w].col) * 8) }; 628 av1_enc_build_one_inter_predictor(temp_blk, blk, &newmv, 629 &inter_pred_params); 630 warped_buf[h * warped_fs + w] = temp_blk[0]; 631 } 632 } 633 } 634 635 #define DERIVATIVE_FILTER_LENGTH 7 636 double filter[DERIVATIVE_FILTER_LENGTH] = { -1.0 / 60, 9.0 / 60, -45.0 / 60, 0, 637 45.0 / 60, -9.0 / 60, 1.0 / 60 }; 638 639 // Get gradient of the whole frame 640 static void get_frame_gradients(const YV12_BUFFER_CONFIG *from_frame, 641 const YV12_BUFFER_CONFIG *to_frame, double *ix, 642 double *iy, double *it, int grad_stride) { 643 int w, h, k, idx; 644 const int fw = from_frame->y_crop_width; 645 const int fh = from_frame->y_crop_height; 646 const int from_fs = from_frame->y_stride, to_fs = to_frame->y_stride; 647 const uint8_t *from_buf = from_frame->y_buffer; 648 const uint8_t *to_buf = to_frame->y_buffer; 649 650 const int lh = DERIVATIVE_FILTER_LENGTH; 651 const int hleft = (lh - 1) / 2; 652 653 for (h = 0; h < fh; h++) { 654 for (w = 0; w < fw; w++) { 655 // x 656 ix[h * grad_stride + w] = 0; 657 for (k = 0; k < lh; k++) { 658 // if we want to make this block dependent, need to extend the 659 // boundaries using other initializations. 660 idx = w + k - hleft; 661 idx = clamp(idx, 0, fw - 1); 662 ix[h * grad_stride + w] += filter[k] * 0.5 * 663 ((double)from_buf[h * from_fs + idx] + 664 (double)to_buf[h * to_fs + idx]); 665 } 666 // y 667 iy[h * grad_stride + w] = 0; 668 for (k = 0; k < lh; k++) { 669 // if we want to make this block dependent, need to extend the 670 // boundaries using other initializations. 671 idx = h + k - hleft; 672 idx = clamp(idx, 0, fh - 1); 673 iy[h * grad_stride + w] += filter[k] * 0.5 * 674 ((double)from_buf[idx * from_fs + w] + 675 (double)to_buf[idx * to_fs + w]); 676 } 677 // t 678 it[h * grad_stride + w] = 679 (double)to_buf[h * to_fs + w] - (double)from_buf[h * from_fs + w]; 680 } 681 } 682 } 683 684 // Solve for linear equations given by the H-S method 685 static void solve_horn_schunck(const double *ix, const double *iy, 686 const double *it, int grad_stride, int width, 687 int height, const LOCALMV *init_mvs, 688 int init_mv_stride, LOCALMV *mvs, 689 int mv_stride) { 690 // TODO(bohanli): May just need to allocate the buffers once per optical flow 691 // calculation 692 int *row_pos = aom_calloc(width * height * 28, sizeof(*row_pos)); 693 int *col_pos = aom_calloc(width * height * 28, sizeof(*col_pos)); 694 double *values = aom_calloc(width * height * 28, sizeof(*values)); 695 double *mv_vec = aom_calloc(width * height * 2, sizeof(*mv_vec)); 696 double *mv_init_vec = aom_calloc(width * height * 2, sizeof(*mv_init_vec)); 697 double *temp_b = aom_calloc(width * height * 2, sizeof(*temp_b)); 698 double *b = aom_calloc(width * height * 2, sizeof(*b)); 699 if (!row_pos || !col_pos || !values || !mv_vec || !mv_init_vec || !temp_b || 700 !b) { 701 goto free_hs_solver_buf; 702 } 703 704 // the location idx for neighboring pixels, k < 4 are the 4 direct neighbors 705 const int check_locs_y[12] = { 0, 0, -1, 1, -1, -1, 1, 1, 0, 0, -2, 2 }; 706 const int check_locs_x[12] = { -1, 1, 0, 0, -1, 1, -1, 1, -2, 2, 0, 0 }; 707 708 int h, w, checkh, checkw, k, ret; 709 const int offset = height * width; 710 SPARSE_MTX A; 711 int c = 0; 712 const double lambda = 100; 713 714 for (w = 0; w < width; w++) { 715 for (h = 0; h < height; h++) { 716 mv_init_vec[w * height + h] = init_mvs[h * init_mv_stride + w].col; 717 mv_init_vec[w * height + h + offset] = 718 init_mvs[h * init_mv_stride + w].row; 719 } 720 } 721 722 // get matrix A 723 for (w = 0; w < width; w++) { 724 for (h = 0; h < height; h++) { 725 int center_num_direct = 4; 726 const int center_idx = w * height + h; 727 if (w == 0 || w == width - 1) center_num_direct--; 728 if (h == 0 || h == height - 1) center_num_direct--; 729 // diagonal entry for this row from the center pixel 730 double cor_w = center_num_direct * center_num_direct + center_num_direct; 731 row_pos[c] = center_idx; 732 col_pos[c] = center_idx; 733 values[c] = lambda * cor_w; 734 c++; 735 row_pos[c] = center_idx + offset; 736 col_pos[c] = center_idx + offset; 737 values[c] = lambda * cor_w; 738 c++; 739 // other entries from direct neighbors 740 for (k = 0; k < 4; k++) { 741 checkh = h + check_locs_y[k]; 742 checkw = w + check_locs_x[k]; 743 if (checkh < 0 || checkh >= height || checkw < 0 || checkw >= width) { 744 continue; 745 } 746 int this_idx = checkw * height + checkh; 747 int this_num_direct = 4; 748 if (checkw == 0 || checkw == width - 1) this_num_direct--; 749 if (checkh == 0 || checkh == height - 1) this_num_direct--; 750 cor_w = -center_num_direct - this_num_direct; 751 row_pos[c] = center_idx; 752 col_pos[c] = this_idx; 753 values[c] = lambda * cor_w; 754 c++; 755 row_pos[c] = center_idx + offset; 756 col_pos[c] = this_idx + offset; 757 values[c] = lambda * cor_w; 758 c++; 759 } 760 // entries from neighbors on the diagonal corners 761 for (k = 4; k < 8; k++) { 762 checkh = h + check_locs_y[k]; 763 checkw = w + check_locs_x[k]; 764 if (checkh < 0 || checkh >= height || checkw < 0 || checkw >= width) { 765 continue; 766 } 767 int this_idx = checkw * height + checkh; 768 cor_w = 2; 769 row_pos[c] = center_idx; 770 col_pos[c] = this_idx; 771 values[c] = lambda * cor_w; 772 c++; 773 row_pos[c] = center_idx + offset; 774 col_pos[c] = this_idx + offset; 775 values[c] = lambda * cor_w; 776 c++; 777 } 778 // entries from neighbors with dist of 2 779 for (k = 8; k < 12; k++) { 780 checkh = h + check_locs_y[k]; 781 checkw = w + check_locs_x[k]; 782 if (checkh < 0 || checkh >= height || checkw < 0 || checkw >= width) { 783 continue; 784 } 785 int this_idx = checkw * height + checkh; 786 cor_w = 1; 787 row_pos[c] = center_idx; 788 col_pos[c] = this_idx; 789 values[c] = lambda * cor_w; 790 c++; 791 row_pos[c] = center_idx + offset; 792 col_pos[c] = this_idx + offset; 793 values[c] = lambda * cor_w; 794 c++; 795 } 796 } 797 } 798 ret = av1_init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height, 799 2 * width * height, &A); 800 if (ret < 0) goto free_hs_solver_buf; 801 // subtract init mv part from b 802 av1_mtx_vect_multi_left(&A, mv_init_vec, temp_b, 2 * width * height); 803 for (int i = 0; i < 2 * width * height; i++) { 804 b[i] = -temp_b[i]; 805 } 806 av1_free_sparse_mtx_elems(&A); 807 808 // add cross terms to A and modify b with ExEt / EyEt 809 for (w = 0; w < width; w++) { 810 for (h = 0; h < height; h++) { 811 int curidx = w * height + h; 812 // modify b 813 b[curidx] += -ix[h * grad_stride + w] * it[h * grad_stride + w]; 814 b[curidx + offset] += -iy[h * grad_stride + w] * it[h * grad_stride + w]; 815 // add cross terms to A 816 row_pos[c] = curidx; 817 col_pos[c] = curidx + offset; 818 values[c] = ix[h * grad_stride + w] * iy[h * grad_stride + w]; 819 c++; 820 row_pos[c] = curidx + offset; 821 col_pos[c] = curidx; 822 values[c] = ix[h * grad_stride + w] * iy[h * grad_stride + w]; 823 c++; 824 } 825 } 826 // Add diagonal terms to A 827 for (int i = 0; i < c; i++) { 828 if (row_pos[i] == col_pos[i]) { 829 if (row_pos[i] < offset) { 830 w = row_pos[i] / height; 831 h = row_pos[i] % height; 832 values[i] += pow(ix[h * grad_stride + w], 2); 833 } else { 834 w = (row_pos[i] - offset) / height; 835 h = (row_pos[i] - offset) % height; 836 values[i] += pow(iy[h * grad_stride + w], 2); 837 } 838 } 839 } 840 841 ret = av1_init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height, 842 2 * width * height, &A); 843 if (ret < 0) goto free_hs_solver_buf; 844 845 // solve for the mvs 846 ret = av1_conjugate_gradient_sparse(&A, b, 2 * width * height, mv_vec); 847 if (ret < 0) goto free_hs_solver_buf; 848 849 // copy mvs 850 for (w = 0; w < width; w++) { 851 for (h = 0; h < height; h++) { 852 mvs[h * mv_stride + w].col = mv_vec[w * height + h]; 853 mvs[h * mv_stride + w].row = mv_vec[w * height + h + offset]; 854 } 855 } 856 free_hs_solver_buf: 857 aom_free(row_pos); 858 aom_free(col_pos); 859 aom_free(values); 860 aom_free(mv_vec); 861 aom_free(mv_init_vec); 862 aom_free(b); 863 aom_free(temp_b); 864 av1_free_sparse_mtx_elems(&A); 865 } 866 867 // Calculate optical flow from from_frame to to_frame using the H-S method. 868 static void horn_schunck(const YV12_BUFFER_CONFIG *from_frame, 869 const YV12_BUFFER_CONFIG *to_frame, const int level, 870 const int mv_stride, const int mv_height, 871 const int mv_width, const OPFL_PARAMS *opfl_params, 872 LOCALMV *mvs) { 873 // mvs are always on level 0, here we define two new mv arrays that is of size 874 // of this level. 875 const int fw = from_frame->y_crop_width; 876 const int fh = from_frame->y_crop_height; 877 const int factor = (int)pow(2, level); 878 int w, h, k, init_mv_stride; 879 LOCALMV *init_mvs = NULL, *refine_mvs = NULL; 880 double *ix = NULL, *iy = NULL, *it = NULL; 881 YV12_BUFFER_CONFIG temp_frame; 882 temp_frame.y_buffer = NULL; 883 if (level == 0) { 884 init_mvs = mvs; 885 init_mv_stride = mv_stride; 886 } else { 887 init_mvs = aom_calloc(fw * fh, sizeof(*mvs)); 888 if (!init_mvs) goto free_hs_buf; 889 init_mv_stride = fw; 890 for (h = 0; h < fh; h++) { 891 for (w = 0; w < fw; w++) { 892 init_mvs[h * init_mv_stride + w].row = 893 mvs[h * factor * mv_stride + w * factor].row / (double)factor; 894 init_mvs[h * init_mv_stride + w].col = 895 mvs[h * factor * mv_stride + w * factor].col / (double)factor; 896 } 897 } 898 } 899 refine_mvs = aom_calloc(fw * fh, sizeof(*mvs)); 900 if (!refine_mvs) goto free_hs_buf; 901 // temp frame for warping 902 temp_frame.y_buffer = 903 (uint8_t *)aom_calloc(fh * fw, sizeof(*temp_frame.y_buffer)); 904 if (!temp_frame.y_buffer) goto free_hs_buf; 905 temp_frame.y_crop_height = fh; 906 temp_frame.y_crop_width = fw; 907 temp_frame.y_stride = fw; 908 // gradient buffers 909 ix = aom_calloc(fw * fh, sizeof(*ix)); 910 iy = aom_calloc(fw * fh, sizeof(*iy)); 911 it = aom_calloc(fw * fh, sizeof(*it)); 912 if (!ix || !iy || !it) goto free_hs_buf; 913 // For each warping step 914 for (k = 0; k < opfl_params->warping_steps; k++) { 915 // warp from_frame with init_mv 916 if (level == 0) { 917 warp_back_frame_intp(&temp_frame, to_frame, init_mvs, init_mv_stride); 918 } else { 919 warp_back_frame(&temp_frame, to_frame, init_mvs, init_mv_stride); 920 } 921 // calculate frame gradients 922 get_frame_gradients(from_frame, &temp_frame, ix, iy, it, fw); 923 // form linear equations and solve mvs 924 solve_horn_schunck(ix, iy, it, fw, fw, fh, init_mvs, init_mv_stride, 925 refine_mvs, fw); 926 // update init_mvs 927 for (h = 0; h < fh; h++) { 928 for (w = 0; w < fw; w++) { 929 init_mvs[h * init_mv_stride + w].col += refine_mvs[h * fw + w].col; 930 init_mvs[h * init_mv_stride + w].row += refine_mvs[h * fw + w].row; 931 } 932 } 933 } 934 // copy back the mvs if needed 935 if (level != 0) { 936 for (h = 0; h < mv_height; h++) { 937 for (w = 0; w < mv_width; w++) { 938 mvs[h * mv_stride + w].row = 939 init_mvs[h / factor * init_mv_stride + w / factor].row * 940 (double)factor; 941 mvs[h * mv_stride + w].col = 942 init_mvs[h / factor * init_mv_stride + w / factor].col * 943 (double)factor; 944 } 945 } 946 } 947 free_hs_buf: 948 if (level != 0) aom_free(init_mvs); 949 aom_free(refine_mvs); 950 aom_free(temp_frame.y_buffer); 951 aom_free(ix); 952 aom_free(iy); 953 aom_free(it); 954 } 955 956 // Apply optical flow iteratively at each pyramid level 957 static void pyramid_optical_flow(const YV12_BUFFER_CONFIG *from_frame, 958 const YV12_BUFFER_CONFIG *to_frame, 959 const int bit_depth, 960 const OPFL_PARAMS *opfl_params, 961 const OPTFLOW_METHOD method, LOCALMV *mvs) { 962 assert(opfl_params->pyramid_levels > 0 && 963 opfl_params->pyramid_levels <= MAX_PYRAMID_LEVELS); 964 int levels = opfl_params->pyramid_levels; 965 const int frame_height = from_frame->y_crop_height; 966 const int frame_width = from_frame->y_crop_width; 967 if ((frame_height / pow(2.0, levels - 1) < 50 || 968 frame_height / pow(2.0, levels - 1) < 50) && 969 levels > 1) 970 levels = levels - 1; 971 uint8_t *images1[MAX_PYRAMID_LEVELS] = { NULL }; 972 uint8_t *images2[MAX_PYRAMID_LEVELS] = { NULL }; 973 int *ref_corners = NULL; 974 975 images1[0] = from_frame->y_buffer; 976 images2[0] = to_frame->y_buffer; 977 YV12_BUFFER_CONFIG *buffers1 = aom_malloc(levels * sizeof(*buffers1)); 978 YV12_BUFFER_CONFIG *buffers2 = aom_malloc(levels * sizeof(*buffers2)); 979 if (!buffers1 || !buffers2) goto free_pyramid_buf; 980 buffers1[0] = *from_frame; 981 buffers2[0] = *to_frame; 982 int fw = frame_width; 983 int fh = frame_height; 984 for (int i = 1; i < levels; i++) { 985 // TODO(bohanli): may need to extend buffers for better interpolation SIMD 986 images1[i] = (uint8_t *)aom_calloc(fh / 2 * fw / 2, sizeof(*images1[i])); 987 images2[i] = (uint8_t *)aom_calloc(fh / 2 * fw / 2, sizeof(*images2[i])); 988 if (!images1[i] || !images2[i]) goto free_pyramid_buf; 989 int stride; 990 if (i == 1) 991 stride = from_frame->y_stride; 992 else 993 stride = fw; 994 reduce(images1[i - 1], fh, fw, stride, images1[i]); 995 reduce(images2[i - 1], fh, fw, stride, images2[i]); 996 fh /= 2; 997 fw /= 2; 998 YV12_BUFFER_CONFIG a = { .y_buffer = images1[i], 999 .y_crop_width = fw, 1000 .y_crop_height = fh, 1001 .y_stride = fw }; 1002 YV12_BUFFER_CONFIG b = { .y_buffer = images2[i], 1003 .y_crop_width = fw, 1004 .y_crop_height = fh, 1005 .y_stride = fw }; 1006 buffers1[i] = a; 1007 buffers2[i] = b; 1008 } 1009 // Compute corners for specific frame 1010 int num_ref_corners = 0; 1011 if (is_sparse(opfl_params)) { 1012 int maxcorners = from_frame->y_crop_width * from_frame->y_crop_height; 1013 ref_corners = aom_malloc(maxcorners * 2 * sizeof(*ref_corners)); 1014 if (!ref_corners) goto free_pyramid_buf; 1015 num_ref_corners = detect_corners(from_frame, to_frame, maxcorners, 1016 ref_corners, bit_depth); 1017 } 1018 const int stop_level = 0; 1019 for (int i = levels - 1; i >= stop_level; i--) { 1020 if (method == LUCAS_KANADE) { 1021 assert(is_sparse(opfl_params)); 1022 lucas_kanade(&buffers1[i], &buffers2[i], i, opfl_params->lk_params, 1023 num_ref_corners, ref_corners, buffers1[0].y_crop_width, 1024 bit_depth, mvs); 1025 } else if (method == HORN_SCHUNCK) { 1026 assert(!is_sparse(opfl_params)); 1027 horn_schunck(&buffers1[i], &buffers2[i], i, buffers1[0].y_crop_width, 1028 buffers1[0].y_crop_height, buffers1[0].y_crop_width, 1029 opfl_params, mvs); 1030 } 1031 } 1032 free_pyramid_buf: 1033 for (int i = 1; i < levels; i++) { 1034 aom_free(images1[i]); 1035 aom_free(images2[i]); 1036 } 1037 aom_free(ref_corners); 1038 aom_free(buffers1); 1039 aom_free(buffers2); 1040 } 1041 // Computes optical flow by applying algorithm at 1042 // multiple pyramid levels of images (lower-resolution, smoothed images) 1043 // This accounts for larger motions. 1044 // Inputs: 1045 // from_frame Frame buffer. 1046 // to_frame: Frame buffer. MVs point from_frame -> to_frame. 1047 // from_frame_idx: Index of from_frame. 1048 // to_frame_idx: Index of to_frame. Return all zero MVs when idx are equal. 1049 // bit_depth: 1050 // opfl_params: contains algorithm-specific parameters. 1051 // mv_filter: MV_FILTER_NONE, MV_FILTER_SMOOTH, or MV_FILTER_MEDIAN. 1052 // method: LUCAS_KANADE, HORN_SCHUNCK 1053 // mvs: pointer to MVs. Contains initialization, and modified 1054 // based on optical flow. Must have 1055 // dimensions = from_frame->y_crop_width * from_frame->y_crop_height 1056 void av1_optical_flow(const YV12_BUFFER_CONFIG *from_frame, 1057 const YV12_BUFFER_CONFIG *to_frame, 1058 const int from_frame_idx, const int to_frame_idx, 1059 const int bit_depth, const OPFL_PARAMS *opfl_params, 1060 const MV_FILTER_TYPE mv_filter, 1061 const OPTFLOW_METHOD method, MV *mvs) { 1062 const int frame_height = from_frame->y_crop_height; 1063 const int frame_width = from_frame->y_crop_width; 1064 // TODO(any): deal with the case where frames are not of the same dimensions 1065 assert(frame_height == to_frame->y_crop_height && 1066 frame_width == to_frame->y_crop_width); 1067 if (from_frame_idx == to_frame_idx) { 1068 // immediately return all zero mvs when frame indices are equal 1069 for (int yy = 0; yy < frame_height; yy++) { 1070 for (int xx = 0; xx < frame_width; xx++) { 1071 MV mv = { .row = 0, .col = 0 }; 1072 mvs[yy * frame_width + xx] = mv; 1073 } 1074 } 1075 return; 1076 } 1077 1078 // Initialize double mvs based on input parameter mvs array 1079 LOCALMV *localmvs = 1080 aom_malloc(frame_height * frame_width * sizeof(*localmvs)); 1081 if (!localmvs) return; 1082 1083 filter_mvs(MV_FILTER_SMOOTH, frame_height, frame_width, localmvs, mvs); 1084 1085 for (int i = 0; i < frame_width * frame_height; i++) { 1086 MV mv = mvs[i]; 1087 LOCALMV localmv = { .row = ((double)mv.row) / 8, 1088 .col = ((double)mv.col) / 8 }; 1089 localmvs[i] = localmv; 1090 } 1091 // Apply optical flow algorithm 1092 pyramid_optical_flow(from_frame, to_frame, bit_depth, opfl_params, method, 1093 localmvs); 1094 1095 // Update original mvs array 1096 for (int j = 0; j < frame_height; j++) { 1097 for (int i = 0; i < frame_width; i++) { 1098 int idx = j * frame_width + i; 1099 if (j + localmvs[idx].row < 0 || j + localmvs[idx].row >= frame_height || 1100 i + localmvs[idx].col < 0 || i + localmvs[idx].col >= frame_width) { 1101 continue; 1102 } 1103 MV mv = { .row = (int16_t)round(8 * localmvs[idx].row), 1104 .col = (int16_t)round(8 * localmvs[idx].col) }; 1105 mvs[idx] = mv; 1106 } 1107 } 1108 1109 filter_mvs(mv_filter, frame_height, frame_width, localmvs, mvs); 1110 1111 aom_free(localmvs); 1112 } 1113 #endif